libgcobol/gmath.cc - gcc - Git at Google

 /*
  * Copyright (c) 2021-2025 Symas Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are
  * met:
  *
  * * Redistributions of source code must retain the above copyright
  *   notice, this list of conditions and the following disclaimer.
  * * Redistributions in binary form must reproduce the above
  *   copyright notice, this list of conditions and the following disclaimer
  *   in the documentation and/or other materials provided with the
  *   distribution.
  * * Neither the name of the Symas Corporation nor the names of its
  *   contributors may be used to endorse or promote products derived from
  *   this software without specific prior written permission.
  *
  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 #include <ctype.h>
 #include <errno.h>
 #include <fcntl.h>
 #include <math.h>
 #include <fenv.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
 #include <algorithm>

 #include "config.h"
 #include "libgcobol-fp.h"

 #include "ec.h"
 #include "common-defs.h"
 #include "io.h"
 #include "gcobolio.h"
 #include "libgcobol.h"
 #include "common-defs.h"
 #include "gmath.h"
 #include "gcobolio.h"

 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/types.h>

 #define MAX_INTERMEDIATE_BITS 126
 #define MAX_INTERMEDIATE_DECIMALS 16

 static int
 conditional_stash(  cblc_field_t *destination,
                     size_t        destination_o,
                     size_t        destination_s,
                     bool          on_error_flag,
                     __int128      value,
                     int           rdigits,
                     cbl_round_t   rounded)
   {
   int retval = compute_error_none;
   if( !on_error_flag )
     {
     // It's an uncomplicated assignment, because there was no
     // ON SIZE ERROR phrase
     __gg__int128_to_qualified_field(destination,
                                     destination_o,
                                     destination_s,
                                     value,
                                     rdigits,
                                     rounded,
                                     &retval);
     }
   else
     {
     // This is slightly more complex, because in the event of a
     // SIZE ERROR. we need to leave the original value untouched

     unsigned char *stash = (unsigned char *)malloc(destination_s);
     memcpy(stash, destination->data+destination_o, destination_s);

     __gg__int128_to_qualified_field(destination,
                                     destination_o,
                                     destination_s,
                                     value,
                                     rdigits,
                                     rounded,
                                     &retval);
     if( retval )
       {
       // Because there was a size error, we will report that
       // upon return, and we need to put back the original value:
       memcpy(destination->data+destination_o, stash, destination_s);
       }
     free(stash);
     }
   return retval;
   }

 static int
 conditional_stash(  cblc_field_t *destination,
                     size_t        destination_o,
                     size_t        destination_s,
                     bool          on_error_flag,
                     GCOB_FP128    value,
                     cbl_round_t     rounded)
   {
   int retval = compute_error_none;
   if( !on_error_flag )
     {
     // It's an uncomplicated assignment, because there was no
     // ON SIZE ERROR phrase
     __gg__float128_to_qualified_field(destination,
                                       destination_o,
                                       value,
                                       rounded,
                                       &retval);
     }
   else
     {
     // This is slightly more complex, because in the event of a
     // SIZE ERROR. we need to leave the original value untouched
     unsigned char *stash = (unsigned char *)malloc(destination_s);
     memcpy(stash, destination->data+destination_o, destination_s);
     __gg__float128_to_qualified_field(destination,
                                       destination_o,
                                       value,
                                       rounded,
                                       &retval);
     if( retval )
       {
       // Because there was a size error, we will report that
       // upon return, and we need to put back the original value:
       memcpy(destination->data+destination_o, stash, destination_s);
       }
     free(stash);
     }
   return retval;
   }

 static
 GCOB_FP128
 divide_helper_float(GCOB_FP128 a_value,
                     GCOB_FP128 b_value,
                     int      *compute_error)
   {
   if( b_value == 0 )
     {
     // Can't divide by zero
     *compute_error |= compute_error_divide_by_zero;
     return a_value;
     }

   // Do the actual division, giving us 0.399999999999999999999999999999999971
   a_value /= b_value;

   if( __builtin_isinf(a_value) )
     {
     *compute_error |= compute_error_overflow;
     return 0;
     }

   if( __builtin_isnan(a_value) )
     {
     *compute_error |= compute_error_underflow;
     return 0;
     }

   return a_value;
   }

 static
 GCOB_FP128
 multiply_helper_float(GCOB_FP128 a_value,
                       GCOB_FP128 b_value,
                       int      *compute_error)
   {
   a_value *= b_value;

   if( __builtin_isinf(a_value) )
     {
     *compute_error |= compute_error_overflow;
     return 0;
     }

   if( __builtin_isnan(a_value) )
     {
     *compute_error |= compute_error_underflow;
     return 0;
     }

   return a_value;
   }

 static
 GCOB_FP128
 addition_helper_float(GCOB_FP128 a_value,
                       GCOB_FP128 b_value,
                       int      *compute_error)
   {
   a_value += b_value;

   if( __builtin_isinf(a_value) )
     {
     *compute_error |= compute_error_overflow;
     return 0;
     }

   if( __builtin_isnan(a_value) )
     {
     *compute_error |= compute_error_underflow;
     return 0;
     }

   return a_value;
   }

 static
 GCOB_FP128
 subtraction_helper_float(GCOB_FP128 a_value,
                          GCOB_FP128 b_value,
                          int      *compute_error)
   {
   a_value -= b_value;

   if( __builtin_isinf(a_value) )
     {
     *compute_error |= compute_error_overflow;
     return 0;
     }

   if( __builtin_isnan(a_value) )
     {
     *compute_error |= compute_error_underflow;
     return 0;
     }

   return a_value;
   }

 extern "C"
 void
 __gg__pow(  cbl_arith_format_t,
             size_t,
             size_t,
             size_t,
             cbl_round_t  *rounded,
             int           on_error_flag,
             int          *compute_error
             )
   {
   cblc_field_t **A  = __gg__treeplet_1f;
   size_t       *A_o = __gg__treeplet_1o;
   size_t       *A_s = __gg__treeplet_1s;
   cblc_field_t **B  = __gg__treeplet_2f;
   size_t       *B_o = __gg__treeplet_2o;
   size_t       *B_s = __gg__treeplet_2s;
   cblc_field_t **C  = __gg__treeplet_3f;
   size_t       *C_o = __gg__treeplet_3o;
   size_t       *C_s = __gg__treeplet_3s;

   GCOB_FP128 avalue = __gg__float128_from_qualified_field(A[0], A_o[0], A_s[0]);
   GCOB_FP128 bvalue = __gg__float128_from_qualified_field(B[0], B_o[0], B_s[0]);
   GCOB_FP128 tgt_value;

   if( avalue == 0 && bvalue == 0 )
     {
     *compute_error |= compute_error_exp_zero_by_zero;
     tgt_value = 1;
     }
   else if(avalue == 0 && bvalue < 0 )
     {
     *compute_error |= compute_error_exp_zero_by_minus;
     tgt_value = 0;
     }
   else
     {
     // Calculate our answer, in floating point:
     errno = 0;
     feclearexcept(FE_ALL_EXCEPT);
     tgt_value = FP128_FUNC(pow)(avalue, bvalue);
     if( errno || fetestexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW) )
       {
       // One of a large number of errors took place. See math_error(7) and
       // pow(3).  Let's just use this last error as a grab-bag; I didn't
       // care to go down the rabbit hole of figuring out if a floating point
       // number did or did not have a fractional part.  That way lies
       // madness.
       *compute_error |= compute_error_exp_minus_by_frac;
       // This kind of error doesn't overwrite the target, so the returned
       // value is not relevant.  Make it zero to avoid overheating the
       // converstion routine
       tgt_value = 0;
       }
     }
   if( !(*compute_error & compute_error_exp_minus_by_frac) )
     {
     *compute_error |= conditional_stash(C[0],
                                         C_o[0],
                                         C_s[0],
                                         (on_error_flag & ON_SIZE_ERROR),
                                         tgt_value,
                                         *rounded);
     }
   }

 extern "C"
 void
 __gg__process_compute_error(int compute_error)
   {
   // This routine gets called after a series of parser_op operations is
   // complete (see parser_assign()) when the source code didn't specify
   // an ON SIZE ERROR clause.
   if( compute_error & compute_error_divide_by_zero)
     {
     exception_raise(ec_size_zero_divide_e);
     }
   else if( compute_error & compute_error_truncate )
     {
     exception_raise(ec_size_truncation_e);
     }
   else if( compute_error & (    compute_error_exp_zero_by_zero
                               | compute_error_exp_zero_by_minus
                               | compute_error_exp_minus_by_frac ) )
     {
     exception_raise(ec_size_exponentiation_e);
     }
   else if( compute_error & compute_error_overflow )
     {
     exception_raise(ec_size_overflow_e);
     }
   else if( compute_error & compute_error_underflow )
     {
     exception_raise(ec_size_underflow_e);
     }
   }

 typedef unsigned __int128 uint128;
 typedef struct int256
   {
   union
     {
     unsigned char  data[32];
     uint64_t       i64 [4];
     uint128        i128[2];
     };
   }int256;

 static int
 multiply_int256_by_int64(int256 &product, const uint64_t multiplier)
   {
   // Typical use of this routine is multiplying a temporary value by
   // a factor of ten.  This is effectively left-shifting by decimal
   // digits.  See scale_int256_by_digits
   uint64_t overflows[5] = {};
   for(int i=0; i<4; i++)
     {
     uint128 temp = (uint128)product.i64[i] * multiplier;
     product.i64[i] = *(uint64_t *)(&temp);
     overflows[i+1] = *(uint64_t *)((uint8_t *)(&temp) + 8);
     }

   for(int i=1; i<4; i++)
     {
     product.i64[i] += overflows[i];
     if(product.i64[i] < overflows[i])
       {
       overflows[i+1] += 1;
       }
     }
   // Indicate that an overflow took place.  This is not useful unless the int256
   // is known to be positive.
   return overflows[4];
   }

 static int
 add_int256_to_int256(int256 &sum, const int256 addend)
   {
   uint128 overflows[3] = {};
   for(int i=0; i<2; i++)
     {
     sum.i128[i] += addend.i128[i];
     if( sum.i128[i] < addend.i128[i] )
       {
       overflows[i+1] = 1;
       }
     }
   if( overflows[1] )
     {
     sum.i128[1] += overflows[1];
     if( sum.i128[1] == 0 )
       {
       overflows[2] = 1;
       }
     }
   // Indicate that an overflow took place.  This is not useful unless the two
   // values are known to be positive.
   return (int)overflows[2];
   }

 static void
 negate_int256(int256 &val)
   {
   val.i128[0] = ~val.i128[0];
   val.i128[1] = ~val.i128[1];
   val.i128[0] += 1;
   if( !val.i128[0] )
     {
     val.i128[1] += 1;
     }
   }

 static int
 subtract_int256_from_int256(int256 &difference, int256 subtrahend)
   {
   negate_int256(subtrahend);
   return add_int256_to_int256(difference, subtrahend);
   }

 static void
 scale_int256_by_digits(int256 &val, int digits)
   {
   uint64_t pot;
   while(digits > 17)
     {
     pot = (uint64_t)__gg__power_of_ten(17);
     multiply_int256_by_int64(val, pot);
     digits -= 17;
     }
   pot = (uint64_t)__gg__power_of_ten(digits);
   multiply_int256_by_int64(val, pot);
   }

 static void
 divide_int256_by_int64(int256 &val, uint64_t divisor)
   {
   // val needs to be a positive number
   uint128 temp = 0;
   for( int i=3; i>=0; i-- )
     {
     // Left shift temp 64 bits:
     *(uint64_t *)(((uint8_t *)&temp)+8) = *(uint64_t *)(((uint8_t *)&temp)+0);

     // Put the high digit of val into the bottom of temp
     *(uint64_t *)(((uint8_t *)&temp)+0) = val.i64[i];

     // Divide that combinary by divisor to get the new digits
     val.i64[i] = temp / divisor;

     // And the new temp is that combination modulo divisor
     temp = temp % divisor;
     }
   }

 static int
 squeeze_int256(int256 &val, int &rdigits)
   {
   int overflow = 0;
   // It has been decreed that at this juncture the result must fit into
   // MAX_FIXED_POINT_DIGITS.  If the result does not, we have an OVERFLOW error.

   int is_negative = val.data[31] & 0x80;
   if( is_negative )
     {
     negate_int256(val);
     }

   // As long as there are some decimal places left, we hold our nose and right-
   // shift a too-large value rightward by decimal digits.  In other words, we
   // truncate the fractional part to make room for the integer part:
   while(rdigits > 0 && val.i128[1] )
     {
     divide_int256_by_int64(val, 10UL);
     rdigits -= 1;
     }

   // At this point, to be useful, val has to have fewer than 128 bits:
   if( val.i128[1] )
     {
     overflow = compute_error_overflow;
     }
   else
     {
     // We know that it has fewer than 128 bits.  But the remaining 128 bits need
     // to be less than 10^MAX_FIXED_POINT_DIGITS.  This gets a bit nasty here,
     // since at this writing the gcc compiler doesn't understand 128-bit
     // constants.  So, we are forced into some annoying compiler gymnastics.
 #if MAX_FIXED_POINT_DIGITS != 37
 #error MAX_FIXED_POINT_DIGITS needs to be 37
 #endif

     // These sixteen bytes comprise the binary value of 10^38
     static const uint8_t C1038[] = {0x00, 0x00, 0x00, 0x00, 0x40, 0x22, 0x8a, 0x09,
                                 0x7a, 0xc4, 0x86, 0x5a, 0xa8, 0x4c, 0x3b, 0x4b};
     static const uint128 biggest = *(uint128 *)C1038;

     // If we still have some rdigits to throw away, we can keep shrinking
     // the value:

     while(rdigits > 0 && val.i128[0] >= biggest  )
       {
       divide_int256_by_int64(val, 10UL);
       rdigits -= 1;
       }

     // And we have to make sure that rdigits isn't too big

     while(rdigits > MAX_FIXED_POINT_DIGITS)
       {
       divide_int256_by_int64(val, 10UL);
       rdigits -= 1;
       }

     if( val.i128[0] >= biggest )
       {
       overflow = compute_error_overflow;
       }
     }

   if( is_negative )
     {
     negate_int256(val);
     }

   return overflow;
   }

 static void
 get_int256_from_qualified_field(int256 &var,
                                 int &rdigits,
                                 cblc_field_t *field,
                                 size_t field_o,
                                 size_t field_s)
   {
   var.i128[0] = (uint128)__gg__binary_value_from_qualified_field( &rdigits,
                                                                   field,
                                                                   field_o,
                                                                   field_s);
   if( var.data[15] & 0x80 )
     {
     // This value is negative, so extend the sign bit:
     memset(var.data + 16, 0xFF, 16);
     }
   else
     {
     // This value is positive
     memset(var.data + 16, 0x00, 16);
     }
   }

 static int256 phase1_result;
 static int    phase1_rdigits;

 static GCOB_FP128 phase1_result_float;

 extern "C"
 void
 __gg__add_fixed_phase1( cbl_arith_format_t ,
                         size_t nA,
                         size_t ,
                         size_t ,
                         cbl_round_t  *,
                         int           ,
                         int          *compute_error
                         )
   {
   // Our job is to add together the nA fixed-point values in the A[] array

   // The result goes into the temporary phase1_result.

   cblc_field_t **A  = __gg__treeplet_1f;
   size_t       *A_o = __gg__treeplet_1o;
   size_t       *A_s = __gg__treeplet_1s;

   // Let us prime the pump with the first value of A[]
   get_int256_from_qualified_field(phase1_result, phase1_rdigits, A[0], A_o[0], A_s[0]);

   // We now go into a loop adding each of the A[] values to phase1_result:

   for( size_t i=1; i<nA; i++ )
     {
     int temp_rdigits;
     int256 temp = {};
     get_int256_from_qualified_field(temp, temp_rdigits, A[i], A_o[i], A_s[i]);

     // We have to scale the one with fewer rdigits to match the one with greater
     // rdigits:
     if( phase1_rdigits > temp_rdigits )
       {
       scale_int256_by_digits(temp, phase1_rdigits - temp_rdigits);
       temp_rdigits = phase1_rdigits;
       }
     else if( phase1_rdigits < temp_rdigits )
       {
       scale_int256_by_digits(phase1_result, temp_rdigits - phase1_rdigits);
       phase1_rdigits = temp_rdigits;
       }

     // The two numbers have the same number of rdigits.  It's now safe to add
     // them.
     add_int256_to_int256(phase1_result, temp);
     }

   // phase1_result/phase1_rdigits now reflect the sum of all A[]

   int overflow = squeeze_int256(phase1_result, phase1_rdigits);
   if( overflow )
     {
     *compute_error |= compute_error_overflow;
     }
   }

 extern "C"
 void
 __gg__addf1_fixed_phase2( cbl_arith_format_t ,
                           size_t ,
                           size_t ,
                           size_t ,
                           cbl_round_t  *rounded,
                           int           on_error_flag,
                           int          *compute_error
                           )
   {
   cblc_field_t **C  = __gg__treeplet_3f;
   size_t       *C_o = __gg__treeplet_3o;
   size_t       *C_s = __gg__treeplet_3s;

   // This is the assignment phase of an ADD Format 1

   // We take phase1_result and accumulate it into C
   bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);

   if( C[0]->type == FldFloat)
     {
     // The target we need to accumulate into is a floating-point number, so we
     // need to convert our fixed-point intermediate into floating point and
     // proceed accordingly.

     // Convert the intermediate
     GCOB_FP128 value_a = (GCOB_FP128)phase1_result.i128[0];
     value_a /= __gg__power_of_ten(phase1_rdigits);

     // Pick up the target
     GCOB_FP128 value_b = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);

     value_a += value_b;

     // At this point, we assign running_sum to *C.
     *compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
                                         on_size_error,
                                         value_a,
                                         *rounded++);
     }
   else
     {
     // We have a fixed-point intermediate, and we are accumulating intoi a
     // fixed point target.
     int256 value_a   = phase1_result;
     int    rdigits_a = phase1_rdigits;

     int256 value_b = {};
     int rdigits_b;

     get_int256_from_qualified_field(value_b, rdigits_b, C[0], C_o[0], C_s[0]);

     // We have to scale the one with fewer rdigits to match the one with greater
     // rdigits:
     if( rdigits_a > rdigits_b )
       {
       scale_int256_by_digits(value_b, rdigits_a - rdigits_b);
       rdigits_b = rdigits_a;
       }
     else if( rdigits_a < rdigits_b )
       {
       scale_int256_by_digits(value_a, rdigits_b - rdigits_a);
       rdigits_a = rdigits_b;
       }

     // The two numbers have the same number of rdigits.  It's now safe to add
     // them.
     add_int256_to_int256(value_a, value_b);

     int overflow = squeeze_int256(value_a, rdigits_a);
     if( overflow )
       {
       *compute_error |= compute_error_overflow;
       }

       // At this point, we assign running_sum to *C.
     *compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
                                         on_size_error,
                                         value_a.i128[0],
                                         rdigits_a,
                                         *rounded++);
     }
   }

 extern "C"
 void
 __gg__fixed_phase2_assign_to_c( cbl_arith_format_t ,
                                 size_t ,
                                 size_t ,
                                 size_t ,
                                 cbl_round_t  *rounded,
                                 int           on_error_flag,
                                 int          *compute_error
                                 )
   {
   // This is the assignment phase of an ADD Format 2

   cblc_field_t **C  = __gg__treeplet_3f;
   size_t       *C_o = __gg__treeplet_3o;
   size_t       *C_s = __gg__treeplet_3s;


   // We take phase1_result and put it into C
   bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);

   if( C[0]->type == FldFloat)
     {
     // The target we need to accumulate into is a floating-point number, so we
     // need to convert our fixed-point intermediate into floating point and
     // proceed accordingly.

     // Convert the intermediate
     GCOB_FP128 value_a = (GCOB_FP128)phase1_result.i128[0];
     value_a /= __gg__power_of_ten(phase1_rdigits);

     *compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
                                         on_size_error,
                                         value_a,
                                        *rounded++);
     }
   else
     {
     // We have a fixed-point intermediate, and we are accumulating intoi a
     // fixed point target.
     int256 value_a   = phase1_result;
     int    rdigits_a = phase1_rdigits;

     int overflow = squeeze_int256(value_a, rdigits_a);
     if( overflow )
       {
       *compute_error |= compute_error_overflow;
       }

       // At this point, we assign that value to *C.
     *compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
                                         on_size_error,
                                         value_a.i128[0],
                                         rdigits_a,
                                        *rounded++);
     }
   }

 extern "C"
 void
 __gg__add_float_phase1( cbl_arith_format_t ,
                         size_t nA,
                         size_t ,
                         size_t ,
                         cbl_round_t  *,
                         int           ,
                         int          *compute_error
                         )
   {
   // Our job is to add together the nA floating-point values in the A[] array

   // The result goes into the temporary phase1_result_ffloat.

   cblc_field_t **A  = __gg__treeplet_1f;
   size_t       *A_o = __gg__treeplet_1o;
   size_t       *A_s = __gg__treeplet_1s;

   // Let us prime the pump with the first value of A[]
   phase1_result_float = __gg__float128_from_qualified_field(A[0], A_o[0], A_s[0]);

   // We now go into a loop adding each of the A[] values to phase1_result_flt:

   for( size_t i=1; i<nA; i++ )
     {
     GCOB_FP128 temp = __gg__float128_from_qualified_field(A[i], A_o[i], A_s[i]);
     phase1_result_float = addition_helper_float(phase1_result_float,
                                                 temp,
                                                 compute_error);
     }
   }

 extern "C"
 void
 __gg__addf1_float_phase2( cbl_arith_format_t ,
                           size_t ,
                           size_t ,
                           size_t ,
                           cbl_round_t  *rounded,
                           int           on_error_flag,
                           int          *compute_error
                           )
   {
   cblc_field_t **C  = __gg__treeplet_3f;
   size_t       *C_o = __gg__treeplet_3o;
   size_t       *C_s = __gg__treeplet_3s;

   bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
   // This is the assignment phase of an ADD Format 2
   // We take phase1_result and accumulate it into C

   GCOB_FP128 temp = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);
   temp = addition_helper_float(temp, phase1_result_float, compute_error);
   *compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
                                       on_size_error,
                                       temp,
                                      *rounded++);
   }

 extern "C"
 void
 __gg__float_phase2_assign_to_c( cbl_arith_format_t ,
                           size_t ,
                           size_t ,
                           size_t ,
                           cbl_round_t  *rounded,
                           int           on_error_flag,
                           int          *compute_error
                           )
   {
   cblc_field_t **C  = __gg__treeplet_3f;
   size_t       *C_o = __gg__treeplet_3o;
   size_t       *C_s = __gg__treeplet_3s;

   bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
   // This is the assignment phase of an ADD Format 2
     // We take phase1_result and put it into C

   *compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
                                       on_size_error,
                                       phase1_result_float,
                                      *rounded++);
   }

 extern "C"
 void
 __gg__addf3(cbl_arith_format_t ,
             size_t nA,
             size_t ,
             size_t ,
             cbl_round_t  *rounded,
             int           on_error_flag,
             int          *compute_error
             )
   {
   // This is an ADD Format 3.  Each A[i] gets accumulated into each C[i].  When
   // both are fixed, we do fixed arithmetic.  When either is a FldFloat, we
   // do floating-point arithmetic.
   cblc_field_t **A  = __gg__treeplet_1f;
   size_t       *A_o = __gg__treeplet_1o;
   size_t       *A_s = __gg__treeplet_1s;

   cblc_field_t **C  = __gg__treeplet_3f;
   size_t       *C_o = __gg__treeplet_3o;
   size_t       *C_s = __gg__treeplet_3s;

   bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);

   for(size_t i=0; i<nA; i++)
     {
     if( A[i]->type == FldFloat || C[i]->type == FldFloat )
       {
       GCOB_FP128 value_a = __gg__float128_from_qualified_field(A[i], A_o[i], A_s[i]);
       GCOB_FP128 value_b = __gg__float128_from_qualified_field(C[i], C_o[i], C_s[i]);

       value_a = addition_helper_float(value_a, value_b, compute_error);

         // At this point, we assign the sum to *C.
       *compute_error |= conditional_stash(C[i], C_o[i], C_s[i],
                                           on_size_error,
                                           value_a,
                                           *rounded++);
       }
     else
       {
       // We have are doing fixed-point arithmetic.
       int256 value_a;
       int    rdigits_a;

       int256 value_b;
       int rdigits_b;

       get_int256_from_qualified_field(value_a, rdigits_a, A[i], A_o[i], A_s[i]);
       get_int256_from_qualified_field(value_b, rdigits_b, C[i], C_o[i], C_s[i]);

       // We have to scale the one with fewer rdigits to match the one with greater
       // rdigits:
       if( rdigits_a > rdigits_b )
         {
         scale_int256_by_digits(value_b, rdigits_a - rdigits_b);
         rdigits_b = rdigits_a;
         }
       else if( rdigits_a < rdigits_b )
         {
         scale_int256_by_digits(value_a, rdigits_b - rdigits_a);
         rdigits_a = rdigits_b;
         }

       // The two numbers have the same number of rdigits.  It's now safe to add
       // them.
       add_int256_to_int256(value_a, value_b);

       int overflow = squeeze_int256(value_a, rdigits_a);
       if( overflow )
         {
         *compute_error |= compute_error_overflow;
         }

         // At this point, we assign the sum to *C.
       *compute_error |= conditional_stash(C[i], C_o[i], C_s[i],
                                           on_size_error,
                                           value_a.i128[0],
                                           rdigits_a,
                                           *rounded++);
       }
     }
   }

 extern "C"
 void
 __gg__subtractf1_fixed_phase2(cbl_arith_format_t ,
                               size_t ,
                               size_t ,
                               size_t ,
                               cbl_round_t  *rounded,
                               int           on_error_flag,
                               int          *compute_error
                               )
   {
   cblc_field_t **C  = __gg__treeplet_3f;
   size_t       *C_o = __gg__treeplet_3o;
   size_t       *C_s = __gg__treeplet_3s;

   // This is the assignment phase of an ADD Format 1

   // We take phase1_result and subtrace it from C
   bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);

   if( C[0]->type == FldFloat)
     {
     // The target we need to accumulate into is a floating-point number, so we
     // need to convert our fixed-point intermediate into floating point and
     // proceed accordingly.

     // Convert the intermediate
     GCOB_FP128 value_a = (GCOB_FP128)phase1_result.i128[0];
     value_a /= __gg__power_of_ten(phase1_rdigits);

     // Pick up the target
     GCOB_FP128 value_b = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);

     value_b -= value_a;

     // At this point, we assign the difference to *C.
     *compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
                                         on_size_error,
                                         value_b,
                                         *rounded++);
     }
   else
     {
     // We have a fixed-point intermediate, and we are accumulating intoi a
     // fixed point target.
     int256 value_a   = phase1_result;
     int    rdigits_a = phase1_rdigits;

     int256 value_b = {};
     int rdigits_b;

     get_int256_from_qualified_field(value_b, rdigits_b, C[0], C_o[0], C_s[0]);

     // We have to scale the one with fewer rdigits to match the one with greater
     // rdigits:
     if( rdigits_a > rdigits_b )
       {
       scale_int256_by_digits(value_b, rdigits_a - rdigits_b);
       rdigits_b = rdigits_a;
       }
     else if( rdigits_a < rdigits_b )
       {
       scale_int256_by_digits(value_a, rdigits_b - rdigits_a);
       rdigits_a = rdigits_b;
       }

     // The two numbers have the same number of rdigits.  It's now safe to add
     // them.
     subtract_int256_from_int256(value_b, value_a);

     int overflow = squeeze_int256(value_b, rdigits_b);
     if( overflow )
       {
       *compute_error |= compute_error_overflow;
       }

       // At this point, we assign running_sum to *C.
     *compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
                                         on_size_error,
                                         value_b.i128[0],
                                         rdigits_b,
                                         *rounded++);
     }
   }

 extern "C"
 void
 __gg__subtractf2_fixed_phase1(cbl_arith_format_t ,
                               size_t nA,
                               size_t ,
                               size_t ,
                               cbl_round_t  *rounded,
                               int           on_error_flag,
                               int          *compute_error
                               )
   {
   // This is the calculation phase of a fixed-point SUBTRACT Format 2

   cblc_field_t **B  = __gg__treeplet_2f;
   size_t       *B_o = __gg__treeplet_2o;
   size_t       *B_s = __gg__treeplet_2s;

   // Add up all the A values
   __gg__add_fixed_phase1( not_expected_e ,
                           nA,
                           0,
                           0,
                           rounded,
                           on_error_flag,
                           compute_error);

   // Subtract that from the B value:

   int256 value_a   = phase1_result;
   int    rdigits_a = phase1_rdigits;

   int256 value_b = {};
   int rdigits_b;

   get_int256_from_qualified_field(value_b, rdigits_b, B[0], B_o[0], B_s[0]);

   // We have to scale the one with fewer rdigits to match the one with greater
   // rdigits:
   if( rdigits_a > rdigits_b )
     {
     scale_int256_by_digits(value_b, rdigits_a - rdigits_b);
     rdigits_b = rdigits_a;
     }
   else if( rdigits_a < rdigits_b )
     {
     scale_int256_by_digits(value_a, rdigits_b - rdigits_a);
     rdigits_a = rdigits_b;
     }

   // The two numbers have the same number of rdigits.  It's now safe to add
   // them.
   subtract_int256_from_int256(value_b, value_a);

   int overflow = squeeze_int256(value_b, rdigits_b);
   if( overflow )
     {
     *compute_error |= compute_error_overflow;
     }
   phase1_result  = value_b;
   phase1_rdigits = rdigits_b;
   }


 extern "C"
 void
 __gg__subtractf1_float_phase2(cbl_arith_format_t ,
                               size_t ,
                               size_t ,
                               size_t ,
                               cbl_round_t  *rounded,
                               int           on_error_flag,
                               int          *compute_error
                               )
   {
   cblc_field_t **C  = __gg__treeplet_3f;
   size_t       *C_o = __gg__treeplet_3o;
   size_t       *C_s = __gg__treeplet_3s;

   bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
   // This is the assignment phase of an ADD Format 2
   // We take phase1_result and subtract it from C

   GCOB_FP128 temp = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);
   temp = subtraction_helper_float(temp, phase1_result_float, compute_error);
   *compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
                                       on_size_error,
                                       temp,
                                      *rounded++);
   }


 extern "C"
 void
 __gg__subtractf2_float_phase1(cbl_arith_format_t ,
                               size_t nA,
                               size_t ,
                               size_t ,
                               cbl_round_t  *rounded,
                               int           on_error_flag,
                               int          *compute_error
                               )
   {
   // This is the calculation phase of a fixed-point SUBTRACT Format 2

   cblc_field_t **B  = __gg__treeplet_2f;
   size_t       *B_o = __gg__treeplet_2o;
   size_t       *B_s = __gg__treeplet_2s;

   // Add up all the A values
   __gg__add_float_phase1( not_expected_e ,
                           nA,
                           0,
                           0,
                           rounded,
                           on_error_flag,
                           compute_error
                           );

   // Subtract that from the B value:
   GCOB_FP128 value_b = __gg__float128_from_qualified_field(B[0], B_o[0], B_s[0]);

   // The two numbers have the same number of rdigits.  It's now safe to add
   // them.
   phase1_result_float = subtraction_helper_float(value_b, phase1_result_float, compute_error);
   }

 extern "C"
 void
 __gg__subtractf3( cbl_arith_format_t ,
                   size_t nA,
                   size_t ,
                   size_t ,
                   cbl_round_t  *rounded,
                   int           on_error_flag,
                   int          *compute_error
                   )
   {
   // This is an ADD Format 3.  Each A[i] gets accumulated into each C[i].  Each
   // SUBTRACTION is treated separately.

   cblc_field_t **A  = __gg__treeplet_1f;
   size_t       *A_o = __gg__treeplet_1o;
   size_t       *A_s = __gg__treeplet_1s;
   cblc_field_t **C  = __gg__treeplet_3f;
   size_t       *C_o = __gg__treeplet_3o;
   size_t       *C_s = __gg__treeplet_3s;

   bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);

   for(size_t i=0; i<nA; i++)
     {
     if( A[i]->type == FldFloat || C[i]->type == FldFloat)
       {
       GCOB_FP128 value_a = __gg__float128_from_qualified_field(A[i], A_o[i], A_s[i]);
       GCOB_FP128 value_b = __gg__float128_from_qualified_field(C[i], C_o[i], C_s[i]);

       value_b = subtraction_helper_float(value_b, value_a, compute_error);

         // At this point, we assign the sum to *C.
       *compute_error |= conditional_stash(C[i], C_o[i], C_s[i],
                                           on_size_error,
                                           value_b,
                                           *rounded++);
       }
     else
       {
       // We are doing fixed-point subtraction.
       int256 value_a;
       int    rdigits_a;

       int256 value_b;
       int rdigits_b;

       get_int256_from_qualified_field(value_a, rdigits_a, A[i], A_o[i], A_s[i]);
       get_int256_from_qualified_field(value_b, rdigits_b, C[i], C_o[i], C_s[i]);

       // We have to scale the one with fewer rdigits to match the one with greater
       // rdigits:
       if( rdigits_a > rdigits_b )
         {
         scale_int256_by_digits(value_b, rdigits_a - rdigits_b);
         rdigits_b = rdigits_a;
         }
       else if( rdigits_a < rdigits_b )
         {
         scale_int256_by_digits(value_a, rdigits_b - rdigits_a);
         rdigits_a = rdigits_b;
         }

       // The two numbers have the same number of rdigits.  It's now safe to add
       // them.
       subtract_int256_from_int256(value_b, value_a);

       int overflow = squeeze_int256(value_b, rdigits_b);

       if( overflow )
         {
         *compute_error |= compute_error_overflow;
         }

         // At this point, we assign the sum to *C.
       *compute_error |= conditional_stash(C[i], C_o[i], C_s[i],
                                           on_size_error,
                                           value_b.i128[0],
                                           rdigits_b,
                                           *rounded++);
       }
     }
   }

 static bool multiply_intermediate_is_float;
 static GCOB_FP128 multiply_intermediate_float;
 static __int128  multiply_intermediate_int128;
 static int       multiply_intermediate_rdigits;

 extern "C"
 void
 __gg__multiplyf1_phase1(cbl_arith_format_t ,
                         size_t ,
                         size_t ,
                         size_t ,
                         cbl_round_t  *,
                         int           ,
                         int          *)
   {
   // We are getting just the one value, which we are converting to the necessary
   // intermediate form

   cblc_field_t **A  = __gg__treeplet_1f;
   size_t       *A_o = __gg__treeplet_1o;
   size_t       *A_s = __gg__treeplet_1s;

   if( A[0]->type == FldFloat )
     {
     multiply_intermediate_is_float = true;
     multiply_intermediate_float = __gg__float128_from_qualified_field(A[0],
                                                                       A_o[0],
                                                                       A_s[0]);
     }
   else
     {
     multiply_intermediate_is_float = false;
     multiply_intermediate_int128 =
          __gg__binary_value_from_qualified_field(&multiply_intermediate_rdigits,
                                                  A[0],
                                                  A_o[0],
                                                  A_s[0]);
     }
   }

 static
 void multiply_int128_by_int128(int256 &ABCD,
                                __int128 ab_value,
                                __int128 cd_value)
   {
   int is_negative = ( ((uint8_t *)(&ab_value))[15]^((uint8_t *)(&cd_value))[15]) & 0x80;
   if( ab_value < 0 )
     {
     ab_value = -ab_value;
     }
   if( cd_value < 0 )
     {
     cd_value = -cd_value;
     }

   uint128 AC00;
   uint128 AD0;
   uint128 BC0;
   uint128 BD;

   // Let's extract the digits.
   uint64_t a = *(uint64_t *)((unsigned char *)(&ab_value)+8);
   uint64_t b = *(uint64_t *)((unsigned char *)(&ab_value)+0);
   uint64_t c = *(uint64_t *)((unsigned char *)(&cd_value)+8);
   uint64_t d = *(uint64_t *)((unsigned char *)(&cd_value)+0);

   // multiply (a0 + b) * (c0 + d)

   AC00 = (uint128)a * c;
   AD0  = (uint128)a * d;
   BC0  = (uint128)b * c;
   BD   = (uint128)b * d;

   // ABCD is the sum of those four pieces
   int256 temp;

   ABCD.i128[1] = 0;
   ABCD.i128[0] = BD;

   temp.i64[3] = 0;
   memcpy(&temp.i64[1], &BC0, 16);
   temp.i64[0] = 0;
   add_int256_to_int256(ABCD, temp);

   memcpy(&temp.i64[1], &AD0, 16);
   add_int256_to_int256(ABCD, temp);

   temp.i64[1] = 0;
   memcpy(&temp.i64[2], &AC00, 16);
   add_int256_to_int256(ABCD, temp);

   // ABCD is now a 256-bit integer with rdigits decimal places
   if(is_negative)
     {
     negate_int256(ABCD);
     }
   }


 extern "C"
 void
 __gg__multiplyf1_phase2(cbl_arith_format_t ,
                         size_t ,
                         size_t ,
                         size_t ,
                         cbl_round_t  *rounded,
                         int           on_error_flag,
                         int          *compute_error
                         )
   {
   cblc_field_t **C  = __gg__treeplet_3f;
   size_t       *C_o = __gg__treeplet_3o;
   size_t       *C_s = __gg__treeplet_3s;

   bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
   int error_this_time=0;

   GCOB_FP128 a_value;
   GCOB_FP128 b_value;

   if( multiply_intermediate_is_float )
     {
     a_value = multiply_intermediate_float;
     if( C[0]->type == FldFloat )
       {
       b_value = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);
       goto float_float;
       }
     else
       {
       // float times fixed
       b_value = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);
       goto float_float;
       }
     }
   else
     {
     if( C[0]->type == FldFloat )
       {
       // gixed * float
       a_value = (GCOB_FP128) multiply_intermediate_int128;
       if( multiply_intermediate_rdigits )
         {
         a_value /= (GCOB_FP128)__gg__power_of_ten(multiply_intermediate_rdigits);
         }
       b_value = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);
       goto float_float;
       }
     else
       {
       // fixed times fixed

       // We have two 128-bit numbers.  Call them AB and CD, where A, B, C, D are
       // 64-bit "digits".  We need to multiply them to create a 256-bit result

       int cd_rdigits;
       __int128 ab_value = multiply_intermediate_int128;
       __int128 cd_value = __gg__binary_value_from_qualified_field(&cd_rdigits, C[0], C_o[0], C_s[0]);

       int256 ABCD;
       int rdigits = multiply_intermediate_rdigits + cd_rdigits;

       multiply_int128_by_int128(ABCD, ab_value, cd_value);

       int overflow = squeeze_int256(ABCD, rdigits);
       if( overflow )
         {
         *compute_error |= compute_error_overflow;
         }
         // At this point, we assign running_sum to *C.
       *compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
                                           on_size_error,
                                           ABCD.i128[0],
                                           rdigits,
                                           *rounded++);

       goto done;
       }
     }
   float_float:

   a_value = multiply_helper_float(a_value, b_value, &error_this_time);

   if( error_this_time && on_size_error)
     {
     *compute_error |= error_this_time;
     rounded++;
     }
   else
     {
     *compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
                                         on_size_error,
                                         a_value,
                                         *rounded++);
     }
   done:
   return;
   }

 extern "C"
 void
 __gg__multiplyf2( cbl_arith_format_t ,
                   size_t ,
                   size_t ,
                   size_t nC,
                   cbl_round_t  *rounded,
                   int           on_error_flag,
                   int          *compute_error
                   )
   {
   cblc_field_t **A  = __gg__treeplet_1f;
   size_t       *A_o = __gg__treeplet_1o;
   size_t       *A_s = __gg__treeplet_1s;
   cblc_field_t **B  = __gg__treeplet_2f;
   size_t       *B_o = __gg__treeplet_2o;
   size_t       *B_s = __gg__treeplet_2s;
   cblc_field_t **C  = __gg__treeplet_3f;
   size_t       *C_o = __gg__treeplet_3o;
   size_t       *C_s = __gg__treeplet_3s;

   bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);

   bool      got_float = false;
   GCOB_FP128 product_float;
   int256    product_fix;
   int       product_fix_digits;

   if( A[0]->type == FldFloat || B[0]->type == FldFloat )
     {
     GCOB_FP128 a_value = __gg__float128_from_qualified_field(A[0], A_o[0], A_s[0]);
     GCOB_FP128 b_value = __gg__float128_from_qualified_field(B[0], B_o[0], B_s[0]);
     product_float = multiply_helper_float(a_value, b_value, compute_error);
     got_float = true;
     }
   else
     {
     int a_rdigits;
     int b_rdigits;
     __int128 a_value = __gg__binary_value_from_qualified_field(&a_rdigits, A[0], A_o[0], A_s[0]);
     __int128 b_value = __gg__binary_value_from_qualified_field(&b_rdigits, B[0], B_o[0], B_s[0]);
     product_fix_digits = a_rdigits + b_rdigits;
     multiply_int128_by_int128(product_fix, a_value, b_value);
     int overflow = squeeze_int256(product_fix, product_fix_digits);
     if( overflow )
       {
       *compute_error |= compute_error_overflow;
       }
     }

   for(size_t i=0; i<nC; i++)
     {
     if( got_float )
       {
       *compute_error |= conditional_stash(C[i], C_o[i], C_s[i],
                                           on_size_error,
                                           product_float,
                                           *rounded++);
       }
     else
       {
       *compute_error |= conditional_stash(C[i], C_o[i], C_s[i],
                                           on_size_error,
                                           product_fix.i128[0],
                                           product_fix_digits,
                                           *rounded++);
       }
     }
   }

 static void
 shift_in_place128(uint8_t *buf, int size, int bits)
   {
   // Assume that size in bytes is some multiple of sixteen.  That is, the
   // buffer we are shifting is made up of either two 128-bit bit values (for
   // an int256) or three 128-bit values (an int384)

   // "bits" is a value from zero to 127

   if( !bits )
     {
     return;
     }

   size_t places = size/16;  // This is either two or three

   uint128 temp;
   uint128 overflow = 0;

   uint128 *as128 = (uint128 *)buf;

   for( size_t i=0; i<places; i++ )
     {
     temp = as128[i] >> (128 - bits);
     as128[i] <<= bits;
     as128[i] += overflow;
     overflow = temp;
     }
   }

 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-function"

 static char *
 int256_as_decimal(int256 val)
   {
   char ach[120];
   memset(ach, 0, sizeof(ach));
   strcpy(ach, "0");
   int index = 0;
   while(val.i128[0] || val.i128[1])
     {
     int256 before;
     int256 after;
     before = val;
     after  = val;
     divide_int256_by_int64(after, 10);
     multiply_int256_by_int64(after, 10);
     uint64_t digit = before.i64[0] - after.i64[0];
     ach[index++] = digit + '0';
     divide_int256_by_int64(val, 10);
     }
   if( !index )
     {
     index = 1;
     }
   index -= 1;
   int r = 0;
   static char retval[120];
   while(index >= 0)
     {
     retval[r++] = ach[index--];
     }
   retval[r++] = '\0';
   return retval;
   }
 #pragma GCC diagnostic pop


 static void
 divide_int128_by_int128(int256   &quotient,
                         int      &quotient_rdigits,
                         __int128  dividend,
                         int       dividend_rdigits,
                         __int128  divisor,
                         int       divisor_rdigits,
                         int      *compute_error)
   {
   if( divisor == 0 )
     {
     *compute_error |= compute_error_divide_by_zero;
     memset(&quotient, 0, sizeof(quotient));
     memcpy(&quotient, &dividend, 8);
     quotient_rdigits = dividend_rdigits;
     return;
     }

   bool is_negative = false;
   if(dividend < 0)
     {
     is_negative = !is_negative;
     dividend = -dividend;
     }
   if(divisor < 0)
     {
     is_negative = !is_negative;
     divisor = -divisor;
     }

   // We are going to be referencing the 64-bit pices of the 128-bit divisor:
   uint64_t *divisor64 = (uint64_t *)&divisor;

   quotient.i128[1] = 0;
   quotient.i128[0] = dividend;

   // In order to get 0.3333333.... from 1 / 3, we are going to scale up the
   // numerator so that it has 37 rdigits:

   int scale = MAX_FIXED_POINT_DIGITS;
   scale_int256_by_digits(quotient, scale);
   quotient_rdigits = scale + dividend_rdigits - divisor_rdigits;

   // Now, let's see if we can do a simple divide-by-single-place calculation:

   if( divisor64[1] == 0 )
     {
     // Yes!  The divisor fits into 64 bits:
     divide_int256_by_int64(quotient, (uint64_t)divisor);
     }
   else
     {
     // We have to do long division, and that means Knuth's Algorithm D.  Let's
     // review where we are.
     //
     // We are using 64-bit places to divide one 128-bit number by another.  We
     // know that both are positive.  So, we are dividing
     //
     //      AB by CD, where we know C is non-zero.
     //
     // Because we want fractional digits to the right, we multiplied by 10**37,
     // which is smaller than 2**64, so we have one additional place:
     //
     //      ABx by CD
     //
     // Algorithm D requires that we left-shift ABX and CD by enough bits to make
     // turn on high-order by of CD.  This will be a value between 1 and 63
     // shifts, resulting in
     //
     //      ABxy by CD.
     //
     // We can know that the quotient will have at most two places.  We can see
     // this in the decimal analogy.  The worst case scenario is dividing
     //
     //      99 by 10
     //
     // We multiply the top by 10 to give us one fractional decimal place in the
     // result:
     //
     //      990 by 10
     //
     // To satisfy Algorithm D's requirement that C be >= b/2, we multiply both
     // dividend and divisor by 5:
     //
     //      4950 by 50
     //
     //  And then we do the work that gives us the two-place answer of 99.
     //
     //
     // Here is our four-place 256-bit "numerator"
     int256 numerator;

     // Copy the three-place ABx value into the numerator area
     memcpy(&numerator, &quotient, sizeof(quotient));

     // Calculate how many bits we need to shift CD to make the high-order bit
     // turn on:

     int bits_to_shift = 0;
     int i=15;
     while( ((uint8_t *)(&divisor))[i] == 0 )
       {
       i -= 1;
       bits_to_shift += 8;
       }
     uint8_t tail = ((uint8_t *)(&divisor))[i];
     while( !(tail & 0x80) )
       {
       bits_to_shift += 1;
       tail <<= 1;
       }

     // Shift both the numerator and the divisor that number of bits

     shift_in_place128((uint8_t *)&numerator, sizeof(numerator), bits_to_shift);
     shift_in_place128((uint8_t *)&divisor,   sizeof(divisor),   bits_to_shift);


     // We are now ready to do the guess-multiply-subtract loop.  We know that
     // the result will have two places, so we know we are going to go through
     // that loop two times.  We will build the quotient from the high-order
     // place down:

     // Let's prime the pump by loading remnant with the A value of ABxyz
     int q_place = 1;

     memset(&quotient, 0, sizeof(quotient));

     while( q_place >= 0 )
       {
       // We develop our guess for a quotient by dividing the top two places of
       // the numerator area by C
       uint128 temp;
       uint64_t *temp64 = (uint64_t *)&temp;

       temp64[1] = numerator.i64[q_place+2];
       temp64[0] = numerator.i64[q_place+1];

       quotient.i64[q_place] = temp / divisor64[1];

       // Now we multiply our 64-bit guess by the 128-bit CD to get the
       // three-place value we are going to subtract from the numerator area.

       uint64_t subber[3];
       subber[2] = 0;

       // Start with the bottom 128 bits of the "subber"
       *(uint128 *)subber = (uint128) divisor64[0] * quotient.i64[q_place];

       // Get the next 128 bits of subber
       temp             = (uint128) divisor64[1] * quotient.i64[q_place];

       // Add the top of the first product to the bottom of the second:
       subber[1] += temp64[0];

       // See if there was an overflow:
       if( subber[1] < temp64[0] )
         {
         // There was an overflow
         subber[2] = 1;
         }
       // And now put the top of the second product into place:
       subber[2] += temp64[1];

       // "subber" is now the three-place product of the two-place divisor times
       // the one-place guess

       // We now subtract the three-place subber from the appropriate place of
       // the numerator:

       uint64_t borrow = 0;
       for(size_t i=0; i<3; i++)
         {
         if( numerator.i64[q_place + i] == 0 && borrow )
           {
           // We are subtracting from zero and we have a borrow.  Leave the
           // borrow on and just do the subtraction:
           numerator.i64[q_place + i] -= subber[i];
           }
         else
           {
           uint64_t stash = numerator.i64[q_place + i];
           numerator.i64[q_place + i] -= borrow;
           numerator.i64[q_place + i] -= subber[i];
           if( numerator.i64[q_place + i] > stash )
             {
             // After subtracting, the value got bigger, which means we have
             // to borrow from the next value to the left
             borrow = 1;
             }
           else
             {
             borrow = 0;
             }
           }
         }
       // The three-place subber has been removed from the numerator.

       // Now Algorithm D comes back into play.  Knuth has proved that the guess
       // is usually correct, but sometimes can be one too large, which means
       // that the numerator area goes negative.  On rare occasions, the guess can
       // be two too large.  So, we have to make sure that the numerator area is
       // actually positive by adding subber back in.

       while( numerator.i64[q_place+2] & 0x8000000000000000UL )
         {
         // We need to add subber back into the numerator area
         uint64_t carry = 0;
         for(size_t i=0; i<3; i++)
           {
           if( numerator.i64[q_place + i] == 0xFFFFFFFFFFFFFFFFUL && carry )
             {
             // We are at the top and have a carry.  Just leave the carry on
             // and do the addition:
             numerator.i64[q_place + i] += subber[i];
             }
           else
             {
             // We are not at the top.
             uint64_t stash = numerator.i64[q_place + i];
             numerator.i64[q_place + i] += carry;
             numerator.i64[q_place + i] += subber[i];
             if( numerator.i64[q_place + i] < stash )
               {
               // The addition caused the result to get smaller, meaning that
               // we wrapped around:
               carry = 1;
               }
             else
               {
               carry = 0;
               }
             }
           }
         }
       q_place -= 1;
       }
     }
   if( is_negative )
     {
     negate_int256(quotient);
     }
   }

 extern "C"
 void
 __gg__dividef1_phase2(cbl_arith_format_t ,
                       size_t ,
                       size_t ,
                       size_t ,
                       cbl_round_t  *rounded,
                       int           on_error_flag,
                       int          *compute_error
                       )
   {
   cblc_field_t **C  = __gg__treeplet_3f;
   size_t       *C_o = __gg__treeplet_3o;
   size_t       *C_s = __gg__treeplet_3s;

   bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
   int error_this_time=0;

   GCOB_FP128 a_value;
   GCOB_FP128 b_value;

   if( multiply_intermediate_is_float )
     {
     a_value = multiply_intermediate_float;
     if( C[0]->type == FldFloat )
       {
       b_value = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);
       goto float_float;
       }
     else
       {
       // float times fixed
       b_value = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);
       goto float_float;
       }
     }
   else
     {
     if( C[0]->type == FldFloat )
       {
       // gixed * float
       a_value = (GCOB_FP128) multiply_intermediate_int128;
       if( multiply_intermediate_rdigits )
         {
         a_value /= (GCOB_FP128)__gg__power_of_ten(multiply_intermediate_rdigits);
         }
       b_value = __gg__float128_from_qualified_field(C[0], C_o[0], C_s[0]);
       goto float_float;
       }
     else
       {
       // fixed times fixed

       // We have two 128-bit numbers.  Call them AB and CD, where A, B, C, D are
       // 64-bit "digits".  We need to multiply them to create a 256-bit result

       int dividend_rdigits;
       __int128 dividend = __gg__binary_value_from_qualified_field(&dividend_rdigits, C[0], C_o[0], C_s[0]);

       int quotient_rdigits;
       int256 quotient;

       divide_int128_by_int128(quotient,
                               quotient_rdigits,
                               dividend,
                               dividend_rdigits,
                               multiply_intermediate_int128,
                               multiply_intermediate_rdigits,
                               compute_error);

       int overflow = squeeze_int256(quotient, quotient_rdigits);
       if( overflow )
         {
         *compute_error |= compute_error_overflow;
         }
         // At this point, we assign the quotient to *C.
       *compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
                                           on_size_error,
                                           quotient.i128[0],
                                           quotient_rdigits,
                                           *rounded++);

       goto done;
       }
     }
   float_float:

   b_value = divide_helper_float(b_value, a_value, &error_this_time);

   *compute_error |= error_this_time;

   if( error_this_time && on_size_error)
     {
     rounded++;
     }
   else
     {
     *compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
                                         on_size_error,
                                         b_value,
                                         *rounded++);
     }
   done:
   return;
   }

 extern "C"
 void
 __gg__dividef23(cbl_arith_format_t ,
                 size_t ,
                 size_t ,
                 size_t nC,
                 cbl_round_t  *rounded,
                 int           on_error_flag,
                 int          *compute_error
                 )
   {
   cblc_field_t **A  = __gg__treeplet_1f;
   size_t       *A_o = __gg__treeplet_1o;
   size_t       *A_s = __gg__treeplet_1s;
   cblc_field_t **B  = __gg__treeplet_2f;
   size_t       *B_o = __gg__treeplet_2o;
   size_t       *B_s = __gg__treeplet_2s;
   cblc_field_t **C  = __gg__treeplet_3f;
   size_t       *C_o = __gg__treeplet_3o;
   size_t       *C_s = __gg__treeplet_3s;

   bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
   int error_this_time=0;

   if( A[0]->type == FldFloat ||  B[0]->type == FldFloat  )
     {
     GCOB_FP128 a_value;
     GCOB_FP128 b_value;
     GCOB_FP128 c_value;
     a_value = __gg__float128_from_qualified_field(A[0], A_o[0], A_s[0]);
     b_value = __gg__float128_from_qualified_field(B[0], B_o[0], B_s[0]);
     c_value = divide_helper_float(a_value, b_value, &error_this_time);

     *compute_error |= error_this_time;
     if( !error_this_time )
       {
       for(size_t i=0; i<nC; i++)
         {
         *compute_error |= conditional_stash(C[i], C_o[i], C_s[i],
                                             on_size_error,
                                             c_value,
                                             *rounded++);
         }
       }
     }
   else
     {
     // fixed divided by fixed
     int dividend_rdigits;
     __int128 dividend = __gg__binary_value_from_qualified_field(&dividend_rdigits, A[0], A_o[0], A_s[0]);

     int divisor_rdigits;
     __int128 divisor = __gg__binary_value_from_qualified_field(&divisor_rdigits, B[0], B_o[0], B_s[0]);

     int quotient_rdigits;
     int256 quotient;

     divide_int128_by_int128(quotient,
                             quotient_rdigits,
                             dividend,
                             dividend_rdigits,
                             divisor,
                             divisor_rdigits,
                             compute_error);

     *compute_error |= squeeze_int256(quotient, quotient_rdigits);
     if( !*compute_error )
       {
         // At this point, we assign the quotient to *C.
       for(size_t i=0; i<nC; i++)
         {
         *compute_error |= conditional_stash(C[i], C_o[i], C_s[i],
                                             on_size_error,
                                             quotient.i128[0],
                                             quotient_rdigits,
                                             *rounded++);
         }
       }
     }
   }

 extern "C"
 void
 __gg__dividef45(cbl_arith_format_t ,
                 size_t ,
                 size_t ,
                 size_t ,
                 cbl_round_t  *rounded_p,
                 int           on_error_flag,
                 int          *compute_error
                 )
   {
   cblc_field_t **A  = __gg__treeplet_1f;  // Numerator
   size_t       *A_o = __gg__treeplet_1o;
   size_t       *A_s = __gg__treeplet_1s;
   cblc_field_t **B  = __gg__treeplet_2f;  // Denominator
   size_t       *B_o = __gg__treeplet_2o;
   size_t       *B_s = __gg__treeplet_2s;
   cblc_field_t **C  = __gg__treeplet_3f;  // Has remainder, then quotient
   size_t       *C_o = __gg__treeplet_3o;
   size_t       *C_s = __gg__treeplet_3s;

   bool on_size_error = !!(on_error_flag & ON_SIZE_ERROR);
   int error_this_time=0;

   if( A[0]->type == FldFloat ||  B[0]->type == FldFloat  )
     {
     GCOB_FP128 a_value;
     GCOB_FP128 b_value;
     GCOB_FP128 c_value;
     a_value = __gg__float128_from_qualified_field(A[0], A_o[0], A_s[0]);
     b_value = __gg__float128_from_qualified_field(B[0], B_o[0], B_s[0]);
     c_value = divide_helper_float(a_value, b_value, &error_this_time);

     *compute_error |= error_this_time;

     if( !error_this_time )
       {
       *compute_error |= conditional_stash(C[1], C_o[1], C_s[1],
                                           on_size_error,
                                           c_value,
                                           *rounded_p++);
       // This is floating point, and there is a remainder, and we don't know
       // what that means.  Set the remainder to zero
       if( !*compute_error )
         {
         c_value = 0;
         *compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
                                             on_size_error,
                                             c_value,
                                             *rounded_p++);
         }
       }
     }
   else
     {
     // fixed divided by fixed
     int dividend_rdigits;
     __int128 dividend = __gg__binary_value_from_qualified_field(&dividend_rdigits, A[0], A_o[0], A_s[0]);

     int divisor_rdigits;
     __int128 divisor = __gg__binary_value_from_qualified_field(&divisor_rdigits, B[0], B_o[0], B_s[0]);

     int quotient_rdigits;
     int256 quotient;

     divide_int128_by_int128(quotient,
                             quotient_rdigits,
                             dividend,
                             dividend_rdigits,
                             divisor,
                             divisor_rdigits,
                             compute_error);

     *compute_error |= squeeze_int256(quotient, quotient_rdigits);
     if( !*compute_error )
       {
       // We are going to need the unrounded quotient to calculate the remainder
       __int128 unrounded_quotient = 0;
       int      unrounded_quotient_digits;
       rounded_p += 1;// Skip the rounded value for the remainder
       cbl_round_t rounded = *rounded_p;
       switch(rounded)
         {
         case truncation_e:
           {
           *compute_error |= conditional_stash(C[1], C_o[1], C_s[1],
                                               on_size_error,
                                               quotient.i128[0],
                                               quotient_rdigits,
                                               *rounded_p++);
           unrounded_quotient = __gg__binary_value_from_qualified_field(
                                                         &unrounded_quotient_digits,
                                                         C[1], C_o[1], C_s[1]);
           break;
           }
         default:
           {
           conditional_stash(C[1], C_o[1], C_s[1],
                             false,
                             quotient.i128[0],
                             quotient_rdigits,
                             truncation_e);
           unrounded_quotient = __gg__binary_value_from_qualified_field(
                                                         &unrounded_quotient_digits,
                                                         C[1], C_o[1], C_s[1]);
           // At this point, we assign the rounded quotient to *C.
           *compute_error |= conditional_stash(C[1], C_o[1], C_s[1],
                                               on_size_error,
                                               quotient.i128[0],
                                               quotient_rdigits,
                                               *rounded_p++);
           break;
           }
         }
       if( !*compute_error )
         {
         // We need to calculate the remainder

         // Remainders in COBOL are seriously weird.  The NIST suite
         // has an example where 174 is divided by 16.  The quotient
         // is a 999.9, and the remainder is a 9999

         // So, here goes:  174 by 16 is 10.875.  The unrounded
         // assignment to Q is thus 10.8
         // You then multiply 10.8 by 16, giving 172.8
         // That gets subtracted from 174, giving 1.2
         // That gets assigned to the 9999 remainder, which is
         // thus 1

         // Any mathematician would walk away, slowly, shaking their head.

         // We need to multiply the unrounded quotient by the divisor.
         int256 temp;
         int    temp_rdigits;
         // Step 1: Multiply the unrounded quotient by the divisor
         multiply_int128_by_int128(temp, unrounded_quotient, divisor);
         temp_rdigits = unrounded_quotient_digits + divisor_rdigits;

         int256 odividend = {};
         memcpy(&odividend, &dividend, sizeof(dividend));

         // We need to line up the rdigits so that we can subtract temp from
         // odividend:

         if( temp_rdigits < dividend_rdigits )
           {
           scale_int256_by_digits(temp, dividend_rdigits-temp_rdigits);
           temp_rdigits = dividend_rdigits;
           }
         else if(temp_rdigits > dividend_rdigits)
           {
           scale_int256_by_digits(odividend, temp_rdigits-dividend_rdigits);
           }

         subtract_int256_from_int256(odividend, temp);

         *compute_error |= squeeze_int256(odividend, temp_rdigits);

         if( !*compute_error )
           {
           *compute_error |= conditional_stash(C[0], C_o[0], C_s[0],
                                               on_size_error,
                                               odividend.i128[0],
                                               temp_rdigits,
                                               truncation_e);
           }
         }
       }
     }
   }