| /* |
| * Included file to common source float/double checking |
| * The following macros should be defined: |
| * TYPE -- floating point type |
| * NAME -- convert a name to include the type |
| * UNS_TYPE -- type to hold TYPE as an unsigned number |
| * EXP_SIZE -- size in bits of the exponent |
| * MAN_SIZE -- size in bits of the mantissa |
| * UNS_ABS -- absolute value for UNS_TYPE |
| * FABS -- absolute value function for TYPE |
| * FMAX -- maximum function for TYPE |
| * FMIN -- minimum function for TYPE |
| * SQRT -- square root function for TYPE |
| * RMIN -- minimum random number to generate |
| * RMAX -- maximum random number to generate |
| * ASMDIV -- assembler instruction to do divide |
| * ASMSQRT -- assembler instruction to do square root |
| * BDIV -- # of bits of inaccuracy to allow for division |
| * BRSQRT -- # of bits of inaccuracy to allow for 1/sqrt |
| * INIT_DIV -- Initial values to test 1/x against |
| * INIT_RSQRT -- Initial values to test 1/sqrt(x) against |
| */ |
| |
| typedef union |
| { |
| UNS_TYPE i; |
| TYPE x; |
| } NAME (union); |
| |
| /* |
| * Input/output arrays. |
| */ |
| |
| static NAME (union) NAME (div_input) [] __attribute__((__aligned__(32))) = INIT_DIV; |
| static NAME (union) NAME (rsqrt_input)[] __attribute__((__aligned__(32))) = INIT_RSQRT; |
| |
| #define DIV_SIZE (sizeof (NAME (div_input)) / sizeof (TYPE)) |
| #define RSQRT_SIZE (sizeof (NAME (rsqrt_input)) / sizeof (TYPE)) |
| |
| static TYPE NAME (div_expected)[DIV_SIZE] __attribute__((__aligned__(32))); |
| static TYPE NAME (div_output) [DIV_SIZE] __attribute__((__aligned__(32))); |
| |
| static TYPE NAME (rsqrt_expected)[RSQRT_SIZE] __attribute__((__aligned__(32))); |
| static TYPE NAME (rsqrt_output) [RSQRT_SIZE] __attribute__((__aligned__(32))); |
| |
| |
| /* |
| * Crack a floating point number into sign bit, exponent, and mantissa. |
| */ |
| |
| static void |
| NAME (crack) (TYPE number, unsigned int *p_sign, unsigned *p_exponent, UNS_TYPE *p_mantissa) |
| { |
| NAME (union) u; |
| UNS_TYPE bits; |
| |
| u.x = number; |
| bits = u.i; |
| |
| *p_sign = (unsigned int)((bits >> (EXP_SIZE + MAN_SIZE)) & 0x1); |
| *p_exponent = (unsigned int)((bits >> MAN_SIZE) & ((((UNS_TYPE)1) << EXP_SIZE) - 1)); |
| *p_mantissa = bits & ((((UNS_TYPE)1) << MAN_SIZE) - 1); |
| return; |
| } |
| |
| |
| /* |
| * Prevent optimizer from eliminating + 0.0 to remove -0.0. |
| */ |
| |
| volatile TYPE NAME (math_diff_0) = ((TYPE) 0.0); |
| |
| /* |
| * Return negative if two numbers are significanly different or return the |
| * number of bits that are different in the mantissa. |
| */ |
| |
| static int |
| NAME (math_diff) (TYPE a, TYPE b, int bits) |
| { |
| TYPE zero = NAME (math_diff_0); |
| unsigned int sign_a, sign_b; |
| unsigned int exponent_a, exponent_b; |
| UNS_TYPE mantissa_a, mantissa_b, diff; |
| int i; |
| |
| /* eliminate signed zero. */ |
| a += zero; |
| b += zero; |
| |
| /* special case Nan. */ |
| if (__builtin_isnan (a)) |
| return (__builtin_isnan (b) ? 0 : -1); |
| |
| if (a == b) |
| return 0; |
| |
| /* special case infinity. */ |
| if (__builtin_isinf (a)) |
| return (__builtin_isinf (b) ? 0 : -1); |
| |
| /* punt on denormal numbers. */ |
| if (!__builtin_isnormal (a) || !__builtin_isnormal (b)) |
| return -1; |
| |
| NAME (crack) (a, &sign_a, &exponent_a, &mantissa_a); |
| NAME (crack) (b, &sign_b, &exponent_b, &mantissa_b); |
| |
| /* If the sign is different, there is no hope. */ |
| if (sign_a != sign_b) |
| return -1; |
| |
| /* If the exponent is off by 1, see if the values straddle the power of two, |
| and adjust things to do the mantassa check if we can. */ |
| if ((exponent_a == (exponent_b+1)) || (exponent_a == (exponent_b-1))) |
| { |
| TYPE big = FMAX (a, b); |
| TYPE small = FMIN (a, b); |
| TYPE diff = FABS (a - b); |
| unsigned int sign_big, sign_small, sign_test; |
| unsigned int exponent_big, exponent_small, exponent_test; |
| UNS_TYPE mantissa_big, mantissa_small, mantissa_test; |
| |
| NAME (crack) (big, &sign_big, &exponent_big, &mantissa_big); |
| NAME (crack) (small, &sign_small, &exponent_small, &mantissa_small); |
| |
| NAME (crack) (small - diff, &sign_test, &exponent_test, &mantissa_test); |
| if ((sign_test == sign_small) && (exponent_test == exponent_small)) |
| { |
| mantissa_a = mantissa_small; |
| mantissa_b = mantissa_test; |
| } |
| |
| else |
| { |
| NAME (crack) (big + diff, &sign_test, &exponent_test, &mantissa_test); |
| if ((sign_test == sign_big) && (exponent_test == exponent_big)) |
| { |
| mantissa_a = mantissa_big; |
| mantissa_b = mantissa_test; |
| } |
| |
| else |
| return -1; |
| } |
| } |
| |
| else if (exponent_a != exponent_b) |
| return -1; |
| |
| diff = UNS_ABS (mantissa_a - mantissa_b); |
| for (i = MAN_SIZE; i > 0; i--) |
| { |
| if ((diff & ((UNS_TYPE)1) << (i-1)) != 0) |
| return i; |
| } |
| |
| return -1; |
| } |
| |
| |
| /* |
| * Turn off inlining to make code inspection easier. |
| */ |
| |
| static void NAME (asm_div) (void) __attribute__((__noinline__)); |
| static void NAME (vector_div) (void) __attribute__((__noinline__)); |
| static void NAME (scalar_div) (void) __attribute__((__noinline__)); |
| static void NAME (asm_rsqrt) (void) __attribute__((__noinline__)); |
| static void NAME (vector_rsqrt) (void) __attribute__((__noinline__)); |
| static void NAME (scalar_rsqrt) (void) __attribute__((__noinline__)); |
| static void NAME (check_div) (const char *) __attribute__((__noinline__)); |
| static void NAME (check_rsqrt) (const char *) __attribute__((__noinline__)); |
| static void NAME (run) (void) __attribute__((__noinline__)); |
| |
| |
| /* |
| * Division function that might be vectorized. |
| */ |
| |
| static void |
| NAME (vector_div) (void) |
| { |
| size_t i; |
| |
| for (i = 0; i < DIV_SIZE; i++) |
| NAME (div_output)[i] = ((TYPE) 1.0) / NAME (div_input)[i].x; |
| } |
| |
| /* |
| * Division function that is not vectorized. |
| */ |
| |
| static void |
| NAME (scalar_div) (void) |
| { |
| size_t i; |
| |
| for (i = 0; i < DIV_SIZE; i++) |
| { |
| TYPE x = ((TYPE) 1.0) / NAME (div_input)[i].x; |
| TYPE y; |
| __asm__ ("" : "=d" (y) : "0" (x)); |
| NAME (div_output)[i] = y; |
| } |
| } |
| |
| /* |
| * Generate the division instruction via asm. |
| */ |
| |
| static void |
| NAME (asm_div) (void) |
| { |
| size_t i; |
| |
| for (i = 0; i < DIV_SIZE; i++) |
| { |
| TYPE x; |
| __asm__ (ASMDIV " %0,%1,%2" |
| : "=d" (x) |
| : "d" ((TYPE) 1.0), "d" (NAME (div_input)[i].x)); |
| NAME (div_expected)[i] = x; |
| } |
| } |
| |
| /* |
| * Reciprocal square root function that might be vectorized. |
| */ |
| |
| static void |
| NAME (vector_rsqrt) (void) |
| { |
| size_t i; |
| |
| for (i = 0; i < RSQRT_SIZE; i++) |
| NAME (rsqrt_output)[i] = ((TYPE) 1.0) / SQRT (NAME (rsqrt_input)[i].x); |
| } |
| |
| /* |
| * Reciprocal square root function that is not vectorized. |
| */ |
| |
| static void |
| NAME (scalar_rsqrt) (void) |
| { |
| size_t i; |
| |
| for (i = 0; i < RSQRT_SIZE; i++) |
| { |
| TYPE x = ((TYPE) 1.0) / SQRT (NAME (rsqrt_input)[i].x); |
| TYPE y; |
| __asm__ ("" : "=d" (y) : "0" (x)); |
| NAME (rsqrt_output)[i] = y; |
| } |
| } |
| |
| /* |
| * Generate the 1/sqrt instructions via asm. |
| */ |
| |
| static void |
| NAME (asm_rsqrt) (void) |
| { |
| size_t i; |
| |
| for (i = 0; i < RSQRT_SIZE; i++) |
| { |
| TYPE x; |
| TYPE y; |
| __asm__ (ASMSQRT " %0,%1" : "=d" (x) : "d" (NAME (rsqrt_input)[i].x)); |
| __asm__ (ASMDIV " %0,%1,%2" : "=d" (y) : "d" ((TYPE) 1.0), "d" (x)); |
| NAME (rsqrt_expected)[i] = y; |
| } |
| } |
| |
| |
| /* |
| * Functions to abort or report errors. |
| */ |
| |
| static int NAME (error_count) = 0; |
| |
| #ifdef VERBOSE |
| static int NAME (max_bits_div) = 0; |
| static int NAME (max_bits_rsqrt) = 0; |
| #endif |
| |
| |
| /* |
| * Compare the expected value with the value we got. |
| */ |
| |
| static void |
| NAME (check_div) (const char *test) |
| { |
| size_t i; |
| int b; |
| |
| for (i = 0; i < DIV_SIZE; i++) |
| { |
| TYPE exp = NAME (div_expected)[i]; |
| TYPE out = NAME (div_output)[i]; |
| b = NAME (math_diff) (exp, out, BDIV); |
| |
| #ifdef VERBOSE |
| if (b != 0) |
| { |
| NAME (union) u_in = NAME (div_input)[i]; |
| NAME (union) u_exp; |
| NAME (union) u_out; |
| char explanation[64]; |
| const char *p_exp; |
| |
| if (b < 0) |
| p_exp = "failed"; |
| else |
| { |
| p_exp = explanation; |
| sprintf (explanation, "%d bit error%s", b, (b > BDIV) ? ", failed" : ""); |
| } |
| |
| u_exp.x = exp; |
| u_out.x = out; |
| printf ("%s %s %s for 1.0 / %g [0x%llx], expected %g [0x%llx], got %g [0x%llx]\n", |
| TNAME (TYPE), test, p_exp, |
| (double) u_in.x, (unsigned long long) u_in.i, |
| (double) exp, (unsigned long long) u_exp.i, |
| (double) out, (unsigned long long) u_out.i); |
| } |
| #endif |
| |
| if (b < 0 || b > BDIV) |
| NAME (error_count)++; |
| |
| #ifdef VERBOSE |
| if (b > NAME (max_bits_div)) |
| NAME (max_bits_div) = b; |
| #endif |
| } |
| } |
| |
| static void |
| NAME (check_rsqrt) (const char *test) |
| { |
| size_t i; |
| int b; |
| |
| for (i = 0; i < RSQRT_SIZE; i++) |
| { |
| TYPE exp = NAME (rsqrt_expected)[i]; |
| TYPE out = NAME (rsqrt_output)[i]; |
| b = NAME (math_diff) (exp, out, BRSQRT); |
| |
| #ifdef VERBOSE |
| if (b != 0) |
| { |
| NAME (union) u_in = NAME (rsqrt_input)[i]; |
| NAME (union) u_exp; |
| NAME (union) u_out; |
| char explanation[64]; |
| const char *p_exp; |
| |
| if (b < 0) |
| p_exp = "failed"; |
| else |
| { |
| p_exp = explanation; |
| sprintf (explanation, "%d bit error%s", b, (b > BDIV) ? ", failed" : ""); |
| } |
| |
| u_exp.x = exp; |
| u_out.x = out; |
| printf ("%s %s %s for 1 / sqrt (%g) [0x%llx], expected %g [0x%llx], got %g [0x%llx]\n", |
| TNAME (TYPE), test, p_exp, |
| (double) u_in.x, (unsigned long long) u_in.i, |
| (double) exp, (unsigned long long) u_exp.i, |
| (double) out, (unsigned long long) u_out.i); |
| } |
| #endif |
| |
| if (b < 0 || b > BRSQRT) |
| NAME (error_count)++; |
| |
| #ifdef VERBOSE |
| if (b > NAME (max_bits_rsqrt)) |
| NAME (max_bits_rsqrt) = b; |
| #endif |
| } |
| } |
| |
| |
| /* |
| * Now do everything. |
| */ |
| |
| static void |
| NAME (run) (void) |
| { |
| #ifdef VERBOSE |
| printf ("start run_%s, divide size = %ld, rsqrt size = %ld, %d bit%s for a/b, %d bit%s for 1/sqrt(a)\n", |
| TNAME (TYPE), |
| (long)DIV_SIZE, |
| (long)RSQRT_SIZE, |
| BDIV, (BDIV == 1) ? "" : "s", |
| BRSQRT, (BRSQRT == 1) ? "" : "s"); |
| #endif |
| |
| NAME (asm_div) (); |
| |
| NAME (scalar_div) (); |
| NAME (check_div) ("scalar"); |
| |
| NAME (vector_div) (); |
| NAME (check_div) ("vector"); |
| |
| NAME (asm_rsqrt) (); |
| |
| NAME (scalar_rsqrt) (); |
| NAME (check_rsqrt) ("scalar"); |
| |
| NAME (vector_rsqrt) (); |
| NAME (check_rsqrt) ("vector"); |
| |
| #ifdef VERBOSE |
| printf ("end run_%s, errors = %d, max div bits = %d, max rsqrt bits = %d\n", |
| TNAME (TYPE), |
| NAME (error_count), |
| NAME (max_bits_div), |
| NAME (max_bits_rsqrt)); |
| #endif |
| } |