gcc/testsuite/gcc.target/i386/avx512fp16vl-reduce-op-1.c - gcc - Git at Google

 /* { dg-do run { target avx512fp16 } } */
 /* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */

 static void do_test (void);

 #define DO_TEST do_test
 #define AVX512FP16

 #include <immintrin.h>
 #include "avx512-check.h"

 __m256h a1 = { -39.3f16, -180.9f16, 13.4f16, 35.4f16, -41.1f16, -14.4f16, 24.5f16, 53.54f16,
 	       238.4f16, -134.8f16, 24.5f16, 35.6f16, -346.7f16, -43.4f16, -535.3f16, 324.7f16 };
 __m256h a2 = { 82.5f16, 21.4f16, 24.4f16, 53.4f16, 23.5f16, -24.4f16, -34.5f16, -32.5f16,
 	       23.6f16, -13.4f16, 24.5f16, 35.5f16, -34.4f16, -24.5f16, -34.5f16, 13.5f16 };

 __m128h b1 = { 1.25f16, 2.25f16, -0.25f16, 4.0f16, -2.0f16, 4.0f16, -3.0f16, 2.0f16 };
 __m128h b2 = { -0.5f16, -1.0f16, 1.0f16, -1.0f16, 1.0f16, 1.0f16, 2.0f16, 4.0f16 };
 __m128h b3 = { 1.25f16, 2.25f16, -4.25f16, 4.0f16, -2.4f16, 4.0f16, -3.0f, 2.0f16 };
 __m128h b4 = { -4.5f16, 7.6f16, 0.7f16, -8.2f16, 2.1f16, 2.4f16, -2.0f16, 1.4f16 };

 __attribute__((noinline, noclone)) _Float16
 test_reduce_256_add_ph (__m256h a)
 {
   return _mm256_reduce_add_ph (a);
 }

 __attribute__((noinline, noclone)) _Float16
 test_reduce_256_mul_ph (__m256h a)
 {
   return _mm256_reduce_mul_ph (a);
 }

 __attribute__((noinline, noclone)) _Float16
 test_reduce_256_max_ph (__m256h a)
 {
   return _mm256_reduce_max_ph (a);
 }

 __attribute__((noinline, noclone)) _Float16
 test_reduce_256_min_ph (__m256h a)
 {
   return _mm256_reduce_min_ph (a);
 }

 __attribute__((noinline, noclone)) _Float16
 test_reduce_add_ph (__m128h b)
 {
   return _mm_reduce_add_ph (b);
 }

 __attribute__((noinline, noclone)) _Float16
 test_reduce_mul_ph (__m128h b)
 {
   return _mm_reduce_mul_ph (b);
 }

 __attribute__((noinline, noclone)) _Float16
 test_reduce_max_ph (__m128h b)
 {
   return _mm_reduce_max_ph (b);
 }

 __attribute__((noinline, noclone)) _Float16
 test_reduce_min_ph (__m128h b)
 {
   return _mm_reduce_min_ph (b);
 }

 #define SIZE 16
 #define REF_ADDMUL(op, a)				\
   __m128h __a1 = _mm_setzero_ph ();			\
   for (int i = 0; i < 8; i++) {				\
     __a1[i] = (_Float16) a[i] op (_Float16) a[i + 8];	\
   }							\
   _Float16 __c0 = __a1[0] op __a1[4];			\
   _Float16 __c1 = __a1[1] op __a1[5];			\
   _Float16 __c2 = __a1[2] op __a1[6];			\
   _Float16 __c3 = __a1[3] op __a1[7];			\
   _Float16 __d0 = __c0 op __c2;				\
   _Float16 __d1 = __c1 op __c3;				\
   _Float16 __e0 = __d0 op __d1;				\
   r3 = __e0

 #define TESTOP(opname, op, a)				\
   do {							\
     _Float16 r1 = _mm256_reduce_##opname##_ph (a);	\
     _Float16 r2 = test_reduce_256_##opname##_ph (a);	\
     _Float16 r3 = a[0];					\
     if (r1 != r2) {					\
       __builtin_abort ();				\
     }							\
     REF_ADDMUL (op, a);					\
     if (r1 != r3) {					\
       __builtin_abort ();				\
     }							\
   } while (0)

 #define TEST_ADDMUL_PH(a)			\
   do {						\
     TESTOP (add, +, a);				\
     TESTOP (mul, *, a);				\
   } while (0)

 static void
 test_256_addmul_ph (void)
 {
   TEST_ADDMUL_PH (a1);
   TEST_ADDMUL_PH (a2);
 }

 #undef TESTOP
 #define TESTOP(opname, op, a)				\
   do {							\
     _Float16 r1 = _mm256_reduce_##opname##_ph (a);	\
     _Float16 r2 = test_reduce_256_##opname##_ph (a);	\
     _Float16 r3 = a[0];					\
     if (r1 != r2) {					\
       __builtin_abort ();				\
     }							\
     for (int i = 1; i < SIZE; i++)			\
       r3 = r3 op a[i];					\
     if (r1 != r3) {					\
       __builtin_abort ();				\
     }							\
   } while (0)

 #define TEST_MINMAX_PH(a)			\
   do {						\
     TESTOP (min, < a[i] ? r3 :, a);		\
     TESTOP (max, > a[i] ? r3 :, a);		\
   } while (0)

 static void
 test_256_minmax_ph (void)
 {
   TEST_MINMAX_PH (a1);
   TEST_MINMAX_PH (a2);
 }

 static void
 test_256_ph (void)
 {
    test_256_addmul_ph ();
    test_256_minmax_ph ();
 }

 #undef SIZE
 #define SIZE 8

 #undef REF_ADDMUL
 #define REF_ADDMUL(op, a)			\
   _Float16 __c0 = a[0] op a[4];			\
   _Float16 __c1 = a[1] op a[5];			\
   _Float16 __c2 = a[2] op a[6];			\
   _Float16 __c3 = a[3] op a[7];			\
   _Float16 __d0 = __c0 op __c2;			\
   _Float16 __d1 = __c1 op __c3;			\
   _Float16 __e0 = __d0 op __d1;			\
   r3 = __e0

 #undef TESTOP
 #define TESTOP(opname, op, a)				\
   do {							\
     _Float16 r1 = _mm_reduce_##opname##_ph (a);		\
     _Float16 r2 = test_reduce_##opname##_ph (a);	\
     _Float16 r3 = a[0];					\
     if (r1 != r2) {					\
       __builtin_abort ();				\
     }							\
     REF_ADDMUL (op, a);					\
     if (r1 != r3) {					\
       __builtin_abort ();				\
     }							\
   } while (0)

 #undef TEST_ADDMUL_PH
 #define TEST_ADDMUL_PH(a)			\
   do {						\
     TESTOP (add, +, a);				\
     TESTOP (mul, *, a);				\
   } while (0)

 static void
 test_128_addmul_ph (void)
 {
   TEST_ADDMUL_PH (b1);
   TEST_ADDMUL_PH (b2);
   TEST_ADDMUL_PH (b3);
   TEST_ADDMUL_PH (b4);
 }

 #undef TESTOP
 #define TESTOP(opname, op, b)				\
   do {							\
     _Float16 r1 = _mm_reduce_##opname##_ph (b);		\
     _Float16 r2 = test_reduce_##opname##_ph (b);	\
     _Float16 r3 = b[0];					\
     if (r1 != r2) {					\
       __builtin_abort ();				\
     }							\
     for (int i = 1; i < SIZE; i++)			\
       r3 = r3 op b[i];					\
     if (r1 != r3) {					\
       __builtin_abort ();				\
     }							\
   } while (0)

 #undef TEST_MINMAX_PH
 #define TEST_MINMAX_PH(b)			\
   do {						\
     TESTOP (min, < b[i] ? r3 :, b);		\
     TESTOP (max, > b[i] ? r3 :, b);		\
   } while (0)

 static void
 test_128_minmax_ph (void)
 {
   TEST_MINMAX_PH (b1);
   TEST_MINMAX_PH (b2);
   TEST_MINMAX_PH (b3);
   TEST_MINMAX_PH (b4);
 }

 static void
 test_128_ph (void)
 {
   test_128_addmul_ph ();
   test_128_minmax_ph ();
 }

 static void
 do_test (void)
 {
   test_256_ph ();
   test_128_ph ();
 }


 #undef SIZE
 #undef REF_ADDMUL
 #undef TESTOP
 #undef TEST_ADDMUL_PH
 #undef TEST_MINMAX_PH
	/* { dg-do run { target avx512fp16 } } */
	/* { dg-options "-O2 -mavx512fp16 -mavx512vl" } */

	static void do_test (void);

	#define DO_TEST do_test
	#define AVX512FP16

	#include <immintrin.h>
	#include "avx512-check.h"

	__m256h a1 = { -39.3f16, -180.9f16, 13.4f16, 35.4f16, -41.1f16, -14.4f16, 24.5f16, 53.54f16,
	238.4f16, -134.8f16, 24.5f16, 35.6f16, -346.7f16, -43.4f16, -535.3f16, 324.7f16 };
	__m256h a2 = { 82.5f16, 21.4f16, 24.4f16, 53.4f16, 23.5f16, -24.4f16, -34.5f16, -32.5f16,
	23.6f16, -13.4f16, 24.5f16, 35.5f16, -34.4f16, -24.5f16, -34.5f16, 13.5f16 };

	__m128h b1 = { 1.25f16, 2.25f16, -0.25f16, 4.0f16, -2.0f16, 4.0f16, -3.0f16, 2.0f16 };
	__m128h b2 = { -0.5f16, -1.0f16, 1.0f16, -1.0f16, 1.0f16, 1.0f16, 2.0f16, 4.0f16 };
	__m128h b3 = { 1.25f16, 2.25f16, -4.25f16, 4.0f16, -2.4f16, 4.0f16, -3.0f, 2.0f16 };
	__m128h b4 = { -4.5f16, 7.6f16, 0.7f16, -8.2f16, 2.1f16, 2.4f16, -2.0f16, 1.4f16 };

	__attribute__((noinline, noclone)) _Float16
	test_reduce_256_add_ph (__m256h a)
	{
	return _mm256_reduce_add_ph (a);
	}

	__attribute__((noinline, noclone)) _Float16
	test_reduce_256_mul_ph (__m256h a)
	{
	return _mm256_reduce_mul_ph (a);
	}

	__attribute__((noinline, noclone)) _Float16
	test_reduce_256_max_ph (__m256h a)
	{
	return _mm256_reduce_max_ph (a);
	}

	__attribute__((noinline, noclone)) _Float16
	test_reduce_256_min_ph (__m256h a)
	{
	return _mm256_reduce_min_ph (a);
	}

	__attribute__((noinline, noclone)) _Float16
	test_reduce_add_ph (__m128h b)
	{
	return _mm_reduce_add_ph (b);
	}

	__attribute__((noinline, noclone)) _Float16
	test_reduce_mul_ph (__m128h b)
	{
	return _mm_reduce_mul_ph (b);
	}

	__attribute__((noinline, noclone)) _Float16
	test_reduce_max_ph (__m128h b)
	{
	return _mm_reduce_max_ph (b);
	}

	__attribute__((noinline, noclone)) _Float16
	test_reduce_min_ph (__m128h b)
	{
	return _mm_reduce_min_ph (b);
	}

	#define SIZE 16
	#define REF_ADDMUL(op, a) \
	__m128h __a1 = _mm_setzero_ph (); \
	for (int i = 0; i < 8; i++) { \
	__a1[i] = (_Float16) a[i] op (_Float16) a[i + 8]; \
	} \
	_Float16 __c0 = __a1[0] op __a1[4]; \
	_Float16 __c1 = __a1[1] op __a1[5]; \
	_Float16 __c2 = __a1[2] op __a1[6]; \
	_Float16 __c3 = __a1[3] op __a1[7]; \
	_Float16 __d0 = __c0 op __c2; \
	_Float16 __d1 = __c1 op __c3; \
	_Float16 __e0 = __d0 op __d1; \
	r3 = __e0

	#define TESTOP(opname, op, a) \
	do { \
	_Float16 r1 = _mm256_reduce_##opname##_ph (a); \
	_Float16 r2 = test_reduce_256_##opname##_ph (a); \
	_Float16 r3 = a[0]; \
	if (r1 != r2) { \
	__builtin_abort (); \
	} \
	REF_ADDMUL (op, a); \
	if (r1 != r3) { \
	__builtin_abort (); \
	} \
	} while (0)

	#define TEST_ADDMUL_PH(a) \
	do { \
	TESTOP (add, +, a); \
	TESTOP (mul, *, a); \
	} while (0)

	static void
	test_256_addmul_ph (void)
	{
	TEST_ADDMUL_PH (a1);
	TEST_ADDMUL_PH (a2);
	}

	#undef TESTOP
	#define TESTOP(opname, op, a) \
	do { \
	_Float16 r1 = _mm256_reduce_##opname##_ph (a); \
	_Float16 r2 = test_reduce_256_##opname##_ph (a); \
	_Float16 r3 = a[0]; \
	if (r1 != r2) { \
	__builtin_abort (); \
	} \
	for (int i = 1; i < SIZE; i++) \
	r3 = r3 op a[i]; \
	if (r1 != r3) { \
	__builtin_abort (); \
	} \
	} while (0)

	#define TEST_MINMAX_PH(a) \
	do { \
	TESTOP (min, < a[i] ? r3 :, a); \
	TESTOP (max, > a[i] ? r3 :, a); \
	} while (0)

	static void
	test_256_minmax_ph (void)
	{
	TEST_MINMAX_PH (a1);
	TEST_MINMAX_PH (a2);
	}

	static void
	test_256_ph (void)
	{
	test_256_addmul_ph ();
	test_256_minmax_ph ();
	}

	#undef SIZE
	#define SIZE 8

	#undef REF_ADDMUL
	#define REF_ADDMUL(op, a) \
	_Float16 __c0 = a[0] op a[4]; \
	_Float16 __c1 = a[1] op a[5]; \
	_Float16 __c2 = a[2] op a[6]; \
	_Float16 __c3 = a[3] op a[7]; \
	_Float16 __d0 = __c0 op __c2; \
	_Float16 __d1 = __c1 op __c3; \
	_Float16 __e0 = __d0 op __d1; \
	r3 = __e0

	#undef TESTOP
	#define TESTOP(opname, op, a) \
	do { \
	_Float16 r1 = _mm_reduce_##opname##_ph (a); \
	_Float16 r2 = test_reduce_##opname##_ph (a); \
	_Float16 r3 = a[0]; \
	if (r1 != r2) { \
	__builtin_abort (); \
	} \
	REF_ADDMUL (op, a); \
	if (r1 != r3) { \
	__builtin_abort (); \
	} \
	} while (0)

	#undef TEST_ADDMUL_PH
	#define TEST_ADDMUL_PH(a) \
	do { \
	TESTOP (add, +, a); \
	TESTOP (mul, *, a); \
	} while (0)

	static void
	test_128_addmul_ph (void)
	{
	TEST_ADDMUL_PH (b1);
	TEST_ADDMUL_PH (b2);
	TEST_ADDMUL_PH (b3);
	TEST_ADDMUL_PH (b4);
	}

	#undef TESTOP
	#define TESTOP(opname, op, b) \
	do { \
	_Float16 r1 = _mm_reduce_##opname##_ph (b); \
	_Float16 r2 = test_reduce_##opname##_ph (b); \
	_Float16 r3 = b[0]; \
	if (r1 != r2) { \
	__builtin_abort (); \
	} \
	for (int i = 1; i < SIZE; i++) \
	r3 = r3 op b[i]; \
	if (r1 != r3) { \
	__builtin_abort (); \
	} \
	} while (0)

	#undef TEST_MINMAX_PH
	#define TEST_MINMAX_PH(b) \
	do { \
	TESTOP (min, < b[i] ? r3 :, b); \
	TESTOP (max, > b[i] ? r3 :, b); \
	} while (0)

	static void
	test_128_minmax_ph (void)
	{
	TEST_MINMAX_PH (b1);
	TEST_MINMAX_PH (b2);
	TEST_MINMAX_PH (b3);
	TEST_MINMAX_PH (b4);
	}

	static void
	test_128_ph (void)
	{
	test_128_addmul_ph ();
	test_128_minmax_ph ();
	}

	static void
	do_test (void)
	{
	test_256_ph ();
	test_128_ph ();
	}


	#undef SIZE
	#undef REF_ADDMUL
	#undef TESTOP
	#undef TEST_ADDMUL_PH
	#undef TEST_MINMAX_PH