gcc/testsuite/gcc.target/i386/avx512f-vect-fmaddsubXXXps.c - gcc - Git at Google

 /* { dg-do run } */
 /* { dg-require-effective-target avx512f } */
 /* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -save-temps" } */

 #include "fma-check.h"
 void __attribute__((noipa))
 check_fmaddsub (float * __restrict a, float *b, float *c, int n)
 {
   for (int i = 0; i < n; ++i)
     {
       a[16*i + 0] = b[16*i + 0] * c[16*i + 0] - a[16*i + 0];
       a[16*i + 1] = b[16*i + 1] * c[16*i + 1] + a[16*i + 1];
       a[16*i + 2] = b[16*i + 2] * c[16*i + 2] - a[16*i + 2];
       a[16*i + 3] = b[16*i + 3] * c[16*i + 3] + a[16*i + 3];
       a[16*i + 4] = b[16*i + 4] * c[16*i + 4] - a[16*i + 4];
       a[16*i + 5] = b[16*i + 5] * c[16*i + 5] + a[16*i + 5];
       a[16*i + 6] = b[16*i + 6] * c[16*i + 6] - a[16*i + 6];
       a[16*i + 7] = b[16*i + 7] * c[16*i + 7] + a[16*i + 7];
       a[16*i + 8] = b[16*i + 8] * c[16*i + 8] - a[16*i + 8];
       a[16*i + 9] = b[16*i + 9] * c[16*i + 9] + a[16*i + 9];
       a[16*i + 10] = b[16*i + 10] * c[16*i + 10] - a[16*i + 10];
       a[16*i + 11] = b[16*i + 11] * c[16*i + 11] + a[16*i + 11];
       a[16*i + 12] = b[16*i + 12] * c[16*i + 12] - a[16*i + 12];
       a[16*i + 13] = b[16*i + 13] * c[16*i + 13] + a[16*i + 13];
       a[16*i + 14] = b[16*i + 14] * c[16*i + 14] - a[16*i + 14];
       a[16*i + 15] = b[16*i + 15] * c[16*i + 15] + a[16*i + 15];
     }
 }

 static void
 fma_test (void)
 {
   if (!__builtin_cpu_supports ("avx512f"))
     return;
   float a[16], b[16], c[16];
   for (int i = 0; i < 16; ++i)
     {
       a[i] = i;
       b[i] = 3*i;
       c[i] = 7*i;
     }
   check_fmaddsub (a, b, c, 1);
   const float d[16] = { 0., 22., 82., 192., 332., 530., 750., 1036.,
 			1336, 1710., 2090., 2552., 3012., 3562., 4102., 4740.};
   for (int i = 0; i < 16; ++i)
     if (a[i] != d[i])
       __builtin_abort ();
 }

 /* { dg-final { scan-assembler {(?n)fmaddsub...ps[ \t].*%zmm[0-9]} } } */
	/* { dg-do run } */
	/* { dg-require-effective-target avx512f } */
	/* { dg-options "-O3 -mavx512f -mprefer-vector-width=512 -save-temps" } */

	#include "fma-check.h"
	void __attribute__((noipa))
	check_fmaddsub (float * __restrict a, float b, float c, int n)
	{
	for (int i = 0; i < n; ++i)
	{
	a[16i + 0] = b[16i + 0] * c[16i + 0] - a[16i + 0];
	a[16i + 1] = b[16i + 1] * c[16i + 1] + a[16i + 1];
	a[16i + 2] = b[16i + 2] * c[16i + 2] - a[16i + 2];
	a[16i + 3] = b[16i + 3] * c[16i + 3] + a[16i + 3];
	a[16i + 4] = b[16i + 4] * c[16i + 4] - a[16i + 4];
	a[16i + 5] = b[16i + 5] * c[16i + 5] + a[16i + 5];
	a[16i + 6] = b[16i + 6] * c[16i + 6] - a[16i + 6];
	a[16i + 7] = b[16i + 7] * c[16i + 7] + a[16i + 7];
	a[16i + 8] = b[16i + 8] * c[16i + 8] - a[16i + 8];
	a[16i + 9] = b[16i + 9] * c[16i + 9] + a[16i + 9];
	a[16i + 10] = b[16i + 10] * c[16i + 10] - a[16i + 10];
	a[16i + 11] = b[16i + 11] * c[16i + 11] + a[16i + 11];
	a[16i + 12] = b[16i + 12] * c[16i + 12] - a[16i + 12];
	a[16i + 13] = b[16i + 13] * c[16i + 13] + a[16i + 13];
	a[16i + 14] = b[16i + 14] * c[16i + 14] - a[16i + 14];
	a[16i + 15] = b[16i + 15] * c[16i + 15] + a[16i + 15];
	}
	}

	static void
	fma_test (void)
	{
	if (!__builtin_cpu_supports ("avx512f"))
	return;
	float a[16], b[16], c[16];
	for (int i = 0; i < 16; ++i)
	{
	a[i] = i;
	b[i] = 3*i;
	c[i] = 7*i;
	}
	check_fmaddsub (a, b, c, 1);
	const float d[16] = { 0., 22., 82., 192., 332., 530., 750., 1036.,
	1336, 1710., 2090., 2552., 3012., 3562., 4102., 4740.};
	for (int i = 0; i < 16; ++i)
	if (a[i] != d[i])
	__builtin_abort ();
	}

	/* { dg-final { scan-assembler {(?n)fmaddsub...ps[ \t].%zmm[0-9]} } } /