gcc/testsuite/gcc.target/aarch64/vect_fp16_1.c - gcc - Git at Google

 /* { dg-do compile } */
 /* { dg-options "-O3 -fno-vect-cost-model" } */

 /* Check that we vectorize to a full 128-bit vector for _Float16 and __fp16
    types.  */

 /* Enable ARMv8.2-A+fp16 so we have access to the vector instructions.  */
 #pragma GCC target ("arch=armv8.2-a+fp16")

 _Float16
 sum_Float16 (_Float16 *__restrict__ __attribute__ ((__aligned__ (16))) a,
 	     _Float16 *__restrict__ __attribute__ ((__aligned__ (16))) b,
 	     _Float16 *__restrict__ __attribute__ ((__aligned__ (16))) c)
 {
   for (int i = 0; i < 256; i++)
     a[i] = b[i] + c[i];
 }

 _Float16
 sum_fp16 (__fp16 *__restrict__ __attribute__ ((__aligned__ (16))) a,
 	  __fp16 *__restrict__ __attribute__ ((__aligned__ (16))) b,
 	  __fp16 *__restrict__ __attribute__ ((__aligned__ (16))) c)
 {
   for (int i = 0; i < 256; i++)
     a[i] = b[i] + c[i];
 }

 /* Two FADD operations on "8h" data widths, one from sum_Float16, one from
    sum_fp16.  */
 /* { dg-final { scan-assembler-times "fadd\tv\[0-9\]\+.8h" 2 } } */
	/* { dg-do compile } */
	/* { dg-options "-O3 -fno-vect-cost-model" } */

	/* Check that we vectorize to a full 128-bit vector for _Float16 and __fp16
	types. */

	/* Enable ARMv8.2-A+fp16 so we have access to the vector instructions. */
	#pragma GCC target ("arch=armv8.2-a+fp16")

	_Float16
	sum_Float16 (_Float16 *__restrict__ __attribute__ ((__aligned__ (16))) a,
	_Float16 *__restrict__ __attribute__ ((__aligned__ (16))) b,
	_Float16 *__restrict__ __attribute__ ((__aligned__ (16))) c)
	{
	for (int i = 0; i < 256; i++)
	a[i] = b[i] + c[i];
	}

	_Float16
	sum_fp16 (__fp16 *__restrict__ __attribute__ ((__aligned__ (16))) a,
	__fp16 *__restrict__ __attribute__ ((__aligned__ (16))) b,
	__fp16 *__restrict__ __attribute__ ((__aligned__ (16))) c)
	{
	for (int i = 0; i < 256; i++)
	a[i] = b[i] + c[i];
	}

	/* Two FADD operations on "8h" data widths, one from sum_Float16, one from
	sum_fp16. */
	/* { dg-final { scan-assembler-times "fadd\tv\[0-9\]\+.8h" 2 } } */