blob: 5a5a3d4b992a886dc988ee1dc156a6bdb31b5e42 [file] [log] [blame]
/* { dg-do run } */
/* { dg-options "-O2 -mavx512fp16 -mavx512vl -ftree-vectorize -fvect-cost-model=unlimited -mprefer-vector-width=256 -fdump-tree-slp-details" } */
/* { dg-require-effective-target avx512fp16 } */
/* { dg-final { scan-tree-dump-times "basic block part vectorized using (?:32|64) byte vectors" 1 "slp2" } }*/
/* { dg-final { scan-tree-dump-times {(?n)add new stmt:.*VEC_PERM_EXPR.*\{ 2, 3, 0, 1, 8, 9, 6, 7, 14, 15, 12, 13, 4, 5, 10, 11 \}} 1 "slp2" } } */
#include <string.h>
static void do_test (void);
#define DO_TEST do_test
#define AVX512FP16
#include "avx512-check.h"
void
__attribute__((noipa))
foo_ph (_Complex _Float16* a, _Complex _Float16* __restrict b)
{
a[0] = b[1];
a[1] = b[0];
a[2] = b[4];
a[3] = b[3];
a[4] = b[7];
a[5] = b[6];
a[6] = b[2];
a[7] = b[5];
}
void
do_test (void)
{
_Complex _Float16* ph_src = (_Complex _Float16*) malloc (32);
_Complex _Float16* ph_dst = (_Complex _Float16*) malloc (32);
_Complex _Float16* ph_exp = (_Complex _Float16*) malloc (32);
char* p = (char* ) malloc (32);
char* q = (char* ) malloc (32);
__builtin_memset (ph_dst, 0, 32);
for (int i = 0; i != 32; i++)
p[i] = i;
__builtin_memcpy (ph_src, p, 32);
for (int i = 0; i != 4; i++)
{
p[i] = i + 4;
p[i + 4] = i;
p[i + 8] = i + 16;
p[i + 12] = i + 12;
p[i + 16] = i + 28;
p[i + 20] = i + 24;
p[i + 24] = i + 8;
p[i + 28] = i + 20;
q[i] = i + 28;
q[i + 4] = i + 24;
q[i + 8] = i + 20;
q[i + 12] = i + 16;
q[i + 16] = i + 12;
q[i + 20] = i + 8;
q[i + 24] = i + 4;
q[i + 28] = i;
}
__builtin_memcpy (ph_exp, p, 32);
foo_ph (ph_dst, ph_src);
if (__builtin_memcmp (ph_dst, ph_exp, 32) != 0)
__builtin_abort ();
return;
}