blob: 02e02dc04b67e68bc14464a0433643209fba52bd [file] [log] [blame]
/* { dg-do run { target aarch64_sve256_hw } } */
/* { dg-options "-march=armv8-a+sve -msve-vector-bits=256 -fopenmp -O2" } */
#include <arm_sve.h>
#pragma omp declare reduction (+:svint32_t: omp_out = svadd_s32_z (svptrue_b32(), omp_in, omp_out)) \
initializer (omp_priv = svindex_s32 (0, 0))
void __attribute__ ((noipa))
parallel_reduction ()
{
int a[8] = {1, 1, 1, 1, 1, 1, 1, 1};
int b[8] = {0, 0, 0, 0, 0, 0, 0, 0};
svint32_t va = svld1_s32 (svptrue_b32 (), b);
int i = 0;
int64_t res;
#pragma omp parallel reduction (+:va, i)
{
va = svld1_s32 (svptrue_b32 (), a);
i++;
}
res = svaddv_s32 (svptrue_b32 (), va);
if (res != i * 8)
__builtin_abort ();
}
void __attribute__ ((noipa))
for_reduction ()
{
int a[8] = {1, 1, 1, 1, 1, 1, 1, 1};
int b[8] = {0, 0, 0, 0, 0, 0, 0, 0};
svint32_t va = svld1_s32 (svptrue_b32 (), b);
int j;
int64_t res;
#pragma omp parallel for reduction (+:va)
for (j = 0; j < 8; j++)
va += svld1_s32 (svptrue_b32 (), a);
res = svaddv_s32 (svptrue_b32 (), va);
if (res != 64)
__builtin_abort ();
}
void __attribute__ ((noipa))
simd_reduction ()
{
int a[8];
svint32_t va = svindex_s32 (0, 0);
int i = 0;
int j;
int64_t res = 0;
for (j = 0; j < 8; j++)
a[j] = 1;
#pragma omp simd reduction (+:va)
for (j = 0; j < 16; j++)
va += svld1_s32 (svptrue_b32 (), a);
res = svaddv_s32 (svptrue_b32 (), va);
if (res != 128)
__builtin_abort ();
}
void __attribute__ ((noipa))
inscan_reduction_incl ()
{
svint32_t va = svindex_s32 (0, 0);
int a[8] = {1, 1, 1, 1, 1, 1, 1, 1};
int b[64] = { 0 };
int j;
int64_t res = 0;
#pragma omp parallel for reduction (inscan, +:va)
for (j = 0; j < 8; j++)
{
va += svld1_s32 (svptrue_b32 (), a);
#pragma omp scan inclusive (va)
svst1_s32 (svptrue_b32 (), b + j * 8, va);
}
res = svaddv_s32 (svptrue_b32 (), va);
if (res != 64)
__builtin_abort ();
for (j = 0; j < 64; j+=8)
if (b[j] != (j / 8 + 1))
__builtin_abort ();
}
void __attribute__ ((noipa))
inscan_reduction_excl ()
{
svint32_t va = svindex_s32 (0, 0);
int a[8] = {1, 1, 1, 1, 1, 1, 1, 1};
int b[64] = { 0 };
int j;
int64_t res = 0;
#pragma omp parallel for reduction (inscan, +:va)
for (j = 0; j < 8; j++)
{
svst1_s32 (svptrue_b32 (), b + j * 8, va);
#pragma omp scan exclusive (va)
va += svld1_s32 (svptrue_b32 (), a);
}
res = svaddv_s32 (svptrue_b32 (), va);
if (res != 64)
__builtin_abort ();
for (j = 0; j < 64; j+=8)
if (b[j] != j / 8)
__builtin_abort ();
}
int
main ()
{
parallel_reduction ();
for_reduction ();
simd_reduction ();
inscan_reduction_incl ();
inscan_reduction_excl ();
}