blob: 34a388301434a07dc09dd6273a6c4ccdb038b275 [file] [log] [blame]
/* { dg-do run } */
/* { dg-options "-O2" } */
/* { dg-additional-options "-msse2" { target sse2_runtime } } */
/* { dg-additional-options "-mavx" { target avx_runtime } } */
extern void abort ();
int a[1024] __attribute__((aligned (32))) = { 1 };
int b[1024] __attribute__((aligned (32))) = { 1 };
unsigned char c[1024] __attribute__((aligned (32))) = { 1 };
int k, m;
__UINTPTR_TYPE__ u, u2, u3;
__attribute__((noinline, noclone)) int
foo (int *p)
{
int i, s = 0, s2 = 0, t, t2;
#pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s) \
lastprivate (t2)
for (i = 0; i < 512; i++)
{
a[i] *= p[i];
t2 = k + p[i];
k += m + 1;
s += p[i] + k;
c[i]++;
}
#pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s2) \
lastprivate (t, u, u2, u3)
for (i = 512; i < 1024; i++)
{
a[i] *= p[i];
k += m + 1;
t = k + p[i];
u = (__UINTPTR_TYPE__) &k;
u2 = (__UINTPTR_TYPE__) &s2;
u3 = (__UINTPTR_TYPE__) &t;
s2 += t;
c[i]++;
}
return s + s2 + t + t2;
}
__attribute__((noinline, noclone)) long int
bar (int *p, long int n, long int o)
{
long int i, s = 0, s2 = 0, t, t2;
#pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s) \
lastprivate (t2)
for (i = 0; i < n; i++)
{
a[i] *= p[i];
t2 = k + p[i];
k += m + 1;
s += p[i] + k;
c[i]++;
}
#pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s2) \
lastprivate (t, u, u2, u3)
for (i = n; i < o; i++)
{
a[i] *= p[i];
k += m + 1;
t = k + p[i];
u = (__UINTPTR_TYPE__) &k;
u2 = (__UINTPTR_TYPE__) &s2;
u3 = (__UINTPTR_TYPE__) &t;
s2 += t;
c[i]++;
}
return s + s2 + t + t2;
}
int
main ()
{
#if __SIZEOF_INT__ >= 4
int i;
k = 4;
m = 2;
for (i = 0; i < 1024; i++)
{
a[i] = i - 512;
b[i] = (i - 51) % 39;
c[i] = (unsigned char) i;
}
int s = foo (b);
for (i = 0; i < 1024; i++)
{
if (b[i] != (i - 51) % 39
|| a[i] != (i - 512) * b[i]
|| c[i] != (unsigned char) (i + 1))
abort ();
a[i] = i - 512;
}
if (k != 4 + 3 * 1024
|| s != 1596127 + (4 + 3 * 511 + b[511]) + (4 + 3 * 1024 + b[1023]))
abort ();
k = 4;
s = bar (b, 512, 1024);
for (i = 0; i < 1024; i++)
{
if (b[i] != (i - 51) % 39
|| a[i] != (i - 512) * b[i]
|| c[i] != (unsigned char) (i + 2))
abort ();
a[i] = i - 512;
}
if (k != 4 + 3 * 1024
|| s != 1596127 + (4 + 3 * 511 + b[511]) + (4 + 3 * 1024 + b[1023]))
abort ();
k = 4;
s = bar (b, 511, 1021);
for (i = 0; i < 1021; i++)
{
if (b[i] != (i - 51) % 39
|| a[i] != (i - 512) * b[i]
|| c[i] != (unsigned char) (i + 3))
abort ();
a[i] = i - 512;
}
for (i = 1021; i < 1024; i++)
if (b[i] != (i - 51) % 39
|| a[i] != i - 512
|| c[i] != (unsigned char) (i + 2))
abort ();
if (k != 4 + 3 * 1021
|| s != 1586803 + (4 + 3 * 510 + b[510]) + (4 + 3 * 1021 + b[1020]))
abort ();
#endif
return 0;
}