| /* { dg-do run } */ |
| /* { dg-additional-options "-msse2" { target sse2_runtime } } */ |
| /* { dg-additional-options "-mavx" { target avx_runtime } } */ |
| |
| extern void abort (); |
| int a[1024] __attribute__((aligned (32))) = { 1 }; |
| int b[1024] __attribute__((aligned (32))) = { 1 }; |
| unsigned char c[1024] __attribute__((aligned (32))) = { 1 }; |
| int k, m; |
| __UINTPTR_TYPE__ u, u2, u3; |
| |
| __attribute__((noinline, noclone)) int |
| foo (int *p) |
| { |
| int i, s = 0, s2 = 0, t, t2; |
| #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s) \ |
| lastprivate (t2) |
| for (i = 0; i < 512; i++) |
| { |
| a[i] *= p[i]; |
| t2 = k + p[i]; |
| k += m + 1; |
| s += p[i] + k; |
| c[i]++; |
| } |
| #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s2) \ |
| lastprivate (t, u, u2, u3) |
| for (i = 512; i < 1024; i++) |
| { |
| a[i] *= p[i]; |
| k += m + 1; |
| t = k + p[i]; |
| u = (__UINTPTR_TYPE__) &k; |
| u2 = (__UINTPTR_TYPE__) &s2; |
| u3 = (__UINTPTR_TYPE__) &t; |
| s2 += t; |
| c[i]++; |
| } |
| return s + s2 + t + t2; |
| } |
| |
| __attribute__((noinline, noclone)) long int |
| bar (int *p, long int n, long int o) |
| { |
| long int i, s = 0, s2 = 0, t, t2; |
| #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s) \ |
| lastprivate (t2) |
| for (i = 0; i < n; i++) |
| { |
| a[i] *= p[i]; |
| t2 = k + p[i]; |
| k += m + 1; |
| s += p[i] + k; |
| c[i]++; |
| } |
| #pragma omp simd aligned(a, b, p : 32) linear(k: m + 1) reduction(+:s2) \ |
| lastprivate (t, u, u2, u3) |
| for (i = n; i < o; i++) |
| { |
| a[i] *= p[i]; |
| k += m + 1; |
| t = k + p[i]; |
| u = (__UINTPTR_TYPE__) &k; |
| u2 = (__UINTPTR_TYPE__) &s2; |
| u3 = (__UINTPTR_TYPE__) &t; |
| s2 += t; |
| c[i]++; |
| } |
| return s + s2 + t + t2; |
| } |
| |
| int |
| main () |
| { |
| #if __SIZEOF_INT__ >= 4 |
| int i; |
| k = 4; |
| m = 2; |
| for (i = 0; i < 1024; i++) |
| { |
| a[i] = i - 512; |
| b[i] = (i - 51) % 39; |
| c[i] = (unsigned char) i; |
| } |
| int s = foo (b); |
| for (i = 0; i < 1024; i++) |
| { |
| if (b[i] != (i - 51) % 39 |
| || a[i] != (i - 512) * b[i] |
| || c[i] != (unsigned char) (i + 1)) |
| abort (); |
| a[i] = i - 512; |
| } |
| if (k != 4 + 3 * 1024 |
| || s != 1596127 + (4 + 3 * 511 + b[511]) + (4 + 3 * 1024 + b[1023])) |
| abort (); |
| k = 4; |
| s = bar (b, 512, 1024); |
| for (i = 0; i < 1024; i++) |
| { |
| if (b[i] != (i - 51) % 39 |
| || a[i] != (i - 512) * b[i] |
| || c[i] != (unsigned char) (i + 2)) |
| abort (); |
| a[i] = i - 512; |
| } |
| if (k != 4 + 3 * 1024 |
| || s != 1596127 + (4 + 3 * 511 + b[511]) + (4 + 3 * 1024 + b[1023])) |
| abort (); |
| k = 4; |
| s = bar (b, 511, 1021); |
| for (i = 0; i < 1021; i++) |
| { |
| if (b[i] != (i - 51) % 39 |
| || a[i] != (i - 512) * b[i] |
| || c[i] != (unsigned char) (i + 3)) |
| abort (); |
| a[i] = i - 512; |
| } |
| for (i = 1021; i < 1024; i++) |
| if (b[i] != (i - 51) % 39 |
| || a[i] != i - 512 |
| || c[i] != (unsigned char) (i + 2)) |
| abort (); |
| if (k != 4 + 3 * 1021 |
| || s != 1586803 + (4 + 3 * 510 + b[510]) + (4 + 3 * 1021 + b[1020])) |
| abort (); |
| #endif |
| return 0; |
| } |