| /* { dg-require-effective-target size32plus } */ |
| /* { dg-additional-options "-O2 -fopenmp -fdump-tree-vect-details" } */ |
| /* { dg-additional-options "-msse2" { target sse2_runtime } } */ |
| /* { dg-additional-options "-mavx" { target avx_runtime } } */ |
| /* { dg-final { scan-tree-dump-times "vectorized \[2-6] loops" 2 "vect" { target sse2_runtime } } } */ |
| |
| extern void abort (void); |
| int r, a[1024], b[1024]; |
| unsigned short r2, b2[1024]; |
| unsigned char r3, b3[1024]; |
| |
| __attribute__((noipa)) void |
| foo (int *a, int *b, unsigned short *b2, unsigned char *b3) |
| { |
| #pragma omp for simd reduction (inscan, +:r, r2, r3) if (simd:0) |
| for (int i = 0; i < 1024; i++) |
| { |
| { r += a[i]; r2 += a[i]; r3 += a[i]; } |
| #pragma omp scan inclusive(r, r2, r3) |
| { |
| b[i] = r; |
| b2[i] = r2; |
| b3[i] = r3; |
| } |
| } |
| } |
| |
| __attribute__((noipa)) int |
| bar (unsigned short *s2p, unsigned char *s3p) |
| { |
| int s = 0; |
| unsigned short s2 = 0; |
| unsigned char s3 = 0; |
| #pragma omp parallel |
| #pragma omp for simd reduction (inscan, +:s, s2, s3) simdlen (1) |
| for (int i = 0; i < 1024; i++) |
| { |
| { |
| s += 2 * a[i]; |
| s2 += 2 * a[i]; |
| s3 += 2 * a[i]; |
| } |
| #pragma omp scan inclusive(s, s2, s3) |
| { b[i] = s; b2[i] = s2; b3[i] = s3; } |
| } |
| *s2p = s2; |
| *s3p = s3; |
| return s; |
| } |
| |
| __attribute__((noipa)) void |
| baz (int *a, int *b, unsigned short *b2, unsigned char *b3) |
| { |
| #pragma omp parallel for simd reduction (inscan, +:r, r2, r3) |
| for (int i = 0; i < 1024; i++) |
| { |
| { |
| r += a[i]; |
| r2 += a[i]; |
| r3 += a[i]; |
| } |
| #pragma omp scan inclusive(r, r2, r3) |
| { |
| b[i] = r; |
| b2[i] = r2; |
| b3[i] = r3; |
| } |
| } |
| } |
| |
| __attribute__((noipa)) int |
| qux (unsigned short *s2p, unsigned char *s3p) |
| { |
| int s = 0; |
| unsigned short s2 = 0; |
| unsigned char s3 = 0; |
| #pragma omp parallel for simd reduction (inscan, +:s, s2, s3) |
| for (int i = 0; i < 1024; i++) |
| { |
| { s += 2 * a[i]; s2 += 2 * a[i]; s3 += 2 * a[i]; } |
| #pragma omp scan inclusive(s, s2, s3) |
| { b[i] = s; b2[i] = s2; b3[i] = s3; } |
| } |
| *s2p = s2; |
| *s3p = s3; |
| return s; |
| } |
| |
| int |
| main () |
| { |
| int s = 0; |
| unsigned short s2; |
| unsigned char s3; |
| for (int i = 0; i < 1024; ++i) |
| { |
| a[i] = i; |
| b[i] = -1; |
| b2[i] = -1; |
| b3[i] = -1; |
| asm ("" : "+g" (i)); |
| } |
| #pragma omp parallel |
| foo (a, b, b2, b3); |
| if (r != 1024 * 1023 / 2 |
| || r2 != (unsigned short) r |
| || r3 != (unsigned char) r) |
| abort (); |
| for (int i = 0; i < 1024; ++i) |
| { |
| s += i; |
| if (b[i] != s |
| || b2[i] != (unsigned short) s |
| || b3[i] != (unsigned char) s) |
| abort (); |
| else |
| { |
| b[i] = 25; |
| b2[i] = 24; |
| b3[i] = 26; |
| } |
| } |
| if (bar (&s2, &s3) != 1024 * 1023) |
| abort (); |
| if (s2 != (unsigned short) (1024 * 1023) |
| || s3 != (unsigned char) (1024 * 1023)) |
| abort (); |
| s = 0; |
| for (int i = 0; i < 1024; ++i) |
| { |
| s += 2 * i; |
| if (b[i] != s |
| || b2[i] != (unsigned short) s |
| || b3[i] != (unsigned char) s) |
| abort (); |
| else |
| { |
| b[i] = -1; |
| b2[i] = -1; |
| b3[i] = -1; |
| } |
| } |
| r = 0; |
| r2 = 0; |
| r3 = 0; |
| baz (a, b, b2, b3); |
| if (r != 1024 * 1023 / 2 |
| || r2 != (unsigned short) r |
| || r3 != (unsigned char) r) |
| abort (); |
| s = 0; |
| for (int i = 0; i < 1024; ++i) |
| { |
| s += i; |
| if (b[i] != s |
| || b2[i] != (unsigned short) s |
| || b3[i] != (unsigned char) s) |
| abort (); |
| else |
| { |
| b[i] = 25; |
| b2[i] = 24; |
| b3[i] = 26; |
| } |
| } |
| s2 = 0; |
| s3 = 0; |
| if (qux (&s2, &s3) != 1024 * 1023) |
| abort (); |
| if (s2 != (unsigned short) (1024 * 1023) |
| || s3 != (unsigned char) (1024 * 1023)) |
| abort (); |
| s = 0; |
| for (int i = 0; i < 1024; ++i) |
| { |
| s += 2 * i; |
| if (b[i] != s |
| || b2[i] != (unsigned short) s |
| || b3[i] != (unsigned char) s) |
| abort (); |
| } |
| return 0; |
| } |