| /* { dg-do compile } */ |
| /* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=scalable" } */ |
| |
| #include <stdint.h> |
| |
| void consumer (void *); |
| |
| #define TEST_LOOP(TYPE) \ |
| void \ |
| multi_loop_##TYPE (TYPE *x, TYPE val1, TYPE val2, int n) \ |
| { \ |
| for (int i = 0; i < n; ++i) \ |
| { \ |
| x[i * 2] += val1; \ |
| x[i * 2 + 1] += val2; \ |
| } \ |
| consumer (x); \ |
| for (int i = 0; i < n; ++i) \ |
| { \ |
| x[i * 2] += val1; \ |
| x[i * 2 + 1] += val2; \ |
| } \ |
| consumer (x); \ |
| for (int i = 0; i < n; ++i) \ |
| { \ |
| x[i * 2] += val1; \ |
| x[i * 2 + 1] += val2; \ |
| } \ |
| consumer (x); \ |
| } |
| |
| /* One iteration is enough. */ |
| TEST_LOOP (uint8_t); |
| TEST_LOOP (uint16_t); |
| /* Two iterations are enough. Complete unrolling makes sense |
| even at -O2. */ |
| TEST_LOOP (uint32_t); |
| /* Four iterations are needed; ought to stay a loop. */ |
| TEST_LOOP (uint64_t); |
| |
| /* { dg-final { scan-assembler {\tld1b\tz[0-9]\.b} } } */ |
| /* { dg-final { scan-assembler {\tld1h\tz[0-9]\.h} } } */ |
| /* { dg-final { scan-assembler {\tld1w\tz[0-9]\.s} } } */ |
| /* { dg-final { scan-assembler {\tld1d\tz[0-9]\.d} } } */ |
| /* { dg-final { scan-assembler-not {\tldr\tz[0-9]} } } */ |
| /* { dg-final { scan-assembler-not {\tstr\tz[0-9]} } } */ |
| /* { dg-final { scan-assembler-not {\tldr\tp[0-9]} } } */ |
| /* { dg-final { scan-assembler-not {\tstr\tp[0-9]} } } */ |