| /* { dg-options "-O2 -ftree-vectorize" } */ |
| |
| #include <stdint.h> |
| |
| #define TEST_LOOP(TYPE1, TYPE2) \ |
| void \ |
| f_##TYPE1##_##TYPE2 (TYPE1 *restrict dst, TYPE1 *restrict src1, \ |
| TYPE2 *restrict src2, int32_t *restrict index, \ |
| int n) \ |
| { \ |
| for (int i = 0; i < n; ++i) \ |
| dst[i] += src1[i] + src2[index[i]]; \ |
| } |
| |
| #define TEST_ALL(T) \ |
| T (int16_t, int8_t) \ |
| T (int32_t, int8_t) \ |
| T (int64_t, int8_t) \ |
| T (int32_t, int16_t) \ |
| T (int64_t, int16_t) \ |
| T (int64_t, int32_t) |
| |
| TEST_ALL (TEST_LOOP) |
| |
| /* { dg-final { scan-assembler-times {\tld1sb\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw\]\n} 2 } } */ |
| /* { dg-final { scan-assembler-times {\tld1sb\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]\n} 1 } } */ |
| /* { dg-final { scan-assembler-times {\tld1sh\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw 1\]\n} 1 } } */ |
| /* { dg-final { scan-assembler-times {\tld1sh\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 1\]\n} 1 } } */ |
| /* { dg-final { scan-assembler-times {\tld1sw\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 2\]\n} 1 } } */ |
| |
| /* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 7 } } */ |
| /* { dg-final { scan-assembler-times {\tld1sw\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 3 } } */ |
| |
| /* { dg-final { scan-assembler-not {\tsxt.\t} } } */ |