| /* { dg-do compile } */ |
| /* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */ |
| |
| #include <stdint.h> |
| |
| #define MASK_SLP_2(TYPE_COND, ALT_VAL) \ |
| void __attribute__ ((noinline, noclone)) \ |
| mask_slp_##TYPE_COND##_2_##ALT_VAL (int *restrict x, int *restrict y, \ |
| TYPE_COND *restrict z, int n) \ |
| { \ |
| for (int i = 0; i < n; i += 2) \ |
| { \ |
| x[i] = y[i] ? z[i] : 1; \ |
| x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL; \ |
| } \ |
| } |
| |
| #define MASK_SLP_4(TYPE_COND, ALT_VAL) \ |
| void __attribute__ ((noinline, noclone)) \ |
| mask_slp_##TYPE_COND##_4_##ALT_VAL (int *restrict x, int *restrict y, \ |
| TYPE_COND *restrict z, int n) \ |
| { \ |
| for (int i = 0; i < n; i += 4) \ |
| { \ |
| x[i] = y[i] ? z[i] : 1; \ |
| x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL; \ |
| x[i + 2] = y[i + 2] ? z[i + 2] : 1; \ |
| x[i + 3] = y[i + 3] ? z[i + 3] : ALT_VAL; \ |
| } \ |
| } |
| |
| #define MASK_SLP_8(TYPE_COND, ALT_VAL) \ |
| void __attribute__ ((noinline, noclone)) \ |
| mask_slp_##TYPE_COND##_8_##ALT_VAL (int *restrict x, int *restrict y, \ |
| TYPE_COND *restrict z, int n) \ |
| { \ |
| for (int i = 0; i < n; i += 8) \ |
| { \ |
| x[i] = y[i] ? z[i] : 1; \ |
| x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL; \ |
| x[i + 2] = y[i + 2] ? z[i + 2] : 1; \ |
| x[i + 3] = y[i + 3] ? z[i + 3] : ALT_VAL; \ |
| x[i + 4] = y[i + 4] ? z[i + 4] : 1; \ |
| x[i + 5] = y[i + 5] ? z[i + 5] : ALT_VAL; \ |
| x[i + 6] = y[i + 6] ? z[i + 6] : 1; \ |
| x[i + 7] = y[i + 7] ? z[i + 7] : ALT_VAL; \ |
| } \ |
| } |
| |
| #define MASK_SLP_FAIL(TYPE_COND) \ |
| void __attribute__ ((noinline, noclone)) \ |
| mask_slp_##TYPE_COND##_FAIL (int *restrict x, int *restrict y, \ |
| TYPE_COND *restrict z, int n) \ |
| { \ |
| for (int i = 0; i < n; i += 2) \ |
| { \ |
| x[i] = y[i] ? z[i] : 1; \ |
| x[i + 1] = y[i + 1] ? z[i + 1] : x[z[i + 1]]; \ |
| } \ |
| } |
| |
| MASK_SLP_2(int8_t, 1) |
| MASK_SLP_2(int8_t, 2) |
| MASK_SLP_2(int, 1) |
| MASK_SLP_2(int, 2) |
| MASK_SLP_2(int64_t, 1) |
| MASK_SLP_2(int64_t, 2) |
| |
| MASK_SLP_4(int8_t, 1) |
| MASK_SLP_4(int8_t, 2) |
| MASK_SLP_4(int, 1) |
| MASK_SLP_4(int, 2) |
| MASK_SLP_4(int64_t, 1) |
| MASK_SLP_4(int64_t, 2) |
| |
| MASK_SLP_8(int8_t, 1) |
| MASK_SLP_8(int8_t, 2) |
| MASK_SLP_8(int, 1) |
| MASK_SLP_8(int, 2) |
| MASK_SLP_8(int64_t, 1) |
| MASK_SLP_8(int64_t, 2) |
| |
| MASK_SLP_FAIL(int8_t) |
| MASK_SLP_FAIL(int) |
| MASK_SLP_FAIL(int64_t) |
| |
| /* { dg-final { scan-assembler-not {\tld2w\t} } } */ |
| /* { dg-final { scan-assembler-not {\tst2w\t} } } */ |
| /* { dg-final { scan-assembler-times {\tld1w\t} 48 } } */ |
| /* { dg-final { scan-assembler-times {\tst1w\t} 40 } } */ |