| /* i?86 does not have V2SF, x32 does though. */ |
| /* { dg-do compile { target { ! ia32 } } } */ |
| /* { dg-options "-O3 -mavx -mfma" } */ |
| |
| struct Matrix |
| { |
| float m11; |
| float m12; |
| float m21; |
| float m22; |
| float dx; |
| float dy; |
| }; |
| |
| struct Matrix multiply(const struct Matrix *a, const struct Matrix *b) |
| { |
| struct Matrix out; |
| out.m11 = a->m11*b->m11 + a->m12*b->m21; |
| out.m12 = a->m11*b->m12 + a->m12*b->m22; |
| out.m21 = a->m21*b->m11 + a->m22*b->m21; |
| out.m22 = a->m21*b->m12 + a->m22*b->m22; |
| |
| out.dx = a->dx*b->m11 + a->dy*b->m21 + b->dx; |
| out.dy = a->dx*b->m12 + a->dy*b->m22 + b->dy; |
| return out; |
| } |
| |
| /* The whole kernel should be vectorized with V4SF and V2SF operations. */ |
| /* { dg-final { scan-assembler-times "vadd" 1 } } */ |
| /* { dg-final { scan-assembler-times "vmul" 2 } } */ |
| /* { dg-final { scan-assembler-times "vfma" 2 } } */ |