| /* { dg-do compile } */ |
| /* { dg-require-effective-target power10_ok } */ |
| /* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */ |
| |
| typedef unsigned char vec_t __attribute__((vector_size(16))); |
| |
| void |
| foo0 (__vector_quad *dst, vec_t *vec) |
| { |
| __vector_quad acc; |
| vec_t vec0 = vec[0]; |
| vec_t vec1 = vec[1]; |
| |
| __builtin_mma_xvi4ger8 (&acc, vec0, vec1); |
| __builtin_mma_xvi4ger8pp (&acc, vec0, vec1); |
| dst[0] = acc; |
| } |
| |
| void |
| foo1 (__vector_quad *dst, vec_t *vec) |
| { |
| __vector_quad acc; |
| vec_t vec0 = vec[0]; |
| vec_t vec1 = vec[1]; |
| |
| __builtin_mma_xvi8ger4 (&acc, vec0, vec1); |
| __builtin_mma_xvi8ger4pp (&acc, vec0, vec1); |
| __builtin_mma_xvi8ger4spp(&acc, vec0, vec1); |
| dst[1] = acc; |
| } |
| |
| void |
| foo2 (__vector_quad *dst, vec_t *vec) |
| { |
| __vector_quad acc; |
| vec_t vec0 = vec[0]; |
| vec_t vec1 = vec[1]; |
| |
| __builtin_mma_xvi16ger2 (&acc, vec0, vec1); |
| __builtin_mma_xvi16ger2pp (&acc, vec0, vec1); |
| dst[2] = acc; |
| } |
| |
| void |
| foo3 (__vector_quad *dst, vec_t *vec) |
| { |
| __vector_quad acc; |
| vec_t vec0 = vec[0]; |
| vec_t vec1 = vec[1]; |
| |
| __builtin_mma_xvi16ger2s (&acc, vec0, vec1); |
| __builtin_mma_xvi16ger2spp (&acc, vec0, vec1); |
| dst[3] = acc; |
| } |
| |
| void |
| foo4 (__vector_quad *dst, vec_t *vec) |
| { |
| __vector_quad acc; |
| vec_t vec0 = vec[0]; |
| vec_t vec1 = vec[1]; |
| |
| __builtin_mma_xvf16ger2 (&acc, vec0, vec1); |
| __builtin_mma_xvf16ger2pp (&acc, vec0, vec1); |
| __builtin_mma_xvf16ger2pn (&acc, vec0, vec1); |
| dst[4] = acc; |
| } |
| |
| void |
| foo4b (__vector_quad *dst, __vector_quad *src, vec_t *vec) |
| { |
| __vector_quad acc; |
| vec_t vec0 = vec[0]; |
| vec_t vec1 = vec[1]; |
| |
| acc = src[0]; |
| __builtin_mma_xvf16ger2np (&acc, vec0, vec1); |
| __builtin_mma_xvf16ger2nn (&acc, vec0, vec1); |
| dst[4] = acc; |
| } |
| |
| void |
| foo5 (__vector_quad *dst, vec_t *vec) |
| { |
| __vector_quad acc; |
| vec_t vec0 = vec[0]; |
| vec_t vec1 = vec[1]; |
| |
| __builtin_mma_xvbf16ger2 (&acc, vec0, vec1); |
| __builtin_mma_xvbf16ger2pp (&acc, vec0, vec1); |
| __builtin_mma_xvbf16ger2pn (&acc, vec0, vec1); |
| dst[5] = acc; |
| } |
| |
| void |
| foo5b (__vector_quad *dst, __vector_quad *src, vec_t *vec) |
| { |
| __vector_quad acc; |
| vec_t vec0 = vec[0]; |
| vec_t vec1 = vec[1]; |
| |
| acc = src[0]; |
| __builtin_mma_xvbf16ger2np (&acc, vec0, vec1); |
| __builtin_mma_xvbf16ger2nn (&acc, vec0, vec1); |
| dst[5] = acc; |
| } |
| |
| void |
| foo6 (__vector_quad *dst, vec_t *vec) |
| { |
| __vector_quad acc; |
| vec_t vec0 = vec[0]; |
| vec_t vec1 = vec[1]; |
| |
| __builtin_mma_xvf32ger (&acc, vec0, vec1); |
| __builtin_mma_xvf32gerpp (&acc, vec0, vec1); |
| __builtin_mma_xvf32gerpn (&acc, vec0, vec1); |
| dst[6] = acc; |
| } |
| |
| void |
| foo6b (__vector_quad *dst, __vector_quad *src, vec_t *vec) |
| { |
| __vector_quad acc; |
| vec_t vec0 = vec[0]; |
| vec_t vec1 = vec[1]; |
| |
| acc = src[0]; |
| __builtin_mma_xvf32gernp (&acc, vec0, vec1); |
| __builtin_mma_xvf32gernn (&acc, vec0, vec1); |
| dst[6] = acc; |
| } |
| |
| void |
| foo7 (__vector_quad *dst, vec_t *vec) |
| { |
| __vector_quad acc; |
| vec_t vec0 = vec[0]; |
| vec_t vec1 = vec[1]; |
| |
| __builtin_mma_pmxvi4ger8 (&acc, vec0, vec1, 15, 15, 255); |
| __builtin_mma_pmxvi4ger8pp (&acc, vec0, vec1, 15, 15, 255); |
| dst[7] = acc; |
| } |
| |
| void |
| foo8 (__vector_quad *dst, vec_t *vec) |
| { |
| __vector_quad acc; |
| vec_t vec0 = vec[0]; |
| vec_t vec1 = vec[1]; |
| |
| __builtin_mma_pmxvi8ger4 (&acc, vec0, vec1, 15, 15, 15); |
| __builtin_mma_pmxvi8ger4pp (&acc, vec0, vec1, 15, 15, 15); |
| __builtin_mma_pmxvi8ger4spp(&acc, vec0, vec1, 15, 15, 15); |
| dst[8] = acc; |
| } |
| |
| void |
| foo9 (__vector_quad *dst, vec_t *vec) |
| { |
| __vector_quad acc; |
| vec_t vec0 = vec[0]; |
| vec_t vec1 = vec[1]; |
| |
| __builtin_mma_pmxvi16ger2 (&acc, vec0, vec1, 15, 15, 3); |
| __builtin_mma_pmxvi16ger2pp (&acc, vec0, vec1, 15, 15, 3); |
| dst[9] = acc; |
| } |
| |
| void |
| foo10 (__vector_quad *dst, vec_t *vec) |
| { |
| __vector_quad acc; |
| vec_t vec0 = vec[0]; |
| vec_t vec1 = vec[1]; |
| |
| __builtin_mma_pmxvi16ger2s (&acc, vec0, vec1, 15, 15, 3); |
| __builtin_mma_pmxvi16ger2spp (&acc, vec0, vec1, 15, 15, 3); |
| dst[10] = acc; |
| } |
| |
| void |
| foo11 (__vector_quad *dst, vec_t *vec) |
| { |
| __vector_quad acc; |
| vec_t vec0 = vec[0]; |
| vec_t vec1 = vec[1]; |
| |
| __builtin_mma_pmxvf16ger2 (&acc, vec0, vec1, 15, 15, 3); |
| __builtin_mma_pmxvf16ger2pp (&acc, vec0, vec1, 15, 15, 3); |
| __builtin_mma_pmxvf16ger2pn (&acc, vec0, vec1, 15, 15, 3); |
| dst[11] = acc; |
| } |
| |
| void |
| foo11b (__vector_quad *dst, __vector_quad *src, vec_t *vec) |
| { |
| __vector_quad acc; |
| vec_t vec0 = vec[0]; |
| vec_t vec1 = vec[1]; |
| |
| acc = src[0]; |
| __builtin_mma_pmxvf16ger2np (&acc, vec0, vec1, 15, 15, 3); |
| __builtin_mma_pmxvf16ger2nn (&acc, vec0, vec1, 15, 15, 3); |
| dst[11] = acc; |
| } |
| |
| void |
| foo12 (__vector_quad *dst, vec_t *vec) |
| { |
| __vector_quad acc; |
| vec_t vec0 = vec[0]; |
| vec_t vec1 = vec[1]; |
| |
| __builtin_mma_pmxvbf16ger2 (&acc, vec0, vec1, 15, 15, 3); |
| __builtin_mma_pmxvbf16ger2pp (&acc, vec0, vec1, 15, 15, 3); |
| __builtin_mma_pmxvbf16ger2pn (&acc, vec0, vec1, 15, 15, 3); |
| dst[12] = acc; |
| } |
| |
| void |
| foo12b (__vector_quad *dst, __vector_quad *src, vec_t *vec) |
| { |
| __vector_quad acc; |
| vec_t vec0 = vec[0]; |
| vec_t vec1 = vec[1]; |
| |
| acc = src[0]; |
| __builtin_mma_pmxvbf16ger2np (&acc, vec0, vec1, 15, 15, 3); |
| __builtin_mma_pmxvbf16ger2nn (&acc, vec0, vec1, 15, 15, 3); |
| dst[12] = acc; |
| } |
| |
| void |
| foo13 (__vector_quad *dst, vec_t *vec) |
| { |
| __vector_quad acc; |
| vec_t vec0 = vec[0]; |
| vec_t vec1 = vec[1]; |
| |
| __builtin_mma_pmxvf32ger (&acc, vec0, vec1, 15, 15); |
| __builtin_mma_pmxvf32gerpp (&acc, vec0, vec1, 15, 15); |
| __builtin_mma_pmxvf32gerpn (&acc, vec0, vec1, 15, 15); |
| dst[13] = acc; |
| } |
| |
| void |
| foo13b (__vector_quad *dst, __vector_quad *src, vec_t *vec) |
| { |
| __vector_quad acc; |
| vec_t vec0 = vec[0]; |
| vec_t vec1 = vec[1]; |
| |
| acc = src[0]; |
| __builtin_mma_pmxvf32gernp (&acc, vec0, vec1, 15, 15); |
| __builtin_mma_pmxvf32gernn (&acc, vec0, vec1, 15, 15); |
| dst[13] = acc; |
| } |
| |
| /* { dg-final { scan-assembler-times {\mlxv\M} 40 } } */ |
| /* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */ |
| /* { dg-final { scan-assembler-times {\mstxvp\M} 40 } } */ |
| /* { dg-final { scan-assembler-times {\mxxmfacc\M} 20 } } */ |
| /* { dg-final { scan-assembler-times {\mxxmtacc\M} 6 } } */ |
| /* { dg-final { scan-assembler-times {\mxvbf16ger2\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvbf16ger2nn\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvbf16ger2np\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvbf16ger2pn\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvbf16ger2pp\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvf16ger2\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvf16ger2nn\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvf16ger2np\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvf16ger2pn\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvf16ger2pp\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvf32ger\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvf32gernn\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvf32gernp\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvf32gerpn\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvf32gerpp\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvi16ger2\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvi16ger2pp\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvi16ger2s\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvi16ger2spp\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvi4ger8\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvi4ger8pp\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvi8ger4\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvi8ger4pp\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mxvi8ger4spp\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvbf16ger2\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvbf16ger2nn\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvbf16ger2np\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvbf16ger2pn\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvbf16ger2pp\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvf16ger2\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvf16ger2nn\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvf16ger2np\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvf16ger2pn\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvf16ger2pp\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvf32ger\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvf32gernn\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvf32gernp\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvf32gerpn\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvf32gerpp\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvi16ger2\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvi16ger2pp\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvi16ger2s\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvi16ger2spp\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvi4ger8\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvi4ger8pp\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvi8ger4\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvi8ger4pp\M} 1 } } */ |
| /* { dg-final { scan-assembler-times {\mpmxvi8ger4spp\M} 1 } } */ |