blob: 69ee826e1be912e1d261f3936ff8169da91c3a1a [file] [log] [blame]
/* { dg-do compile } */
/* { dg-require-effective-target power10_ok } */
/* { dg-options "-Wno-psabi -mdejagnu-cpu=power10 -O2" } */
typedef unsigned char vec_t __attribute__((vector_size(16)));
void
foo0 (__vector_quad *dst, vec_t *vec)
{
__vector_quad acc;
vec_t vec0 = vec[0];
vec_t vec1 = vec[1];
__builtin_mma_xvi4ger8 (&acc, vec0, vec1);
__builtin_mma_xvi4ger8pp (&acc, vec0, vec1);
dst[0] = acc;
}
void
foo1 (__vector_quad *dst, vec_t *vec)
{
__vector_quad acc;
vec_t vec0 = vec[0];
vec_t vec1 = vec[1];
__builtin_mma_xvi8ger4 (&acc, vec0, vec1);
__builtin_mma_xvi8ger4pp (&acc, vec0, vec1);
__builtin_mma_xvi8ger4spp(&acc, vec0, vec1);
dst[1] = acc;
}
void
foo2 (__vector_quad *dst, vec_t *vec)
{
__vector_quad acc;
vec_t vec0 = vec[0];
vec_t vec1 = vec[1];
__builtin_mma_xvi16ger2 (&acc, vec0, vec1);
__builtin_mma_xvi16ger2pp (&acc, vec0, vec1);
dst[2] = acc;
}
void
foo3 (__vector_quad *dst, vec_t *vec)
{
__vector_quad acc;
vec_t vec0 = vec[0];
vec_t vec1 = vec[1];
__builtin_mma_xvi16ger2s (&acc, vec0, vec1);
__builtin_mma_xvi16ger2spp (&acc, vec0, vec1);
dst[3] = acc;
}
void
foo4 (__vector_quad *dst, vec_t *vec)
{
__vector_quad acc;
vec_t vec0 = vec[0];
vec_t vec1 = vec[1];
__builtin_mma_xvf16ger2 (&acc, vec0, vec1);
__builtin_mma_xvf16ger2pp (&acc, vec0, vec1);
__builtin_mma_xvf16ger2pn (&acc, vec0, vec1);
dst[4] = acc;
}
void
foo4b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
{
__vector_quad acc;
vec_t vec0 = vec[0];
vec_t vec1 = vec[1];
acc = src[0];
__builtin_mma_xvf16ger2np (&acc, vec0, vec1);
__builtin_mma_xvf16ger2nn (&acc, vec0, vec1);
dst[4] = acc;
}
void
foo5 (__vector_quad *dst, vec_t *vec)
{
__vector_quad acc;
vec_t vec0 = vec[0];
vec_t vec1 = vec[1];
__builtin_mma_xvbf16ger2 (&acc, vec0, vec1);
__builtin_mma_xvbf16ger2pp (&acc, vec0, vec1);
__builtin_mma_xvbf16ger2pn (&acc, vec0, vec1);
dst[5] = acc;
}
void
foo5b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
{
__vector_quad acc;
vec_t vec0 = vec[0];
vec_t vec1 = vec[1];
acc = src[0];
__builtin_mma_xvbf16ger2np (&acc, vec0, vec1);
__builtin_mma_xvbf16ger2nn (&acc, vec0, vec1);
dst[5] = acc;
}
void
foo6 (__vector_quad *dst, vec_t *vec)
{
__vector_quad acc;
vec_t vec0 = vec[0];
vec_t vec1 = vec[1];
__builtin_mma_xvf32ger (&acc, vec0, vec1);
__builtin_mma_xvf32gerpp (&acc, vec0, vec1);
__builtin_mma_xvf32gerpn (&acc, vec0, vec1);
dst[6] = acc;
}
void
foo6b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
{
__vector_quad acc;
vec_t vec0 = vec[0];
vec_t vec1 = vec[1];
acc = src[0];
__builtin_mma_xvf32gernp (&acc, vec0, vec1);
__builtin_mma_xvf32gernn (&acc, vec0, vec1);
dst[6] = acc;
}
void
foo7 (__vector_quad *dst, vec_t *vec)
{
__vector_quad acc;
vec_t vec0 = vec[0];
vec_t vec1 = vec[1];
__builtin_mma_pmxvi4ger8 (&acc, vec0, vec1, 15, 15, 255);
__builtin_mma_pmxvi4ger8pp (&acc, vec0, vec1, 15, 15, 255);
dst[7] = acc;
}
void
foo8 (__vector_quad *dst, vec_t *vec)
{
__vector_quad acc;
vec_t vec0 = vec[0];
vec_t vec1 = vec[1];
__builtin_mma_pmxvi8ger4 (&acc, vec0, vec1, 15, 15, 15);
__builtin_mma_pmxvi8ger4pp (&acc, vec0, vec1, 15, 15, 15);
__builtin_mma_pmxvi8ger4spp(&acc, vec0, vec1, 15, 15, 15);
dst[8] = acc;
}
void
foo9 (__vector_quad *dst, vec_t *vec)
{
__vector_quad acc;
vec_t vec0 = vec[0];
vec_t vec1 = vec[1];
__builtin_mma_pmxvi16ger2 (&acc, vec0, vec1, 15, 15, 3);
__builtin_mma_pmxvi16ger2pp (&acc, vec0, vec1, 15, 15, 3);
dst[9] = acc;
}
void
foo10 (__vector_quad *dst, vec_t *vec)
{
__vector_quad acc;
vec_t vec0 = vec[0];
vec_t vec1 = vec[1];
__builtin_mma_pmxvi16ger2s (&acc, vec0, vec1, 15, 15, 3);
__builtin_mma_pmxvi16ger2spp (&acc, vec0, vec1, 15, 15, 3);
dst[10] = acc;
}
void
foo11 (__vector_quad *dst, vec_t *vec)
{
__vector_quad acc;
vec_t vec0 = vec[0];
vec_t vec1 = vec[1];
__builtin_mma_pmxvf16ger2 (&acc, vec0, vec1, 15, 15, 3);
__builtin_mma_pmxvf16ger2pp (&acc, vec0, vec1, 15, 15, 3);
__builtin_mma_pmxvf16ger2pn (&acc, vec0, vec1, 15, 15, 3);
dst[11] = acc;
}
void
foo11b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
{
__vector_quad acc;
vec_t vec0 = vec[0];
vec_t vec1 = vec[1];
acc = src[0];
__builtin_mma_pmxvf16ger2np (&acc, vec0, vec1, 15, 15, 3);
__builtin_mma_pmxvf16ger2nn (&acc, vec0, vec1, 15, 15, 3);
dst[11] = acc;
}
void
foo12 (__vector_quad *dst, vec_t *vec)
{
__vector_quad acc;
vec_t vec0 = vec[0];
vec_t vec1 = vec[1];
__builtin_mma_pmxvbf16ger2 (&acc, vec0, vec1, 15, 15, 3);
__builtin_mma_pmxvbf16ger2pp (&acc, vec0, vec1, 15, 15, 3);
__builtin_mma_pmxvbf16ger2pn (&acc, vec0, vec1, 15, 15, 3);
dst[12] = acc;
}
void
foo12b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
{
__vector_quad acc;
vec_t vec0 = vec[0];
vec_t vec1 = vec[1];
acc = src[0];
__builtin_mma_pmxvbf16ger2np (&acc, vec0, vec1, 15, 15, 3);
__builtin_mma_pmxvbf16ger2nn (&acc, vec0, vec1, 15, 15, 3);
dst[12] = acc;
}
void
foo13 (__vector_quad *dst, vec_t *vec)
{
__vector_quad acc;
vec_t vec0 = vec[0];
vec_t vec1 = vec[1];
__builtin_mma_pmxvf32ger (&acc, vec0, vec1, 15, 15);
__builtin_mma_pmxvf32gerpp (&acc, vec0, vec1, 15, 15);
__builtin_mma_pmxvf32gerpn (&acc, vec0, vec1, 15, 15);
dst[13] = acc;
}
void
foo13b (__vector_quad *dst, __vector_quad *src, vec_t *vec)
{
__vector_quad acc;
vec_t vec0 = vec[0];
vec_t vec1 = vec[1];
acc = src[0];
__builtin_mma_pmxvf32gernp (&acc, vec0, vec1, 15, 15);
__builtin_mma_pmxvf32gernn (&acc, vec0, vec1, 15, 15);
dst[13] = acc;
}
/* { dg-final { scan-assembler-times {\mlxv\M} 40 } } */
/* { dg-final { scan-assembler-times {\mlxvp\M} 12 } } */
/* { dg-final { scan-assembler-times {\mstxvp\M} 40 } } */
/* { dg-final { scan-assembler-times {\mxxmfacc\M} 20 } } */
/* { dg-final { scan-assembler-times {\mxxmtacc\M} 6 } } */
/* { dg-final { scan-assembler-times {\mxvbf16ger2\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvbf16ger2nn\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvbf16ger2np\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvbf16ger2pn\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvbf16ger2pp\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvf16ger2\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvf16ger2nn\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvf16ger2np\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvf16ger2pn\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvf16ger2pp\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvf32ger\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvf32gernn\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvf32gernp\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvf32gerpn\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvf32gerpp\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvi16ger2\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvi16ger2pp\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvi16ger2s\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvi16ger2spp\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvi4ger8\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvi4ger8pp\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvi8ger4\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvi8ger4pp\M} 1 } } */
/* { dg-final { scan-assembler-times {\mxvi8ger4spp\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvbf16ger2\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvbf16ger2nn\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvbf16ger2np\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvbf16ger2pn\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvbf16ger2pp\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvf16ger2\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvf16ger2nn\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvf16ger2np\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvf16ger2pn\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvf16ger2pp\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvf32ger\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvf32gernn\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvf32gernp\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvf32gerpn\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvf32gerpp\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvi16ger2\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvi16ger2pp\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvi16ger2s\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvi16ger2spp\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvi4ger8\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvi4ger8pp\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvi8ger4\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvi8ger4pp\M} 1 } } */
/* { dg-final { scan-assembler-times {\mpmxvi8ger4spp\M} 1 } } */