blob: ac4f821e77143e93ecb23db71b7100b37df770f2 [file] [log] [blame]
/* { dg-do assemble { target { aarch64*-*-* } } } */
/* { dg-require-effective-target arm_v8_2a_i8mm_ok } */
/* { dg-add-options arm_v8_2a_i8mm } */
/* { dg-additional-options "-save-temps" } */
/* { dg-final { check-function-bodies "**" "" {-O[^0]} } } */
/* { dg-skip-if "" { *-*-* } { "-fno-fat-lto-objects" } } */
#include <arm_neon.h>
/* Unsigned-Signed Dot Product instructions. */
/*
**ufoo:
** usdot v0\.2s, v1\.8b, v2\.8b
** ret
*/
int32x2_t ufoo (int32x2_t r, uint8x8_t x, int8x8_t y)
{
return vusdot_s32 (r, x, y);
}
/*
**ufooq:
** usdot v0\.4s, v1\.16b, v2\.16b
** ret
*/
int32x4_t ufooq (int32x4_t r, uint8x16_t x, int8x16_t y)
{
return vusdotq_s32 (r, x, y);
}
/*
**ufoo_lane:
** usdot v0\.2s, v1\.8b, v2\.4b\[0\]
** ret
*/
int32x2_t ufoo_lane (int32x2_t r, uint8x8_t x, int8x8_t y)
{
return vusdot_lane_s32 (r, x, y, 0);
}
/*
**ufoo_laneq:
** usdot v0\.2s, v1\.8b, v2\.4b\[2\]
** ret
*/
int32x2_t ufoo_laneq (int32x2_t r, uint8x8_t x, int8x16_t y)
{
return vusdot_laneq_s32 (r, x, y, 2);
}
/*
**ufooq_lane:
** usdot v0\.4s, v1\.16b, v2\.4b\[1\]
** ret
*/
int32x4_t ufooq_lane (int32x4_t r, uint8x16_t x, int8x8_t y)
{
return vusdotq_lane_s32 (r, x, y, 1);
}
/*
**ufooq_laneq:
** usdot v0\.4s, v1\.16b, v2\.4b\[3\]
** ret
*/
int32x4_t ufooq_laneq (int32x4_t r, uint8x16_t x, int8x16_t y)
{
return vusdotq_laneq_s32 (r, x, y, 3);
}
/* Signed-Unsigned Dot Product instructions. */
/*
**sfoo_lane:
** sudot v0\.2s, v1\.8b, v2\.4b\[0\]
** ret
*/
int32x2_t sfoo_lane (int32x2_t r, int8x8_t x, uint8x8_t y)
{
return vsudot_lane_s32 (r, x, y, 0);
}
/*
**sfoo_laneq:
** sudot v0\.2s, v1\.8b, v2\.4b\[2\]
** ret
*/
int32x2_t sfoo_laneq (int32x2_t r, int8x8_t x, uint8x16_t y)
{
return vsudot_laneq_s32 (r, x, y, 2);
}
/*
**sfooq_lane:
** sudot v0\.4s, v1\.16b, v2\.4b\[1\]
** ret
*/
int32x4_t sfooq_lane (int32x4_t r, int8x16_t x, uint8x8_t y)
{
return vsudotq_lane_s32 (r, x, y, 1);
}
/*
**sfooq_laneq:
** sudot v0\.4s, v1\.16b, v2\.4b\[3\]
** ret
*/
int32x4_t sfooq_laneq (int32x4_t r, int8x16_t x, uint8x16_t y)
{
return vsudotq_laneq_s32 (r, x, y, 3);
}
/*
**ufoo_untied:
** mov v0\.8b, v1\.8b
** usdot v0\.2s, v2\.8b, v3\.8b
** ret
*/
int32x2_t ufoo_untied (int32x2_t unused, int32x2_t r, uint8x8_t x, int8x8_t y)
{
return vusdot_s32 (r, x, y);
}
/*
**ufooq_laneq_untied:
** mov v0\.16b, v1\.16b
** usdot v0\.4s, v2\.16b, v3\.4b\[3\]
** ret
*/
int32x4_t ufooq_laneq_untied (int32x2_t unused, int32x4_t r, uint8x16_t x, int8x16_t y)
{
return vusdotq_laneq_s32 (r, x, y, 3);
}