blob: c43f0199adcd348370edabf045a532e9abb436e7 [file] [log] [blame]
/* { dg-do compile } */
/* { dg-options "-O2" } */
/* { dg-require-effective-target lp64 } */
#include <stdint.h>
/* One byte variable set should be scalar
**set1byte:
** strb w1, \[x0\]
** ret
*/
void __attribute__((__noinline__))
set1byte (int64_t *src, char c)
{
__builtin_memset (src, c, 1);
}
/* Special cases for setting 0. */
/* 1-byte should be STRB with wzr
**set0byte:
** strb wzr, \[x0\]
** ret
*/
void __attribute__((__noinline__))
set0byte (int64_t *src)
{
__builtin_memset (src, 0, 1);
}
/* 35bytes would become 4 scalar instructions. So favour NEON.
**set0neon:
** movi v0.4s, 0
** stp q0, q0, \[x0\]
** str wzr, \[x0, 31\]
** ret
*/
void __attribute__((__noinline__))
set0neon (int64_t *src)
{
__builtin_memset (src, 0, 35);
}
/* 36bytes should be scalar however.
**set0scalar:
** stp xzr, xzr, \[x0\]
** stp xzr, xzr, \[x0, 16\]
** str wzr, \[x0, 32\]
** ret
*/
void __attribute__((__noinline__))
set0scalar (int64_t *src)
{
__builtin_memset (src, 0, 36);
}
/* 256-bytes expanded
**set256byte:
** dup v0.16b, w1
** stp q0, q0, \[x0\]
** stp q0, q0, \[x0, 32\]
** stp q0, q0, \[x0, 64\]
** stp q0, q0, \[x0, 96\]
** stp q0, q0, \[x0, 128\]
** stp q0, q0, \[x0, 160\]
** stp q0, q0, \[x0, 192\]
** stp q0, q0, \[x0, 224\]
** ret
*/
void __attribute__((__noinline__))
set256byte (int64_t *src, char c)
{
__builtin_memset (src, c, 256);
}
/* More than 256 bytes goes to memset
**set257byte:
** mov x2, 257
** mov w1, 99
** b memset
*/
void __attribute__((__noinline__))
set257byte (int64_t *src)
{
__builtin_memset (src, 'c', 257);
}
/* { dg-final { check-function-bodies "**" "" "" } } */