blob: d8c6748d4ee911c6869083b8811e9ac713a75861 [file] [log] [blame]
/* { dg-do compile } */
/* { dg-require-effective-target arm_neon_ok } */
/* { dg-require-effective-target arm_thumb2_ok } */
/* { dg-options "-O2 -mthumb" } */
/* { dg-add-options arm_neon } */
/* { dg-prune-output "switch .* conflicts with" } */
#include <arm_neon.h>
#include <stddef.h>
void *
memset (DST, C, LENGTH)
void *DST;
int C;
size_t LENGTH;
{
void* DST0 = DST;
unsigned char C_BYTE = C;
if (__builtin_expect(LENGTH < 4, 1)) {
size_t i = 0;
while (i < LENGTH) {
((char*)DST)[i] = C_BYTE;
i++;
}
return DST;
}
const char* DST_end = (char*)DST + LENGTH;
while ((uintptr_t)DST % 4 != 0) {
*(char*) (DST++) = C_BYTE;
}
uint32_t C_SHORTWORD = (uint32_t)(unsigned char)(C_BYTE) * 0x01010101;
if (__builtin_expect(DST_end - (char*)DST >= 16, 0)) {
while ((uintptr_t)DST % 16 != 0) {
*((uint32_t*)((char*)(DST) + (0))) = C_SHORTWORD;
DST += 4;
}
uint8x16_t C_WORD = vdupq_n_u8(C_BYTE);
size_t i = 0;
LENGTH = DST_end - (char*)DST;
while (i + 16 * 16 <= LENGTH) {
*((uint8x16_t*)((char*)(DST) + (i))) = C_WORD;
*((uint8x16_t*)((char*)(DST) + (i + 16 * 1))) = C_WORD;
*((uint8x16_t*)((char*)(DST) + (i + 16 * 2))) = C_WORD;
*((uint8x16_t*)((char*)(DST) + (i + 16 * 3))) = C_WORD;
*((uint8x16_t*)((char*)(DST) + (i + 16 * 4))) = C_WORD;
*((uint8x16_t*)((char*)(DST) + (i + 16 * 5))) = C_WORD;
*((uint8x16_t*)((char*)(DST) + (i + 16 * 6))) = C_WORD;
*((uint8x16_t*)((char*)(DST) + (i + 16 * 7))) = C_WORD;
*((uint8x16_t*)((char*)(DST) + (i + 16 * 8))) = C_WORD;
*((uint8x16_t*)((char*)(DST) + (i + 16 * 9))) = C_WORD;
*((uint8x16_t*)((char*)(DST) + (i + 16 * 10))) = C_WORD;
*((uint8x16_t*)((char*)(DST) + (i + 16 * 11))) = C_WORD;
*((uint8x16_t*)((char*)(DST) + (i + 16 * 12))) = C_WORD;
*((uint8x16_t*)((char*)(DST) + (i + 16 * 13))) = C_WORD;
*((uint8x16_t*)((char*)(DST) + (i + 16 * 14))) = C_WORD;
*((uint8x16_t*)((char*)(DST) + (i + 16 * 15))) = C_WORD;
i += 16 * 16;
}
while (i + 16 * 4 <= LENGTH) {
*((uint8x16_t*)((char*)(DST) + (i))) = C_WORD;
*((uint8x16_t*)((char*)(DST) + (i + 16 * 1))) = C_WORD;
*((uint8x16_t*)((char*)(DST) + (i + 16 * 2))) = C_WORD;
*((uint8x16_t*)((char*)(DST) + (i + 16 * 3))) = C_WORD;
i += 16 * 4;
}
while (i + 16 <= LENGTH) {
*((uint8x16_t*)((char*)(DST) + (i))) = C_WORD;
i += 16;
}
DST += i;
}
while (4 <= DST_end - (char*)DST) {
*((uint32_t*)((char*)(DST) + (0))) = C_SHORTWORD;
DST += 4;
}
while ((char*)DST < DST_end) {
*((char*)DST) = C_BYTE;
DST++;
}
return DST0;
}