blob: 06a4a2ec6dfe56b0c16b9bc6752e0b7e486cd441 [file] [log] [blame]
/* { dg-do run } */
/* { dg-require-effective-target xop } */
/* { dg-options "-O2 -mxop" } */
#include "xop-check.h"
#include <x86intrin.h>
#include <string.h>
#define NUM 10
union
{
__m128i x[NUM];
unsigned char ssi[NUM * 16];
unsigned short si[NUM * 8];
unsigned int li[NUM * 4];
unsigned long long lli[NUM * 2];
} dst, res, src1;
static void
init_byte ()
{
int i;
for (i=0; i < NUM * 16; i++)
src1.ssi[i] = i;
}
static void
init_word ()
{
int i;
for (i=0; i < NUM * 8; i++)
src1.si[i] = i;
}
static void
init_dword ()
{
int i;
for (i=0; i < NUM * 4; i++)
src1.li[i] = i;
}
static int
check_byte2word ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 16; i = i + 16)
{
for (j = 0; j < 8; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.si[s] = src1.ssi[t] + src1.ssi[t + 1] ;
if (res.si[s] != dst.si[s])
check_fails++;
}
}
return check_fails;
}
static int
check_byte2dword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 16; i = i + 16)
{
for (j = 0; j < 4; j++)
{
t = i + (4 * j);
s = (i / 4) + j;
res.li[s] = (src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2]
+ src1.ssi[t + 3]);
if (res.li[s] != dst.li[s])
check_fails++;
}
}
return check_fails;
}
static int
check_byte2qword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 16; i = i + 16)
{
for (j = 0; j < 2; j++)
{
t = i + (8 * j);
s = (i / 8) + j;
res.lli[s] = ((src1.ssi[t] + src1.ssi[t + 1]) + (src1.ssi[t + 2]
+ src1.ssi[t + 3])) + ((src1.ssi[t + 4] + src1.ssi[t +5])
+ (src1.ssi[t + 6] + src1.ssi[t + 7]));
if (res.lli[s] != dst.lli[s])
check_fails++;
}
}
return check_fails;
}
static int
check_word2dword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 8; i = i + 8)
{
for (j = 0; j < 4; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.li[s] = src1.si[t] + src1.si[t + 1] ;
if (res.li[s] != dst.li[s])
check_fails++;
}
}
return check_fails;
}
static int
check_word2qword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 8; i = i + 8)
{
for (j = 0; j < 2; j++)
{
t = i + (4 * j);
s = (i / 4) + j;
res.lli[s] = (src1.si[t] + src1.si[t + 1]) + (src1.si[t + 2]
+ src1.si[t + 3]);
if (res.lli[s] != dst.lli[s])
check_fails++;
}
}
return check_fails;
}
static int
check_dword2qword ()
{
int i, j, s, t, check_fails = 0;
for (i = 0; i < NUM * 4; i = i + 4)
{
for (j = 0; j < 2; j++)
{
t = i + (2 * j);
s = (i / 2) + j;
res.lli[s] = src1.li[t] + src1.li[t + 1] ;
if (res.lli[s] != dst.lli[s])
check_fails++;
}
}
return check_fails;
}
static void
xop_test (void)
{
int i;
/* Check haddubw */
init_byte ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddw_epu8 (src1.x[i]);
if (check_byte2word())
abort ();
/* Check haddubd */
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddd_epu8 (src1.x[i]);
if (check_byte2dword())
abort ();
/* Check haddubq */
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddq_epu8 (src1.x[i]);
if (check_byte2qword())
abort ();
/* Check hadduwd */
init_word ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddd_epu16 (src1.x[i]);
if (check_word2dword())
abort ();
/* Check haddbuwq */
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddq_epu16 (src1.x[i]);
if (check_word2qword())
abort ();
/* Check hadudq */
init_dword ();
for (i = 0; i < NUM; i++)
dst.x[i] = _mm_haddq_epu32 (src1.x[i]);
if (check_dword2qword())
abort ();
}