blob: 999b5c8edabd926d042aeca3fa2127070a30d806 [file] [log] [blame]
#include <nmmintrin.h>
#include <string.h>
#define CFLAG 0x00000001
#define ZFLAG 0x00000002
#define SFLAG 0x00000004
#define OFLAG 0x00000008
#define AFLAG 0x00000010
#define PFLAG 0x00000020
#define PCMPSTR_EQ(X, Y, RES) \
{ \
int __size = (sizeof (*X) ^ 3) * 8; \
int __i, __j; \
for (__i = 0; __i < __size; __i++) \
for (__j = 0; __j < __size; __j++) \
RES[__j][__i] = (X[__i] == Y[__j]); \
}
#define PCMPSTR_RNG(X, Y, RES) \
{ \
int __size = (sizeof (*X) ^ 3) * 8; \
int __i, __j; \
for (__j = 0; __j < __size; __j++) \
for (__i = 0; __i < __size - 1; __i += 2) \
{ \
RES[__j][__i] = (Y[__j] >= X[__i]); \
RES[__j][__i+1] = (Y[__j] <= X[__i + 1]); \
} \
}
static void
override_invalid (unsigned char res[16][16], int la, int lb,
const int mode, int dim)
{
int i, j;
for (j = 0; j < dim; j++)
for (i = 0; i < dim; i++)
if (i < la && j >= lb)
res[j][i] = 0;
else if (i >= la)
switch ((mode & 0x0C))
{
case _SIDD_CMP_EQUAL_ANY:
case _SIDD_CMP_RANGES:
res[j][i] = 0;
break;
case _SIDD_CMP_EQUAL_EACH:
res[j][i] = (j >= lb) ? 1: 0;
break;
case _SIDD_CMP_EQUAL_ORDERED:
res[j][i] = 1;
break;
}
}
static void
calc_matrix (__m128i a, int la, __m128i b, int lb, const int mode,
unsigned char res[16][16])
{
union
{
__m128i x;
signed char sc[16];
unsigned char uc[16];
signed short ss[8];
unsigned short us[8];
} d, s;
d.x = a;
s.x = b;
switch ((mode & 3))
{
case _SIDD_UBYTE_OPS:
if ((mode & 0x0C) == _SIDD_CMP_RANGES)
{
PCMPSTR_RNG (d.uc, s.uc, res);
}
else
{
PCMPSTR_EQ (d.uc, s.uc, res);
}
break;
case _SIDD_UWORD_OPS:
if ((mode & 0x0C) == _SIDD_CMP_RANGES)
{
PCMPSTR_RNG (d.us, s.us, res);
}
else
{
PCMPSTR_EQ (d.us, s.us, res);
}
break;
case _SIDD_SBYTE_OPS:
if ((mode & 0x0C) == _SIDD_CMP_RANGES)
{
PCMPSTR_RNG (d.sc, s.sc, res);
}
else
{
PCMPSTR_EQ (d.sc, s.sc, res);
}
break;
case _SIDD_SWORD_OPS:
if ((mode & 0x0C) == _SIDD_CMP_RANGES)
{
PCMPSTR_RNG (d.ss, s.ss, res);
}
else
{
PCMPSTR_EQ (d.ss, s.ss, res);
}
break;
}
override_invalid (res, la, lb, mode, (mode & 1) == 0 ? 16 : 8);
}
static int
calc_res (__m128i a, int la, __m128i b, int lb, const int mode)
{
unsigned char mtx[16][16];
int i, j, k, dim, res = 0;
memset (mtx, 0, sizeof (mtx));
dim = (mode & 1) == 0 ? 16 : 8;
if (la < 0)
la = -la;
if (lb < 0)
lb = -lb;
if (la > dim)
la = dim;
if (lb > dim)
lb = dim;
calc_matrix (a, la, b, lb, mode, mtx);
switch ((mode & 0x0C))
{
case _SIDD_CMP_EQUAL_ANY:
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
if (mtx[i][j])
res |= (1 << i);
break;
case _SIDD_CMP_RANGES:
for (i = 0; i < dim; i += 2)
for(j = 0; j < dim; j++)
if (mtx[j][i] && mtx[j][i+1])
res |= (1 << j);
break;
case _SIDD_CMP_EQUAL_EACH:
for(i = 0; i < dim; i++)
if (mtx[i][i])
res |= (1 << i);
break;
case _SIDD_CMP_EQUAL_ORDERED:
for(i = 0; i < dim; i++)
{
unsigned char val = 1;
for (j = 0, k = i; j < dim - i && k < dim; j++, k++)
val &= mtx[k][j];
if (val)
res |= (1 << i);
else
res &= ~(1 << i);
}
break;
}
switch ((mode & 0x30))
{
case _SIDD_POSITIVE_POLARITY:
case _SIDD_MASKED_POSITIVE_POLARITY:
break;
case _SIDD_NEGATIVE_POLARITY:
res ^= -1;
break;
case _SIDD_MASKED_NEGATIVE_POLARITY:
for (i = 0; i < lb; i++)
if (res & (1 << i))
res &= ~(1 << i);
else
res |= (1 << i);
break;
}
return res & ((dim == 8) ? 0xFF : 0xFFFF);
}
static int
cmp_flags (__m128i a, int la, __m128i b, int lb,
int mode, int res2, int is_implicit)
{
int i;
int flags = 0;
int is_bytes_mode = (mode & 1) == 0;
union
{
__m128i x;
unsigned char uc[16];
unsigned short us[8];
} d, s;
d.x = a;
s.x = b;
/* CF: reset if (RES2 == 0), set otherwise. */
if (res2 != 0)
flags |= CFLAG;
if (is_implicit)
{
/* ZF: set if any byte/word of src xmm operand is null, reset
otherwise.
SF: set if any byte/word of dst xmm operand is null, reset
otherwise. */
if (is_bytes_mode)
{
for (i = 0; i < 16; i++)
{
if (s.uc[i] == 0)
flags |= ZFLAG;
if (d.uc[i] == 0)
flags |= SFLAG;
}
}
else
{
for (i = 0; i < 8; i++)
{
if (s.us[i] == 0)
flags |= ZFLAG;
if (d.us[i] == 0)
flags |= SFLAG;
}
}
}
else
{
/* ZF: set if abs value of EDX/RDX < 16 (8), reset otherwise.
SF: set if abs value of EAX/RAX < 16 (8), reset otherwise. */
int max_ind = is_bytes_mode ? 16 : 8;
if (la < 0)
la = -la;
if (lb < 0)
lb = -lb;
if (lb < max_ind)
flags |= ZFLAG;
if (la < max_ind)
flags |= SFLAG;
}
/* OF: equal to RES2[0]. */
if ((res2 & 0x1))
flags |= OFLAG;
/* AF: Reset.
PF: Reset. */
return flags;
}
static int
cmp_indexed (__m128i a, int la, __m128i b, int lb,
const int mode, int *res2)
{
int i, ndx;
int dim = (mode & 1) == 0 ? 16 : 8;
int r2;
r2 = calc_res (a, la, b, lb, mode);
ndx = dim;
if ((mode & 0x40))
{
for (i = dim - 1; i >= 0; i--)
if (r2 & (1 << i))
{
ndx = i;
break;
}
}
else
{
for (i = 0; i < dim; i++)
if ((r2 & (1 << i)))
{
ndx = i;
break;
}
}
*res2 = r2;
return ndx;
}
static __m128i
cmp_masked (__m128i a, int la, __m128i b, int lb,
const int mode, int *res2)
{
union
{
__m128i x;
char c[16];
short s[8];
} ret;
int i;
int dim = (mode & 1) == 0 ? 16 : 8;
union
{
int i;
char c[4];
short s[2];
} r2;
r2.i = calc_res (a, la, b, lb, mode);
memset (&ret, 0, sizeof (ret));
if (mode & 0x40)
{
for (i = 0; i < dim; i++)
if (dim == 8)
ret.s [i] = (r2.i & (1 << i)) ? -1 : 0;
else
ret.c [i] = (r2.i & (1 << i)) ? -1 : 0;
}
else
{
if (dim == 16)
ret.s[0] = r2.s[0];
else
ret.c[0] = r2.c[0];
}
*res2 = r2.i;
return ret.x;
}
static int
calc_str_len (__m128i a, const int mode)
{
union
{
__m128i x;
char c[16];
short s[8];
} s;
int i;
int dim = (mode & 1) == 0 ? 16 : 8;
s.x = a;
if ((mode & 1))
{
for (i = 0; i < dim; i++)
if (s.s[i] == 0)
break;
}
else
{
for (i = 0; i < dim; i++)
if (s.c[i] == 0)
break;
}
return i;
}
static inline int
cmp_ei (__m128i *a, int la, __m128i *b, int lb,
const int mode, int *flags)
{
int res2;
int index = cmp_indexed (*a, la, *b, lb, mode, &res2);
if (flags != NULL)
*flags = cmp_flags (*a, la, *b, lb, mode, res2, 0);
return index;
}
static inline int
cmp_ii (__m128i *a, __m128i *b, const int mode, int *flags)
{
int la, lb;
int res2;
int index;
la = calc_str_len (*a, mode);
lb = calc_str_len (*b, mode);
index = cmp_indexed (*a, la, *b, lb, mode, &res2);
if (flags != NULL)
*flags = cmp_flags (*a, la, *b, lb, mode, res2, 1);
return index;
}
static inline __m128i
cmp_em (__m128i *a, int la, __m128i *b, int lb,
const int mode, int *flags )
{
int res2;
__m128i mask = cmp_masked (*a, la, *b, lb, mode, &res2);
if (flags != NULL)
*flags = cmp_flags (*a, la, *b, lb, mode, res2, 0);
return mask;
}
static inline __m128i
cmp_im (__m128i *a, __m128i *b, const int mode, int *flags)
{
int la, lb;
int res2;
__m128i mask;
la = calc_str_len (*a, mode);
lb = calc_str_len (*b, mode);
mask = cmp_masked (*a, la, *b, lb, mode, &res2);
if (flags != NULL)
*flags = cmp_flags (*a, la, *b, lb, mode, res2, 1);
return mask;
}