| #include <nmmintrin.h> |
| #include <string.h> |
| |
| #define CFLAG 0x00000001 |
| #define ZFLAG 0x00000002 |
| #define SFLAG 0x00000004 |
| #define OFLAG 0x00000008 |
| #define AFLAG 0x00000010 |
| #define PFLAG 0x00000020 |
| |
| #define PCMPSTR_EQ(X, Y, RES) \ |
| { \ |
| int __size = (sizeof (*X) ^ 3) * 8; \ |
| int __i, __j; \ |
| for (__i = 0; __i < __size; __i++) \ |
| for (__j = 0; __j < __size; __j++) \ |
| RES[__j][__i] = (X[__i] == Y[__j]); \ |
| } |
| |
| #define PCMPSTR_RNG(X, Y, RES) \ |
| { \ |
| int __size = (sizeof (*X) ^ 3) * 8; \ |
| int __i, __j; \ |
| for (__j = 0; __j < __size; __j++) \ |
| for (__i = 0; __i < __size - 1; __i += 2) \ |
| { \ |
| RES[__j][__i] = (Y[__j] >= X[__i]); \ |
| RES[__j][__i+1] = (Y[__j] <= X[__i + 1]); \ |
| } \ |
| } |
| |
| static void |
| override_invalid (unsigned char res[16][16], int la, int lb, |
| const int mode, int dim) |
| { |
| int i, j; |
| |
| for (j = 0; j < dim; j++) |
| for (i = 0; i < dim; i++) |
| if (i < la && j >= lb) |
| res[j][i] = 0; |
| else if (i >= la) |
| switch ((mode & 0x0C)) |
| { |
| case _SIDD_CMP_EQUAL_ANY: |
| case _SIDD_CMP_RANGES: |
| res[j][i] = 0; |
| break; |
| case _SIDD_CMP_EQUAL_EACH: |
| res[j][i] = (j >= lb) ? 1: 0; |
| break; |
| case _SIDD_CMP_EQUAL_ORDERED: |
| res[j][i] = 1; |
| break; |
| } |
| } |
| |
| static void |
| calc_matrix (__m128i a, int la, __m128i b, int lb, const int mode, |
| unsigned char res[16][16]) |
| { |
| union |
| { |
| __m128i x; |
| signed char sc[16]; |
| unsigned char uc[16]; |
| signed short ss[8]; |
| unsigned short us[8]; |
| } d, s; |
| |
| d.x = a; |
| s.x = b; |
| |
| switch ((mode & 3)) |
| { |
| case _SIDD_UBYTE_OPS: |
| if ((mode & 0x0C) == _SIDD_CMP_RANGES) |
| { |
| PCMPSTR_RNG (d.uc, s.uc, res); |
| } |
| else |
| { |
| PCMPSTR_EQ (d.uc, s.uc, res); |
| } |
| break; |
| case _SIDD_UWORD_OPS: |
| if ((mode & 0x0C) == _SIDD_CMP_RANGES) |
| { |
| PCMPSTR_RNG (d.us, s.us, res); |
| } |
| else |
| { |
| PCMPSTR_EQ (d.us, s.us, res); |
| } |
| break; |
| case _SIDD_SBYTE_OPS: |
| if ((mode & 0x0C) == _SIDD_CMP_RANGES) |
| { |
| PCMPSTR_RNG (d.sc, s.sc, res); |
| } |
| else |
| { |
| PCMPSTR_EQ (d.sc, s.sc, res); |
| } |
| break; |
| case _SIDD_SWORD_OPS: |
| if ((mode & 0x0C) == _SIDD_CMP_RANGES) |
| { |
| PCMPSTR_RNG (d.ss, s.ss, res); |
| } |
| else |
| { |
| PCMPSTR_EQ (d.ss, s.ss, res); |
| } |
| break; |
| } |
| |
| override_invalid (res, la, lb, mode, (mode & 1) == 0 ? 16 : 8); |
| } |
| |
| static int |
| calc_res (__m128i a, int la, __m128i b, int lb, const int mode) |
| { |
| unsigned char mtx[16][16]; |
| int i, j, k, dim, res = 0; |
| |
| memset (mtx, 0, sizeof (mtx)); |
| |
| dim = (mode & 1) == 0 ? 16 : 8; |
| |
| if (la < 0) |
| la = -la; |
| |
| if (lb < 0) |
| lb = -lb; |
| |
| if (la > dim) |
| la = dim; |
| |
| if (lb > dim) |
| lb = dim; |
| |
| calc_matrix (a, la, b, lb, mode, mtx); |
| |
| switch ((mode & 0x0C)) |
| { |
| case _SIDD_CMP_EQUAL_ANY: |
| for (i = 0; i < dim; i++) |
| for (j = 0; j < dim; j++) |
| if (mtx[i][j]) |
| res |= (1 << i); |
| break; |
| |
| case _SIDD_CMP_RANGES: |
| for (i = 0; i < dim; i += 2) |
| for(j = 0; j < dim; j++) |
| if (mtx[j][i] && mtx[j][i+1]) |
| res |= (1 << j); |
| break; |
| |
| case _SIDD_CMP_EQUAL_EACH: |
| for(i = 0; i < dim; i++) |
| if (mtx[i][i]) |
| res |= (1 << i); |
| break; |
| |
| case _SIDD_CMP_EQUAL_ORDERED: |
| for(i = 0; i < dim; i++) |
| { |
| unsigned char val = 1; |
| |
| for (j = 0, k = i; j < dim - i && k < dim; j++, k++) |
| val &= mtx[k][j]; |
| |
| if (val) |
| res |= (1 << i); |
| else |
| res &= ~(1 << i); |
| } |
| break; |
| } |
| |
| switch ((mode & 0x30)) |
| { |
| case _SIDD_POSITIVE_POLARITY: |
| case _SIDD_MASKED_POSITIVE_POLARITY: |
| break; |
| |
| case _SIDD_NEGATIVE_POLARITY: |
| res ^= -1; |
| break; |
| |
| case _SIDD_MASKED_NEGATIVE_POLARITY: |
| for (i = 0; i < lb; i++) |
| if (res & (1 << i)) |
| res &= ~(1 << i); |
| else |
| res |= (1 << i); |
| break; |
| } |
| |
| return res & ((dim == 8) ? 0xFF : 0xFFFF); |
| } |
| |
| static int |
| cmp_flags (__m128i a, int la, __m128i b, int lb, |
| int mode, int res2, int is_implicit) |
| { |
| int i; |
| int flags = 0; |
| int is_bytes_mode = (mode & 1) == 0; |
| union |
| { |
| __m128i x; |
| unsigned char uc[16]; |
| unsigned short us[8]; |
| } d, s; |
| |
| d.x = a; |
| s.x = b; |
| |
| /* CF: reset if (RES2 == 0), set otherwise. */ |
| if (res2 != 0) |
| flags |= CFLAG; |
| |
| if (is_implicit) |
| { |
| /* ZF: set if any byte/word of src xmm operand is null, reset |
| otherwise. |
| SF: set if any byte/word of dst xmm operand is null, reset |
| otherwise. */ |
| |
| if (is_bytes_mode) |
| { |
| for (i = 0; i < 16; i++) |
| { |
| if (s.uc[i] == 0) |
| flags |= ZFLAG; |
| if (d.uc[i] == 0) |
| flags |= SFLAG; |
| } |
| } |
| else |
| { |
| for (i = 0; i < 8; i++) |
| { |
| if (s.us[i] == 0) |
| flags |= ZFLAG; |
| if (d.us[i] == 0) |
| flags |= SFLAG; |
| } |
| } |
| } |
| else |
| { |
| /* ZF: set if abs value of EDX/RDX < 16 (8), reset otherwise. |
| SF: set if abs value of EAX/RAX < 16 (8), reset otherwise. */ |
| int max_ind = is_bytes_mode ? 16 : 8; |
| |
| if (la < 0) |
| la = -la; |
| if (lb < 0) |
| lb = -lb; |
| |
| if (lb < max_ind) |
| flags |= ZFLAG; |
| if (la < max_ind) |
| flags |= SFLAG; |
| } |
| |
| /* OF: equal to RES2[0]. */ |
| if ((res2 & 0x1)) |
| flags |= OFLAG; |
| |
| /* AF: Reset. |
| PF: Reset. */ |
| return flags; |
| } |
| |
| static int |
| cmp_indexed (__m128i a, int la, __m128i b, int lb, |
| const int mode, int *res2) |
| { |
| int i, ndx; |
| int dim = (mode & 1) == 0 ? 16 : 8; |
| int r2; |
| |
| r2 = calc_res (a, la, b, lb, mode); |
| |
| ndx = dim; |
| if ((mode & 0x40)) |
| { |
| for (i = dim - 1; i >= 0; i--) |
| if (r2 & (1 << i)) |
| { |
| ndx = i; |
| break; |
| } |
| } |
| else |
| { |
| for (i = 0; i < dim; i++) |
| if ((r2 & (1 << i))) |
| { |
| ndx = i; |
| break; |
| } |
| } |
| |
| *res2 = r2; |
| return ndx; |
| } |
| |
| static __m128i |
| cmp_masked (__m128i a, int la, __m128i b, int lb, |
| const int mode, int *res2) |
| { |
| union |
| { |
| __m128i x; |
| char c[16]; |
| short s[8]; |
| } ret; |
| int i; |
| int dim = (mode & 1) == 0 ? 16 : 8; |
| union |
| { |
| int i; |
| char c[4]; |
| short s[2]; |
| } r2; |
| |
| r2.i = calc_res (a, la, b, lb, mode); |
| |
| memset (&ret, 0, sizeof (ret)); |
| |
| if (mode & 0x40) |
| { |
| for (i = 0; i < dim; i++) |
| if (dim == 8) |
| ret.s [i] = (r2.i & (1 << i)) ? -1 : 0; |
| else |
| ret.c [i] = (r2.i & (1 << i)) ? -1 : 0; |
| } |
| else |
| { |
| if (dim == 16) |
| ret.s[0] = r2.s[0]; |
| else |
| ret.c[0] = r2.c[0]; |
| } |
| |
| *res2 = r2.i; |
| |
| return ret.x; |
| } |
| |
| static int |
| calc_str_len (__m128i a, const int mode) |
| { |
| union |
| { |
| __m128i x; |
| char c[16]; |
| short s[8]; |
| } s; |
| int i; |
| int dim = (mode & 1) == 0 ? 16 : 8; |
| |
| s.x = a; |
| |
| if ((mode & 1)) |
| { |
| for (i = 0; i < dim; i++) |
| if (s.s[i] == 0) |
| break; |
| } |
| else |
| { |
| for (i = 0; i < dim; i++) |
| if (s.c[i] == 0) |
| break; |
| } |
| |
| return i; |
| } |
| |
| static inline int |
| cmp_ei (__m128i *a, int la, __m128i *b, int lb, |
| const int mode, int *flags) |
| { |
| int res2; |
| int index = cmp_indexed (*a, la, *b, lb, mode, &res2); |
| |
| if (flags != NULL) |
| *flags = cmp_flags (*a, la, *b, lb, mode, res2, 0); |
| |
| return index; |
| } |
| |
| static inline int |
| cmp_ii (__m128i *a, __m128i *b, const int mode, int *flags) |
| { |
| int la, lb; |
| int res2; |
| int index; |
| |
| la = calc_str_len (*a, mode); |
| lb = calc_str_len (*b, mode); |
| |
| index = cmp_indexed (*a, la, *b, lb, mode, &res2); |
| |
| if (flags != NULL) |
| *flags = cmp_flags (*a, la, *b, lb, mode, res2, 1); |
| |
| return index; |
| } |
| |
| static inline __m128i |
| cmp_em (__m128i *a, int la, __m128i *b, int lb, |
| const int mode, int *flags ) |
| { |
| int res2; |
| __m128i mask = cmp_masked (*a, la, *b, lb, mode, &res2); |
| |
| if (flags != NULL) |
| *flags = cmp_flags (*a, la, *b, lb, mode, res2, 0); |
| |
| return mask; |
| } |
| |
| static inline __m128i |
| cmp_im (__m128i *a, __m128i *b, const int mode, int *flags) |
| { |
| int la, lb; |
| int res2; |
| __m128i mask; |
| |
| la = calc_str_len (*a, mode); |
| lb = calc_str_len (*b, mode); |
| |
| mask = cmp_masked (*a, la, *b, lb, mode, &res2); |
| if (flags != NULL) |
| *flags = cmp_flags (*a, la, *b, lb, mode, res2, 1); |
| |
| return mask; |
| } |