blob: 515ee0c1728569fb4c2fb34595f502aa764f288e [file] [log] [blame]
/* Copyright (C) 2013-2021 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef _IMMINTRIN_H_INCLUDED
#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef _AVX512FINTRIN_H_INCLUDED
#define _AVX512FINTRIN_H_INCLUDED
#ifndef __AVX512F__
#pragma GCC push_options
#pragma GCC target("avx512f")
#define __DISABLE_AVX512F__
#endif /* __AVX512F__ */
/* Internal data types for implementing the intrinsics. */
typedef double __v8df __attribute__ ((__vector_size__ (64)));
typedef float __v16sf __attribute__ ((__vector_size__ (64)));
typedef long long __v8di __attribute__ ((__vector_size__ (64)));
typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
typedef int __v16si __attribute__ ((__vector_size__ (64)));
typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
typedef short __v32hi __attribute__ ((__vector_size__ (64)));
typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
typedef char __v64qi __attribute__ ((__vector_size__ (64)));
typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
/* The Intel API is flexible enough that we must allow aliasing with other
vector types, and their scalar components. */
typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
/* Unaligned version of the same type. */
typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1)));
typedef unsigned char __mmask8;
typedef unsigned short __mmask16;
extern __inline __mmask16
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_int2mask (int __M)
{
return (__mmask16) __M;
}
extern __inline int
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask2int (__mmask16 __M)
{
return (int) __M;
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set_epi64 (long long __A, long long __B, long long __C,
long long __D, long long __E, long long __F,
long long __G, long long __H)
{
return __extension__ (__m512i) (__v8di)
{ __H, __G, __F, __E, __D, __C, __B, __A };
}
/* Create the vector [A B C D E F G H I J K L M N O P]. */
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set_epi32 (int __A, int __B, int __C, int __D,
int __E, int __F, int __G, int __H,
int __I, int __J, int __K, int __L,
int __M, int __N, int __O, int __P)
{
return __extension__ (__m512i)(__v16si)
{ __P, __O, __N, __M, __L, __K, __J, __I,
__H, __G, __F, __E, __D, __C, __B, __A };
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set_epi16 (short __q31, short __q30, short __q29, short __q28,
short __q27, short __q26, short __q25, short __q24,
short __q23, short __q22, short __q21, short __q20,
short __q19, short __q18, short __q17, short __q16,
short __q15, short __q14, short __q13, short __q12,
short __q11, short __q10, short __q09, short __q08,
short __q07, short __q06, short __q05, short __q04,
short __q03, short __q02, short __q01, short __q00)
{
return __extension__ (__m512i)(__v32hi){
__q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
__q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
__q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
__q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31
};
}
extern __inline __m512i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set_epi8 (char __q63, char __q62, char __q61, char __q60,
char __q59, char __q58, char __q57, char __q56,
char __q55, char __q54, char __q53, char __q52,
char __q51, char __q50, char __q49, char __q48,
char __q47, char __q46, char __q45, char __q44,
char __q43, char __q42, char __q41, char __q40,
char __q39, char __q38, char __q37, char __q36,
char __q35, char __q34, char __q33, char __q32,
char __q31, char __q30, char __q29, char __q28,
char __q27, char __q26, char __q25, char __q24,
char __q23, char __q22, char __q21, char __q20,
char __q19, char __q18, char __q17, char __q16,
char __q15, char __q14, char __q13, char __q12,
char __q11, char __q10, char __q09, char __q08,
char __q07, char __q06, char __q05, char __q04,
char __q03, char __q02, char __q01, char __q00)
{
return __extension__ (__m512i)(__v64qi){
__q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07,
__q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15,
__q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23,
__q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31,
__q32, __q33, __q34, __q35, __q36, __q37, __q38, __q39,
__q40, __q41, __q42, __q43, __q44, __q45, __q46, __q47,
__q48, __q49, __q50, __q51, __q52, __q53, __q54, __q55,
__q56, __q57, __q58, __q59, __q60, __q61, __q62, __q63
};
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set_pd (double __A, double __B, double __C, double __D,
double __E, double __F, double __G, double __H)
{
return __extension__ (__m512d)
{ __H, __G, __F, __E, __D, __C, __B, __A };
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set_ps (float __A, float __B, float __C, float __D,
float __E, float __F, float __G, float __H,
float __I, float __J, float __K, float __L,
float __M, float __N, float __O, float __P)
{
return __extension__ (__m512)
{ __P, __O, __N, __M, __L, __K, __J, __I,
__H, __G, __F, __E, __D, __C, __B, __A };
}
#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
_mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
e8,e9,e10,e11,e12,e13,e14,e15) \
_mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
_mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
_mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_undefined_ps (void)
{
__m512 __Y = __Y;
return __Y;
}
#define _mm512_undefined _mm512_undefined_ps
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_undefined_pd (void)
{
__m512d __Y = __Y;
return __Y;
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_undefined_epi32 (void)
{
__m512i __Y = __Y;
return __Y;
}
#define _mm512_undefined_si512 _mm512_undefined_epi32
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set1_epi8 (char __A)
{
return __extension__ (__m512i)(__v64qi)
{ __A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A };
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set1_epi16 (short __A)
{
return __extension__ (__m512i)(__v32hi)
{ __A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A };
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set1_pd (double __A)
{
return __extension__ (__m512d)(__v8df)
{ __A, __A, __A, __A, __A, __A, __A, __A };
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set1_ps (float __A)
{
return __extension__ (__m512)(__v16sf)
{ __A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A };
}
/* Create the vector [A B C D A B C D A B C D A B C D]. */
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
{
return __extension__ (__m512i)(__v16si)
{ __D, __C, __B, __A, __D, __C, __B, __A,
__D, __C, __B, __A, __D, __C, __B, __A };
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set4_epi64 (long long __A, long long __B, long long __C,
long long __D)
{
return __extension__ (__m512i) (__v8di)
{ __D, __C, __B, __A, __D, __C, __B, __A };
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set4_pd (double __A, double __B, double __C, double __D)
{
return __extension__ (__m512d)
{ __D, __C, __B, __A, __D, __C, __B, __A };
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set4_ps (float __A, float __B, float __C, float __D)
{
return __extension__ (__m512)
{ __D, __C, __B, __A, __D, __C, __B, __A,
__D, __C, __B, __A, __D, __C, __B, __A };
}
#define _mm512_setr4_epi64(e0,e1,e2,e3) \
_mm512_set4_epi64(e3,e2,e1,e0)
#define _mm512_setr4_epi32(e0,e1,e2,e3) \
_mm512_set4_epi32(e3,e2,e1,e0)
#define _mm512_setr4_pd(e0,e1,e2,e3) \
_mm512_set4_pd(e3,e2,e1,e0)
#define _mm512_setr4_ps(e0,e1,e2,e3) \
_mm512_set4_ps(e3,e2,e1,e0)
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_setzero_ps (void)
{
return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_setzero (void)
{
return _mm512_setzero_ps ();
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_setzero_pd (void)
{
return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_setzero_epi32 (void)
{
return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_setzero_si512 (void)
{
return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
{
return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
(__v8df) __W,
(__mmask8) __U);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
{
return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
{
return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
(__v16sf) __W,
(__mmask16) __U);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
{
return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_load_pd (void const *__P)
{
return *(__m512d *) __P;
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
{
return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
(__v8df) __W,
(__mmask8) __U);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_load_pd (__mmask8 __U, void const *__P)
{
return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_store_pd (void *__P, __m512d __A)
{
*(__m512d *) __P = __A;
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
{
__builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
(__mmask8) __U);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_load_ps (void const *__P)
{
return *(__m512 *) __P;
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
{
return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
(__v16sf) __W,
(__mmask16) __U);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_load_ps (__mmask16 __U, void const *__P)
{
return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_store_ps (void *__P, __m512 __A)
{
*(__m512 *) __P = __A;
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
{
__builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_load_epi64 (void const *__P)
{
return *(__m512i *) __P;
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
{
return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
{
return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_store_epi64 (void *__P, __m512i __A)
{
*(__m512i *) __P = __A;
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
{
__builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_load_si512 (void const *__P)
{
return *(__m512i *) __P;
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_load_epi32 (void const *__P)
{
return *(__m512i *) __P;
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
{
return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
{
return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_store_si512 (void *__P, __m512i __A)
{
*(__m512i *) __P = __A;
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_store_epi32 (void *__P, __m512i __A)
{
*(__m512i *) __P = __A;
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
{
__builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mullo_epi32 (__m512i __A, __m512i __B)
{
return (__m512i) ((__v16su) __A * (__v16su) __B);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
_mm512_setzero_si512 (),
__M);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si) __W, __M);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mullox_epi64 (__m512i __A, __m512i __B)
{
return (__m512i) ((__v8du) __A * (__v8du) __B);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mullox_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
{
return _mm512_mask_mov_epi64 (__W, __M, _mm512_mullox_epi64 (__A, __B));
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_srav_epi32 (__m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_add_epi64 (__m512i __A, __m512i __B)
{
return (__m512i) ((__v8du) __A + (__v8du) __B);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sub_epi64 (__m512i __A, __m512i __B)
{
return (__m512i) ((__v8du) __A - (__v8du) __B);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di)
_mm512_undefined_pd (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_srav_epi64 (__m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_add_epi32 (__m512i __A, __m512i __B)
{
return (__m512i) ((__v16su) __A + (__v16su) __B);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mul_epi32 (__m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di) __W, __M);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di)
_mm512_setzero_si512 (),
__M);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sub_epi32 (__m512i __A, __m512i __B)
{
return (__m512i) ((__v16su) __A - (__v16su) __B);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mul_epu32 (__m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di) __W, __M);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di)
_mm512_setzero_si512 (),
__M);
}
#ifdef __OPTIMIZE__
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_slli_epi64 (__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
unsigned int __B)
{
return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
#else
#define _mm512_slli_epi64(X, C) \
((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
(__v8di)(__m512i)_mm512_undefined_epi32 (),\
(__mmask8)-1))
#define _mm512_mask_slli_epi64(W, U, X, C) \
((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
(__v8di)(__m512i)(W),\
(__mmask8)(U)))
#define _mm512_maskz_slli_epi64(U, X, C) \
((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
(__v8di)(__m512i)_mm512_setzero_si512 (),\
(__mmask8)(U)))
#endif
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sll_epi64 (__m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
#ifdef __OPTIMIZE__
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_srli_epi64 (__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
#else
#define _mm512_srli_epi64(X, C) \
((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
(__v8di)(__m512i)_mm512_undefined_epi32 (),\
(__mmask8)-1))
#define _mm512_mask_srli_epi64(W, U, X, C) \
((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
(__v8di)(__m512i)(W),\
(__mmask8)(U)))
#define _mm512_maskz_srli_epi64(U, X, C) \
((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
(__v8di)(__m512i)_mm512_setzero_si512 (),\
(__mmask8)(U)))
#endif
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_srl_epi64 (__m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
#ifdef __OPTIMIZE__
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_srai_epi64 (__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
unsigned int __B)
{
return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
#else
#define _mm512_srai_epi64(X, C) \
((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
(__v8di)(__m512i)_mm512_undefined_epi32 (),\
(__mmask8)-1))
#define _mm512_mask_srai_epi64(W, U, X, C) \
((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
(__v8di)(__m512i)(W),\
(__mmask8)(U)))
#define _mm512_maskz_srai_epi64(U, X, C) \
((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
(__v8di)(__m512i)_mm512_setzero_si512 (),\
(__mmask8)(U)))
#endif
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sra_epi64 (__m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
#ifdef __OPTIMIZE__
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_slli_epi32 (__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
unsigned int __B)
{
return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
#else
#define _mm512_slli_epi32(X, C) \
((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
(__v16si)(__m512i)_mm512_undefined_epi32 (),\
(__mmask16)-1))
#define _mm512_mask_slli_epi32(W, U, X, C) \
((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
(__v16si)(__m512i)(W),\
(__mmask16)(U)))
#define _mm512_maskz_slli_epi32(U, X, C) \
((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
(__v16si)(__m512i)_mm512_setzero_si512 (),\
(__mmask16)(U)))
#endif
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sll_epi32 (__m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
#ifdef __OPTIMIZE__
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_srli_epi32 (__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
#else
#define _mm512_srli_epi32(X, C) \
((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
(__v16si)(__m512i)_mm512_undefined_epi32 (),\
(__mmask16)-1))
#define _mm512_mask_srli_epi32(W, U, X, C) \
((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
(__v16si)(__m512i)(W),\
(__mmask16)(U)))
#define _mm512_maskz_srli_epi32(U, X, C) \
((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
(__v16si)(__m512i)_mm512_setzero_si512 (),\
(__mmask16)(U)))
#endif
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_srl_epi32 (__m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
#ifdef __OPTIMIZE__
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_srai_epi32 (__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
unsigned int __B)
{
return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
#else
#define _mm512_srai_epi32(X, C) \
((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
(__v16si)(__m512i)_mm512_undefined_epi32 (),\
(__mmask16)-1))
#define _mm512_mask_srai_epi32(W, U, X, C) \
((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
(__v16si)(__m512i)(W),\
(__mmask16)(U)))
#define _mm512_maskz_srai_epi32(U, X, C) \
((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
(__v16si)(__m512i)_mm512_setzero_si512 (),\
(__mmask16)(U)))
#endif
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sra_epi32 (__m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
#ifdef __OPTIMIZE__
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
{
return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
(__v2df) __B,
__R);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
__m128d __B, const int __R)
{
return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
(__v2df) __B,
(__v2df) __W,
(__mmask8) __U, __R);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
const int __R)
{
return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A,
(__v2df) __B,
(__v2df)
_mm_setzero_pd (),
(__mmask8) __U, __R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
{
return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
(__v4sf) __B,
__R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
__m128 __B, const int __R)
{
return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
(__v4sf) __B,
(__v4sf) __W,
(__mmask8) __U, __R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
const int __R)
{
return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A,
(__v4sf) __B,
(__v4sf)
_mm_setzero_ps (),
(__mmask8) __U, __R);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
{
return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
(__v2df) __B,
__R);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
__m128d __B, const int __R)
{
return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
(__v2df) __B,
(__v2df) __W,
(__mmask8) __U, __R);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
const int __R)
{
return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A,
(__v2df) __B,
(__v2df)
_mm_setzero_pd (),
(__mmask8) __U, __R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
{
return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
(__v4sf) __B,
__R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
__m128 __B, const int __R)
{
return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
(__v4sf) __B,
(__v4sf) __W,
(__mmask8) __U, __R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
const int __R)
{
return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A,
(__v4sf) __B,
(__v4sf)
_mm_setzero_ps (),
(__mmask8) __U, __R);
}
#else
#define _mm_add_round_sd(A, B, C) \
(__m128d)__builtin_ia32_addsd_round(A, B, C)
#define _mm_mask_add_round_sd(W, U, A, B, C) \
(__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C)
#define _mm_maskz_add_round_sd(U, A, B, C) \
(__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
#define _mm_add_round_ss(A, B, C) \
(__m128)__builtin_ia32_addss_round(A, B, C)
#define _mm_mask_add_round_ss(W, U, A, B, C) \
(__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C)
#define _mm_maskz_add_round_ss(U, A, B, C) \
(__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
#define _mm_sub_round_sd(A, B, C) \
(__m128d)__builtin_ia32_subsd_round(A, B, C)
#define _mm_mask_sub_round_sd(W, U, A, B, C) \
(__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C)
#define _mm_maskz_sub_round_sd(U, A, B, C) \
(__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
#define _mm_sub_round_ss(A, B, C) \
(__m128)__builtin_ia32_subss_round(A, B, C)
#define _mm_mask_sub_round_ss(W, U, A, B, C) \
(__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C)
#define _mm_maskz_sub_round_ss(U, A, B, C) \
(__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C)
#endif
#ifdef __OPTIMIZE__
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
const int __imm)
{
return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di) __C, __imm,
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
__m512i __C, const int __imm)
{
return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di) __C, __imm,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
__m512i __C, const int __imm)
{
return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
(__v8di) __B,
(__v8di) __C,
__imm, (__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
const int __imm)
{
return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si) __C,
__imm, (__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
__m512i __C, const int __imm)
{
return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si) __C,
__imm, (__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
__m512i __C, const int __imm)
{
return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
(__v16si) __B,
(__v16si) __C,
__imm, (__mmask16) __U);
}
#else
#define _mm512_ternarylogic_epi64(A, B, C, I) \
((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
(__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
(__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
(__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
#define _mm512_ternarylogic_epi32(A, B, C, I) \
((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
(__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
(__mmask16)-1))
#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
(__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
(__mmask16)(U)))
#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
(__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
(__mmask16)(U)))
#endif
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rcp14_pd (__m512d __A)
{
return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
(__v8df)
_mm512_undefined_pd (),
(__mmask8) -1);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
{
return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
(__v8df) __W,
(__mmask8) __U);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
{
return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rcp14_ps (__m512 __A)
{
return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
(__v16sf)
_mm512_undefined_ps (),
(__mmask16) -1);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
{
return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
(__v16sf) __W,
(__mmask16) __U);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
{
return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp14_sd (__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
(__v2df) __A);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
(__v2df) __A,
(__v2df) __W,
(__mmask8) __U);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B,
(__v2df) __A,
(__v2df) _mm_setzero_ps (),
(__mmask8) __U);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp14_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
(__v4sf) __A);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
(__v4sf) __A,
(__v4sf) __W,
(__mmask8) __U);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B,
(__v4sf) __A,
(__v4sf) _mm_setzero_ps (),
(__mmask8) __U);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt14_pd (__m512d __A)
{
return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
(__v8df)
_mm512_undefined_pd (),
(__mmask8) -1);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
{
return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
(__v8df) __W,
(__mmask8) __U);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
{
return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt14_ps (__m512 __A)
{
return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
(__v16sf)
_mm512_undefined_ps (),
(__mmask16) -1);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
{
return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
(__v16sf) __W,
(__mmask16) __U);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
{
return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt14_sd (__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
(__v2df) __A);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
(__v2df) __A,
(__v2df) __W,
(__mmask8) __U);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B,
(__v2df) __A,
(__v2df) _mm_setzero_pd (),
(__mmask8) __U);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt14_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
(__v4sf) __A);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
(__v4sf) __A,
(__v4sf) __W,
(__mmask8) __U);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B,
(__v4sf) __A,
(__v4sf) _mm_setzero_ps (),
(__mmask8) __U);
}
#ifdef __OPTIMIZE__
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sqrt_round_pd (__m512d __A, const int __R)
{
return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
(__v8df)
_mm512_undefined_pd (),
(__mmask8) -1, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
const int __R)
{
return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
(__v8df) __W,
(__mmask8) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
{
return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sqrt_round_ps (__m512 __A, const int __R)
{
return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
(__v16sf)
_mm512_undefined_ps (),
(__mmask16) -1, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
{
return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
(__v16sf) __W,
(__mmask16) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
{
return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U, __R);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
{
return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
(__v2df) __A,
(__v2df)
_mm_setzero_pd (),
(__mmask8) -1, __R);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sqrt_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B,
const int __R)
{
return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
(__v2df) __A,
(__v2df) __W,
(__mmask8) __U, __R);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B, const int __R)
{
return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B,
(__v2df) __A,
(__v2df)
_mm_setzero_pd (),
(__mmask8) __U, __R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
{
return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
(__v4sf) __A,
(__v4sf)
_mm_setzero_ps (),
(__mmask8) -1, __R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_sqrt_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B,
const int __R)
{
return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
(__v4sf) __A,
(__v4sf) __W,
(__mmask8) __U, __R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
{
return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B,
(__v4sf) __A,
(__v4sf)
_mm_setzero_ps (),
(__mmask8) __U, __R);
}
#else
#define _mm512_sqrt_round_pd(A, C) \
(__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_sqrt_round_pd(W, U, A, C) \
(__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
#define _mm512_maskz_sqrt_round_pd(U, A, C) \
(__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_sqrt_round_ps(A, C) \
(__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_sqrt_round_ps(W, U, A, C) \
(__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
#define _mm512_maskz_sqrt_round_ps(U, A, C) \
(__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm_sqrt_round_sd(A, B, C) \
(__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
(__v2df) _mm_setzero_pd (), -1, C)
#define _mm_mask_sqrt_round_sd(W, U, A, B, C) \
(__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, W, U, C)
#define _mm_maskz_sqrt_round_sd(U, A, B, C) \
(__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \
(__v2df) _mm_setzero_pd (), U, C)
#define _mm_sqrt_round_ss(A, B, C) \
(__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
(__v4sf) _mm_setzero_ps (), -1, C)
#define _mm_mask_sqrt_round_ss(W, U, A, B, C) \
(__m128)__builtin_ia32_sqrtss_mask_round (B, A, W, U, C)
#define _mm_maskz_sqrt_round_ss(U, A, B, C) \
(__m128)__builtin_ia32_sqrtss_mask_round (B, A, \
(__v4sf) _mm_setzero_ps (), U, C)
#endif
#define _mm_mask_sqrt_sd(W, U, A, B) \
_mm_mask_sqrt_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_maskz_sqrt_sd(U, A, B) \
_mm_maskz_sqrt_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_mask_sqrt_ss(W, U, A, B) \
_mm_mask_sqrt_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION)
#define _mm_maskz_sqrt_ss(U, A, B) \
_mm_maskz_sqrt_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION)
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi8_epi32 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi8_epi64 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi16_epi32 (__m256i __A)
{
return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
{
return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
{
return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi16_epi64 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi32_epi64 (__m256i __X)
{
return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
{
return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
{
return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepu8_epi32 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepu8_epi64 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepu16_epi32 (__m256i __A)
{
return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
{
return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
{
return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepu16_epi64 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepu32_epi64 (__m256i __X)
{
return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
{
return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
{
return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
#ifdef __OPTIMIZE__
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
{
return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
_mm512_undefined_pd (),
(__mmask8) -1, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
__m512d __B, const int __R)
{
return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df) __W,
(__mmask8) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
const int __R)
{
return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_undefined_ps (),
(__mmask16) -1, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
__m512 __B, const int __R)
{
return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf) __W,
(__mmask16) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
{
return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
_mm512_undefined_pd (),
(__mmask8) -1, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
__m512d __B, const int __R)
{
return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df) __W,
(__mmask8) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
const int __R)
{
return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_undefined_ps (),
(__mmask16) -1, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
__m512 __B, const int __R)
{
return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf) __W,
(__mmask16) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U, __R);
}
#else
#define _mm512_add_round_pd(A, B, C) \
(__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_add_round_pd(W, U, A, B, C) \
(__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
#define _mm512_maskz_add_round_pd(U, A, B, C) \
(__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_add_round_ps(A, B, C) \
(__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_add_round_ps(W, U, A, B, C) \
(__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
#define _mm512_maskz_add_round_ps(U, A, B, C) \
(__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm512_sub_round_pd(A, B, C) \
(__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_sub_round_pd(W, U, A, B, C) \
(__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
#define _mm512_maskz_sub_round_pd(U, A, B, C) \
(__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_sub_round_ps(A, B, C) \
(__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_sub_round_ps(W, U, A, B, C) \
(__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
#define _mm512_maskz_sub_round_ps(U, A, B, C) \
(__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
#endif
#ifdef __OPTIMIZE__
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
{
return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
_mm512_undefined_pd (),
(__mmask8) -1, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
__m512d __B, const int __R)
{
return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df) __W,
(__mmask8) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
const int __R)
{
return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_undefined_ps (),
(__mmask16) -1, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
__m512 __B, const int __R)
{
return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf) __W,
(__mmask16) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
{
return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
(__v8df) __V,
(__v8df)
_mm512_undefined_pd (),
(__mmask8) -1, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
__m512d __V, const int __R)
{
return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
(__v8df) __V,
(__v8df) __W,
(__mmask8) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
const int __R)
{
return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
(__v8df) __V,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_undefined_ps (),
(__mmask16) -1, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
__m512 __B, const int __R)
{
return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf) __W,
(__mmask16) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U, __R);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
{
return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
(__v2df) __B,
__R);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
__m128d __B, const int __R)
{
return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
(__v2df) __B,
(__v2df) __W,
(__mmask8) __U, __R);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
const int __R)
{
return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A,
(__v2df) __B,
(__v2df)
_mm_setzero_pd (),
(__mmask8) __U, __R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
{
return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
(__v4sf) __B,
__R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
__m128 __B, const int __R)
{
return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
(__v4sf) __B,
(__v4sf) __W,
(__mmask8) __U, __R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
const int __R)
{
return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A,
(__v4sf) __B,
(__v4sf)
_mm_setzero_ps (),
(__mmask8) __U, __R);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
{
return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
(__v2df) __B,
__R);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A,
__m128d __B, const int __R)
{
return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
(__v2df) __B,
(__v2df) __W,
(__mmask8) __U, __R);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B,
const int __R)
{
return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A,
(__v2df) __B,
(__v2df)
_mm_setzero_pd (),
(__mmask8) __U, __R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
{
return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
(__v4sf) __B,
__R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A,
__m128 __B, const int __R)
{
return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
(__v4sf) __B,
(__v4sf) __W,
(__mmask8) __U, __R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
const int __R)
{
return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A,
(__v4sf) __B,
(__v4sf)
_mm_setzero_ps (),
(__mmask8) __U, __R);
}
#else
#define _mm512_mul_round_pd(A, B, C) \
(__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_mul_round_pd(W, U, A, B, C) \
(__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
#define _mm512_maskz_mul_round_pd(U, A, B, C) \
(__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_mul_round_ps(A, B, C) \
(__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_mul_round_ps(W, U, A, B, C) \
(__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
#define _mm512_maskz_mul_round_ps(U, A, B, C) \
(__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm512_div_round_pd(A, B, C) \
(__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_div_round_pd(W, U, A, B, C) \
(__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
#define _mm512_maskz_div_round_pd(U, A, B, C) \
(__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_div_round_ps(A, B, C) \
(__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_div_round_ps(W, U, A, B, C) \
(__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
#define _mm512_maskz_div_round_ps(U, A, B, C) \
(__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm_mul_round_sd(A, B, C) \
(__m128d)__builtin_ia32_mulsd_round(A, B, C)
#define _mm_mask_mul_round_sd(W, U, A, B, C) \
(__m128d)__builtin_ia32_mulsd_mask_round(A, B, W, U, C)
#define _mm_maskz_mul_round_sd(U, A, B, C) \
(__m128d)__builtin_ia32_mulsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C)
#define _mm_mul_round_ss(A, B, C) \
(__m128)__builtin_ia32_mulss_round(A, B, C)
#define _mm_mask_mul_round_ss(W, U, A, B, C) \
(