| /* Copyright (C) 2013-2022 Free Software Foundation, Inc. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| GCC is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| Under Section 7 of GPL version 3, you are granted additional |
| permissions described in the GCC Runtime Library Exception, version |
| 3.1, as published by the Free Software Foundation. |
| |
| You should have received a copy of the GNU General Public License and |
| a copy of the GCC Runtime Library Exception along with this program; |
| see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #ifndef _IMMINTRIN_H_INCLUDED |
| #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." |
| #endif |
| |
| #ifndef _AVX512FINTRIN_H_INCLUDED |
| #define _AVX512FINTRIN_H_INCLUDED |
| |
| #ifndef __AVX512F__ |
| #pragma GCC push_options |
| #pragma GCC target("avx512f") |
| #define __DISABLE_AVX512F__ |
| #endif /* __AVX512F__ */ |
| |
| /* Internal data types for implementing the intrinsics. */ |
| typedef double __v8df __attribute__ ((__vector_size__ (64))); |
| typedef float __v16sf __attribute__ ((__vector_size__ (64))); |
| typedef long long __v8di __attribute__ ((__vector_size__ (64))); |
| typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64))); |
| typedef int __v16si __attribute__ ((__vector_size__ (64))); |
| typedef unsigned int __v16su __attribute__ ((__vector_size__ (64))); |
| typedef short __v32hi __attribute__ ((__vector_size__ (64))); |
| typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64))); |
| typedef char __v64qi __attribute__ ((__vector_size__ (64))); |
| typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64))); |
| |
| /* The Intel API is flexible enough that we must allow aliasing with other |
| vector types, and their scalar components. */ |
| typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__)); |
| typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__)); |
| typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); |
| |
| /* Unaligned version of the same type. */ |
| typedef float __m512_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1))); |
| typedef long long __m512i_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1))); |
| typedef double __m512d_u __attribute__ ((__vector_size__ (64), __may_alias__, __aligned__ (1))); |
| |
| typedef unsigned char __mmask8; |
| typedef unsigned short __mmask16; |
| |
| extern __inline __mmask16 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_int2mask (int __M) |
| { |
| return (__mmask16) __M; |
| } |
| |
| extern __inline int |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask2int (__mmask16 __M) |
| { |
| return (int) __M; |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set_epi64 (long long __A, long long __B, long long __C, |
| long long __D, long long __E, long long __F, |
| long long __G, long long __H) |
| { |
| return __extension__ (__m512i) (__v8di) |
| { __H, __G, __F, __E, __D, __C, __B, __A }; |
| } |
| |
| /* Create the vector [A B C D E F G H I J K L M N O P]. */ |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set_epi32 (int __A, int __B, int __C, int __D, |
| int __E, int __F, int __G, int __H, |
| int __I, int __J, int __K, int __L, |
| int __M, int __N, int __O, int __P) |
| { |
| return __extension__ (__m512i)(__v16si) |
| { __P, __O, __N, __M, __L, __K, __J, __I, |
| __H, __G, __F, __E, __D, __C, __B, __A }; |
| } |
| |
| extern __inline __m512i |
| __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set_epi16 (short __q31, short __q30, short __q29, short __q28, |
| short __q27, short __q26, short __q25, short __q24, |
| short __q23, short __q22, short __q21, short __q20, |
| short __q19, short __q18, short __q17, short __q16, |
| short __q15, short __q14, short __q13, short __q12, |
| short __q11, short __q10, short __q09, short __q08, |
| short __q07, short __q06, short __q05, short __q04, |
| short __q03, short __q02, short __q01, short __q00) |
| { |
| return __extension__ (__m512i)(__v32hi){ |
| __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, |
| __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15, |
| __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23, |
| __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31 |
| }; |
| } |
| |
| extern __inline __m512i |
| __attribute__((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set_epi8 (char __q63, char __q62, char __q61, char __q60, |
| char __q59, char __q58, char __q57, char __q56, |
| char __q55, char __q54, char __q53, char __q52, |
| char __q51, char __q50, char __q49, char __q48, |
| char __q47, char __q46, char __q45, char __q44, |
| char __q43, char __q42, char __q41, char __q40, |
| char __q39, char __q38, char __q37, char __q36, |
| char __q35, char __q34, char __q33, char __q32, |
| char __q31, char __q30, char __q29, char __q28, |
| char __q27, char __q26, char __q25, char __q24, |
| char __q23, char __q22, char __q21, char __q20, |
| char __q19, char __q18, char __q17, char __q16, |
| char __q15, char __q14, char __q13, char __q12, |
| char __q11, char __q10, char __q09, char __q08, |
| char __q07, char __q06, char __q05, char __q04, |
| char __q03, char __q02, char __q01, char __q00) |
| { |
| return __extension__ (__m512i)(__v64qi){ |
| __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, |
| __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15, |
| __q16, __q17, __q18, __q19, __q20, __q21, __q22, __q23, |
| __q24, __q25, __q26, __q27, __q28, __q29, __q30, __q31, |
| __q32, __q33, __q34, __q35, __q36, __q37, __q38, __q39, |
| __q40, __q41, __q42, __q43, __q44, __q45, __q46, __q47, |
| __q48, __q49, __q50, __q51, __q52, __q53, __q54, __q55, |
| __q56, __q57, __q58, __q59, __q60, __q61, __q62, __q63 |
| }; |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set_pd (double __A, double __B, double __C, double __D, |
| double __E, double __F, double __G, double __H) |
| { |
| return __extension__ (__m512d) |
| { __H, __G, __F, __E, __D, __C, __B, __A }; |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set_ps (float __A, float __B, float __C, float __D, |
| float __E, float __F, float __G, float __H, |
| float __I, float __J, float __K, float __L, |
| float __M, float __N, float __O, float __P) |
| { |
| return __extension__ (__m512) |
| { __P, __O, __N, __M, __L, __K, __J, __I, |
| __H, __G, __F, __E, __D, __C, __B, __A }; |
| } |
| |
| #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ |
| _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0) |
| |
| #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ |
| e8,e9,e10,e11,e12,e13,e14,e15) \ |
| _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) |
| |
| #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ |
| _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0) |
| |
| #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ |
| _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_undefined_ps (void) |
| { |
| __m512 __Y = __Y; |
| return __Y; |
| } |
| |
| #define _mm512_undefined _mm512_undefined_ps |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_undefined_pd (void) |
| { |
| __m512d __Y = __Y; |
| return __Y; |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_undefined_epi32 (void) |
| { |
| __m512i __Y = __Y; |
| return __Y; |
| } |
| |
| #define _mm512_undefined_si512 _mm512_undefined_epi32 |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set1_epi8 (char __A) |
| { |
| return __extension__ (__m512i)(__v64qi) |
| { __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A }; |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set1_epi16 (short __A) |
| { |
| return __extension__ (__m512i)(__v32hi) |
| { __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A }; |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set1_pd (double __A) |
| { |
| return __extension__ (__m512d)(__v8df) |
| { __A, __A, __A, __A, __A, __A, __A, __A }; |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set1_ps (float __A) |
| { |
| return __extension__ (__m512)(__v16sf) |
| { __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A }; |
| } |
| |
| /* Create the vector [A B C D A B C D A B C D A B C D]. */ |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set4_epi32 (int __A, int __B, int __C, int __D) |
| { |
| return __extension__ (__m512i)(__v16si) |
| { __D, __C, __B, __A, __D, __C, __B, __A, |
| __D, __C, __B, __A, __D, __C, __B, __A }; |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set4_epi64 (long long __A, long long __B, long long __C, |
| long long __D) |
| { |
| return __extension__ (__m512i) (__v8di) |
| { __D, __C, __B, __A, __D, __C, __B, __A }; |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set4_pd (double __A, double __B, double __C, double __D) |
| { |
| return __extension__ (__m512d) |
| { __D, __C, __B, __A, __D, __C, __B, __A }; |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set4_ps (float __A, float __B, float __C, float __D) |
| { |
| return __extension__ (__m512) |
| { __D, __C, __B, __A, __D, __C, __B, __A, |
| __D, __C, __B, __A, __D, __C, __B, __A }; |
| } |
| |
| #define _mm512_setr4_epi64(e0,e1,e2,e3) \ |
| _mm512_set4_epi64(e3,e2,e1,e0) |
| |
| #define _mm512_setr4_epi32(e0,e1,e2,e3) \ |
| _mm512_set4_epi32(e3,e2,e1,e0) |
| |
| #define _mm512_setr4_pd(e0,e1,e2,e3) \ |
| _mm512_set4_pd(e3,e2,e1,e0) |
| |
| #define _mm512_setr4_ps(e0,e1,e2,e3) \ |
| _mm512_set4_ps(e3,e2,e1,e0) |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_setzero_ps (void) |
| { |
| return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, |
| 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_setzero (void) |
| { |
| return _mm512_setzero_ps (); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_setzero_pd (void) |
| { |
| return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_setzero_epi32 (void) |
| { |
| return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_setzero_si512 (void) |
| { |
| return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) |
| { |
| return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A, |
| (__v8df) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) |
| { |
| return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) |
| { |
| return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A, |
| (__v16sf) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) |
| { |
| return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_load_pd (void const *__P) |
| { |
| return *(__m512d *) __P; |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P) |
| { |
| return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, |
| (__v8df) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_load_pd (__mmask8 __U, void const *__P) |
| { |
| return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline void |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_store_pd (void *__P, __m512d __A) |
| { |
| *(__m512d *) __P = __A; |
| } |
| |
| extern __inline void |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A) |
| { |
| __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_load_ps (void const *__P) |
| { |
| return *(__m512 *) __P; |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P) |
| { |
| return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, |
| (__v16sf) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_load_ps (__mmask16 __U, void const *__P) |
| { |
| return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline void |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_store_ps (void *__P, __m512 __A) |
| { |
| *(__m512 *) __P = __A; |
| } |
| |
| extern __inline void |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A) |
| { |
| __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) |
| { |
| return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A) |
| { |
| return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_load_epi64 (void const *__P) |
| { |
| return *(__m512i *) __P; |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P) |
| { |
| return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P) |
| { |
| return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline void |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_store_epi64 (void *__P, __m512i __A) |
| { |
| *(__m512i *) __P = __A; |
| } |
| |
| extern __inline void |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) |
| { |
| __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) |
| { |
| return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) |
| { |
| return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_load_si512 (void const *__P) |
| { |
| return *(__m512i *) __P; |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_load_epi32 (void const *__P) |
| { |
| return *(__m512i *) __P; |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P) |
| { |
| return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P) |
| { |
| return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline void |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_store_si512 (void *__P, __m512i __A) |
| { |
| *(__m512i *) __P = __A; |
| } |
| |
| extern __inline void |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_store_epi32 (void *__P, __m512i __A) |
| { |
| *(__m512i *) __P = __A; |
| } |
| |
| extern __inline void |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A) |
| { |
| __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mullo_epi32 (__m512i __A, __m512i __B) |
| { |
| return (__m512i) ((__v16su) __A * (__v16su) __B); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B) |
| { |
| return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, |
| (__v16si) __B, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| __M); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) |
| { |
| return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, |
| (__v16si) __B, |
| (__v16si) __W, __M); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mullox_epi64 (__m512i __A, __m512i __B) |
| { |
| return (__m512i) ((__v8du) __A * (__v8du) __B); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_mullox_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) |
| { |
| return _mm512_mask_mov_epi64 (__W, __M, _mm512_mullox_epi64 (__A, __B)); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sllv_epi32 (__m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_srav_epi32 (__m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_srlv_epi32 (__m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_add_epi64 (__m512i __A, __m512i __B) |
| { |
| return (__m512i) ((__v8du) __A + (__v8du) __B); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) |
| { |
| return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, |
| (__v8di) __B, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B) |
| { |
| return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, |
| (__v8di) __B, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sub_epi64 (__m512i __A, __m512i __B) |
| { |
| return (__m512i) ((__v8du) __A - (__v8du) __B); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) |
| { |
| return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, |
| (__v8di) __B, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B) |
| { |
| return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, |
| (__v8di) __B, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sllv_epi64 (__m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, |
| (__v8di) __Y, |
| (__v8di) |
| _mm512_undefined_pd (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, |
| (__v8di) __Y, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, |
| (__v8di) __Y, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_srav_epi64 (__m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, |
| (__v8di) __Y, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, |
| (__v8di) __Y, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, |
| (__v8di) __Y, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_srlv_epi64 (__m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, |
| (__v8di) __Y, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, |
| (__v8di) __Y, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, |
| (__v8di) __Y, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_add_epi32 (__m512i __A, __m512i __B) |
| { |
| return (__m512i) ((__v16su) __A + (__v16su) __B); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) |
| { |
| return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, |
| (__v16si) __B, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) |
| { |
| return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, |
| (__v16si) __B, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mul_epi32 (__m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v8di) __W, __M); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| __M); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sub_epi32 (__m512i __A, __m512i __B) |
| { |
| return (__m512i) ((__v16su) __A - (__v16su) __B); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) |
| { |
| return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, |
| (__v16si) __B, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B) |
| { |
| return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, |
| (__v16si) __B, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mul_epu32 (__m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v8di) __W, __M); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| __M); |
| } |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_slli_epi64 (__m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A, |
| unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| #else |
| #define _mm512_slli_epi64(X, C) \ |
| ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
| (__v8di)(__m512i)_mm512_undefined_epi32 (),\ |
| (__mmask8)-1)) |
| |
| #define _mm512_mask_slli_epi64(W, U, X, C) \ |
| ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
| (__v8di)(__m512i)(W),\ |
| (__mmask8)(U))) |
| |
| #define _mm512_maskz_slli_epi64(U, X, C) \ |
| ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
| (__v8di)(__m512i)_mm512_setzero_si512 (),\ |
| (__mmask8)(U))) |
| #endif |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sll_epi64 (__m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, |
| (__v2di) __B, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, |
| (__v2di) __B, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, |
| (__v2di) __B, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_srli_epi64 (__m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U, |
| __m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| #else |
| #define _mm512_srli_epi64(X, C) \ |
| ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
| (__v8di)(__m512i)_mm512_undefined_epi32 (),\ |
| (__mmask8)-1)) |
| |
| #define _mm512_mask_srli_epi64(W, U, X, C) \ |
| ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
| (__v8di)(__m512i)(W),\ |
| (__mmask8)(U))) |
| |
| #define _mm512_maskz_srli_epi64(U, X, C) \ |
| ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
| (__v8di)(__m512i)_mm512_setzero_si512 (),\ |
| (__mmask8)(U))) |
| #endif |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_srl_epi64 (__m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, |
| (__v2di) __B, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, |
| (__v2di) __B, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, |
| (__v2di) __B, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_srai_epi64 (__m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A, |
| unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| #else |
| #define _mm512_srai_epi64(X, C) \ |
| ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
| (__v8di)(__m512i)_mm512_undefined_epi32 (),\ |
| (__mmask8)-1)) |
| |
| #define _mm512_mask_srai_epi64(W, U, X, C) \ |
| ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
| (__v8di)(__m512i)(W),\ |
| (__mmask8)(U))) |
| |
| #define _mm512_maskz_srai_epi64(U, X, C) \ |
| ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
| (__v8di)(__m512i)_mm512_setzero_si512 (),\ |
| (__mmask8)(U))) |
| #endif |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sra_epi64 (__m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, |
| (__v2di) __B, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, |
| (__v2di) __B, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, |
| (__v2di) __B, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_slli_epi32 (__m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A, |
| unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| #else |
| #define _mm512_slli_epi32(X, C) \ |
| ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
| (__v16si)(__m512i)_mm512_undefined_epi32 (),\ |
| (__mmask16)-1)) |
| |
| #define _mm512_mask_slli_epi32(W, U, X, C) \ |
| ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
| (__v16si)(__m512i)(W),\ |
| (__mmask16)(U))) |
| |
| #define _mm512_maskz_slli_epi32(U, X, C) \ |
| ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
| (__v16si)(__m512i)_mm512_setzero_si512 (),\ |
| (__mmask16)(U))) |
| #endif |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sll_epi32 (__m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, |
| (__v4si) __B, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, |
| (__v4si) __B, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, |
| (__v4si) __B, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_srli_epi32 (__m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U, |
| __m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| #else |
| #define _mm512_srli_epi32(X, C) \ |
| ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
| (__v16si)(__m512i)_mm512_undefined_epi32 (),\ |
| (__mmask16)-1)) |
| |
| #define _mm512_mask_srli_epi32(W, U, X, C) \ |
| ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
| (__v16si)(__m512i)(W),\ |
| (__mmask16)(U))) |
| |
| #define _mm512_maskz_srli_epi32(U, X, C) \ |
| ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
| (__v16si)(__m512i)_mm512_setzero_si512 (),\ |
| (__mmask16)(U))) |
| #endif |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_srl_epi32 (__m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, |
| (__v4si) __B, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, |
| (__v4si) __B, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, |
| (__v4si) __B, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_srai_epi32 (__m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A, |
| unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| #else |
| #define _mm512_srai_epi32(X, C) \ |
| ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
| (__v16si)(__m512i)_mm512_undefined_epi32 (),\ |
| (__mmask16)-1)) |
| |
| #define _mm512_mask_srai_epi32(W, U, X, C) \ |
| ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
| (__v16si)(__m512i)(W),\ |
| (__mmask16)(U))) |
| |
| #define _mm512_maskz_srai_epi32(U, X, C) \ |
| ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
| (__v16si)(__m512i)_mm512_setzero_si512 (),\ |
| (__mmask16)(U))) |
| #endif |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sra_epi32 (__m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, |
| (__v4si) __B, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, |
| (__v4si) __B, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, |
| (__v4si) __B, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_add_round_sd (__m128d __A, __m128d __B, const int __R) |
| { |
| return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, |
| (__v2df) __B, |
| __R); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_mask_add_round_sd (__m128d __W, __mmask8 __U, __m128d __A, |
| __m128d __B, const int __R) |
| { |
| return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A, |
| (__v2df) __B, |
| (__v2df) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_maskz_add_round_sd (__mmask8 __U, __m128d __A, __m128d __B, |
| const int __R) |
| { |
| return (__m128d) __builtin_ia32_addsd_mask_round ((__v2df) __A, |
| (__v2df) __B, |
| (__v2df) |
| _mm_setzero_pd (), |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_add_round_ss (__m128 __A, __m128 __B, const int __R) |
| { |
| return (__m128) __builtin_ia32_addss_round ((__v4sf) __A, |
| (__v4sf) __B, |
| __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_mask_add_round_ss (__m128 __W, __mmask8 __U, __m128 __A, |
| __m128 __B, const int __R) |
| { |
| return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A, |
| (__v4sf) __B, |
| (__v4sf) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_maskz_add_round_ss (__mmask8 __U, __m128 __A, __m128 __B, |
| const int __R) |
| { |
| return (__m128) __builtin_ia32_addss_mask_round ((__v4sf) __A, |
| (__v4sf) __B, |
| (__v4sf) |
| _mm_setzero_ps (), |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R) |
| { |
| return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, |
| (__v2df) __B, |
| __R); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_mask_sub_round_sd (__m128d __W, __mmask8 __U, __m128d __A, |
| __m128d __B, const int __R) |
| { |
| return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A, |
| (__v2df) __B, |
| (__v2df) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_maskz_sub_round_sd (__mmask8 __U, __m128d __A, __m128d __B, |
| const int __R) |
| { |
| return (__m128d) __builtin_ia32_subsd_mask_round ((__v2df) __A, |
| (__v2df) __B, |
| (__v2df) |
| _mm_setzero_pd (), |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R) |
| { |
| return (__m128) __builtin_ia32_subss_round ((__v4sf) __A, |
| (__v4sf) __B, |
| __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_mask_sub_round_ss (__m128 __W, __mmask8 __U, __m128 __A, |
| __m128 __B, const int __R) |
| { |
| return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A, |
| (__v4sf) __B, |
| (__v4sf) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_maskz_sub_round_ss (__mmask8 __U, __m128 __A, __m128 __B, |
| const int __R) |
| { |
| return (__m128) __builtin_ia32_subss_mask_round ((__v4sf) __A, |
| (__v4sf) __B, |
| (__v4sf) |
| _mm_setzero_ps (), |
| (__mmask8) __U, __R); |
| } |
| |
| #else |
| #define _mm_add_round_sd(A, B, C) \ |
| (__m128d)__builtin_ia32_addsd_round(A, B, C) |
| |
| #define _mm_mask_add_round_sd(W, U, A, B, C) \ |
| (__m128d)__builtin_ia32_addsd_mask_round(A, B, W, U, C) |
| |
| #define _mm_maskz_add_round_sd(U, A, B, C) \ |
| (__m128d)__builtin_ia32_addsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) |
| |
| #define _mm_add_round_ss(A, B, C) \ |
| (__m128)__builtin_ia32_addss_round(A, B, C) |
| |
| #define _mm_mask_add_round_ss(W, U, A, B, C) \ |
| (__m128)__builtin_ia32_addss_mask_round(A, B, W, U, C) |
| |
| #define _mm_maskz_add_round_ss(U, A, B, C) \ |
| (__m128)__builtin_ia32_addss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) |
| |
| #define _mm_sub_round_sd(A, B, C) \ |
| (__m128d)__builtin_ia32_subsd_round(A, B, C) |
| |
| #define _mm_mask_sub_round_sd(W, U, A, B, C) \ |
| (__m128d)__builtin_ia32_subsd_mask_round(A, B, W, U, C) |
| |
| #define _mm_maskz_sub_round_sd(U, A, B, C) \ |
| (__m128d)__builtin_ia32_subsd_mask_round(A, B, (__v2df)_mm_setzero_pd(), U, C) |
| |
| #define _mm_sub_round_ss(A, B, C) \ |
| (__m128)__builtin_ia32_subss_round(A, B, C) |
| |
| #define _mm_mask_sub_round_ss(W, U, A, B, C) \ |
| (__m128)__builtin_ia32_subss_mask_round(A, B, W, U, C) |
| |
| #define _mm_maskz_sub_round_ss(U, A, B, C) \ |
| (__m128)__builtin_ia32_subss_mask_round(A, B, (__v4sf)_mm_setzero_ps(), U, C) |
| |
| #endif |
| |
| /* Constant helper to represent the ternary logic operations among |
| vector A, B and C. */ |
| typedef enum |
| { |
| _MM_TERNLOG_A = 0xF0, |
| _MM_TERNLOG_B = 0xCC, |
| _MM_TERNLOG_C = 0xAA |
| } _MM_TERNLOG_ENUM; |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, |
| const int __imm) |
| { |
| return (__m512i) |
| __builtin_ia32_pternlogq512_mask ((__v8di) __A, |
| (__v8di) __B, |
| (__v8di) __C, |
| (unsigned char) __imm, |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B, |
| __m512i __C, const int __imm) |
| { |
| return (__m512i) |
| __builtin_ia32_pternlogq512_mask ((__v8di) __A, |
| (__v8di) __B, |
| (__v8di) __C, |
| (unsigned char) __imm, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B, |
| __m512i __C, const int __imm) |
| { |
| return (__m512i) |
| __builtin_ia32_pternlogq512_maskz ((__v8di) __A, |
| (__v8di) __B, |
| (__v8di) __C, |
| (unsigned char) __imm, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, |
| const int __imm) |
| { |
| return (__m512i) |
| __builtin_ia32_pternlogd512_mask ((__v16si) __A, |
| (__v16si) __B, |
| (__v16si) __C, |
| (unsigned char) __imm, |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B, |
| __m512i __C, const int __imm) |
| { |
| return (__m512i) |
| __builtin_ia32_pternlogd512_mask ((__v16si) __A, |
| (__v16si) __B, |
| (__v16si) __C, |
| (unsigned char) __imm, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B, |
| __m512i __C, const int __imm) |
| { |
| return (__m512i) |
| __builtin_ia32_pternlogd512_maskz ((__v16si) __A, |
| (__v16si) __B, |
| (__v16si) __C, |
| (unsigned char) __imm, |
| (__mmask16) __U); |
| } |
| #else |
| #define _mm512_ternarylogic_epi64(A, B, C, I) \ |
| ((__m512i) \ |
| __builtin_ia32_pternlogq512_mask ((__v8di) (__m512i) (A), \ |
| (__v8di) (__m512i) (B), \ |
| (__v8di) (__m512i) (C), \ |
| (unsigned char) (I), \ |
| (__mmask8) -1)) |
| #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \ |
| ((__m512i) \ |
| __builtin_ia32_pternlogq512_mask ((__v8di) (__m512i) (A), \ |
| (__v8di) (__m512i) (B), \ |
| (__v8di) (__m512i) (C), \ |
| (unsigned char)(I), \ |
| (__mmask8) (U))) |
| #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \ |
| ((__m512i) \ |
| __builtin_ia32_pternlogq512_maskz ((__v8di) (__m512i) (A), \ |
| (__v8di) (__m512i) (B), \ |
| (__v8di) (__m512i) (C), \ |
| (unsigned char) (I), \ |
| (__mmask8) (U))) |
| #define _mm512_ternarylogic_epi32(A, B, C, I) \ |
| ((__m512i) \ |
| __builtin_ia32_pternlogd512_mask ((__v16si) (__m512i) (A), \ |
| (__v16si) (__m512i) (B), \ |
| (__v16si) (__m512i) (C), \ |
| (unsigned char) (I), \ |
| (__mmask16) -1)) |
| #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \ |
| ((__m512i) \ |
| __builtin_ia32_pternlogd512_mask ((__v16si) (__m512i) (A), \ |
| (__v16si) (__m512i) (B), \ |
| (__v16si) (__m512i) (C), \ |
| (unsigned char) (I), \ |
| (__mmask16) (U))) |
| #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \ |
| ((__m512i) \ |
| __builtin_ia32_pternlogd512_maskz ((__v16si) (__m512i) (A), \ |
| (__v16si) (__m512i) (B), \ |
| (__v16si) (__m512i) (C), \ |
| (unsigned char) (I), \ |
| (__mmask16) (U))) |
| #endif |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_rcp14_pd (__m512d __A) |
| { |
| return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, |
| (__v8df) |
| _mm512_undefined_pd (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) |
| { |
| return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, |
| (__v8df) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) |
| { |
| return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_rcp14_ps (__m512 __A) |
| { |
| return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, |
| (__v16sf) |
| _mm512_undefined_ps (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) |
| { |
| return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, |
| (__v16sf) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) |
| { |
| return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_rcp14_sd (__m128d __A, __m128d __B) |
| { |
| return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B, |
| (__v2df) __A); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) |
| { |
| return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B, |
| (__v2df) __A, |
| (__v2df) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B) |
| { |
| return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __B, |
| (__v2df) __A, |
| (__v2df) _mm_setzero_ps (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_rcp14_ss (__m128 __A, __m128 __B) |
| { |
| return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B, |
| (__v4sf) __A); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) |
| { |
| return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B, |
| (__v4sf) __A, |
| (__v4sf) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B) |
| { |
| return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __B, |
| (__v4sf) __A, |
| (__v4sf) _mm_setzero_ps (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_rsqrt14_pd (__m512d __A) |
| { |
| return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, |
| (__v8df) |
| _mm512_undefined_pd (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) |
| { |
| return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, |
| (__v8df) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) |
| { |
| return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_rsqrt14_ps (__m512 __A) |
| { |
| return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, |
| (__v16sf) |
| _mm512_undefined_ps (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) |
| { |
| return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, |
| (__v16sf) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) |
| { |
| return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_rsqrt14_sd (__m128d __A, __m128d __B) |
| { |
| return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B, |
| (__v2df) __A); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) |
| { |
| return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B, |
| (__v2df) __A, |
| (__v2df) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) |
| { |
| return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __B, |
| (__v2df) __A, |
| (__v2df) _mm_setzero_pd (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_rsqrt14_ss (__m128 __A, __m128 __B) |
| { |
| return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B, |
| (__v4sf) __A); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) |
| { |
| return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B, |
| (__v4sf) __A, |
| (__v4sf) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) |
| { |
| return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __B, |
| (__v4sf) __A, |
| (__v4sf) _mm_setzero_ps (), |
| (__mmask8) __U); |
| } |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sqrt_round_pd (__m512d __A, const int __R) |
| { |
| return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, |
| (__v8df) |
| _mm512_undefined_pd (), |
| (__mmask8) -1, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A, |
| const int __R) |
| { |
| return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, |
| (__v8df) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R) |
| { |
| return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sqrt_round_ps (__m512 __A, const int __R) |
| { |
| return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, |
| (__v16sf) |
| _mm512_undefined_ps (), |
| (__mmask16) -1, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R) |
| { |
| return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, |
| (__v16sf) __W, |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R) |
| { |
| return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R) |
| { |
| return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B, |
| (__v2df) __A, |
| (__v2df) |
| _mm_setzero_pd (), |
| (__mmask8) -1, __R); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_mask_sqrt_round_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B, |
| const int __R) |
| { |
| return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B, |
| (__v2df) __A, |
| (__v2df) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_maskz_sqrt_round_sd (__mmask8 __U, __m128d __A, __m128d __B, const int __R) |
| { |
| return (__m128d) __builtin_ia32_sqrtsd_mask_round ((__v2df) __B, |
| (__v2df) __A, |
| (__v2df) |
| _mm_setzero_pd (), |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R) |
| { |
| return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B, |
| (__v4sf) __A, |
| (__v4sf) |
| _mm_setzero_ps (), |
| (__mmask8) -1, __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_mask_sqrt_round_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B, |
| const int __R) |
| { |
| return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B, |
| (__v4sf) __A, |
| (__v4sf) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_maskz_sqrt_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R) |
| { |
| return (__m128) __builtin_ia32_sqrtss_mask_round ((__v4sf) __B, |
| (__v4sf) __A, |
| (__v4sf) |
| _mm_setzero_ps (), |
| (__mmask8) __U, __R); |
| } |
| #else |
| #define _mm512_sqrt_round_pd(A, C) \ |
| (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C) |
| |
| #define _mm512_mask_sqrt_round_pd(W, U, A, C) \ |
| (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C) |
| |
| #define _mm512_maskz_sqrt_round_pd(U, A, C) \ |
| (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C) |
| |
| #define _mm512_sqrt_round_ps(A, C) \ |
| (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C) |
| |
| #define _mm512_mask_sqrt_round_ps(W, U, A, C) \ |
| (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C) |
| |
| #define _mm512_maskz_sqrt_round_ps(U, A, C) \ |
| (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) |
| |
| #define _mm_sqrt_round_sd(A, B, C) \ |
| (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \ |
| (__v2df) _mm_setzero_pd (), -1, C) |
| |
| #define _mm_mask_sqrt_round_sd(W, U, A, B, C) \ |
| (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, W, U, C) |
| |
| #define _mm_maskz_sqrt_round_sd(U, A, B, C) \ |
| (__m128d)__builtin_ia32_sqrtsd_mask_round (B, A, \ |
| (__v2df) _mm_setzero_pd (), U, C) |
| |
| #define _mm_sqrt_round_ss(A, B, C) \ |
| (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \ |
| (__v4sf) _mm_setzero_ps (), -1, C) |
| |
| #define _mm_mask_sqrt_round_ss(W, U, A, B, C) \ |
| (__m128)__builtin_ia32_sqrtss_mask_round (B, A, W, U, C) |
| |
| #define _mm_maskz_sqrt_round_ss(U, A, B, C) \ |
| (__m128)__builtin_ia32_sqrtss_mask_round (B, A, \ |
| (__v4sf) _mm_setzero_ps (), U, C) |
| #endif |
| |
| #define _mm_mask_sqrt_sd(W, U, A, B) \ |
| _mm_mask_sqrt_round_sd ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION) |
| |
| #define _mm_maskz_sqrt_sd(U, A, B) \ |
| _mm_maskz_sqrt_round_sd ((U), (A), (B), _MM_FROUND_CUR_DIRECTION) |
| |
| #define _mm_mask_sqrt_ss(W, U, A, B) \ |
| _mm_mask_sqrt_round_ss ((W), (U), (A), (B), _MM_FROUND_CUR_DIRECTION) |
| |
| #define _mm_maskz_sqrt_ss(U, A, B) \ |
| _mm_maskz_sqrt_round_ss ((U), (A), (B), _MM_FROUND_CUR_DIRECTION) |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_cvtepi8_epi32 (__m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_cvtepi8_epi64 (__m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_cvtepi16_epi32 (__m256i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_cvtepi16_epi64 (__m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_cvtepi32_epi64 (__m256i __X) |
| { |
| return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X) |
| { |
| return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X) |
| { |
| return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_cvtepu8_epi32 (__m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_cvtepu8_epi64 (__m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_cvtepu16_epi32 (__m256i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_cvtepu16_epi64 (__m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_cvtepu32_epi64 (__m256i __X) |
| { |
| return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X) |
| { |
| return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X) |
| { |
| return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R) |
| { |
| return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) |
| _mm512_undefined_pd (), |
| (__mmask8) -1, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A, |
| __m512d __B, const int __R) |
| { |
| return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
| const int __R) |
| { |
| return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_undefined_ps (), |
| (__mmask16) -1, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A, |
| __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) __W, |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R) |
| { |
| return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) |
| _mm512_undefined_pd (), |
| (__mmask8) -1, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A, |
| __m512d __B, const int __R) |
| { |
| return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
| const int __R) |
| { |
| return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_undefined_ps (), |
| (__mmask16) -1, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A, |
| __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) __W, |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U, __R); |
| } |
| #else |
| #define _mm512_add_round_pd(A, B, C) \ |
| (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) |
| |
| #define _mm512_mask_add_round_pd(W, U, A, B, C) \ |
| (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C) |
| |
| #define _mm512_maskz_add_round_pd(U, A, B, C) \ |
| (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) |
| |
| #define _mm512_add_round_ps(A, B, C) \ |
| (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) |
| |
| #define _mm512_mask_add_round_ps(W, U, A, B, C) \ |
| (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C) |
| |
| #define _mm512_maskz_add_round_ps(U, A, B, C) \ |
| (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) |
| |
| #define _mm512_sub_round_pd(A, B, C) \ |
| (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) |
| |
| #define _mm512_mask_sub_round_pd(W, U, A, B, C) \ |
| (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C) |
| |
| #define _mm512_maskz_sub_round_pd(U, A, B, C) \ |
| (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) |
| |
| #define _mm512_sub_round_ps(A, B, C) \ |
| (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) |
| |
| #define _mm512_mask_sub_round_ps(W, U, A, B, C) \ |
| (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C) |
| |
| #define _mm512_maskz_sub_round_ps(U, A, B, C) \ |
| (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) |
| #endif |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R) |
| { |
| return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) |
| _mm512_undefined_pd (), |
| (__mmask8) -1, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A, |
| __m512d __B, const int __R) |
| { |
| return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
| const int __R) |
| { |
| return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_undefined_ps (), |
| (__mmask16) -1, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A, |
| __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) __W, |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R) |
| { |
| return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, |
| (__v8df) __V, |
| (__v8df) |
| _mm512_undefined_pd (), |
| (__mmask8) -1, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M, |
| __m512d __V, const int __R) |
| { |
| return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, |
| (__v8df) __V, |
| (__v8df) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V, |
| const int __R) |
| { |
| return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, |
| (__v8df) __V, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_undefined_ps (), |
| (__mmask16) -1, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A, |
| __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) __W, |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R) |
| { |
| return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, |
| (__v2df) __B, |
| __R); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_mask_mul_round_sd (__m128d __W, __mmask8 __U, __m128d __A, |
| __m128d __B, const int __R) |
| { |
| return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A, |
| (__v2df) __B, |
| (__v2df) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_maskz_mul_round_sd (__mmask8 __U, __m128d __A, __m128d __B, |
| const int __R) |
| { |
| return (__m128d) __builtin_ia32_mulsd_mask_round ((__v2df) __A, |
| (__v2df) __B, |
| (__v2df) |
| _mm_setzero_pd (), |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R) |
| { |
| return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, |
| (__v4sf) __B, |
| __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_mask_mul_round_ss (__m128 __W, __mmask8 __U, __m128 __A, |
| __m128 __B, const int __R) |
| { |
| return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A, |
| (__v4sf) __B, |
| (__v4sf) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_maskz_mul_round_ss (__mmask8 __U, __m128 __A, __m128 __B, |
| const int __R) |
| { |
| return (__m128) __builtin_ia32_mulss_mask_round ((__v4sf) __A, |
| (__v4sf) __B, |
| (__v4sf) |
| _mm_setzero_ps (), |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_div_round_sd (__m128d __A, __m128d __B, const int __R) |
| { |
| return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, |
| (__v2df) __B, |
| __R); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_mask_div_round_sd (__m128d __W, __mmask8 __U, __m128d __A, |
| __m128d __B, const int __R) |
| { |
| return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A, |
| (__v2df) __B, |
| (__v2df) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_maskz_div_round_sd (__mmask8 __U, __m128d __A, __m128d __B, |
| const int __R) |
| { |
| return (__m128d) __builtin_ia32_divsd_mask_round ((__v2df) __A, |
| (__v2df) __B, |
| (__v2df) |
| _mm_setzero_pd (), |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_div_round_ss (__m128 __A, __m128 __B, const int __R) |
| { |
| return (__m128) __builtin_ia32_divss_round ((__v4sf) __A, |
| (__v4sf) __B, |
| __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_mask_div_round_ss (__m128 __W, __mmask8 __U, __m128 __A, |
| __m128 __B, const int __R) |
| { |
| return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A, |
| (__v4sf) __B, |
| (__v4sf) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_maskz_div_round_ss (__mmask8 __U, __m128 __A, __m128 __B, |
| const int __R) |
| { |
| return (__m128) __builtin_ia32_divss_mask_round ((__v4sf) __A, |
| (__v4sf) __B, |
| (__v4sf) |
| _mm_setzero_ps (), |
| (__mmask8) __U, __R); |
| } |
| |
| #else |
| #define _mm512_mul_round_pd(A, B, C) \ |
| (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) |
| |
| #define _mm512_mask_mul_round_pd(W, U, A, B, C) \ |
| (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C) |
| |
| #define _mm512_maskz_mul_round_pd(U, A, B, C) \ |
| (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) |
| |
| #define _mm512_mul_round_ps(A, B, C) \ |
| (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) |
| |
| #define _mm512_mask_mul_round_ps(W, U, A, B, C) \ |
| (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C) |
| |
| #define _mm512_maskz_mul_round_ps(U, A, B, C) \ |
| (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) |
| |
| #define _mm512_div_round_pd(A, B, C) \ |
| (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) |
| |
| |