| /* Copyright (C) 2013-2015 Free Software Foundation, Inc. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| GCC is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| Under Section 7 of GPL version 3, you are granted additional |
| permissions described in the GCC Runtime Library Exception, version |
| 3.1, as published by the Free Software Foundation. |
| |
| You should have received a copy of the GNU General Public License and |
| a copy of the GCC Runtime Library Exception along with this program; |
| see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #ifndef _IMMINTRIN_H_INCLUDED |
| #error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead." |
| #endif |
| |
| #ifndef _AVX512FINTRIN_H_INCLUDED |
| #define _AVX512FINTRIN_H_INCLUDED |
| |
| #ifndef __AVX512F__ |
| #pragma GCC push_options |
| #pragma GCC target("avx512f") |
| #define __DISABLE_AVX512F__ |
| #endif /* __AVX512F__ */ |
| |
| /* Internal data types for implementing the intrinsics. */ |
| typedef double __v8df __attribute__ ((__vector_size__ (64))); |
| typedef float __v16sf __attribute__ ((__vector_size__ (64))); |
| typedef long long __v8di __attribute__ ((__vector_size__ (64))); |
| typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64))); |
| typedef int __v16si __attribute__ ((__vector_size__ (64))); |
| typedef unsigned int __v16su __attribute__ ((__vector_size__ (64))); |
| typedef short __v32hi __attribute__ ((__vector_size__ (64))); |
| typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64))); |
| typedef char __v64qi __attribute__ ((__vector_size__ (64))); |
| typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64))); |
| |
| /* The Intel API is flexible enough that we must allow aliasing with other |
| vector types, and their scalar components. */ |
| typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__)); |
| typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__)); |
| typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__)); |
| |
| typedef unsigned char __mmask8; |
| typedef unsigned short __mmask16; |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set_epi64 (long long __A, long long __B, long long __C, |
| long long __D, long long __E, long long __F, |
| long long __G, long long __H) |
| { |
| return __extension__ (__m512i) (__v8di) |
| { __H, __G, __F, __E, __D, __C, __B, __A }; |
| } |
| |
| /* Create the vector [A B C D E F G H I J K L M N O P]. */ |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set_epi32 (int __A, int __B, int __C, int __D, |
| int __E, int __F, int __G, int __H, |
| int __I, int __J, int __K, int __L, |
| int __M, int __N, int __O, int __P) |
| { |
| return __extension__ (__m512i)(__v16si) |
| { __P, __O, __N, __M, __L, __K, __J, __I, |
| __H, __G, __F, __E, __D, __C, __B, __A }; |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set_pd (double __A, double __B, double __C, double __D, |
| double __E, double __F, double __G, double __H) |
| { |
| return __extension__ (__m512d) |
| { __H, __G, __F, __E, __D, __C, __B, __A }; |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set_ps (float __A, float __B, float __C, float __D, |
| float __E, float __F, float __G, float __H, |
| float __I, float __J, float __K, float __L, |
| float __M, float __N, float __O, float __P) |
| { |
| return __extension__ (__m512) |
| { __P, __O, __N, __M, __L, __K, __J, __I, |
| __H, __G, __F, __E, __D, __C, __B, __A }; |
| } |
| |
| #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ |
| _mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0) |
| |
| #define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \ |
| e8,e9,e10,e11,e12,e13,e14,e15) \ |
| _mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) |
| |
| #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ |
| _mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0) |
| |
| #define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \ |
| _mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0) |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_undefined_ps (void) |
| { |
| __m512 __Y = __Y; |
| return __Y; |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_undefined_pd (void) |
| { |
| __m512d __Y = __Y; |
| return __Y; |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_undefined_epi32 (void) |
| { |
| __m512i __Y = __Y; |
| return __Y; |
| } |
| |
| #define _mm512_undefined_si512 _mm512_undefined_epi32 |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set1_epi8 (char __A) |
| { |
| return __extension__ (__m512i)(__v64qi) |
| { __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A }; |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set1_epi16 (short __A) |
| { |
| return __extension__ (__m512i)(__v32hi) |
| { __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A, |
| __A, __A, __A, __A, __A, __A, __A, __A }; |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set1_pd (double __A) |
| { |
| return (__m512d) __builtin_ia32_broadcastsd512 (__extension__ |
| (__v2df) { __A, }, |
| (__v8df) |
| _mm512_undefined_pd (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set1_ps (float __A) |
| { |
| return (__m512) __builtin_ia32_broadcastss512 (__extension__ |
| (__v4sf) { __A, }, |
| (__v16sf) |
| _mm512_undefined_ps (), |
| (__mmask16) -1); |
| } |
| |
| /* Create the vector [A B C D A B C D A B C D A B C D]. */ |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set4_epi32 (int __A, int __B, int __C, int __D) |
| { |
| return __extension__ (__m512i)(__v16si) |
| { __D, __C, __B, __A, __D, __C, __B, __A, |
| __D, __C, __B, __A, __D, __C, __B, __A }; |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set4_epi64 (long long __A, long long __B, long long __C, |
| long long __D) |
| { |
| return __extension__ (__m512i) (__v8di) |
| { __D, __C, __B, __A, __D, __C, __B, __A }; |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set4_pd (double __A, double __B, double __C, double __D) |
| { |
| return __extension__ (__m512d) |
| { __D, __C, __B, __A, __D, __C, __B, __A }; |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_set4_ps (float __A, float __B, float __C, float __D) |
| { |
| return __extension__ (__m512) |
| { __D, __C, __B, __A, __D, __C, __B, __A, |
| __D, __C, __B, __A, __D, __C, __B, __A }; |
| } |
| |
| #define _mm512_setr4_epi64(e0,e1,e2,e3) \ |
| _mm512_set4_epi64(e3,e2,e1,e0) |
| |
| #define _mm512_setr4_epi32(e0,e1,e2,e3) \ |
| _mm512_set4_epi32(e3,e2,e1,e0) |
| |
| #define _mm512_setr4_pd(e0,e1,e2,e3) \ |
| _mm512_set4_pd(e3,e2,e1,e0) |
| |
| #define _mm512_setr4_ps(e0,e1,e2,e3) \ |
| _mm512_set4_ps(e3,e2,e1,e0) |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_setzero_ps (void) |
| { |
| return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, |
| 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_setzero_pd (void) |
| { |
| return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_setzero_epi32 (void) |
| { |
| return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_setzero_si512 (void) |
| { |
| return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) |
| { |
| return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A, |
| (__v8df) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) |
| { |
| return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) |
| { |
| return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A, |
| (__v16sf) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) |
| { |
| return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_load_pd (void const *__P) |
| { |
| return *(__m512d *) __P; |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P) |
| { |
| return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, |
| (__v8df) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_load_pd (__mmask8 __U, void const *__P) |
| { |
| return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline void |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_store_pd (void *__P, __m512d __A) |
| { |
| *(__m512d *) __P = __A; |
| } |
| |
| extern __inline void |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A) |
| { |
| __builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_load_ps (void const *__P) |
| { |
| return *(__m512 *) __P; |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P) |
| { |
| return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, |
| (__v16sf) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_load_ps (__mmask16 __U, void const *__P) |
| { |
| return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline void |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_store_ps (void *__P, __m512 __A) |
| { |
| *(__m512 *) __P = __A; |
| } |
| |
| extern __inline void |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A) |
| { |
| __builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) |
| { |
| return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A) |
| { |
| return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_load_epi64 (void const *__P) |
| { |
| return *(__m512i *) __P; |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P) |
| { |
| return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P) |
| { |
| return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline void |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_store_epi64 (void *__P, __m512i __A) |
| { |
| *(__m512i *) __P = __A; |
| } |
| |
| extern __inline void |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) |
| { |
| __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) |
| { |
| return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) |
| { |
| return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_load_si512 (void const *__P) |
| { |
| return *(__m512i *) __P; |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_load_epi32 (void const *__P) |
| { |
| return *(__m512i *) __P; |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P) |
| { |
| return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P) |
| { |
| return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline void |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_store_si512 (void *__P, __m512i __A) |
| { |
| *(__m512i *) __P = __A; |
| } |
| |
| extern __inline void |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_store_epi32 (void *__P, __m512i __A) |
| { |
| *(__m512i *) __P = __A; |
| } |
| |
| extern __inline void |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A) |
| { |
| __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mullo_epi32 (__m512i __A, __m512i __B) |
| { |
| return (__m512i) ((__v16su) __A * (__v16su) __B); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B) |
| { |
| return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, |
| (__v16si) __B, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| __M); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) |
| { |
| return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A, |
| (__v16si) __B, |
| (__v16si) __W, __M); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sllv_epi32 (__m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_srav_epi32 (__m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_srlv_epi32 (__m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_add_epi64 (__m512i __A, __m512i __B) |
| { |
| return (__m512i) ((__v8du) __A + (__v8du) __B); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) |
| { |
| return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, |
| (__v8di) __B, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B) |
| { |
| return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A, |
| (__v8di) __B, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sub_epi64 (__m512i __A, __m512i __B) |
| { |
| return (__m512i) ((__v8du) __A - (__v8du) __B); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) |
| { |
| return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, |
| (__v8di) __B, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B) |
| { |
| return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A, |
| (__v8di) __B, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sllv_epi64 (__m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, |
| (__v8di) __Y, |
| (__v8di) |
| _mm512_undefined_pd (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, |
| (__v8di) __Y, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X, |
| (__v8di) __Y, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_srav_epi64 (__m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, |
| (__v8di) __Y, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, |
| (__v8di) __Y, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X, |
| (__v8di) __Y, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_srlv_epi64 (__m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, |
| (__v8di) __Y, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, |
| (__v8di) __Y, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X, |
| (__v8di) __Y, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_add_epi32 (__m512i __A, __m512i __B) |
| { |
| return (__m512i) ((__v16su) __A + (__v16su) __B); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) |
| { |
| return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, |
| (__v16si) __B, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) |
| { |
| return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A, |
| (__v16si) __B, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mul_epi32 (__m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v8di) __W, __M); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| __M); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sub_epi32 (__m512i __A, __m512i __B) |
| { |
| return (__m512i) ((__v16su) __A - (__v16su) __B); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) |
| { |
| return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, |
| (__v16si) __B, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B) |
| { |
| return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A, |
| (__v16si) __B, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mul_epu32 (__m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v8di) __W, __M); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y) |
| { |
| return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X, |
| (__v16si) __Y, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| __M); |
| } |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_slli_epi64 (__m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A, |
| unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| #else |
| #define _mm512_slli_epi64(X, C) \ |
| ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
| (__v8di)(__m512i)_mm512_undefined_epi32 (),\ |
| (__mmask8)-1)) |
| |
| #define _mm512_mask_slli_epi64(W, U, X, C) \ |
| ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
| (__v8di)(__m512i)(W),\ |
| (__mmask8)(U))) |
| |
| #define _mm512_maskz_slli_epi64(U, X, C) \ |
| ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
| (__v8di)(__m512i)_mm512_setzero_si512 (),\ |
| (__mmask8)(U))) |
| #endif |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sll_epi64 (__m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, |
| (__v2di) __B, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, |
| (__v2di) __B, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A, |
| (__v2di) __B, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_srli_epi64 (__m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U, |
| __m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| #else |
| #define _mm512_srli_epi64(X, C) \ |
| ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
| (__v8di)(__m512i)_mm512_undefined_epi32 (),\ |
| (__mmask8)-1)) |
| |
| #define _mm512_mask_srli_epi64(W, U, X, C) \ |
| ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
| (__v8di)(__m512i)(W),\ |
| (__mmask8)(U))) |
| |
| #define _mm512_maskz_srli_epi64(U, X, C) \ |
| ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
| (__v8di)(__m512i)_mm512_setzero_si512 (),\ |
| (__mmask8)(U))) |
| #endif |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_srl_epi64 (__m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, |
| (__v2di) __B, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, |
| (__v2di) __B, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A, |
| (__v2di) __B, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_srai_epi64 (__m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A, |
| unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| #else |
| #define _mm512_srai_epi64(X, C) \ |
| ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
| (__v8di)(__m512i)_mm512_undefined_epi32 (),\ |
| (__mmask8)-1)) |
| |
| #define _mm512_mask_srai_epi64(W, U, X, C) \ |
| ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
| (__v8di)(__m512i)(W),\ |
| (__mmask8)(U))) |
| |
| #define _mm512_maskz_srai_epi64(U, X, C) \ |
| ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\ |
| (__v8di)(__m512i)_mm512_setzero_si512 (),\ |
| (__mmask8)(U))) |
| #endif |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sra_epi64 (__m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, |
| (__v2di) __B, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, |
| (__v2di) __B, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A, |
| (__v2di) __B, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_slli_epi32 (__m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A, |
| unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| #else |
| #define _mm512_slli_epi32(X, C) \ |
| ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
| (__v16si)(__m512i)_mm512_undefined_epi32 (),\ |
| (__mmask16)-1)) |
| |
| #define _mm512_mask_slli_epi32(W, U, X, C) \ |
| ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
| (__v16si)(__m512i)(W),\ |
| (__mmask16)(U))) |
| |
| #define _mm512_maskz_slli_epi32(U, X, C) \ |
| ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
| (__v16si)(__m512i)_mm512_setzero_si512 (),\ |
| (__mmask16)(U))) |
| #endif |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sll_epi32 (__m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, |
| (__v4si) __B, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, |
| (__v4si) __B, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A, |
| (__v4si) __B, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_srli_epi32 (__m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U, |
| __m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| #else |
| #define _mm512_srli_epi32(X, C) \ |
| ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
| (__v16si)(__m512i)_mm512_undefined_epi32 (),\ |
| (__mmask16)-1)) |
| |
| #define _mm512_mask_srli_epi32(W, U, X, C) \ |
| ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
| (__v16si)(__m512i)(W),\ |
| (__mmask16)(U))) |
| |
| #define _mm512_maskz_srli_epi32(U, X, C) \ |
| ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
| (__v16si)(__m512i)_mm512_setzero_si512 (),\ |
| (__mmask16)(U))) |
| #endif |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_srl_epi32 (__m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, |
| (__v4si) __B, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, |
| (__v4si) __B, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A, |
| (__v4si) __B, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_srai_epi32 (__m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A, |
| unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B) |
| { |
| return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| #else |
| #define _mm512_srai_epi32(X, C) \ |
| ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
| (__v16si)(__m512i)_mm512_undefined_epi32 (),\ |
| (__mmask16)-1)) |
| |
| #define _mm512_mask_srai_epi32(W, U, X, C) \ |
| ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
| (__v16si)(__m512i)(W),\ |
| (__mmask16)(U))) |
| |
| #define _mm512_maskz_srai_epi32(U, X, C) \ |
| ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\ |
| (__v16si)(__m512i)_mm512_setzero_si512 (),\ |
| (__mmask16)(U))) |
| #endif |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sra_epi32 (__m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, |
| (__v4si) __B, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, |
| (__v4si) __B, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B) |
| { |
| return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A, |
| (__v4si) __B, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_add_round_sd (__m128d __A, __m128d __B, const int __R) |
| { |
| return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A, |
| (__v2df) __B, |
| __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_add_round_ss (__m128 __A, __m128 __B, const int __R) |
| { |
| return (__m128) __builtin_ia32_addss_round ((__v4sf) __A, |
| (__v4sf) __B, |
| __R); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_sub_round_sd (__m128d __A, __m128d __B, const int __R) |
| { |
| return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A, |
| (__v2df) __B, |
| __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_sub_round_ss (__m128 __A, __m128 __B, const int __R) |
| { |
| return (__m128) __builtin_ia32_subss_round ((__v4sf) __A, |
| (__v4sf) __B, |
| __R); |
| } |
| |
| #else |
| #define _mm_add_round_sd(A, B, C) \ |
| (__m128d)__builtin_ia32_addsd_round(A, B, C) |
| |
| #define _mm_add_round_ss(A, B, C) \ |
| (__m128)__builtin_ia32_addss_round(A, B, C) |
| |
| #define _mm_sub_round_sd(A, B, C) \ |
| (__m128d)__builtin_ia32_subsd_round(A, B, C) |
| |
| #define _mm_sub_round_ss(A, B, C) \ |
| (__m128)__builtin_ia32_subss_round(A, B, C) |
| #endif |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C, |
| const int __imm) |
| { |
| return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A, |
| (__v8di) __B, |
| (__v8di) __C, __imm, |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B, |
| __m512i __C, const int __imm) |
| { |
| return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A, |
| (__v8di) __B, |
| (__v8di) __C, __imm, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B, |
| __m512i __C, const int __imm) |
| { |
| return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A, |
| (__v8di) __B, |
| (__v8di) __C, |
| __imm, (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C, |
| const int __imm) |
| { |
| return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A, |
| (__v16si) __B, |
| (__v16si) __C, |
| __imm, (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B, |
| __m512i __C, const int __imm) |
| { |
| return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A, |
| (__v16si) __B, |
| (__v16si) __C, |
| __imm, (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B, |
| __m512i __C, const int __imm) |
| { |
| return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A, |
| (__v16si) __B, |
| (__v16si) __C, |
| __imm, (__mmask16) __U); |
| } |
| #else |
| #define _mm512_ternarylogic_epi64(A, B, C, I) \ |
| ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \ |
| (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1)) |
| #define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \ |
| ((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \ |
| (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U))) |
| #define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \ |
| ((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \ |
| (__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U))) |
| #define _mm512_ternarylogic_epi32(A, B, C, I) \ |
| ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \ |
| (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ |
| (__mmask16)-1)) |
| #define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \ |
| ((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \ |
| (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ |
| (__mmask16)(U))) |
| #define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \ |
| ((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \ |
| (__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \ |
| (__mmask16)(U))) |
| #endif |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_rcp14_pd (__m512d __A) |
| { |
| return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, |
| (__v8df) |
| _mm512_undefined_pd (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) |
| { |
| return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, |
| (__v8df) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) |
| { |
| return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_rcp14_ps (__m512 __A) |
| { |
| return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, |
| (__v16sf) |
| _mm512_undefined_ps (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) |
| { |
| return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, |
| (__v16sf) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) |
| { |
| return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_rcp14_sd (__m128d __A, __m128d __B) |
| { |
| return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B, |
| (__v2df) __A); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_rcp14_ss (__m128 __A, __m128 __B) |
| { |
| return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B, |
| (__v4sf) __A); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_rsqrt14_pd (__m512d __A) |
| { |
| return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, |
| (__v8df) |
| _mm512_undefined_pd (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) |
| { |
| return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, |
| (__v8df) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) |
| { |
| return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_rsqrt14_ps (__m512 __A) |
| { |
| return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, |
| (__v16sf) |
| _mm512_undefined_ps (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) |
| { |
| return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, |
| (__v16sf) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) |
| { |
| return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_rsqrt14_sd (__m128d __A, __m128d __B) |
| { |
| return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B, |
| (__v2df) __A); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_rsqrt14_ss (__m128 __A, __m128 __B) |
| { |
| return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B, |
| (__v4sf) __A); |
| } |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sqrt_round_pd (__m512d __A, const int __R) |
| { |
| return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, |
| (__v8df) |
| _mm512_undefined_pd (), |
| (__mmask8) -1, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A, |
| const int __R) |
| { |
| return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, |
| (__v8df) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R) |
| { |
| return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sqrt_round_ps (__m512 __A, const int __R) |
| { |
| return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, |
| (__v16sf) |
| _mm512_undefined_ps (), |
| (__mmask16) -1, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R) |
| { |
| return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, |
| (__v16sf) __W, |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R) |
| { |
| return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R) |
| { |
| return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B, |
| (__v2df) __A, |
| __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R) |
| { |
| return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B, |
| (__v4sf) __A, |
| __R); |
| } |
| #else |
| #define _mm512_sqrt_round_pd(A, C) \ |
| (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C) |
| |
| #define _mm512_mask_sqrt_round_pd(W, U, A, C) \ |
| (__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C) |
| |
| #define _mm512_maskz_sqrt_round_pd(U, A, C) \ |
| (__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C) |
| |
| #define _mm512_sqrt_round_ps(A, C) \ |
| (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C) |
| |
| #define _mm512_mask_sqrt_round_ps(W, U, A, C) \ |
| (__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C) |
| |
| #define _mm512_maskz_sqrt_round_ps(U, A, C) \ |
| (__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C) |
| |
| #define _mm_sqrt_round_sd(A, B, C) \ |
| (__m128d)__builtin_ia32_sqrtsd_round(A, B, C) |
| |
| #define _mm_sqrt_round_ss(A, B, C) \ |
| (__m128)__builtin_ia32_sqrtss_round(A, B, C) |
| #endif |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_cvtepi8_epi32 (__m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_cvtepi8_epi64 (__m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_cvtepi16_epi32 (__m256i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_cvtepi16_epi64 (__m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_cvtepi32_epi64 (__m256i __X) |
| { |
| return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X) |
| { |
| return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X) |
| { |
| return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_cvtepu8_epi32 (__m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_cvtepu8_epi64 (__m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_cvtepu16_epi32 (__m256i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, |
| (__v16si) |
| _mm512_undefined_epi32 (), |
| (__mmask16) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, |
| (__v16si) __W, |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A, |
| (__v16si) |
| _mm512_setzero_si512 (), |
| (__mmask16) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_cvtepu16_epi64 (__m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A) |
| { |
| return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_cvtepu32_epi64 (__m256i __X) |
| { |
| return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, |
| (__v8di) |
| _mm512_undefined_epi32 (), |
| (__mmask8) -1); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X) |
| { |
| return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, |
| (__v8di) __W, |
| (__mmask8) __U); |
| } |
| |
| extern __inline __m512i |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X) |
| { |
| return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X, |
| (__v8di) |
| _mm512_setzero_si512 (), |
| (__mmask8) __U); |
| } |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_add_round_pd (__m512d __A, __m512d __B, const int __R) |
| { |
| return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) |
| _mm512_undefined_pd (), |
| (__mmask8) -1, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A, |
| __m512d __B, const int __R) |
| { |
| return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
| const int __R) |
| { |
| return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_add_round_ps (__m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_undefined_ps (), |
| (__mmask16) -1, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A, |
| __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) __W, |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R) |
| { |
| return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) |
| _mm512_undefined_pd (), |
| (__mmask8) -1, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A, |
| __m512d __B, const int __R) |
| { |
| return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
| const int __R) |
| { |
| return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_undefined_ps (), |
| (__mmask16) -1, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A, |
| __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) __W, |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U, __R); |
| } |
| #else |
| #define _mm512_add_round_pd(A, B, C) \ |
| (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) |
| |
| #define _mm512_mask_add_round_pd(W, U, A, B, C) \ |
| (__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C) |
| |
| #define _mm512_maskz_add_round_pd(U, A, B, C) \ |
| (__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) |
| |
| #define _mm512_add_round_ps(A, B, C) \ |
| (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) |
| |
| #define _mm512_mask_add_round_ps(W, U, A, B, C) \ |
| (__m512)__builtin_ia32_addps512_mask(A, B, W, U, C) |
| |
| #define _mm512_maskz_add_round_ps(U, A, B, C) \ |
| (__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) |
| |
| #define _mm512_sub_round_pd(A, B, C) \ |
| (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) |
| |
| #define _mm512_mask_sub_round_pd(W, U, A, B, C) \ |
| (__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C) |
| |
| #define _mm512_maskz_sub_round_pd(U, A, B, C) \ |
| (__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) |
| |
| #define _mm512_sub_round_ps(A, B, C) \ |
| (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) |
| |
| #define _mm512_mask_sub_round_ps(W, U, A, B, C) \ |
| (__m512)__builtin_ia32_subps512_mask(A, B, W, U, C) |
| |
| #define _mm512_maskz_sub_round_ps(U, A, B, C) \ |
| (__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) |
| #endif |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R) |
| { |
| return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) |
| _mm512_undefined_pd (), |
| (__mmask8) -1, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A, |
| __m512d __B, const int __R) |
| { |
| return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
| const int __R) |
| { |
| return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_undefined_ps (), |
| (__mmask16) -1, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A, |
| __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) __W, |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_div_round_pd (__m512d __M, __m512d __V, const int __R) |
| { |
| return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, |
| (__v8df) __V, |
| (__v8df) |
| _mm512_undefined_pd (), |
| (__mmask8) -1, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M, |
| __m512d __V, const int __R) |
| { |
| return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, |
| (__v8df) __V, |
| (__v8df) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V, |
| const int __R) |
| { |
| return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M, |
| (__v8df) __V, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_div_round_ps (__m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_undefined_ps (), |
| (__mmask16) -1, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A, |
| __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) __W, |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_mul_round_sd (__m128d __A, __m128d __B, const int __R) |
| { |
| return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A, |
| (__v2df) __B, |
| __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_mul_round_ss (__m128 __A, __m128 __B, const int __R) |
| { |
| return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A, |
| (__v4sf) __B, |
| __R); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_div_round_sd (__m128d __A, __m128d __B, const int __R) |
| { |
| return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A, |
| (__v2df) __B, |
| __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_div_round_ss (__m128 __A, __m128 __B, const int __R) |
| { |
| return (__m128) __builtin_ia32_divss_round ((__v4sf) __A, |
| (__v4sf) __B, |
| __R); |
| } |
| |
| #else |
| #define _mm512_mul_round_pd(A, B, C) \ |
| (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) |
| |
| #define _mm512_mask_mul_round_pd(W, U, A, B, C) \ |
| (__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C) |
| |
| #define _mm512_maskz_mul_round_pd(U, A, B, C) \ |
| (__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) |
| |
| #define _mm512_mul_round_ps(A, B, C) \ |
| (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) |
| |
| #define _mm512_mask_mul_round_ps(W, U, A, B, C) \ |
| (__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C) |
| |
| #define _mm512_maskz_mul_round_ps(U, A, B, C) \ |
| (__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) |
| |
| #define _mm512_div_round_pd(A, B, C) \ |
| (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) |
| |
| #define _mm512_mask_div_round_pd(W, U, A, B, C) \ |
| (__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C) |
| |
| #define _mm512_maskz_div_round_pd(U, A, B, C) \ |
| (__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) |
| |
| #define _mm512_div_round_ps(A, B, C) \ |
| (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) |
| |
| #define _mm512_mask_div_round_ps(W, U, A, B, C) \ |
| (__m512)__builtin_ia32_divps512_mask(A, B, W, U, C) |
| |
| #define _mm512_maskz_div_round_ps(U, A, B, C) \ |
| (__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) |
| |
| #define _mm_mul_round_sd(A, B, C) \ |
| (__m128d)__builtin_ia32_mulsd_round(A, B, C) |
| |
| #define _mm_mul_round_ss(A, B, C) \ |
| (__m128)__builtin_ia32_mulss_round(A, B, C) |
| |
| #define _mm_div_round_sd(A, B, C) \ |
| (__m128d)__builtin_ia32_divsd_round(A, B, C) |
| |
| #define _mm_div_round_ss(A, B, C) \ |
| (__m128)__builtin_ia32_divss_round(A, B, C) |
| #endif |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_max_round_pd (__m512d __A, __m512d __B, const int __R) |
| { |
| return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) |
| _mm512_undefined_pd (), |
| (__mmask8) -1, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A, |
| __m512d __B, const int __R) |
| { |
| return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
| const int __R) |
| { |
| return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_max_round_ps (__m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_undefined_ps (), |
| (__mmask16) -1, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A, |
| __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) __W, |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_min_round_pd (__m512d __A, __m512d __B, const int __R) |
| { |
| return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) |
| _mm512_undefined_pd (), |
| (__mmask8) -1, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A, |
| __m512d __B, const int __R) |
| { |
| return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
| const int __R) |
| { |
| return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_min_round_ps (__m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_undefined_ps (), |
| (__mmask16) -1, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A, |
| __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) __W, |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U, __R); |
| } |
| #else |
| #define _mm512_max_round_pd(A, B, R) \ |
| (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R) |
| |
| #define _mm512_mask_max_round_pd(W, U, A, B, R) \ |
| (__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R) |
| |
| #define _mm512_maskz_max_round_pd(U, A, B, R) \ |
| (__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R) |
| |
| #define _mm512_max_round_ps(A, B, R) \ |
| (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R) |
| |
| #define _mm512_mask_max_round_ps(W, U, A, B, R) \ |
| (__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R) |
| |
| #define _mm512_maskz_max_round_ps(U, A, B, R) \ |
| (__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R) |
| |
| #define _mm512_min_round_pd(A, B, R) \ |
| (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R) |
| |
| #define _mm512_mask_min_round_pd(W, U, A, B, R) \ |
| (__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R) |
| |
| #define _mm512_maskz_min_round_pd(U, A, B, R) \ |
| (__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R) |
| |
| #define _mm512_min_round_ps(A, B, R) \ |
| (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R) |
| |
| #define _mm512_mask_min_round_ps(W, U, A, B, R) \ |
| (__m512)__builtin_ia32_minps512_mask(A, B, W, U, R) |
| |
| #define _mm512_maskz_min_round_ps(U, A, B, R) \ |
| (__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R) |
| #endif |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R) |
| { |
| return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) |
| _mm512_undefined_pd (), |
| (__mmask8) -1, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A, |
| __m512d __B, const int __R) |
| { |
| return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) __W, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
| const int __R) |
| { |
| return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) |
| _mm512_setzero_pd (), |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_undefined_ps (), |
| (__mmask16) -1, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A, |
| __m512 __B, const int __R) |
| { |
| return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) __W, |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B, |
| const int __R) |
| { |
| return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) |
| _mm512_setzero_ps (), |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m128d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R) |
| { |
| return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A, |
| (__v2df) __B, |
| __R); |
| } |
| |
| extern __inline __m128 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R) |
| { |
| return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A, |
| (__v4sf) __B, |
| __R); |
| } |
| #else |
| #define _mm512_scalef_round_pd(A, B, C) \ |
| (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C) |
| |
| #define _mm512_mask_scalef_round_pd(W, U, A, B, C) \ |
| (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C) |
| |
| #define _mm512_maskz_scalef_round_pd(U, A, B, C) \ |
| (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C) |
| |
| #define _mm512_scalef_round_ps(A, B, C) \ |
| (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C) |
| |
| #define _mm512_mask_scalef_round_ps(W, U, A, B, C) \ |
| (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C) |
| |
| #define _mm512_maskz_scalef_round_ps(U, A, B, C) \ |
| (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C) |
| |
| #define _mm_scalef_round_sd(A, B, C) \ |
| (__m128d)__builtin_ia32_scalefsd_round(A, B, C) |
| |
| #define _mm_scalef_round_ss(A, B, C) \ |
| (__m128)__builtin_ia32_scalefss_round(A, B, C) |
| #endif |
| |
| #ifdef __OPTIMIZE__ |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) |
| { |
| return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) __C, |
| (__mmask8) -1, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B, |
| __m512d __C, const int __R) |
| { |
| return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) __C, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, |
| __mmask8 __U, const int __R) |
| { |
| return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) __C, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
| __m512d __C, const int __R) |
| { |
| return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) __C, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) |
| { |
| return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) __C, |
| (__mmask16) -1, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B, |
| __m512 __C, const int __R) |
| { |
| return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) __C, |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, |
| __mmask16 __U, const int __R) |
| { |
| return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) __C, |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B, |
| __m512 __C, const int __R) |
| { |
| return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) __C, |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) |
| { |
| return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| -(__v8df) __C, |
| (__mmask8) -1, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, |
| __m512d __C, const int __R) |
| { |
| return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| -(__v8df) __C, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, |
| __mmask8 __U, const int __R) |
| { |
| return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) __C, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B, |
| __m512d __C, const int __R) |
| { |
| return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, |
| (__v8df) __B, |
| -(__v8df) __C, |
| (__mmask8) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R) |
| { |
| return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| -(__v16sf) __C, |
| (__mmask16) -1, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B, |
| __m512 __C, const int __R) |
| { |
| return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, |
| (__v16sf) __B, |
| -(__v16sf) __C, |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, |
| __mmask16 __U, const int __R) |
| { |
| return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, |
| (__v16sf) __B, |
| (__v16sf) __C, |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512 |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, |
| __m512 __C, const int __R) |
| { |
| return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, |
| (__v16sf) __B, |
| -(__v16sf) __C, |
| (__mmask16) __U, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R) |
| { |
| return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, |
| (__v8df) __B, |
| (__v8df) __C, |
| (__mmask8) -1, __R); |
| } |
| |
| extern __inline __m512d |
| __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) |
| _mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B, |
| __m512d __C, const int __R) |
| { |
|