blob: 4bd7dd3d62e1fcc0655027f13de7f3eaf9d6000c [file] [log] [blame]
/* Copyright (C) 2013-2015 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#ifndef _IMMINTRIN_H_INCLUDED
#error "Never use <avx512fintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef _AVX512FINTRIN_H_INCLUDED
#define _AVX512FINTRIN_H_INCLUDED
#ifndef __AVX512F__
#pragma GCC push_options
#pragma GCC target("avx512f")
#define __DISABLE_AVX512F__
#endif /* __AVX512F__ */
/* Internal data types for implementing the intrinsics. */
typedef double __v8df __attribute__ ((__vector_size__ (64)));
typedef float __v16sf __attribute__ ((__vector_size__ (64)));
typedef long long __v8di __attribute__ ((__vector_size__ (64)));
typedef unsigned long long __v8du __attribute__ ((__vector_size__ (64)));
typedef int __v16si __attribute__ ((__vector_size__ (64)));
typedef unsigned int __v16su __attribute__ ((__vector_size__ (64)));
typedef short __v32hi __attribute__ ((__vector_size__ (64)));
typedef unsigned short __v32hu __attribute__ ((__vector_size__ (64)));
typedef char __v64qi __attribute__ ((__vector_size__ (64)));
typedef unsigned char __v64qu __attribute__ ((__vector_size__ (64)));
/* The Intel API is flexible enough that we must allow aliasing with other
vector types, and their scalar components. */
typedef float __m512 __attribute__ ((__vector_size__ (64), __may_alias__));
typedef long long __m512i __attribute__ ((__vector_size__ (64), __may_alias__));
typedef double __m512d __attribute__ ((__vector_size__ (64), __may_alias__));
typedef unsigned char __mmask8;
typedef unsigned short __mmask16;
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set_epi64 (long long __A, long long __B, long long __C,
long long __D, long long __E, long long __F,
long long __G, long long __H)
{
return __extension__ (__m512i) (__v8di)
{ __H, __G, __F, __E, __D, __C, __B, __A };
}
/* Create the vector [A B C D E F G H I J K L M N O P]. */
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set_epi32 (int __A, int __B, int __C, int __D,
int __E, int __F, int __G, int __H,
int __I, int __J, int __K, int __L,
int __M, int __N, int __O, int __P)
{
return __extension__ (__m512i)(__v16si)
{ __P, __O, __N, __M, __L, __K, __J, __I,
__H, __G, __F, __E, __D, __C, __B, __A };
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set_pd (double __A, double __B, double __C, double __D,
double __E, double __F, double __G, double __H)
{
return __extension__ (__m512d)
{ __H, __G, __F, __E, __D, __C, __B, __A };
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set_ps (float __A, float __B, float __C, float __D,
float __E, float __F, float __G, float __H,
float __I, float __J, float __K, float __L,
float __M, float __N, float __O, float __P)
{
return __extension__ (__m512)
{ __P, __O, __N, __M, __L, __K, __J, __I,
__H, __G, __F, __E, __D, __C, __B, __A };
}
#define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \
_mm512_set_epi64(e7,e6,e5,e4,e3,e2,e1,e0)
#define _mm512_setr_epi32(e0,e1,e2,e3,e4,e5,e6,e7, \
e8,e9,e10,e11,e12,e13,e14,e15) \
_mm512_set_epi32(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
#define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \
_mm512_set_pd(e7,e6,e5,e4,e3,e2,e1,e0)
#define _mm512_setr_ps(e0,e1,e2,e3,e4,e5,e6,e7,e8,e9,e10,e11,e12,e13,e14,e15) \
_mm512_set_ps(e15,e14,e13,e12,e11,e10,e9,e8,e7,e6,e5,e4,e3,e2,e1,e0)
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_undefined_ps (void)
{
__m512 __Y = __Y;
return __Y;
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_undefined_pd (void)
{
__m512d __Y = __Y;
return __Y;
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_undefined_epi32 (void)
{
__m512i __Y = __Y;
return __Y;
}
#define _mm512_undefined_si512 _mm512_undefined_epi32
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set1_epi8 (char __A)
{
return __extension__ (__m512i)(__v64qi)
{ __A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A };
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set1_epi16 (short __A)
{
return __extension__ (__m512i)(__v32hi)
{ __A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A,
__A, __A, __A, __A, __A, __A, __A, __A };
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set1_pd (double __A)
{
return (__m512d) __builtin_ia32_broadcastsd512 (__extension__
(__v2df) { __A, },
(__v8df)
_mm512_undefined_pd (),
(__mmask8) -1);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set1_ps (float __A)
{
return (__m512) __builtin_ia32_broadcastss512 (__extension__
(__v4sf) { __A, },
(__v16sf)
_mm512_undefined_ps (),
(__mmask16) -1);
}
/* Create the vector [A B C D A B C D A B C D A B C D]. */
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set4_epi32 (int __A, int __B, int __C, int __D)
{
return __extension__ (__m512i)(__v16si)
{ __D, __C, __B, __A, __D, __C, __B, __A,
__D, __C, __B, __A, __D, __C, __B, __A };
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set4_epi64 (long long __A, long long __B, long long __C,
long long __D)
{
return __extension__ (__m512i) (__v8di)
{ __D, __C, __B, __A, __D, __C, __B, __A };
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set4_pd (double __A, double __B, double __C, double __D)
{
return __extension__ (__m512d)
{ __D, __C, __B, __A, __D, __C, __B, __A };
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_set4_ps (float __A, float __B, float __C, float __D)
{
return __extension__ (__m512)
{ __D, __C, __B, __A, __D, __C, __B, __A,
__D, __C, __B, __A, __D, __C, __B, __A };
}
#define _mm512_setr4_epi64(e0,e1,e2,e3) \
_mm512_set4_epi64(e3,e2,e1,e0)
#define _mm512_setr4_epi32(e0,e1,e2,e3) \
_mm512_set4_epi32(e3,e2,e1,e0)
#define _mm512_setr4_pd(e0,e1,e2,e3) \
_mm512_set4_pd(e3,e2,e1,e0)
#define _mm512_setr4_ps(e0,e1,e2,e3) \
_mm512_set4_ps(e3,e2,e1,e0)
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_setzero_ps (void)
{
return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_setzero_pd (void)
{
return __extension__ (__m512d) { 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_setzero_epi32 (void)
{
return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_setzero_si512 (void)
{
return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
{
return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
(__v8df) __W,
(__mmask8) __U);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
{
return (__m512d) __builtin_ia32_movapd512_mask ((__v8df) __A,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
{
return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
(__v16sf) __W,
(__mmask16) __U);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
{
return (__m512) __builtin_ia32_movaps512_mask ((__v16sf) __A,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_load_pd (void const *__P)
{
return *(__m512d *) __P;
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
{
return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
(__v8df) __W,
(__mmask8) __U);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_load_pd (__mmask8 __U, void const *__P)
{
return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_store_pd (void *__P, __m512d __A)
{
*(__m512d *) __P = __A;
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_store_pd (void *__P, __mmask8 __U, __m512d __A)
{
__builtin_ia32_storeapd512_mask ((__v8df *) __P, (__v8df) __A,
(__mmask8) __U);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_load_ps (void const *__P)
{
return *(__m512 *) __P;
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
{
return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
(__v16sf) __W,
(__mmask16) __U);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_load_ps (__mmask16 __U, void const *__P)
{
return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_store_ps (void *__P, __m512 __A)
{
*(__m512 *) __P = __A;
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_store_ps (void *__P, __mmask16 __U, __m512 __A)
{
__builtin_ia32_storeaps512_mask ((__v16sf *) __P, (__v16sf) __A,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_movdqa64_512_mask ((__v8di) __A,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_load_epi64 (void const *__P)
{
return *(__m512i *) __P;
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
{
return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
{
return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_store_epi64 (void *__P, __m512i __A)
{
*(__m512i *) __P = __A;
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
{
__builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
{
return (__m512i) __builtin_ia32_movdqa32_512_mask ((__v16si) __A,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_load_si512 (void const *__P)
{
return *(__m512i *) __P;
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_load_epi32 (void const *__P)
{
return *(__m512i *) __P;
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
{
return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
{
return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_store_si512 (void *__P, __m512i __A)
{
*(__m512i *) __P = __A;
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_store_epi32 (void *__P, __m512i __A)
{
*(__m512i *) __P = __A;
}
extern __inline void
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
{
__builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mullo_epi32 (__m512i __A, __m512i __B)
{
return (__m512i) ((__v16su) __A * (__v16su) __B);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mullo_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
_mm512_setzero_si512 (),
__M);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mullo_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_pmulld512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si) __W, __M);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sllv_epi32 (__m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sllv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sllv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psllv16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_srav_epi32 (__m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srav_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_srav_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrav16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_srlv_epi32 (__m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srlv_epi32 (__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_srlv_epi32 (__mmask16 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrlv16si_mask ((__v16si) __X,
(__v16si) __Y,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_add_epi64 (__m512i __A, __m512i __B)
{
return (__m512i) ((__v8du) __A + (__v8du) __B);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_add_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_add_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_paddq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sub_epi64 (__m512i __A, __m512i __B)
{
return (__m512i) ((__v8du) __A - (__v8du) __B);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sub_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sub_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_psubq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sllv_epi64 (__m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di)
_mm512_undefined_pd (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sllv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sllv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psllv8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_srav_epi64 (__m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srav_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_srav_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrav8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_srlv_epi64 (__m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srlv_epi64 (__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_srlv_epi64 (__mmask8 __U, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_psrlv8di_mask ((__v8di) __X,
(__v8di) __Y,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_add_epi32 (__m512i __A, __m512i __B)
{
return (__m512i) ((__v16su) __A + (__v16su) __B);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_add_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_paddd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mul_epi32 (__m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mul_epi32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di) __W, __M);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mul_epi32 (__mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_pmuldq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di)
_mm512_setzero_si512 (),
__M);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sub_epi32 (__m512i __A, __m512i __B)
{
return (__m512i) ((__v16su) __A - (__v16su) __B);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sub_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sub_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
{
return (__m512i) __builtin_ia32_psubd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mul_epu32 (__m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mul_epu32 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di) __W, __M);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mul_epu32 (__mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i) __builtin_ia32_pmuludq512_mask ((__v16si) __X,
(__v16si) __Y,
(__v8di)
_mm512_setzero_si512 (),
__M);
}
#ifdef __OPTIMIZE__
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_slli_epi64 (__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_slli_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
unsigned int __B)
{
return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_slli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psllqi512_mask ((__v8di) __A, __B,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
#else
#define _mm512_slli_epi64(X, C) \
((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
(__v8di)(__m512i)_mm512_undefined_epi32 (),\
(__mmask8)-1))
#define _mm512_mask_slli_epi64(W, U, X, C) \
((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
(__v8di)(__m512i)(W),\
(__mmask8)(U)))
#define _mm512_maskz_slli_epi64(U, X, C) \
((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
(__v8di)(__m512i)_mm512_setzero_si512 (),\
(__mmask8)(U)))
#endif
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sll_epi64 (__m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sll_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sll_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psllq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
#ifdef __OPTIMIZE__
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_srli_epi64 (__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srli_epi64 (__m512i __W, __mmask8 __U,
__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_srli_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psrlqi512_mask ((__v8di) __A, __B,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
#else
#define _mm512_srli_epi64(X, C) \
((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
(__v8di)(__m512i)_mm512_undefined_epi32 (),\
(__mmask8)-1))
#define _mm512_mask_srli_epi64(W, U, X, C) \
((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
(__v8di)(__m512i)(W),\
(__mmask8)(U)))
#define _mm512_maskz_srli_epi64(U, X, C) \
((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
(__v8di)(__m512i)_mm512_setzero_si512 (),\
(__mmask8)(U)))
#endif
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_srl_epi64 (__m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srl_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_srl_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psrlq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
#ifdef __OPTIMIZE__
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_srai_epi64 (__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srai_epi64 (__m512i __W, __mmask8 __U, __m512i __A,
unsigned int __B)
{
return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_srai_epi64 (__mmask8 __U, __m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psraqi512_mask ((__v8di) __A, __B,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
#else
#define _mm512_srai_epi64(X, C) \
((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
(__v8di)(__m512i)_mm512_undefined_epi32 (),\
(__mmask8)-1))
#define _mm512_mask_srai_epi64(W, U, X, C) \
((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
(__v8di)(__m512i)(W),\
(__mmask8)(U)))
#define _mm512_maskz_srai_epi64(U, X, C) \
((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
(__v8di)(__m512i)_mm512_setzero_si512 (),\
(__mmask8)(U)))
#endif
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sra_epi64 (__m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sra_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sra_epi64 (__mmask8 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psraq512_mask ((__v8di) __A,
(__v2di) __B,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
#ifdef __OPTIMIZE__
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_slli_epi32 (__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_slli_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
unsigned int __B)
{
return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_slli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_pslldi512_mask ((__v16si) __A, __B,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
#else
#define _mm512_slli_epi32(X, C) \
((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
(__v16si)(__m512i)_mm512_undefined_epi32 (),\
(__mmask16)-1))
#define _mm512_mask_slli_epi32(W, U, X, C) \
((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
(__v16si)(__m512i)(W),\
(__mmask16)(U)))
#define _mm512_maskz_slli_epi32(U, X, C) \
((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
(__v16si)(__m512i)_mm512_setzero_si512 (),\
(__mmask16)(U)))
#endif
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sll_epi32 (__m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sll_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sll_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_pslld512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
#ifdef __OPTIMIZE__
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_srli_epi32 (__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srli_epi32 (__m512i __W, __mmask16 __U,
__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_srli_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psrldi512_mask ((__v16si) __A, __B,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
#else
#define _mm512_srli_epi32(X, C) \
((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
(__v16si)(__m512i)_mm512_undefined_epi32 (),\
(__mmask16)-1))
#define _mm512_mask_srli_epi32(W, U, X, C) \
((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
(__v16si)(__m512i)(W),\
(__mmask16)(U)))
#define _mm512_maskz_srli_epi32(U, X, C) \
((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
(__v16si)(__m512i)_mm512_setzero_si512 (),\
(__mmask16)(U)))
#endif
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_srl_epi32 (__m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srl_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_srl_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psrld512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
#ifdef __OPTIMIZE__
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_srai_epi32 (__m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srai_epi32 (__m512i __W, __mmask16 __U, __m512i __A,
unsigned int __B)
{
return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_srai_epi32 (__mmask16 __U, __m512i __A, unsigned int __B)
{
return (__m512i) __builtin_ia32_psradi512_mask ((__v16si) __A, __B,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
#else
#define _mm512_srai_epi32(X, C) \
((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
(__v16si)(__m512i)_mm512_undefined_epi32 (),\
(__mmask16)-1))
#define _mm512_mask_srai_epi32(W, U, X, C) \
((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
(__v16si)(__m512i)(W),\
(__mmask16)(U)))
#define _mm512_maskz_srai_epi32(U, X, C) \
((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
(__v16si)(__m512i)_mm512_setzero_si512 (),\
(__mmask16)(U)))
#endif
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sra_epi32 (__m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sra_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sra_epi32 (__mmask16 __U, __m512i __A, __m128i __B)
{
return (__m512i) __builtin_ia32_psrad512_mask ((__v16si) __A,
(__v4si) __B,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
#ifdef __OPTIMIZE__
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_round_sd (__m128d __A, __m128d __B, const int __R)
{
return (__m128d) __builtin_ia32_addsd_round ((__v2df) __A,
(__v2df) __B,
__R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_round_ss (__m128 __A, __m128 __B, const int __R)
{
return (__m128) __builtin_ia32_addss_round ((__v4sf) __A,
(__v4sf) __B,
__R);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_round_sd (__m128d __A, __m128d __B, const int __R)
{
return (__m128d) __builtin_ia32_subsd_round ((__v2df) __A,
(__v2df) __B,
__R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_round_ss (__m128 __A, __m128 __B, const int __R)
{
return (__m128) __builtin_ia32_subss_round ((__v4sf) __A,
(__v4sf) __B,
__R);
}
#else
#define _mm_add_round_sd(A, B, C) \
(__m128d)__builtin_ia32_addsd_round(A, B, C)
#define _mm_add_round_ss(A, B, C) \
(__m128)__builtin_ia32_addss_round(A, B, C)
#define _mm_sub_round_sd(A, B, C) \
(__m128d)__builtin_ia32_subsd_round(A, B, C)
#define _mm_sub_round_ss(A, B, C) \
(__m128)__builtin_ia32_subss_round(A, B, C)
#endif
#ifdef __OPTIMIZE__
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_ternarylogic_epi64 (__m512i __A, __m512i __B, __m512i __C,
const int __imm)
{
return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di) __C, __imm,
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_ternarylogic_epi64 (__m512i __A, __mmask8 __U, __m512i __B,
__m512i __C, const int __imm)
{
return (__m512i) __builtin_ia32_pternlogq512_mask ((__v8di) __A,
(__v8di) __B,
(__v8di) __C, __imm,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_ternarylogic_epi64 (__mmask8 __U, __m512i __A, __m512i __B,
__m512i __C, const int __imm)
{
return (__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di) __A,
(__v8di) __B,
(__v8di) __C,
__imm, (__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_ternarylogic_epi32 (__m512i __A, __m512i __B, __m512i __C,
const int __imm)
{
return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si) __C,
__imm, (__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_ternarylogic_epi32 (__m512i __A, __mmask16 __U, __m512i __B,
__m512i __C, const int __imm)
{
return (__m512i) __builtin_ia32_pternlogd512_mask ((__v16si) __A,
(__v16si) __B,
(__v16si) __C,
__imm, (__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_ternarylogic_epi32 (__mmask16 __U, __m512i __A, __m512i __B,
__m512i __C, const int __imm)
{
return (__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si) __A,
(__v16si) __B,
(__v16si) __C,
__imm, (__mmask16) __U);
}
#else
#define _mm512_ternarylogic_epi64(A, B, C, I) \
((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
(__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)-1))
#define _mm512_mask_ternarylogic_epi64(A, U, B, C, I) \
((__m512i) __builtin_ia32_pternlogq512_mask ((__v8di)(__m512i)(A), \
(__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
#define _mm512_maskz_ternarylogic_epi64(U, A, B, C, I) \
((__m512i) __builtin_ia32_pternlogq512_maskz ((__v8di)(__m512i)(A), \
(__v8di)(__m512i)(B), (__v8di)(__m512i)(C), (int)(I), (__mmask8)(U)))
#define _mm512_ternarylogic_epi32(A, B, C, I) \
((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
(__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
(__mmask16)-1))
#define _mm512_mask_ternarylogic_epi32(A, U, B, C, I) \
((__m512i) __builtin_ia32_pternlogd512_mask ((__v16si)(__m512i)(A), \
(__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
(__mmask16)(U)))
#define _mm512_maskz_ternarylogic_epi32(U, A, B, C, I) \
((__m512i) __builtin_ia32_pternlogd512_maskz ((__v16si)(__m512i)(A), \
(__v16si)(__m512i)(B), (__v16si)(__m512i)(C), (int)(I), \
(__mmask16)(U)))
#endif
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rcp14_pd (__m512d __A)
{
return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
(__v8df)
_mm512_undefined_pd (),
(__mmask8) -1);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
{
return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
(__v8df) __W,
(__mmask8) __U);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
{
return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rcp14_ps (__m512 __A)
{
return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
(__v16sf)
_mm512_undefined_ps (),
(__mmask16) -1);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
{
return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
(__v16sf) __W,
(__mmask16) __U);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
{
return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp14_sd (__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
(__v2df) __A);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rcp14_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
(__v4sf) __A);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt14_pd (__m512d __A)
{
return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
(__v8df)
_mm512_undefined_pd (),
(__mmask8) -1);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
{
return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
(__v8df) __W,
(__mmask8) __U);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
{
return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt14_ps (__m512 __A)
{
return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
(__v16sf)
_mm512_undefined_ps (),
(__mmask16) -1);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
{
return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
(__v16sf) __W,
(__mmask16) __U);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
{
return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt14_sd (__m128d __A, __m128d __B)
{
return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
(__v2df) __A);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_rsqrt14_ss (__m128 __A, __m128 __B)
{
return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
(__v4sf) __A);
}
#ifdef __OPTIMIZE__
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sqrt_round_pd (__m512d __A, const int __R)
{
return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
(__v8df)
_mm512_undefined_pd (),
(__mmask8) -1, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sqrt_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
const int __R)
{
return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
(__v8df) __W,
(__mmask8) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sqrt_round_pd (__mmask8 __U, __m512d __A, const int __R)
{
return (__m512d) __builtin_ia32_sqrtpd512_mask ((__v8df) __A,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sqrt_round_ps (__m512 __A, const int __R)
{
return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
(__v16sf)
_mm512_undefined_ps (),
(__mmask16) -1, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sqrt_round_ps (__m512 __W, __mmask16 __U, __m512 __A, const int __R)
{
return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
(__v16sf) __W,
(__mmask16) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sqrt_round_ps (__mmask16 __U, __m512 __A, const int __R)
{
return (__m512) __builtin_ia32_sqrtps512_mask ((__v16sf) __A,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U, __R);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sqrt_round_sd (__m128d __A, __m128d __B, const int __R)
{
return (__m128d) __builtin_ia32_sqrtsd_round ((__v2df) __B,
(__v2df) __A,
__R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_sqrt_round_ss (__m128 __A, __m128 __B, const int __R)
{
return (__m128) __builtin_ia32_sqrtss_round ((__v4sf) __B,
(__v4sf) __A,
__R);
}
#else
#define _mm512_sqrt_round_pd(A, C) \
(__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_sqrt_round_pd(W, U, A, C) \
(__m512d)__builtin_ia32_sqrtpd512_mask(A, W, U, C)
#define _mm512_maskz_sqrt_round_pd(U, A, C) \
(__m512d)__builtin_ia32_sqrtpd512_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_sqrt_round_ps(A, C) \
(__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_sqrt_round_ps(W, U, A, C) \
(__m512)__builtin_ia32_sqrtps512_mask(A, W, U, C)
#define _mm512_maskz_sqrt_round_ps(U, A, C) \
(__m512)__builtin_ia32_sqrtps512_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm_sqrt_round_sd(A, B, C) \
(__m128d)__builtin_ia32_sqrtsd_round(A, B, C)
#define _mm_sqrt_round_ss(A, B, C) \
(__m128)__builtin_ia32_sqrtss_round(A, B, C)
#endif
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi8_epi32 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepi8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepi8_epi32 (__mmask16 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxbd512_mask ((__v16qi) __A,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi8_epi64 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepi8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepi8_epi64 (__mmask8 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxbq512_mask ((__v16qi) __A,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi16_epi32 (__m256i __A)
{
return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepi16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
{
return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepi16_epi32 (__mmask16 __U, __m256i __A)
{
return (__m512i) __builtin_ia32_pmovsxwd512_mask ((__v16hi) __A,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi16_epi64 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepi16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepi16_epi64 (__mmask8 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovsxwq512_mask ((__v8hi) __A,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepi32_epi64 (__m256i __X)
{
return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepi32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
{
return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepi32_epi64 (__mmask8 __U, __m256i __X)
{
return (__m512i) __builtin_ia32_pmovsxdq512_mask ((__v8si) __X,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepu8_epi32 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepu8_epi32 (__m512i __W, __mmask16 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepu8_epi32 (__mmask16 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxbd512_mask ((__v16qi) __A,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepu8_epi64 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepu8_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxbq512_mask ((__v16qi) __A,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepu16_epi32 (__m256i __A)
{
return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
(__v16si)
_mm512_undefined_epi32 (),
(__mmask16) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepu16_epi32 (__m512i __W, __mmask16 __U, __m256i __A)
{
return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
(__v16si) __W,
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepu16_epi32 (__mmask16 __U, __m256i __A)
{
return (__m512i) __builtin_ia32_pmovzxwd512_mask ((__v16hi) __A,
(__v16si)
_mm512_setzero_si512 (),
(__mmask16) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepu16_epi64 (__m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepu16_epi64 (__m512i __W, __mmask8 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepu16_epi64 (__mmask8 __U, __m128i __A)
{
return (__m512i) __builtin_ia32_pmovzxwq512_mask ((__v8hi) __A,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_cvtepu32_epi64 (__m256i __X)
{
return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
(__v8di)
_mm512_undefined_epi32 (),
(__mmask8) -1);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_cvtepu32_epi64 (__m512i __W, __mmask8 __U, __m256i __X)
{
return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
(__v8di) __W,
(__mmask8) __U);
}
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_cvtepu32_epi64 (__mmask8 __U, __m256i __X)
{
return (__m512i) __builtin_ia32_pmovzxdq512_mask ((__v8si) __X,
(__v8di)
_mm512_setzero_si512 (),
(__mmask8) __U);
}
#ifdef __OPTIMIZE__
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_add_round_pd (__m512d __A, __m512d __B, const int __R)
{
return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
_mm512_undefined_pd (),
(__mmask8) -1, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_add_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
__m512d __B, const int __R)
{
return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df) __W,
(__mmask8) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_add_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
const int __R)
{
return (__m512d) __builtin_ia32_addpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_add_round_ps (__m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_undefined_ps (),
(__mmask16) -1, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_add_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
__m512 __B, const int __R)
{
return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf) __W,
(__mmask16) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_add_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_addps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sub_round_pd (__m512d __A, __m512d __B, const int __R)
{
return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
_mm512_undefined_pd (),
(__mmask8) -1, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sub_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
__m512d __B, const int __R)
{
return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df) __W,
(__mmask8) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
const int __R)
{
return (__m512d) __builtin_ia32_subpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_sub_round_ps (__m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_undefined_ps (),
(__mmask16) -1, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_sub_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
__m512 __B, const int __R)
{
return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf) __W,
(__mmask16) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_sub_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_subps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U, __R);
}
#else
#define _mm512_add_round_pd(A, B, C) \
(__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_add_round_pd(W, U, A, B, C) \
(__m512d)__builtin_ia32_addpd512_mask(A, B, W, U, C)
#define _mm512_maskz_add_round_pd(U, A, B, C) \
(__m512d)__builtin_ia32_addpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_add_round_ps(A, B, C) \
(__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_add_round_ps(W, U, A, B, C) \
(__m512)__builtin_ia32_addps512_mask(A, B, W, U, C)
#define _mm512_maskz_add_round_ps(U, A, B, C) \
(__m512)__builtin_ia32_addps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm512_sub_round_pd(A, B, C) \
(__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_sub_round_pd(W, U, A, B, C) \
(__m512d)__builtin_ia32_subpd512_mask(A, B, W, U, C)
#define _mm512_maskz_sub_round_pd(U, A, B, C) \
(__m512d)__builtin_ia32_subpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_sub_round_ps(A, B, C) \
(__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_sub_round_ps(W, U, A, B, C) \
(__m512)__builtin_ia32_subps512_mask(A, B, W, U, C)
#define _mm512_maskz_sub_round_ps(U, A, B, C) \
(__m512)__builtin_ia32_subps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
#endif
#ifdef __OPTIMIZE__
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mul_round_pd (__m512d __A, __m512d __B, const int __R)
{
return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
_mm512_undefined_pd (),
(__mmask8) -1, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mul_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
__m512d __B, const int __R)
{
return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df) __W,
(__mmask8) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mul_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
const int __R)
{
return (__m512d) __builtin_ia32_mulpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mul_round_ps (__m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_undefined_ps (),
(__mmask16) -1, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_mul_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
__m512 __B, const int __R)
{
return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf) __W,
(__mmask16) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_mul_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_mulps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_div_round_pd (__m512d __M, __m512d __V, const int __R)
{
return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
(__v8df) __V,
(__v8df)
_mm512_undefined_pd (),
(__mmask8) -1, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_div_round_pd (__m512d __W, __mmask8 __U, __m512d __M,
__m512d __V, const int __R)
{
return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
(__v8df) __V,
(__v8df) __W,
(__mmask8) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_div_round_pd (__mmask8 __U, __m512d __M, __m512d __V,
const int __R)
{
return (__m512d) __builtin_ia32_divpd512_mask ((__v8df) __M,
(__v8df) __V,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_div_round_ps (__m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_undefined_ps (),
(__mmask16) -1, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_div_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
__m512 __B, const int __R)
{
return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf) __W,
(__mmask16) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_div_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_divps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U, __R);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_round_sd (__m128d __A, __m128d __B, const int __R)
{
return (__m128d) __builtin_ia32_mulsd_round ((__v2df) __A,
(__v2df) __B,
__R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mul_round_ss (__m128 __A, __m128 __B, const int __R)
{
return (__m128) __builtin_ia32_mulss_round ((__v4sf) __A,
(__v4sf) __B,
__R);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_round_sd (__m128d __A, __m128d __B, const int __R)
{
return (__m128d) __builtin_ia32_divsd_round ((__v2df) __A,
(__v2df) __B,
__R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_div_round_ss (__m128 __A, __m128 __B, const int __R)
{
return (__m128) __builtin_ia32_divss_round ((__v4sf) __A,
(__v4sf) __B,
__R);
}
#else
#define _mm512_mul_round_pd(A, B, C) \
(__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_mul_round_pd(W, U, A, B, C) \
(__m512d)__builtin_ia32_mulpd512_mask(A, B, W, U, C)
#define _mm512_maskz_mul_round_pd(U, A, B, C) \
(__m512d)__builtin_ia32_mulpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_mul_round_ps(A, B, C) \
(__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_mul_round_ps(W, U, A, B, C) \
(__m512)__builtin_ia32_mulps512_mask(A, B, W, U, C)
#define _mm512_maskz_mul_round_ps(U, A, B, C) \
(__m512)__builtin_ia32_mulps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm512_div_round_pd(A, B, C) \
(__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_div_round_pd(W, U, A, B, C) \
(__m512d)__builtin_ia32_divpd512_mask(A, B, W, U, C)
#define _mm512_maskz_div_round_pd(U, A, B, C) \
(__m512d)__builtin_ia32_divpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_div_round_ps(A, B, C) \
(__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_div_round_ps(W, U, A, B, C) \
(__m512)__builtin_ia32_divps512_mask(A, B, W, U, C)
#define _mm512_maskz_div_round_ps(U, A, B, C) \
(__m512)__builtin_ia32_divps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm_mul_round_sd(A, B, C) \
(__m128d)__builtin_ia32_mulsd_round(A, B, C)
#define _mm_mul_round_ss(A, B, C) \
(__m128)__builtin_ia32_mulss_round(A, B, C)
#define _mm_div_round_sd(A, B, C) \
(__m128d)__builtin_ia32_divsd_round(A, B, C)
#define _mm_div_round_ss(A, B, C) \
(__m128)__builtin_ia32_divss_round(A, B, C)
#endif
#ifdef __OPTIMIZE__
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_max_round_pd (__m512d __A, __m512d __B, const int __R)
{
return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
_mm512_undefined_pd (),
(__mmask8) -1, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_max_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
__m512d __B, const int __R)
{
return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df) __W,
(__mmask8) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_max_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
const int __R)
{
return (__m512d) __builtin_ia32_maxpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_max_round_ps (__m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_undefined_ps (),
(__mmask16) -1, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_max_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
__m512 __B, const int __R)
{
return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf) __W,
(__mmask16) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_max_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_maxps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_min_round_pd (__m512d __A, __m512d __B, const int __R)
{
return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
_mm512_undefined_pd (),
(__mmask8) -1, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_min_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
__m512d __B, const int __R)
{
return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df) __W,
(__mmask8) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_min_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
const int __R)
{
return (__m512d) __builtin_ia32_minpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_min_round_ps (__m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_undefined_ps (),
(__mmask16) -1, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_min_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
__m512 __B, const int __R)
{
return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf) __W,
(__mmask16) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_min_round_ps (__mmask16 __U, __m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_minps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U, __R);
}
#else
#define _mm512_max_round_pd(A, B, R) \
(__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
#define _mm512_mask_max_round_pd(W, U, A, B, R) \
(__m512d)__builtin_ia32_maxpd512_mask(A, B, W, U, R)
#define _mm512_maskz_max_round_pd(U, A, B, R) \
(__m512d)__builtin_ia32_maxpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
#define _mm512_max_round_ps(A, B, R) \
(__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_undefined_pd(), -1, R)
#define _mm512_mask_max_round_ps(W, U, A, B, R) \
(__m512)__builtin_ia32_maxps512_mask(A, B, W, U, R)
#define _mm512_maskz_max_round_ps(U, A, B, R) \
(__m512)__builtin_ia32_maxps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
#define _mm512_min_round_pd(A, B, R) \
(__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, R)
#define _mm512_mask_min_round_pd(W, U, A, B, R) \
(__m512d)__builtin_ia32_minpd512_mask(A, B, W, U, R)
#define _mm512_maskz_min_round_pd(U, A, B, R) \
(__m512d)__builtin_ia32_minpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, R)
#define _mm512_min_round_ps(A, B, R) \
(__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, R)
#define _mm512_mask_min_round_ps(W, U, A, B, R) \
(__m512)__builtin_ia32_minps512_mask(A, B, W, U, R)
#define _mm512_maskz_min_round_ps(U, A, B, R) \
(__m512)__builtin_ia32_minps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, R)
#endif
#ifdef __OPTIMIZE__
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_scalef_round_pd (__m512d __A, __m512d __B, const int __R)
{
return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
_mm512_undefined_pd (),
(__mmask8) -1, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_scalef_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
__m512d __B, const int __R)
{
return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df) __W,
(__mmask8) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_scalef_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
const int __R)
{
return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df)
_mm512_setzero_pd (),
(__mmask8) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_scalef_round_ps (__m512 __A, __m512 __B, const int __R)
{
return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_undefined_ps (),
(__mmask16) -1, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_scalef_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
__m512 __B, const int __R)
{
return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf) __W,
(__mmask16) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_scalef_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
const int __R)
{
return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf)
_mm512_setzero_ps (),
(__mmask16) __U, __R);
}
extern __inline __m128d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_scalef_round_sd (__m128d __A, __m128d __B, const int __R)
{
return (__m128d) __builtin_ia32_scalefsd_round ((__v2df) __A,
(__v2df) __B,
__R);
}
extern __inline __m128
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_scalef_round_ss (__m128 __A, __m128 __B, const int __R)
{
return (__m128) __builtin_ia32_scalefss_round ((__v4sf) __A,
(__v4sf) __B,
__R);
}
#else
#define _mm512_scalef_round_pd(A, B, C) \
(__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
(__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
#define _mm512_maskz_scalef_round_pd(U, A, B, C) \
(__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_scalef_round_ps(A, B, C) \
(__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
(__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
#define _mm512_maskz_scalef_round_ps(U, A, B, C) \
(__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm_scalef_round_sd(A, B, C) \
(__m128d)__builtin_ia32_scalefsd_round(A, B, C)
#define _mm_scalef_round_ss(A, B, C) \
(__m128)__builtin_ia32_scalefss_round(A, B, C)
#endif
#ifdef __OPTIMIZE__
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
{
return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df) __C,
(__mmask8) -1, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmadd_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
__m512d __C, const int __R)
{
return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df) __C,
(__mmask8) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmadd_round_pd (__m512d __A, __m512d __B, __m512d __C,
__mmask8 __U, const int __R)
{
return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
(__v8df) __B,
(__v8df) __C,
(__mmask8) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmadd_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
__m512d __C, const int __R)
{
return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
(__v8df) __B,
(__v8df) __C,
(__mmask8) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
{
return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf) __C,
(__mmask16) -1, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmadd_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
__m512 __C, const int __R)
{
return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
(__v16sf) __B,
(__v16sf) __C,
(__mmask16) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmadd_round_ps (__m512 __A, __m512 __B, __m512 __C,
__mmask16 __U, const int __R)
{
return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
(__v16sf) __B,
(__v16sf) __C,
(__mmask16) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmadd_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
__m512 __C, const int __R)
{
return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
(__v16sf) __B,
(__v16sf) __C,
(__mmask16) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
{
return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
(__v8df) __B,
-(__v8df) __C,
(__mmask8) -1, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
__m512d __C, const int __R)
{
return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
(__v8df) __B,
-(__v8df) __C,
(__mmask8) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmsub_round_pd (__m512d __A, __m512d __B, __m512d __C,
__mmask8 __U, const int __R)
{
return (__m512d) __builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
(__v8df) __B,
(__v8df) __C,
(__mmask8) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmsub_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
__m512d __C, const int __R)
{
return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
(__v8df) __B,
-(__v8df) __C,
(__mmask8) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C, const int __R)
{
return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
(__v16sf) __B,
-(__v16sf) __C,
(__mmask16) -1, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmsub_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
__m512 __C, const int __R)
{
return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
(__v16sf) __B,
-(__v16sf) __C,
(__mmask16) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask3_fmsub_round_ps (__m512 __A, __m512 __B, __m512 __C,
__mmask16 __U, const int __R)
{
return (__m512) __builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
(__v16sf) __B,
(__v16sf) __C,
(__mmask16) __U, __R);
}
extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_fmsub_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
__m512 __C, const int __R)
{
return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
(__v16sf) __B,
-(__v16sf) __C,
(__mmask16) __U, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_fmaddsub_round_pd (__m512d __A, __m512d __B, __m512d __C, const int __R)
{
return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
(__v8df) __B,
(__v8df) __C,
(__mmask8) -1, __R);
}
extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_fmaddsub_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
__m512d __C, const int __R)
{