| /* ARM NEON intrinsics include file. |
| |
| Copyright (C) 2006-2020 Free Software Foundation, Inc. |
| Contributed by CodeSourcery. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it |
| under the terms of the GNU General Public License as published |
| by the Free Software Foundation; either version 3, or (at your |
| option) any later version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT |
| ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public |
| License for more details. |
| |
| Under Section 7 of GPL version 3, you are granted additional |
| permissions described in the GCC Runtime Library Exception, version |
| 3.1, as published by the Free Software Foundation. |
| |
| You should have received a copy of the GNU General Public License and |
| a copy of the GCC Runtime Library Exception along with this program; |
| see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #ifndef _GCC_ARM_NEON_H |
| #define _GCC_ARM_NEON_H 1 |
| |
| #ifndef __ARM_FP |
| #error "NEON intrinsics not available with the soft-float ABI. Please use -mfloat-abi=softfp or -mfloat-abi=hard" |
| #else |
| |
| #pragma GCC push_options |
| #pragma GCC target ("fpu=neon") |
| |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| #include <arm_fp16.h> |
| #include <arm_bf16.h> |
| #include <stdint.h> |
| |
| /* For big-endian, GCC's vector indices are reversed within each 64 |
| bits compared to the architectural lane indices used by Neon |
| intrinsics. */ |
| #ifdef __ARM_BIG_ENDIAN |
| #define __ARM_NUM_LANES(__v) (sizeof (__v) / sizeof (__v[0])) |
| #define __arm_lane(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec) - 1)) |
| #define __arm_laneq(__vec, __idx) (__idx ^ (__ARM_NUM_LANES(__vec)/2 - 1)) |
| #else |
| #define __arm_lane(__vec, __idx) __idx |
| #define __arm_laneq(__vec, __idx) __idx |
| #endif |
| |
| typedef __simd64_int8_t int8x8_t; |
| typedef __simd64_int16_t int16x4_t; |
| typedef __simd64_int32_t int32x2_t; |
| typedef __builtin_neon_di int64x1_t; |
| #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) |
| typedef __fp16 float16_t; |
| typedef __simd64_float16_t float16x4_t; |
| #endif |
| typedef __simd64_float32_t float32x2_t; |
| typedef __simd64_poly8_t poly8x8_t; |
| typedef __simd64_poly16_t poly16x4_t; |
| #pragma GCC push_options |
| #pragma GCC target ("fpu=crypto-neon-fp-armv8") |
| typedef __builtin_neon_poly64 poly64x1_t; |
| #pragma GCC pop_options |
| typedef __simd64_uint8_t uint8x8_t; |
| typedef __simd64_uint16_t uint16x4_t; |
| typedef __simd64_uint32_t uint32x2_t; |
| typedef __builtin_neon_udi uint64x1_t; |
| |
| typedef __simd128_int8_t int8x16_t; |
| typedef __simd128_int16_t int16x8_t; |
| typedef __simd128_int32_t int32x4_t; |
| typedef __simd128_int64_t int64x2_t; |
| #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) |
| typedef __simd128_float16_t float16x8_t; |
| #endif |
| typedef __simd128_float32_t float32x4_t; |
| typedef __simd128_poly8_t poly8x16_t; |
| typedef __simd128_poly16_t poly16x8_t; |
| #pragma GCC push_options |
| #pragma GCC target ("fpu=crypto-neon-fp-armv8") |
| typedef __builtin_neon_poly64 poly64x2_t __attribute__ ((__vector_size__ (16))); |
| #pragma GCC pop_options |
| |
| typedef __simd128_uint8_t uint8x16_t; |
| typedef __simd128_uint16_t uint16x8_t; |
| typedef __simd128_uint32_t uint32x4_t; |
| typedef __simd128_uint64_t uint64x2_t; |
| |
| typedef float float32_t; |
| |
| typedef __simd128_bfloat16_t bfloat16x8_t; |
| typedef __simd64_bfloat16_t bfloat16x4_t; |
| |
| /* The Poly types are user visible and live in their own world, |
| keep them that way. */ |
| typedef __builtin_neon_poly8 poly8_t; |
| typedef __builtin_neon_poly16 poly16_t; |
| #pragma GCC push_options |
| #pragma GCC target ("fpu=crypto-neon-fp-armv8") |
| typedef __builtin_neon_poly64 poly64_t; |
| typedef __builtin_neon_poly128 poly128_t; |
| #pragma GCC pop_options |
| |
| typedef struct int8x8x2_t |
| { |
| int8x8_t val[2]; |
| } int8x8x2_t; |
| |
| typedef struct int8x16x2_t |
| { |
| int8x16_t val[2]; |
| } int8x16x2_t; |
| |
| typedef struct int16x4x2_t |
| { |
| int16x4_t val[2]; |
| } int16x4x2_t; |
| |
| typedef struct int16x8x2_t |
| { |
| int16x8_t val[2]; |
| } int16x8x2_t; |
| |
| typedef struct int32x2x2_t |
| { |
| int32x2_t val[2]; |
| } int32x2x2_t; |
| |
| typedef struct int32x4x2_t |
| { |
| int32x4_t val[2]; |
| } int32x4x2_t; |
| |
| typedef struct int64x1x2_t |
| { |
| int64x1_t val[2]; |
| } int64x1x2_t; |
| |
| typedef struct int64x2x2_t |
| { |
| int64x2_t val[2]; |
| } int64x2x2_t; |
| |
| typedef struct uint8x8x2_t |
| { |
| uint8x8_t val[2]; |
| } uint8x8x2_t; |
| |
| typedef struct uint8x16x2_t |
| { |
| uint8x16_t val[2]; |
| } uint8x16x2_t; |
| |
| typedef struct uint16x4x2_t |
| { |
| uint16x4_t val[2]; |
| } uint16x4x2_t; |
| |
| typedef struct uint16x8x2_t |
| { |
| uint16x8_t val[2]; |
| } uint16x8x2_t; |
| |
| typedef struct uint32x2x2_t |
| { |
| uint32x2_t val[2]; |
| } uint32x2x2_t; |
| |
| typedef struct uint32x4x2_t |
| { |
| uint32x4_t val[2]; |
| } uint32x4x2_t; |
| |
| typedef struct uint64x1x2_t |
| { |
| uint64x1_t val[2]; |
| } uint64x1x2_t; |
| |
| typedef struct uint64x2x2_t |
| { |
| uint64x2_t val[2]; |
| } uint64x2x2_t; |
| |
| #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) |
| typedef struct float16x4x2_t |
| { |
| float16x4_t val[2]; |
| } float16x4x2_t; |
| #endif |
| |
| #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) |
| typedef struct float16x8x2_t |
| { |
| float16x8_t val[2]; |
| } float16x8x2_t; |
| #endif |
| |
| typedef struct float32x2x2_t |
| { |
| float32x2_t val[2]; |
| } float32x2x2_t; |
| |
| typedef struct float32x4x2_t |
| { |
| float32x4_t val[2]; |
| } float32x4x2_t; |
| |
| typedef struct poly8x8x2_t |
| { |
| poly8x8_t val[2]; |
| } poly8x8x2_t; |
| |
| typedef struct poly8x16x2_t |
| { |
| poly8x16_t val[2]; |
| } poly8x16x2_t; |
| |
| typedef struct poly16x4x2_t |
| { |
| poly16x4_t val[2]; |
| } poly16x4x2_t; |
| |
| typedef struct poly16x8x2_t |
| { |
| poly16x8_t val[2]; |
| } poly16x8x2_t; |
| |
| #pragma GCC push_options |
| #pragma GCC target ("fpu=crypto-neon-fp-armv8") |
| typedef struct poly64x1x2_t |
| { |
| poly64x1_t val[2]; |
| } poly64x1x2_t; |
| |
| |
| typedef struct poly64x2x2_t |
| { |
| poly64x2_t val[2]; |
| } poly64x2x2_t; |
| #pragma GCC pop_options |
| |
| |
| typedef struct int8x8x3_t |
| { |
| int8x8_t val[3]; |
| } int8x8x3_t; |
| |
| typedef struct int8x16x3_t |
| { |
| int8x16_t val[3]; |
| } int8x16x3_t; |
| |
| typedef struct int16x4x3_t |
| { |
| int16x4_t val[3]; |
| } int16x4x3_t; |
| |
| typedef struct int16x8x3_t |
| { |
| int16x8_t val[3]; |
| } int16x8x3_t; |
| |
| typedef struct int32x2x3_t |
| { |
| int32x2_t val[3]; |
| } int32x2x3_t; |
| |
| typedef struct int32x4x3_t |
| { |
| int32x4_t val[3]; |
| } int32x4x3_t; |
| |
| typedef struct int64x1x3_t |
| { |
| int64x1_t val[3]; |
| } int64x1x3_t; |
| |
| typedef struct int64x2x3_t |
| { |
| int64x2_t val[3]; |
| } int64x2x3_t; |
| |
| typedef struct uint8x8x3_t |
| { |
| uint8x8_t val[3]; |
| } uint8x8x3_t; |
| |
| typedef struct uint8x16x3_t |
| { |
| uint8x16_t val[3]; |
| } uint8x16x3_t; |
| |
| typedef struct uint16x4x3_t |
| { |
| uint16x4_t val[3]; |
| } uint16x4x3_t; |
| |
| typedef struct uint16x8x3_t |
| { |
| uint16x8_t val[3]; |
| } uint16x8x3_t; |
| |
| typedef struct uint32x2x3_t |
| { |
| uint32x2_t val[3]; |
| } uint32x2x3_t; |
| |
| typedef struct uint32x4x3_t |
| { |
| uint32x4_t val[3]; |
| } uint32x4x3_t; |
| |
| typedef struct uint64x1x3_t |
| { |
| uint64x1_t val[3]; |
| } uint64x1x3_t; |
| |
| typedef struct uint64x2x3_t |
| { |
| uint64x2_t val[3]; |
| } uint64x2x3_t; |
| |
| #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) |
| typedef struct float16x4x3_t |
| { |
| float16x4_t val[3]; |
| } float16x4x3_t; |
| #endif |
| |
| #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) |
| typedef struct float16x8x3_t |
| { |
| float16x8_t val[3]; |
| } float16x8x3_t; |
| #endif |
| |
| typedef struct float32x2x3_t |
| { |
| float32x2_t val[3]; |
| } float32x2x3_t; |
| |
| typedef struct float32x4x3_t |
| { |
| float32x4_t val[3]; |
| } float32x4x3_t; |
| |
| typedef struct poly8x8x3_t |
| { |
| poly8x8_t val[3]; |
| } poly8x8x3_t; |
| |
| typedef struct poly8x16x3_t |
| { |
| poly8x16_t val[3]; |
| } poly8x16x3_t; |
| |
| typedef struct poly16x4x3_t |
| { |
| poly16x4_t val[3]; |
| } poly16x4x3_t; |
| |
| typedef struct poly16x8x3_t |
| { |
| poly16x8_t val[3]; |
| } poly16x8x3_t; |
| |
| #pragma GCC push_options |
| #pragma GCC target ("fpu=crypto-neon-fp-armv8") |
| typedef struct poly64x1x3_t |
| { |
| poly64x1_t val[3]; |
| } poly64x1x3_t; |
| |
| |
| typedef struct poly64x2x3_t |
| { |
| poly64x2_t val[3]; |
| } poly64x2x3_t; |
| #pragma GCC pop_options |
| |
| |
| typedef struct int8x8x4_t |
| { |
| int8x8_t val[4]; |
| } int8x8x4_t; |
| |
| typedef struct int8x16x4_t |
| { |
| int8x16_t val[4]; |
| } int8x16x4_t; |
| |
| typedef struct int16x4x4_t |
| { |
| int16x4_t val[4]; |
| } int16x4x4_t; |
| |
| typedef struct int16x8x4_t |
| { |
| int16x8_t val[4]; |
| } int16x8x4_t; |
| |
| typedef struct int32x2x4_t |
| { |
| int32x2_t val[4]; |
| } int32x2x4_t; |
| |
| typedef struct int32x4x4_t |
| { |
| int32x4_t val[4]; |
| } int32x4x4_t; |
| |
| typedef struct int64x1x4_t |
| { |
| int64x1_t val[4]; |
| } int64x1x4_t; |
| |
| typedef struct int64x2x4_t |
| { |
| int64x2_t val[4]; |
| } int64x2x4_t; |
| |
| typedef struct uint8x8x4_t |
| { |
| uint8x8_t val[4]; |
| } uint8x8x4_t; |
| |
| typedef struct uint8x16x4_t |
| { |
| uint8x16_t val[4]; |
| } uint8x16x4_t; |
| |
| typedef struct uint16x4x4_t |
| { |
| uint16x4_t val[4]; |
| } uint16x4x4_t; |
| |
| typedef struct uint16x8x4_t |
| { |
| uint16x8_t val[4]; |
| } uint16x8x4_t; |
| |
| typedef struct uint32x2x4_t |
| { |
| uint32x2_t val[4]; |
| } uint32x2x4_t; |
| |
| typedef struct uint32x4x4_t |
| { |
| uint32x4_t val[4]; |
| } uint32x4x4_t; |
| |
| typedef struct uint64x1x4_t |
| { |
| uint64x1_t val[4]; |
| } uint64x1x4_t; |
| |
| typedef struct uint64x2x4_t |
| { |
| uint64x2_t val[4]; |
| } uint64x2x4_t; |
| |
| #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) |
| typedef struct float16x4x4_t |
| { |
| float16x4_t val[4]; |
| } float16x4x4_t; |
| #endif |
| |
| #if defined (__ARM_FP16_FORMAT_IEEE) || defined (__ARM_FP16_FORMAT_ALTERNATIVE) |
| typedef struct float16x8x4_t |
| { |
| float16x8_t val[4]; |
| } float16x8x4_t; |
| #endif |
| |
| typedef struct float32x2x4_t |
| { |
| float32x2_t val[4]; |
| } float32x2x4_t; |
| |
| typedef struct float32x4x4_t |
| { |
| float32x4_t val[4]; |
| } float32x4x4_t; |
| |
| typedef struct poly8x8x4_t |
| { |
| poly8x8_t val[4]; |
| } poly8x8x4_t; |
| |
| typedef struct poly8x16x4_t |
| { |
| poly8x16_t val[4]; |
| } poly8x16x4_t; |
| |
| typedef struct poly16x4x4_t |
| { |
| poly16x4_t val[4]; |
| } poly16x4x4_t; |
| |
| typedef struct poly16x8x4_t |
| { |
| poly16x8_t val[4]; |
| } poly16x8x4_t; |
| |
| #pragma GCC push_options |
| #pragma GCC target ("fpu=crypto-neon-fp-armv8") |
| typedef struct poly64x1x4_t |
| { |
| poly64x1_t val[4]; |
| } poly64x1x4_t; |
| |
| |
| typedef struct poly64x2x4_t |
| { |
| poly64x2_t val[4]; |
| } poly64x2x4_t; |
| #pragma GCC pop_options |
| |
| /* vadd */ |
| __extension__ extern __inline int8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vadd_s8 (int8x8_t __a, int8x8_t __b) |
| { |
| return __a + __b; |
| } |
| |
| __extension__ extern __inline int16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vadd_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return __a + __b; |
| } |
| |
| __extension__ extern __inline int32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vadd_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return __a + __b; |
| } |
| |
| __extension__ extern __inline float32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vadd_f32 (float32x2_t __a, float32x2_t __b) |
| { |
| #ifdef __FAST_MATH__ |
| return __a + __b; |
| #else |
| return (float32x2_t) __builtin_neon_vaddv2sf (__a, __b); |
| #endif |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vadd_u8 (uint8x8_t __a, uint8x8_t __b) |
| { |
| return __a + __b; |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vadd_u16 (uint16x4_t __a, uint16x4_t __b) |
| { |
| return __a + __b; |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vadd_u32 (uint32x2_t __a, uint32x2_t __b) |
| { |
| return __a + __b; |
| } |
| |
| __extension__ extern __inline int64x1_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vadd_s64 (int64x1_t __a, int64x1_t __b) |
| { |
| return __a + __b; |
| } |
| |
| __extension__ extern __inline uint64x1_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vadd_u64 (uint64x1_t __a, uint64x1_t __b) |
| { |
| return __a + __b; |
| } |
| |
| __extension__ extern __inline int8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddq_s8 (int8x16_t __a, int8x16_t __b) |
| { |
| return __a + __b; |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddq_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return __a + __b; |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddq_s32 (int32x4_t __a, int32x4_t __b) |
| { |
| return __a + __b; |
| } |
| |
| __extension__ extern __inline int64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddq_s64 (int64x2_t __a, int64x2_t __b) |
| { |
| return __a + __b; |
| } |
| |
| __extension__ extern __inline float32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddq_f32 (float32x4_t __a, float32x4_t __b) |
| { |
| #ifdef __FAST_MATH__ |
| return __a + __b; |
| #else |
| return (float32x4_t) __builtin_neon_vaddv4sf (__a, __b); |
| #endif |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddq_u8 (uint8x16_t __a, uint8x16_t __b) |
| { |
| return __a + __b; |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddq_u16 (uint16x8_t __a, uint16x8_t __b) |
| { |
| return __a + __b; |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddq_u32 (uint32x4_t __a, uint32x4_t __b) |
| { |
| return __a + __b; |
| } |
| |
| __extension__ extern __inline uint64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddq_u64 (uint64x2_t __a, uint64x2_t __b) |
| { |
| return __a + __b; |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddl_s8 (int8x8_t __a, int8x8_t __b) |
| { |
| return (int16x8_t)__builtin_neon_vaddlsv8qi (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddl_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return (int32x4_t)__builtin_neon_vaddlsv4hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddl_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return (int64x2_t)__builtin_neon_vaddlsv2si (__a, __b); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddl_u8 (uint8x8_t __a, uint8x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vaddluv8qi ((int8x8_t) __a, (int8x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddl_u16 (uint16x4_t __a, uint16x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vaddluv4hi ((int16x4_t) __a, (int16x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddl_u32 (uint32x2_t __a, uint32x2_t __b) |
| { |
| return (uint64x2_t)__builtin_neon_vaddluv2si ((int32x2_t) __a, (int32x2_t) __b); |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddw_s8 (int16x8_t __a, int8x8_t __b) |
| { |
| return (int16x8_t)__builtin_neon_vaddwsv8qi (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddw_s16 (int32x4_t __a, int16x4_t __b) |
| { |
| return (int32x4_t)__builtin_neon_vaddwsv4hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddw_s32 (int64x2_t __a, int32x2_t __b) |
| { |
| return (int64x2_t)__builtin_neon_vaddwsv2si (__a, __b); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddw_u8 (uint16x8_t __a, uint8x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vaddwuv8qi ((int16x8_t) __a, (int8x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddw_u16 (uint32x4_t __a, uint16x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vaddwuv4hi ((int32x4_t) __a, (int16x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddw_u32 (uint64x2_t __a, uint32x2_t __b) |
| { |
| return (uint64x2_t)__builtin_neon_vaddwuv2si ((int64x2_t) __a, (int32x2_t) __b); |
| } |
| |
| __extension__ extern __inline int8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhadd_s8 (int8x8_t __a, int8x8_t __b) |
| { |
| return (int8x8_t)__builtin_neon_vhaddsv8qi (__a, __b); |
| } |
| |
| __extension__ extern __inline int16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhadd_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return (int16x4_t)__builtin_neon_vhaddsv4hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhadd_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return (int32x2_t)__builtin_neon_vhaddsv2si (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhadd_u8 (uint8x8_t __a, uint8x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vhadduv8qi ((int8x8_t) __a, (int8x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhadd_u16 (uint16x4_t __a, uint16x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vhadduv4hi ((int16x4_t) __a, (int16x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhadd_u32 (uint32x2_t __a, uint32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vhadduv2si ((int32x2_t) __a, (int32x2_t) __b); |
| } |
| |
| __extension__ extern __inline int8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhaddq_s8 (int8x16_t __a, int8x16_t __b) |
| { |
| return (int8x16_t)__builtin_neon_vhaddsv16qi (__a, __b); |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhaddq_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return (int16x8_t)__builtin_neon_vhaddsv8hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhaddq_s32 (int32x4_t __a, int32x4_t __b) |
| { |
| return (int32x4_t)__builtin_neon_vhaddsv4si (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhaddq_u8 (uint8x16_t __a, uint8x16_t __b) |
| { |
| return (uint8x16_t)__builtin_neon_vhadduv16qi ((int8x16_t) __a, (int8x16_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhaddq_u16 (uint16x8_t __a, uint16x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vhadduv8hi ((int16x8_t) __a, (int16x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhaddq_u32 (uint32x4_t __a, uint32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vhadduv4si ((int32x4_t) __a, (int32x4_t) __b); |
| } |
| |
| __extension__ extern __inline int8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrhadd_s8 (int8x8_t __a, int8x8_t __b) |
| { |
| return (int8x8_t)__builtin_neon_vrhaddsv8qi (__a, __b); |
| } |
| |
| __extension__ extern __inline int16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrhadd_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return (int16x4_t)__builtin_neon_vrhaddsv4hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrhadd_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return (int32x2_t)__builtin_neon_vrhaddsv2si (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrhadd_u8 (uint8x8_t __a, uint8x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vrhadduv8qi ((int8x8_t) __a, (int8x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrhadd_u16 (uint16x4_t __a, uint16x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vrhadduv4hi ((int16x4_t) __a, (int16x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrhadd_u32 (uint32x2_t __a, uint32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vrhadduv2si ((int32x2_t) __a, (int32x2_t) __b); |
| } |
| |
| __extension__ extern __inline int8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrhaddq_s8 (int8x16_t __a, int8x16_t __b) |
| { |
| return (int8x16_t)__builtin_neon_vrhaddsv16qi (__a, __b); |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrhaddq_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return (int16x8_t)__builtin_neon_vrhaddsv8hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrhaddq_s32 (int32x4_t __a, int32x4_t __b) |
| { |
| return (int32x4_t)__builtin_neon_vrhaddsv4si (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrhaddq_u8 (uint8x16_t __a, uint8x16_t __b) |
| { |
| return (uint8x16_t)__builtin_neon_vrhadduv16qi ((int8x16_t) __a, (int8x16_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrhaddq_u16 (uint16x8_t __a, uint16x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vrhadduv8hi ((int16x8_t) __a, (int16x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrhaddq_u32 (uint32x4_t __a, uint32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vrhadduv4si ((int32x4_t) __a, (int32x4_t) __b); |
| } |
| |
| __extension__ extern __inline int8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqadd_s8 (int8x8_t __a, int8x8_t __b) |
| { |
| return (int8x8_t)__builtin_neon_vqaddsv8qi (__a, __b); |
| } |
| |
| __extension__ extern __inline int16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqadd_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return (int16x4_t)__builtin_neon_vqaddsv4hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqadd_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return (int32x2_t)__builtin_neon_vqaddsv2si (__a, __b); |
| } |
| |
| __extension__ extern __inline int64x1_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqadd_s64 (int64x1_t __a, int64x1_t __b) |
| { |
| return (int64x1_t)__builtin_neon_vqaddsdi (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqadd_u8 (uint8x8_t __a, uint8x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vqadduv8qi ((int8x8_t) __a, (int8x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqadd_u16 (uint16x4_t __a, uint16x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vqadduv4hi ((int16x4_t) __a, (int16x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqadd_u32 (uint32x2_t __a, uint32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vqadduv2si ((int32x2_t) __a, (int32x2_t) __b); |
| } |
| |
| __extension__ extern __inline uint64x1_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqadd_u64 (uint64x1_t __a, uint64x1_t __b) |
| { |
| return (uint64x1_t)__builtin_neon_vqaddudi ((int64x1_t) __a, (int64x1_t) __b); |
| } |
| |
| __extension__ extern __inline int8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqaddq_s8 (int8x16_t __a, int8x16_t __b) |
| { |
| return (int8x16_t)__builtin_neon_vqaddsv16qi (__a, __b); |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqaddq_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return (int16x8_t)__builtin_neon_vqaddsv8hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqaddq_s32 (int32x4_t __a, int32x4_t __b) |
| { |
| return (int32x4_t)__builtin_neon_vqaddsv4si (__a, __b); |
| } |
| |
| __extension__ extern __inline int64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqaddq_s64 (int64x2_t __a, int64x2_t __b) |
| { |
| return (int64x2_t)__builtin_neon_vqaddsv2di (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqaddq_u8 (uint8x16_t __a, uint8x16_t __b) |
| { |
| return (uint8x16_t)__builtin_neon_vqadduv16qi ((int8x16_t) __a, (int8x16_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqaddq_u16 (uint16x8_t __a, uint16x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vqadduv8hi ((int16x8_t) __a, (int16x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqaddq_u32 (uint32x4_t __a, uint32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vqadduv4si ((int32x4_t) __a, (int32x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqaddq_u64 (uint64x2_t __a, uint64x2_t __b) |
| { |
| return (uint64x2_t)__builtin_neon_vqadduv2di ((int64x2_t) __a, (int64x2_t) __b); |
| } |
| |
| __extension__ extern __inline int8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddhn_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return (int8x8_t)__builtin_neon_vaddhnv8hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddhn_s32 (int32x4_t __a, int32x4_t __b) |
| { |
| return (int16x4_t)__builtin_neon_vaddhnv4si (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddhn_s64 (int64x2_t __a, int64x2_t __b) |
| { |
| return (int32x2_t)__builtin_neon_vaddhnv2di (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddhn_u16 (uint16x8_t __a, uint16x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vaddhnv8hi ((int16x8_t) __a, (int16x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddhn_u32 (uint32x4_t __a, uint32x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vaddhnv4si ((int32x4_t) __a, (int32x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vaddhn_u64 (uint64x2_t __a, uint64x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vaddhnv2di ((int64x2_t) __a, (int64x2_t) __b); |
| } |
| |
| __extension__ extern __inline int8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vraddhn_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return (int8x8_t)__builtin_neon_vraddhnv8hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vraddhn_s32 (int32x4_t __a, int32x4_t __b) |
| { |
| return (int16x4_t)__builtin_neon_vraddhnv4si (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vraddhn_s64 (int64x2_t __a, int64x2_t __b) |
| { |
| return (int32x2_t)__builtin_neon_vraddhnv2di (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vraddhn_u16 (uint16x8_t __a, uint16x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vraddhnv8hi ((int16x8_t) __a, (int16x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vraddhn_u32 (uint32x4_t __a, uint32x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vraddhnv4si ((int32x4_t) __a, (int32x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vraddhn_u64 (uint64x2_t __a, uint64x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vraddhnv2di ((int64x2_t) __a, (int64x2_t) __b); |
| } |
| |
| __extension__ extern __inline int8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmul_s8 (int8x8_t __a, int8x8_t __b) |
| { |
| return __a * __b; |
| } |
| |
| __extension__ extern __inline int16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmul_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return __a * __b; |
| } |
| |
| __extension__ extern __inline int32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmul_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return __a * __b; |
| } |
| |
| __extension__ extern __inline float32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmul_f32 (float32x2_t __a, float32x2_t __b) |
| { |
| #ifdef __FAST_MATH__ |
| return __a * __b; |
| #else |
| return (float32x2_t) __builtin_neon_vmulfv2sf (__a, __b); |
| #endif |
| |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmul_u8 (uint8x8_t __a, uint8x8_t __b) |
| { |
| return __a * __b; |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmul_u16 (uint16x4_t __a, uint16x4_t __b) |
| { |
| return __a * __b; |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmul_u32 (uint32x2_t __a, uint32x2_t __b) |
| { |
| return __a * __b; |
| } |
| |
| __extension__ extern __inline int8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmulq_s8 (int8x16_t __a, int8x16_t __b) |
| { |
| return __a * __b; |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmulq_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return __a * __b; |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmulq_s32 (int32x4_t __a, int32x4_t __b) |
| { |
| return __a * __b; |
| } |
| |
| __extension__ extern __inline float32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmulq_f32 (float32x4_t __a, float32x4_t __b) |
| { |
| #ifdef __FAST_MATH__ |
| return __a * __b; |
| #else |
| return (float32x4_t) __builtin_neon_vmulfv4sf (__a, __b); |
| #endif |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmulq_u8 (uint8x16_t __a, uint8x16_t __b) |
| { |
| return __a * __b; |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmulq_u16 (uint16x8_t __a, uint16x8_t __b) |
| { |
| return __a * __b; |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmulq_u32 (uint32x4_t __a, uint32x4_t __b) |
| { |
| return __a * __b; |
| } |
| |
| __extension__ extern __inline poly8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmul_p8 (poly8x8_t __a, poly8x8_t __b) |
| { |
| return (poly8x8_t)__builtin_neon_vmulpv8qi ((int8x8_t) __a, (int8x8_t) __b); |
| } |
| |
| __extension__ extern __inline poly8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmulq_p8 (poly8x16_t __a, poly8x16_t __b) |
| { |
| return (poly8x16_t)__builtin_neon_vmulpv16qi ((int8x16_t) __a, (int8x16_t) __b); |
| } |
| |
| __extension__ extern __inline int16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqdmulh_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return (int16x4_t)__builtin_neon_vqdmulhv4hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqdmulh_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return (int32x2_t)__builtin_neon_vqdmulhv2si (__a, __b); |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqdmulhq_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return (int16x8_t)__builtin_neon_vqdmulhv8hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqdmulhq_s32 (int32x4_t __a, int32x4_t __b) |
| { |
| return (int32x4_t)__builtin_neon_vqdmulhv4si (__a, __b); |
| } |
| |
| __extension__ extern __inline int16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqrdmulh_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return (int16x4_t)__builtin_neon_vqrdmulhv4hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqrdmulh_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return (int32x2_t)__builtin_neon_vqrdmulhv2si (__a, __b); |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqrdmulhq_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return (int16x8_t)__builtin_neon_vqrdmulhv8hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqrdmulhq_s32 (int32x4_t __a, int32x4_t __b) |
| { |
| return (int32x4_t)__builtin_neon_vqrdmulhv4si (__a, __b); |
| } |
| |
| #ifdef __ARM_FEATURE_QRDMX |
| __extension__ extern __inline int16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqrdmlah_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) |
| { |
| return (int16x4_t)__builtin_neon_vqrdmlahv4hi (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqrdmlah_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) |
| { |
| return (int32x2_t)__builtin_neon_vqrdmlahv2si (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqrdmlahq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) |
| { |
| return (int16x8_t)__builtin_neon_vqrdmlahv8hi (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqrdmlahq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) |
| { |
| return (int32x4_t)__builtin_neon_vqrdmlahv4si (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqrdmlsh_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) |
| { |
| return (int16x4_t)__builtin_neon_vqrdmlshv4hi (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqrdmlsh_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) |
| { |
| return (int32x2_t)__builtin_neon_vqrdmlshv2si (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqrdmlshq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) |
| { |
| return (int16x8_t)__builtin_neon_vqrdmlshv8hi (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqrdmlshq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) |
| { |
| return (int32x4_t)__builtin_neon_vqrdmlshv4si (__a, __b, __c); |
| } |
| #endif |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmull_s8 (int8x8_t __a, int8x8_t __b) |
| { |
| return (int16x8_t)__builtin_neon_vmullsv8qi (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmull_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return (int32x4_t)__builtin_neon_vmullsv4hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmull_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return (int64x2_t)__builtin_neon_vmullsv2si (__a, __b); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmull_u8 (uint8x8_t __a, uint8x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vmulluv8qi ((int8x8_t) __a, (int8x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmull_u16 (uint16x4_t __a, uint16x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vmulluv4hi ((int16x4_t) __a, (int16x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmull_u32 (uint32x2_t __a, uint32x2_t __b) |
| { |
| return (uint64x2_t)__builtin_neon_vmulluv2si ((int32x2_t) __a, (int32x2_t) __b); |
| } |
| |
| __extension__ extern __inline poly16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmull_p8 (poly8x8_t __a, poly8x8_t __b) |
| { |
| return (poly16x8_t)__builtin_neon_vmullpv8qi ((int8x8_t) __a, (int8x8_t) __b); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqdmull_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return (int32x4_t)__builtin_neon_vqdmullv4hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqdmull_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return (int64x2_t)__builtin_neon_vqdmullv2si (__a, __b); |
| } |
| |
| __extension__ extern __inline int8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmla_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c) |
| { |
| return (int8x8_t)__builtin_neon_vmlav8qi (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmla_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) |
| { |
| return (int16x4_t)__builtin_neon_vmlav4hi (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmla_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) |
| { |
| return (int32x2_t)__builtin_neon_vmlav2si (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline float32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmla_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) |
| { |
| return (float32x2_t)__builtin_neon_vmlav2sf (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmla_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) |
| { |
| return (uint8x8_t)__builtin_neon_vmlav8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmla_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) |
| { |
| return (uint16x4_t)__builtin_neon_vmlav4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmla_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) |
| { |
| return (uint32x2_t)__builtin_neon_vmlav2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c); |
| } |
| |
| __extension__ extern __inline int8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlaq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c) |
| { |
| return (int8x16_t)__builtin_neon_vmlav16qi (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlaq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) |
| { |
| return (int16x8_t)__builtin_neon_vmlav8hi (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlaq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) |
| { |
| return (int32x4_t)__builtin_neon_vmlav4si (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline float32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) |
| { |
| return (float32x4_t)__builtin_neon_vmlav4sf (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlaq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) |
| { |
| return (uint8x16_t)__builtin_neon_vmlav16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlaq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) |
| { |
| return (uint16x8_t)__builtin_neon_vmlav8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlaq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) |
| { |
| return (uint32x4_t)__builtin_neon_vmlav4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c); |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlal_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c) |
| { |
| return (int16x8_t)__builtin_neon_vmlalsv8qi (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) |
| { |
| return (int32x4_t)__builtin_neon_vmlalsv4hi (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) |
| { |
| return (int64x2_t)__builtin_neon_vmlalsv2si (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlal_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) |
| { |
| return (uint16x8_t)__builtin_neon_vmlaluv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlal_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) |
| { |
| return (uint32x4_t)__builtin_neon_vmlaluv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c); |
| } |
| |
| __extension__ extern __inline uint64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlal_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) |
| { |
| return (uint64x2_t)__builtin_neon_vmlaluv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqdmlal_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) |
| { |
| return (int32x4_t)__builtin_neon_vqdmlalv4hi (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqdmlal_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) |
| { |
| return (int64x2_t)__builtin_neon_vqdmlalv2si (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmls_s8 (int8x8_t __a, int8x8_t __b, int8x8_t __c) |
| { |
| return (int8x8_t)__builtin_neon_vmlsv8qi (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmls_s16 (int16x4_t __a, int16x4_t __b, int16x4_t __c) |
| { |
| return (int16x4_t)__builtin_neon_vmlsv4hi (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmls_s32 (int32x2_t __a, int32x2_t __b, int32x2_t __c) |
| { |
| return (int32x2_t)__builtin_neon_vmlsv2si (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline float32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmls_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) |
| { |
| return (float32x2_t)__builtin_neon_vmlsv2sf (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmls_u8 (uint8x8_t __a, uint8x8_t __b, uint8x8_t __c) |
| { |
| return (uint8x8_t)__builtin_neon_vmlsv8qi ((int8x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmls_u16 (uint16x4_t __a, uint16x4_t __b, uint16x4_t __c) |
| { |
| return (uint16x4_t)__builtin_neon_vmlsv4hi ((int16x4_t) __a, (int16x4_t) __b, (int16x4_t) __c); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmls_u32 (uint32x2_t __a, uint32x2_t __b, uint32x2_t __c) |
| { |
| return (uint32x2_t)__builtin_neon_vmlsv2si ((int32x2_t) __a, (int32x2_t) __b, (int32x2_t) __c); |
| } |
| |
| __extension__ extern __inline int8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlsq_s8 (int8x16_t __a, int8x16_t __b, int8x16_t __c) |
| { |
| return (int8x16_t)__builtin_neon_vmlsv16qi (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlsq_s16 (int16x8_t __a, int16x8_t __b, int16x8_t __c) |
| { |
| return (int16x8_t)__builtin_neon_vmlsv8hi (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlsq_s32 (int32x4_t __a, int32x4_t __b, int32x4_t __c) |
| { |
| return (int32x4_t)__builtin_neon_vmlsv4si (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline float32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) |
| { |
| return (float32x4_t)__builtin_neon_vmlsv4sf (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlsq_u8 (uint8x16_t __a, uint8x16_t __b, uint8x16_t __c) |
| { |
| return (uint8x16_t)__builtin_neon_vmlsv16qi ((int8x16_t) __a, (int8x16_t) __b, (int8x16_t) __c); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlsq_u16 (uint16x8_t __a, uint16x8_t __b, uint16x8_t __c) |
| { |
| return (uint16x8_t)__builtin_neon_vmlsv8hi ((int16x8_t) __a, (int16x8_t) __b, (int16x8_t) __c); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlsq_u32 (uint32x4_t __a, uint32x4_t __b, uint32x4_t __c) |
| { |
| return (uint32x4_t)__builtin_neon_vmlsv4si ((int32x4_t) __a, (int32x4_t) __b, (int32x4_t) __c); |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlsl_s8 (int16x8_t __a, int8x8_t __b, int8x8_t __c) |
| { |
| return (int16x8_t)__builtin_neon_vmlslsv8qi (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) |
| { |
| return (int32x4_t)__builtin_neon_vmlslsv4hi (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) |
| { |
| return (int64x2_t)__builtin_neon_vmlslsv2si (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlsl_u8 (uint16x8_t __a, uint8x8_t __b, uint8x8_t __c) |
| { |
| return (uint16x8_t)__builtin_neon_vmlsluv8qi ((int16x8_t) __a, (int8x8_t) __b, (int8x8_t) __c); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlsl_u16 (uint32x4_t __a, uint16x4_t __b, uint16x4_t __c) |
| { |
| return (uint32x4_t)__builtin_neon_vmlsluv4hi ((int32x4_t) __a, (int16x4_t) __b, (int16x4_t) __c); |
| } |
| |
| __extension__ extern __inline uint64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vmlsl_u32 (uint64x2_t __a, uint32x2_t __b, uint32x2_t __c) |
| { |
| return (uint64x2_t)__builtin_neon_vmlsluv2si ((int64x2_t) __a, (int32x2_t) __b, (int32x2_t) __c); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqdmlsl_s16 (int32x4_t __a, int16x4_t __b, int16x4_t __c) |
| { |
| return (int32x4_t)__builtin_neon_vqdmlslv4hi (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline int64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqdmlsl_s32 (int64x2_t __a, int32x2_t __b, int32x2_t __c) |
| { |
| return (int64x2_t)__builtin_neon_vqdmlslv2si (__a, __b, __c); |
| } |
| |
| #pragma GCC push_options |
| #pragma GCC target ("fpu=neon-vfpv4") |
| __extension__ extern __inline float32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vfma_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) |
| { |
| return (float32x2_t)__builtin_neon_vfmav2sf (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline float32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vfmaq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) |
| { |
| return (float32x4_t)__builtin_neon_vfmav4sf (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline float32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vfms_f32 (float32x2_t __a, float32x2_t __b, float32x2_t __c) |
| { |
| return (float32x2_t)__builtin_neon_vfmsv2sf (__a, __b, __c); |
| } |
| |
| __extension__ extern __inline float32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vfmsq_f32 (float32x4_t __a, float32x4_t __b, float32x4_t __c) |
| { |
| return (float32x4_t)__builtin_neon_vfmsv4sf (__a, __b, __c); |
| } |
| #pragma GCC pop_options |
| |
| #if __ARM_ARCH >= 8 |
| __extension__ extern __inline float32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrndn_f32 (float32x2_t __a) |
| { |
| return (float32x2_t)__builtin_neon_vrintnv2sf (__a); |
| } |
| |
| #endif |
| #if __ARM_ARCH >= 8 |
| __extension__ extern __inline float32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrndnq_f32 (float32x4_t __a) |
| { |
| return (float32x4_t)__builtin_neon_vrintnv4sf (__a); |
| } |
| |
| #endif |
| #if __ARM_ARCH >= 8 |
| __extension__ extern __inline float32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrnda_f32 (float32x2_t __a) |
| { |
| return (float32x2_t)__builtin_neon_vrintav2sf (__a); |
| } |
| |
| #endif |
| #if __ARM_ARCH >= 8 |
| __extension__ extern __inline float32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrndaq_f32 (float32x4_t __a) |
| { |
| return (float32x4_t)__builtin_neon_vrintav4sf (__a); |
| } |
| |
| #endif |
| #if __ARM_ARCH >= 8 |
| __extension__ extern __inline float32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrndp_f32 (float32x2_t __a) |
| { |
| return (float32x2_t)__builtin_neon_vrintpv2sf (__a); |
| } |
| |
| #endif |
| #if __ARM_ARCH >= 8 |
| __extension__ extern __inline float32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrndpq_f32 (float32x4_t __a) |
| { |
| return (float32x4_t)__builtin_neon_vrintpv4sf (__a); |
| } |
| |
| #endif |
| #if __ARM_ARCH >= 8 |
| __extension__ extern __inline float32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrndm_f32 (float32x2_t __a) |
| { |
| return (float32x2_t)__builtin_neon_vrintmv2sf (__a); |
| } |
| |
| #endif |
| #if __ARM_ARCH >= 8 |
| __extension__ extern __inline float32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrndmq_f32 (float32x4_t __a) |
| { |
| return (float32x4_t)__builtin_neon_vrintmv4sf (__a); |
| } |
| |
| #endif |
| |
| #if __ARM_ARCH >= 8 |
| __extension__ extern __inline float32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrndx_f32 (float32x2_t __a) |
| { |
| return (float32x2_t)__builtin_neon_vrintxv2sf (__a); |
| } |
| |
| #endif |
| |
| #if __ARM_ARCH >= 8 |
| __extension__ extern __inline float32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrndxq_f32 (float32x4_t __a) |
| { |
| return (float32x4_t)__builtin_neon_vrintxv4sf (__a); |
| } |
| |
| #endif |
| |
| #if __ARM_ARCH >= 8 |
| __extension__ extern __inline float32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrnd_f32 (float32x2_t __a) |
| { |
| return (float32x2_t)__builtin_neon_vrintzv2sf (__a); |
| } |
| |
| #endif |
| #if __ARM_ARCH >= 8 |
| __extension__ extern __inline float32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrndq_f32 (float32x4_t __a) |
| { |
| return (float32x4_t)__builtin_neon_vrintzv4sf (__a); |
| } |
| |
| #endif |
| |
| __extension__ extern __inline int8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsub_s8 (int8x8_t __a, int8x8_t __b) |
| { |
| return __a - __b; |
| } |
| |
| __extension__ extern __inline int16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsub_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return __a - __b; |
| } |
| |
| __extension__ extern __inline int32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsub_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return __a - __b; |
| } |
| |
| __extension__ extern __inline float32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsub_f32 (float32x2_t __a, float32x2_t __b) |
| { |
| #ifdef __FAST_MATH__ |
| return __a - __b; |
| #else |
| return (float32x2_t) __builtin_neon_vsubv2sf (__a, __b); |
| #endif |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsub_u8 (uint8x8_t __a, uint8x8_t __b) |
| { |
| return __a - __b; |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsub_u16 (uint16x4_t __a, uint16x4_t __b) |
| { |
| return __a - __b; |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsub_u32 (uint32x2_t __a, uint32x2_t __b) |
| { |
| return __a - __b; |
| } |
| |
| __extension__ extern __inline int64x1_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsub_s64 (int64x1_t __a, int64x1_t __b) |
| { |
| return __a - __b; |
| } |
| |
| __extension__ extern __inline uint64x1_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsub_u64 (uint64x1_t __a, uint64x1_t __b) |
| { |
| return __a - __b; |
| } |
| |
| __extension__ extern __inline int8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubq_s8 (int8x16_t __a, int8x16_t __b) |
| { |
| return __a - __b; |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubq_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return __a - __b; |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubq_s32 (int32x4_t __a, int32x4_t __b) |
| { |
| return __a - __b; |
| } |
| |
| __extension__ extern __inline int64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubq_s64 (int64x2_t __a, int64x2_t __b) |
| { |
| return __a - __b; |
| } |
| |
| __extension__ extern __inline float32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubq_f32 (float32x4_t __a, float32x4_t __b) |
| { |
| #ifdef __FAST_MATH__ |
| return __a - __b; |
| #else |
| return (float32x4_t) __builtin_neon_vsubv4sf (__a, __b); |
| #endif |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubq_u8 (uint8x16_t __a, uint8x16_t __b) |
| { |
| return __a - __b; |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubq_u16 (uint16x8_t __a, uint16x8_t __b) |
| { |
| return __a - __b; |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubq_u32 (uint32x4_t __a, uint32x4_t __b) |
| { |
| return __a - __b; |
| } |
| |
| __extension__ extern __inline uint64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubq_u64 (uint64x2_t __a, uint64x2_t __b) |
| { |
| return __a - __b; |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubl_s8 (int8x8_t __a, int8x8_t __b) |
| { |
| return (int16x8_t)__builtin_neon_vsublsv8qi (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubl_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return (int32x4_t)__builtin_neon_vsublsv4hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubl_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return (int64x2_t)__builtin_neon_vsublsv2si (__a, __b); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubl_u8 (uint8x8_t __a, uint8x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vsubluv8qi ((int8x8_t) __a, (int8x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubl_u16 (uint16x4_t __a, uint16x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vsubluv4hi ((int16x4_t) __a, (int16x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubl_u32 (uint32x2_t __a, uint32x2_t __b) |
| { |
| return (uint64x2_t)__builtin_neon_vsubluv2si ((int32x2_t) __a, (int32x2_t) __b); |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubw_s8 (int16x8_t __a, int8x8_t __b) |
| { |
| return (int16x8_t)__builtin_neon_vsubwsv8qi (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubw_s16 (int32x4_t __a, int16x4_t __b) |
| { |
| return (int32x4_t)__builtin_neon_vsubwsv4hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubw_s32 (int64x2_t __a, int32x2_t __b) |
| { |
| return (int64x2_t)__builtin_neon_vsubwsv2si (__a, __b); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubw_u8 (uint16x8_t __a, uint8x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vsubwuv8qi ((int16x8_t) __a, (int8x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubw_u16 (uint32x4_t __a, uint16x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vsubwuv4hi ((int32x4_t) __a, (int16x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubw_u32 (uint64x2_t __a, uint32x2_t __b) |
| { |
| return (uint64x2_t)__builtin_neon_vsubwuv2si ((int64x2_t) __a, (int32x2_t) __b); |
| } |
| |
| __extension__ extern __inline int8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhsub_s8 (int8x8_t __a, int8x8_t __b) |
| { |
| return (int8x8_t)__builtin_neon_vhsubsv8qi (__a, __b); |
| } |
| |
| __extension__ extern __inline int16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhsub_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return (int16x4_t)__builtin_neon_vhsubsv4hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhsub_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return (int32x2_t)__builtin_neon_vhsubsv2si (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhsub_u8 (uint8x8_t __a, uint8x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vhsubuv8qi ((int8x8_t) __a, (int8x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhsub_u16 (uint16x4_t __a, uint16x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vhsubuv4hi ((int16x4_t) __a, (int16x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhsub_u32 (uint32x2_t __a, uint32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vhsubuv2si ((int32x2_t) __a, (int32x2_t) __b); |
| } |
| |
| __extension__ extern __inline int8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhsubq_s8 (int8x16_t __a, int8x16_t __b) |
| { |
| return (int8x16_t)__builtin_neon_vhsubsv16qi (__a, __b); |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhsubq_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return (int16x8_t)__builtin_neon_vhsubsv8hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhsubq_s32 (int32x4_t __a, int32x4_t __b) |
| { |
| return (int32x4_t)__builtin_neon_vhsubsv4si (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhsubq_u8 (uint8x16_t __a, uint8x16_t __b) |
| { |
| return (uint8x16_t)__builtin_neon_vhsubuv16qi ((int8x16_t) __a, (int8x16_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhsubq_u16 (uint16x8_t __a, uint16x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vhsubuv8hi ((int16x8_t) __a, (int16x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vhsubq_u32 (uint32x4_t __a, uint32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vhsubuv4si ((int32x4_t) __a, (int32x4_t) __b); |
| } |
| |
| __extension__ extern __inline int8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqsub_s8 (int8x8_t __a, int8x8_t __b) |
| { |
| return (int8x8_t)__builtin_neon_vqsubsv8qi (__a, __b); |
| } |
| |
| __extension__ extern __inline int16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqsub_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return (int16x4_t)__builtin_neon_vqsubsv4hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqsub_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return (int32x2_t)__builtin_neon_vqsubsv2si (__a, __b); |
| } |
| |
| __extension__ extern __inline int64x1_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqsub_s64 (int64x1_t __a, int64x1_t __b) |
| { |
| return (int64x1_t)__builtin_neon_vqsubsdi (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqsub_u8 (uint8x8_t __a, uint8x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vqsubuv8qi ((int8x8_t) __a, (int8x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqsub_u16 (uint16x4_t __a, uint16x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vqsubuv4hi ((int16x4_t) __a, (int16x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqsub_u32 (uint32x2_t __a, uint32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vqsubuv2si ((int32x2_t) __a, (int32x2_t) __b); |
| } |
| |
| __extension__ extern __inline uint64x1_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqsub_u64 (uint64x1_t __a, uint64x1_t __b) |
| { |
| return (uint64x1_t)__builtin_neon_vqsubudi ((int64x1_t) __a, (int64x1_t) __b); |
| } |
| |
| __extension__ extern __inline int8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqsubq_s8 (int8x16_t __a, int8x16_t __b) |
| { |
| return (int8x16_t)__builtin_neon_vqsubsv16qi (__a, __b); |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqsubq_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return (int16x8_t)__builtin_neon_vqsubsv8hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqsubq_s32 (int32x4_t __a, int32x4_t __b) |
| { |
| return (int32x4_t)__builtin_neon_vqsubsv4si (__a, __b); |
| } |
| |
| __extension__ extern __inline int64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqsubq_s64 (int64x2_t __a, int64x2_t __b) |
| { |
| return (int64x2_t)__builtin_neon_vqsubsv2di (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqsubq_u8 (uint8x16_t __a, uint8x16_t __b) |
| { |
| return (uint8x16_t)__builtin_neon_vqsubuv16qi ((int8x16_t) __a, (int8x16_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqsubq_u16 (uint16x8_t __a, uint16x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vqsubuv8hi ((int16x8_t) __a, (int16x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqsubq_u32 (uint32x4_t __a, uint32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vqsubuv4si ((int32x4_t) __a, (int32x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint64x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vqsubq_u64 (uint64x2_t __a, uint64x2_t __b) |
| { |
| return (uint64x2_t)__builtin_neon_vqsubuv2di ((int64x2_t) __a, (int64x2_t) __b); |
| } |
| |
| __extension__ extern __inline int8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubhn_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return (int8x8_t)__builtin_neon_vsubhnv8hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubhn_s32 (int32x4_t __a, int32x4_t __b) |
| { |
| return (int16x4_t)__builtin_neon_vsubhnv4si (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubhn_s64 (int64x2_t __a, int64x2_t __b) |
| { |
| return (int32x2_t)__builtin_neon_vsubhnv2di (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubhn_u16 (uint16x8_t __a, uint16x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vsubhnv8hi ((int16x8_t) __a, (int16x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubhn_u32 (uint32x4_t __a, uint32x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vsubhnv4si ((int32x4_t) __a, (int32x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vsubhn_u64 (uint64x2_t __a, uint64x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vsubhnv2di ((int64x2_t) __a, (int64x2_t) __b); |
| } |
| |
| __extension__ extern __inline int8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrsubhn_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return (int8x8_t)__builtin_neon_vrsubhnv8hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrsubhn_s32 (int32x4_t __a, int32x4_t __b) |
| { |
| return (int16x4_t)__builtin_neon_vrsubhnv4si (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrsubhn_s64 (int64x2_t __a, int64x2_t __b) |
| { |
| return (int32x2_t)__builtin_neon_vrsubhnv2di (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrsubhn_u16 (uint16x8_t __a, uint16x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vrsubhnv8hi ((int16x8_t) __a, (int16x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrsubhn_u32 (uint32x4_t __a, uint32x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vrsubhnv4si ((int32x4_t) __a, (int32x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vrsubhn_u64 (uint64x2_t __a, uint64x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vrsubhnv2di ((int64x2_t) __a, (int64x2_t) __b); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vceq_s8 (int8x8_t __a, int8x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vceqv8qi (__a, __b); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vceq_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vceqv4hi (__a, __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vceq_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vceqv2si (__a, __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vceq_f32 (float32x2_t __a, float32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vceqv2sf (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vceq_u8 (uint8x8_t __a, uint8x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vceqv8qi ((int8x8_t) __a, (int8x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vceq_u16 (uint16x4_t __a, uint16x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vceqv4hi ((int16x4_t) __a, (int16x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vceq_u32 (uint32x2_t __a, uint32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vceqv2si ((int32x2_t) __a, (int32x2_t) __b); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vceq_p8 (poly8x8_t __a, poly8x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vceqv8qi ((int8x8_t) __a, (int8x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vceqq_s8 (int8x16_t __a, int8x16_t __b) |
| { |
| return (uint8x16_t)__builtin_neon_vceqv16qi (__a, __b); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vceqq_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vceqv8hi (__a, __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vceqq_s32 (int32x4_t __a, int32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vceqv4si (__a, __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vceqq_f32 (float32x4_t __a, float32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vceqv4sf (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vceqq_u8 (uint8x16_t __a, uint8x16_t __b) |
| { |
| return (uint8x16_t)__builtin_neon_vceqv16qi ((int8x16_t) __a, (int8x16_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vceqq_u16 (uint16x8_t __a, uint16x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vceqv8hi ((int16x8_t) __a, (int16x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vceqq_u32 (uint32x4_t __a, uint32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vceqv4si ((int32x4_t) __a, (int32x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vceqq_p8 (poly8x16_t __a, poly8x16_t __b) |
| { |
| return (uint8x16_t)__builtin_neon_vceqv16qi ((int8x16_t) __a, (int8x16_t) __b); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcge_s8 (int8x8_t __a, int8x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vcgev8qi (__a, __b); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcge_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vcgev4hi (__a, __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcge_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vcgev2si (__a, __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcge_f32 (float32x2_t __a, float32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vcgev2sf (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcge_u8 (uint8x8_t __a, uint8x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vcgeuv8qi ((int8x8_t) __a, (int8x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcge_u16 (uint16x4_t __a, uint16x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vcgeuv4hi ((int16x4_t) __a, (int16x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcge_u32 (uint32x2_t __a, uint32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vcgeuv2si ((int32x2_t) __a, (int32x2_t) __b); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgeq_s8 (int8x16_t __a, int8x16_t __b) |
| { |
| return (uint8x16_t)__builtin_neon_vcgev16qi (__a, __b); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgeq_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vcgev8hi (__a, __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgeq_s32 (int32x4_t __a, int32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vcgev4si (__a, __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgeq_f32 (float32x4_t __a, float32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vcgev4sf (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgeq_u8 (uint8x16_t __a, uint8x16_t __b) |
| { |
| return (uint8x16_t)__builtin_neon_vcgeuv16qi ((int8x16_t) __a, (int8x16_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgeq_u16 (uint16x8_t __a, uint16x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vcgeuv8hi ((int16x8_t) __a, (int16x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgeq_u32 (uint32x4_t __a, uint32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vcgeuv4si ((int32x4_t) __a, (int32x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcle_s8 (int8x8_t __a, int8x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vcgev8qi (__b, __a); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcle_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vcgev4hi (__b, __a); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcle_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vcgev2si (__b, __a); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcle_f32 (float32x2_t __a, float32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vcgev2sf (__b, __a); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcle_u8 (uint8x8_t __a, uint8x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vcgeuv8qi ((int8x8_t) __b, (int8x8_t) __a); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcle_u16 (uint16x4_t __a, uint16x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vcgeuv4hi ((int16x4_t) __b, (int16x4_t) __a); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcle_u32 (uint32x2_t __a, uint32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vcgeuv2si ((int32x2_t) __b, (int32x2_t) __a); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcleq_s8 (int8x16_t __a, int8x16_t __b) |
| { |
| return (uint8x16_t)__builtin_neon_vcgev16qi (__b, __a); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcleq_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vcgev8hi (__b, __a); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcleq_s32 (int32x4_t __a, int32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vcgev4si (__b, __a); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcleq_f32 (float32x4_t __a, float32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vcgev4sf (__b, __a); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcleq_u8 (uint8x16_t __a, uint8x16_t __b) |
| { |
| return (uint8x16_t)__builtin_neon_vcgeuv16qi ((int8x16_t) __b, (int8x16_t) __a); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcleq_u16 (uint16x8_t __a, uint16x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vcgeuv8hi ((int16x8_t) __b, (int16x8_t) __a); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcleq_u32 (uint32x4_t __a, uint32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vcgeuv4si ((int32x4_t) __b, (int32x4_t) __a); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgt_s8 (int8x8_t __a, int8x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vcgtv8qi (__a, __b); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgt_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vcgtv4hi (__a, __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgt_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vcgtv2si (__a, __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgt_f32 (float32x2_t __a, float32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vcgtv2sf (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgt_u8 (uint8x8_t __a, uint8x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vcgtuv8qi ((int8x8_t) __a, (int8x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgt_u16 (uint16x4_t __a, uint16x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vcgtuv4hi ((int16x4_t) __a, (int16x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgt_u32 (uint32x2_t __a, uint32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vcgtuv2si ((int32x2_t) __a, (int32x2_t) __b); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgtq_s8 (int8x16_t __a, int8x16_t __b) |
| { |
| return (uint8x16_t)__builtin_neon_vcgtv16qi (__a, __b); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgtq_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vcgtv8hi (__a, __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgtq_s32 (int32x4_t __a, int32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vcgtv4si (__a, __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgtq_f32 (float32x4_t __a, float32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vcgtv4sf (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgtq_u8 (uint8x16_t __a, uint8x16_t __b) |
| { |
| return (uint8x16_t)__builtin_neon_vcgtuv16qi ((int8x16_t) __a, (int8x16_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgtq_u16 (uint16x8_t __a, uint16x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vcgtuv8hi ((int16x8_t) __a, (int16x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcgtq_u32 (uint32x4_t __a, uint32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vcgtuv4si ((int32x4_t) __a, (int32x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vclt_s8 (int8x8_t __a, int8x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vcgtv8qi (__b, __a); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vclt_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vcgtv4hi (__b, __a); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vclt_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vcgtv2si (__b, __a); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vclt_f32 (float32x2_t __a, float32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vcgtv2sf (__b, __a); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vclt_u8 (uint8x8_t __a, uint8x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vcgtuv8qi ((int8x8_t) __b, (int8x8_t) __a); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vclt_u16 (uint16x4_t __a, uint16x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vcgtuv4hi ((int16x4_t) __b, (int16x4_t) __a); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vclt_u32 (uint32x2_t __a, uint32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vcgtuv2si ((int32x2_t) __b, (int32x2_t) __a); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcltq_s8 (int8x16_t __a, int8x16_t __b) |
| { |
| return (uint8x16_t)__builtin_neon_vcgtv16qi (__b, __a); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcltq_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vcgtv8hi (__b, __a); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcltq_s32 (int32x4_t __a, int32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vcgtv4si (__b, __a); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcltq_f32 (float32x4_t __a, float32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vcgtv4sf (__b, __a); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcltq_u8 (uint8x16_t __a, uint8x16_t __b) |
| { |
| return (uint8x16_t)__builtin_neon_vcgtuv16qi ((int8x16_t) __b, (int8x16_t) __a); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcltq_u16 (uint16x8_t __a, uint16x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vcgtuv8hi ((int16x8_t) __b, (int16x8_t) __a); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcltq_u32 (uint32x4_t __a, uint32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vcgtuv4si ((int32x4_t) __b, (int32x4_t) __a); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcage_f32 (float32x2_t __a, float32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vcagev2sf (__a, __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcageq_f32 (float32x4_t __a, float32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vcagev4sf (__a, __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcale_f32 (float32x2_t __a, float32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vcagev2sf (__b, __a); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcaleq_f32 (float32x4_t __a, float32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vcagev4sf (__b, __a); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcagt_f32 (float32x2_t __a, float32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vcagtv2sf (__a, __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcagtq_f32 (float32x4_t __a, float32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vcagtv4sf (__a, __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcalt_f32 (float32x2_t __a, float32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vcagtv2sf (__b, __a); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vcaltq_f32 (float32x4_t __a, float32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vcagtv4sf (__b, __a); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vtst_s8 (int8x8_t __a, int8x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vtstv8qi (__a, __b); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vtst_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vtstv4hi (__a, __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vtst_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vtstv2si (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vtst_u8 (uint8x8_t __a, uint8x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vtstv8qi ((int8x8_t) __a, (int8x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vtst_u16 (uint16x4_t __a, uint16x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vtstv4hi ((int16x4_t) __a, (int16x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vtst_u32 (uint32x2_t __a, uint32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vtstv2si ((int32x2_t) __a, (int32x2_t) __b); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vtst_p8 (poly8x8_t __a, poly8x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vtstv8qi ((int8x8_t) __a, (int8x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vtst_p16 (poly16x4_t __a, poly16x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vtstv4hi ((int16x4_t) __a, (int16x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vtstq_s8 (int8x16_t __a, int8x16_t __b) |
| { |
| return (uint8x16_t)__builtin_neon_vtstv16qi (__a, __b); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vtstq_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vtstv8hi (__a, __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vtstq_s32 (int32x4_t __a, int32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vtstv4si (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vtstq_u8 (uint8x16_t __a, uint8x16_t __b) |
| { |
| return (uint8x16_t)__builtin_neon_vtstv16qi ((int8x16_t) __a, (int8x16_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vtstq_u16 (uint16x8_t __a, uint16x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vtstv8hi ((int16x8_t) __a, (int16x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vtstq_u32 (uint32x4_t __a, uint32x4_t __b) |
| { |
| return (uint32x4_t)__builtin_neon_vtstv4si ((int32x4_t) __a, (int32x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vtstq_p8 (poly8x16_t __a, poly8x16_t __b) |
| { |
| return (uint8x16_t)__builtin_neon_vtstv16qi ((int8x16_t) __a, (int8x16_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vtstq_p16 (poly16x8_t __a, poly16x8_t __b) |
| { |
| return (uint16x8_t)__builtin_neon_vtstv8hi ((int16x8_t) __a, (int16x8_t) __b); |
| } |
| |
| __extension__ extern __inline int8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vabd_s8 (int8x8_t __a, int8x8_t __b) |
| { |
| return (int8x8_t)__builtin_neon_vabdsv8qi (__a, __b); |
| } |
| |
| __extension__ extern __inline int16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vabd_s16 (int16x4_t __a, int16x4_t __b) |
| { |
| return (int16x4_t)__builtin_neon_vabdsv4hi (__a, __b); |
| } |
| |
| __extension__ extern __inline int32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vabd_s32 (int32x2_t __a, int32x2_t __b) |
| { |
| return (int32x2_t)__builtin_neon_vabdsv2si (__a, __b); |
| } |
| |
| __extension__ extern __inline float32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vabd_f32 (float32x2_t __a, float32x2_t __b) |
| { |
| return (float32x2_t)__builtin_neon_vabdfv2sf (__a, __b); |
| } |
| |
| __extension__ extern __inline uint8x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vabd_u8 (uint8x8_t __a, uint8x8_t __b) |
| { |
| return (uint8x8_t)__builtin_neon_vabduv8qi ((int8x8_t) __a, (int8x8_t) __b); |
| } |
| |
| __extension__ extern __inline uint16x4_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vabd_u16 (uint16x4_t __a, uint16x4_t __b) |
| { |
| return (uint16x4_t)__builtin_neon_vabduv4hi ((int16x4_t) __a, (int16x4_t) __b); |
| } |
| |
| __extension__ extern __inline uint32x2_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vabd_u32 (uint32x2_t __a, uint32x2_t __b) |
| { |
| return (uint32x2_t)__builtin_neon_vabduv2si ((int32x2_t) __a, (int32x2_t) __b); |
| } |
| |
| __extension__ extern __inline int8x16_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vabdq_s8 (int8x16_t __a, int8x16_t __b) |
| { |
| return (int8x16_t)__builtin_neon_vabdsv16qi (__a, __b); |
| } |
| |
| __extension__ extern __inline int16x8_t |
| __attribute__ ((__always_inline__, __gnu_inline__, __artificial__)) |
| vabdq_s16 (int16x8_t __a, int16x8_t __b) |
| { |
| return (int16x8_t)
|