| // Implementation of <simd> -*- C++ -*- |
| |
| // Copyright The GNU Toolchain Authors. |
| // |
| // This file is part of the GNU ISO C++ Library. This library is free |
| // software; you can redistribute it and/or modify it under the |
| // terms of the GNU General Public License as published by the |
| // Free Software Foundation; either version 3, or (at your option) |
| // any later version. |
| |
| // This library is distributed in the hope that it will be useful, |
| // but WITHOUT ANY WARRANTY; without even the implied warranty of |
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| // GNU General Public License for more details. |
| |
| // Under Section 7 of GPL version 3, you are granted additional |
| // permissions described in the GCC Runtime Library Exception, version |
| // 3.1, as published by the Free Software Foundation. |
| |
| // You should have received a copy of the GNU General Public License and |
| // a copy of the GCC Runtime Library Exception along with this program; |
| // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
| // <http://www.gnu.org/licenses/>. |
| |
| #ifndef _GLIBCXX_VEC_OPS_H |
| #define _GLIBCXX_VEC_OPS_H 1 |
| |
| #ifdef _GLIBCXX_SYSHDR |
| #pragma GCC system_header |
| #endif |
| |
| #if __cplusplus >= 202400L |
| |
| #include "simd_details.h" |
| |
| #include <bit> |
| #include <bits/utility.h> |
| |
| // psabi warnings are bogus because the ABI of the internal types never leaks into user code |
| #pragma GCC diagnostic push |
| #pragma GCC diagnostic ignored "-Wpsabi" |
| |
| namespace std _GLIBCXX_VISIBILITY(default) |
| { |
| _GLIBCXX_BEGIN_NAMESPACE_VERSION |
| namespace simd |
| { |
| template <std::signed_integral _Tp> |
| constexpr bool |
| __signed_has_single_bit(_Tp __x) |
| { return __has_single_bit(make_unsigned_t<_Tp>(__x)); } |
| |
| /** |
| * Alias for a vector builtin with given value type and total sizeof. |
| */ |
| template <__vectorizable _Tp, size_t _Bytes> |
| requires (__has_single_bit(_Bytes)) |
| using __vec_builtin_type_bytes [[__gnu__::__vector_size__(_Bytes)]] = _Tp; |
| |
| /** |
| * Alias for a vector builtin with given value type @p _Tp and @p _Width. |
| */ |
| template <__vectorizable _Tp, __simd_size_type _Width> |
| requires (__signed_has_single_bit(_Width)) |
| using __vec_builtin_type = __vec_builtin_type_bytes<_Tp, sizeof(_Tp) * _Width>; |
| |
| /** |
| * Constrain to any vector builtin with given value type and optional width. |
| */ |
| template <typename _Tp, typename _ValueType, |
| __simd_size_type _Width = sizeof(_Tp) / sizeof(_ValueType)> |
| concept __vec_builtin_of |
| = !is_class_v<_Tp> && !is_pointer_v<_Tp> && !is_arithmetic_v<_Tp> |
| && __vectorizable<_ValueType> |
| && _Width >= 1 && sizeof(_Tp) / sizeof(_ValueType) == _Width |
| && same_as<__vec_builtin_type_bytes<_ValueType, sizeof(_Tp)>, _Tp> |
| && requires(_Tp& __v, _ValueType __x) { __v[0] = __x; }; |
| |
| /** |
| * Constrain to any vector builtin. |
| */ |
| template <typename _Tp> |
| concept __vec_builtin |
| = __vec_builtin_of<_Tp, remove_cvref_t<decltype(declval<const _Tp>()[0])>>; |
| |
| /** |
| * Alias for the value type of the given __vec_builtin type @p _Tp. |
| */ |
| template <__vec_builtin _Tp> |
| using __vec_value_type = remove_cvref_t<decltype(declval<const _Tp>()[0])>; |
| |
| /** |
| * The width (number of value_type elements) of the given vector builtin or arithmetic type. |
| */ |
| template <typename _Tp> |
| inline constexpr __simd_size_type __width_of = 1; |
| |
| template <typename _Tp> |
| requires __vec_builtin<_Tp> |
| inline constexpr __simd_size_type __width_of<_Tp> = sizeof(_Tp) / sizeof(__vec_value_type<_Tp>); |
| |
| /** |
| * Alias for a vector builtin with equal value type and new width @p _Np. |
| */ |
| template <__simd_size_type _Np, __vec_builtin _TV> |
| using __resize_vec_builtin_t = __vec_builtin_type<__vec_value_type<_TV>, _Np>; |
| |
| template <__vec_builtin _TV> |
| requires (__width_of<_TV> > 1) |
| using __half_vec_builtin_t = __resize_vec_builtin_t<__width_of<_TV> / 2, _TV>; |
| |
| template <__vec_builtin _TV> |
| using __double_vec_builtin_t = __resize_vec_builtin_t<__width_of<_TV> * 2, _TV>; |
| |
| template <typename _Up, __vec_builtin _TV> |
| [[__gnu__::__always_inline__]] |
| constexpr __vec_builtin_type_bytes<_Up, sizeof(_TV)> |
| __vec_bit_cast(_TV __v) |
| { return reinterpret_cast<__vec_builtin_type_bytes<_Up, sizeof(_TV)>>(__v); } |
| |
| template <int _Np, __vec_builtin _TV> |
| requires signed_integral<__vec_value_type<_TV>> |
| static constexpr _TV _S_vec_implicit_mask = []<int... _Is> (integer_sequence<int, _Is...>) { |
| return _TV{ (_Is < _Np ? -1 : 0)... }; |
| } (make_integer_sequence<int, __width_of<_TV>>()); |
| |
| /** |
| * Helper function to work around Clang not allowing v[i] in constant expressions. |
| */ |
| template <__vec_builtin _TV> |
| [[__gnu__::__always_inline__]] |
| constexpr __vec_value_type<_TV> |
| __vec_get(_TV __v, int __i) |
| { |
| #ifdef _GLIBCXX_CLANG |
| if consteval |
| { |
| return __builtin_bit_cast(array<__vec_value_type<_TV>, __width_of<_TV>>, __v)[__i]; |
| } |
| else |
| #endif |
| { |
| return __v[__i]; |
| } |
| } |
| |
| /** |
| * Helper function to work around Clang and GCC not allowing assignment to v[i] in constant |
| * expressions. |
| */ |
| template <__vec_builtin _TV> |
| [[__gnu__::__always_inline__]] |
| constexpr void |
| __vec_set(_TV& __v, int __i, __vec_value_type<_TV> __x) |
| { |
| if consteval |
| { |
| #ifdef _GLIBCXX_CLANG |
| auto __arr = __builtin_bit_cast(array<__vec_value_type<_TV>, __width_of<_TV>>, __v); |
| __arr[__i] = __x; |
| __v = __builtin_bit_cast(_TV, __arr); |
| #else |
| constexpr auto [...__j] = _IotaArray<__width_of<_TV>>; |
| __v = _TV{(__i == __j ? __x : __v[__j])...}; |
| #endif |
| } |
| else |
| { |
| __v[__i] = __x; |
| } |
| } |
| |
| /** @internal |
| * Return vector builtin with all values from @p __a and @p __b. |
| */ |
| template <__vec_builtin _TV> |
| [[__gnu__::__always_inline__]] |
| constexpr __vec_builtin_type<__vec_value_type<_TV>, __width_of<_TV> * 2> |
| __vec_concat(_TV __a, _TV __b) |
| { |
| constexpr auto [...__is] = _IotaArray<__width_of<_TV> * 2>; |
| return __builtin_shufflevector(__a, __b, __is...); |
| } |
| |
| /** @internal |
| * Concatenate the first @p _N0 elements from @p __a with the first @p _N1 elements from @p __b |
| * with the elements from applying this function recursively to @p __rest. |
| * |
| * @pre _N0 <= __width_of<_TV0> && _N1 <= __width_of<_TV1> && _Ns <= __width_of<_TVs> && ... |
| * |
| * Strategy: Aim for a power-of-2 tree concat. E.g. |
| * - cat(2, 2, 2, 2) -> cat(4, 2, 2) -> cat(4, 4) |
| * - cat(2, 2, 2, 2, 8) -> cat(4, 2, 2, 8) -> cat(4, 4, 8) -> cat(8, 8) |
| */ |
| template <int _N0, int _N1, int... _Ns, __vec_builtin _TV0, __vec_builtin _TV1, |
| __vec_builtin... _TVs> |
| [[__gnu__::__always_inline__]] |
| constexpr __vec_builtin_type<__vec_value_type<_TV0>, |
| __bit_ceil(unsigned(_N0 + (_N1 + ... + _Ns)))> |
| __vec_concat_sized(const _TV0& __a, const _TV1& __b, const _TVs&... __rest); |
| |
| template <int _N0, int _N1, int _N2, int... _Ns, __vec_builtin _TV0, __vec_builtin _TV1, |
| __vec_builtin _TV2, __vec_builtin... _TVs> |
| requires (__has_single_bit(unsigned(_N0))) && (_N0 >= (_N1 + _N2)) |
| [[__gnu__::__always_inline__]] |
| constexpr __vec_builtin_type<__vec_value_type<_TV0>, |
| __bit_ceil(unsigned(_N0 + _N1 + (_N2 + ... + _Ns)))> |
| __vec_concat_sized(const _TV0& __a, const _TV1& __b, const _TV2& __c, const _TVs&... __rest) |
| { |
| return __vec_concat_sized<_N0, _N1 + _N2, _Ns...>( |
| __a, __vec_concat_sized<_N1, _N2>(__b, __c), __rest...); |
| } |
| |
| template <int _N0, int _N1, int... _Ns, __vec_builtin _TV0, __vec_builtin _TV1, |
| __vec_builtin... _TVs> |
| [[__gnu__::__always_inline__]] |
| constexpr __vec_builtin_type<__vec_value_type<_TV0>, |
| __bit_ceil(unsigned(_N0 + (_N1 + ... + _Ns)))> |
| __vec_concat_sized(const _TV0& __a, const _TV1& __b, const _TVs&... __rest) |
| { |
| // __is is rounded up because we need to generate a power-of-2 vector: |
| constexpr auto [...__is] = _IotaArray<__bit_ceil(unsigned(_N0 + _N1)), int>; |
| const auto __ab = __builtin_shufflevector(__a, __b, [](int __i) consteval { |
| if (__i < _N0) // copy from __a |
| return __i; |
| else if (__i < _N0 + _N1) // copy from __b |
| return __i - _N0 + __width_of<_TV0>; // _N0 <= __width_of<_TV0> |
| else // can't index into __rest |
| return -1; // don't care |
| }(__is)...); |
| if constexpr (sizeof...(__rest) == 0) |
| return __ab; |
| else |
| return __vec_concat_sized<_N0 + _N1, _Ns...>(__ab, __rest...); |
| } |
| |
| template <__vec_builtin _TV> |
| [[__gnu__::__always_inline__]] |
| constexpr __half_vec_builtin_t<_TV> |
| __vec_split_lo(_TV __v) |
| { |
| constexpr int __n = __width_of<_TV> / 2; |
| constexpr auto [...__is] = _IotaArray<__n>; |
| return __builtin_shufflevector(__v, __v, __is...); |
| } |
| |
| template <__vec_builtin _TV> |
| [[__gnu__::__always_inline__]] |
| constexpr __half_vec_builtin_t<_TV> |
| __vec_split_hi(_TV __v) |
| { |
| constexpr int __n = __width_of<_TV> / 2; |
| constexpr auto [...__is] = _IotaArray<__n>; |
| return __builtin_shufflevector(__v, __v, (__n + __is)...); |
| } |
| |
| /** @internal |
| * Return @p __x zero-padded to @p _Bytes bytes. |
| * |
| * Use this function when you need two objects of the same size (e.g. for __vec_concat). |
| */ |
| template <size_t _Bytes, __vec_builtin _TV> |
| [[__gnu__::__always_inline__]] |
| constexpr auto |
| __vec_zero_pad_to(_TV __x) |
| { |
| if constexpr (sizeof(_TV) == _Bytes) |
| return __x; |
| else if constexpr (sizeof(_TV) <= sizeof(0ull)) |
| { |
| using _Up = _UInt<sizeof(_TV)>; |
| __vec_builtin_type_bytes<_Up, _Bytes> __tmp = {__builtin_bit_cast(_Up, __x)}; |
| return __builtin_bit_cast(__vec_builtin_type_bytes<__vec_value_type<_TV>, _Bytes>, __tmp); |
| } |
| else if constexpr (sizeof(_TV) < _Bytes) |
| return __vec_zero_pad_to<_Bytes>(__vec_concat(__x, _TV())); |
| else |
| static_assert(false); |
| } |
| |
| /** @internal |
| * Return a type with sizeof 16, add zero-padding to @p __x. The input must be smaller. |
| * |
| * Use this function instead of the above when you need to pad an argument for a SIMD builtin. |
| */ |
| template <__vec_builtin _TV> |
| [[__gnu__::__always_inline__]] |
| constexpr auto |
| __vec_zero_pad_to_16(_TV __x) |
| { |
| static_assert(sizeof(_TV) < 16); |
| return __vec_zero_pad_to<16>(__x); |
| } |
| |
| // work around __builtin_constant_p returning false unless passed a variable |
| // (__builtin_constant_p(x[0]) is false while __is_const_known(x[0]) is true) |
| template <typename _Tp> |
| [[__gnu__::__always_inline__]] |
| constexpr bool |
| __is_const_known(const _Tp& __x) |
| { |
| return __builtin_constant_p(__x); |
| } |
| |
| [[__gnu__::__always_inline__]] |
| constexpr bool |
| __is_const_known(const auto&... __xs) requires(sizeof...(__xs) >= 2) |
| { |
| if consteval |
| { |
| return true; |
| } |
| else |
| { |
| return (__is_const_known(__xs) && ...); |
| } |
| } |
| |
| [[__gnu__::__always_inline__]] |
| constexpr bool |
| __is_const_known_equal_to(const auto& __x, const auto& __expect) |
| { return __is_const_known(__x == __expect) && __x == __expect; } |
| |
| #if _GLIBCXX_X86 |
| template <__vec_builtin _UV, __vec_builtin _TV> |
| inline _UV |
| __x86_cvt_f16c(_TV __v); |
| #endif |
| |
| |
| /** @internal |
| * Simple wrapper around __builtin_convertvector to provide static_cast-like syntax. |
| * |
| * Works around GCC failing to use the F16C/AVX512F cvtps2ph/cvtph2ps instructions. |
| */ |
| template <__vec_builtin _UV, __vec_builtin _TV, _ArchTraits _Traits = {}> |
| [[__gnu__::__always_inline__]] |
| constexpr _UV |
| __vec_cast(_TV __v) |
| { |
| static_assert(__width_of<_UV> == __width_of<_TV>); |
| #if _GLIBCXX_X86 |
| using _Up = __vec_value_type<_UV>; |
| using _Tp = __vec_value_type<_TV>; |
| constexpr bool __to_f16 = is_same_v<_Up, _Float16>; |
| constexpr bool __from_f16 = is_same_v<_Tp, _Float16>; |
| constexpr bool __needs_f16c = _Traits._M_have_f16c() && !_Traits._M_have_avx512fp16() |
| && (__to_f16 || __from_f16); |
| if (__needs_f16c && !__is_const_known(__v)) |
| { // Work around PR121688 |
| if constexpr (__needs_f16c) |
| return __x86_cvt_f16c<_UV>(__v); |
| } |
| if constexpr (is_floating_point_v<_Tp> && is_integral_v<_Up> |
| && sizeof(_UV) < sizeof(_TV) && sizeof(_Up) < sizeof(int)) |
| { |
| using _Ip = __integer_from<std::min(sizeof(int), sizeof(_Tp))>; |
| using _IV = __vec_builtin_type<_Ip, __width_of<_TV>>; |
| return __vec_cast<_UV>(__vec_cast<_IV>(__v)); |
| } |
| #endif |
| return __builtin_convertvector(__v, _UV); |
| } |
| |
| /** @internal |
| * Overload of the above cast function that determines the destination vector type from a given |
| * element type @p _Up and the `__width_of` the argument type. |
| * |
| * Calls the above overload. |
| */ |
| template <__vectorizable _Up, __vec_builtin _TV> |
| [[__gnu__::__always_inline__]] |
| constexpr __vec_builtin_type<_Up, __width_of<_TV>> |
| __vec_cast(_TV __v) |
| { return __vec_cast<__vec_builtin_type<_Up, __width_of<_TV>>>(__v); } |
| |
| /** @internal |
| * As above, but with additional precondition on possible values of the argument. |
| * |
| * Precondition: __k[i] is either 0 or -1 for all i. |
| */ |
| template <__vec_builtin _UV, __vec_builtin _TV> |
| [[__gnu__::__always_inline__]] |
| constexpr _UV |
| __vec_mask_cast(_TV __k) |
| { |
| static_assert(signed_integral<__vec_value_type<_UV>>); |
| static_assert(signed_integral<__vec_value_type<_TV>>); |
| // TODO: __builtin_convertvector cannot be optimal because it doesn't consider input and |
| // output can only be 0 or -1. |
| return __builtin_convertvector(__k, _UV); |
| } |
| |
| template <__vec_builtin _TV> |
| [[__gnu__::__always_inline__]] |
| constexpr _TV |
| __vec_xor(_TV __a, _TV __b) |
| { |
| using _Tp = __vec_value_type<_TV>; |
| if constexpr (is_floating_point_v<_Tp>) |
| { |
| using _UV = __vec_builtin_type<__integer_from<sizeof(_Tp)>, __width_of<_TV>>; |
| return __builtin_bit_cast( |
| _TV, __builtin_bit_cast(_UV, __a) ^ __builtin_bit_cast(_UV, __b)); |
| } |
| else |
| return __a ^ __b; |
| } |
| |
| template <__vec_builtin _TV> |
| [[__gnu__::__always_inline__]] |
| constexpr _TV |
| __vec_or(_TV __a, _TV __b) |
| { |
| using _Tp = __vec_value_type<_TV>; |
| if constexpr (is_floating_point_v<_Tp>) |
| { |
| using _UV = __vec_builtin_type<__integer_from<sizeof(_Tp)>, __width_of<_TV>>; |
| return __builtin_bit_cast( |
| _TV, __builtin_bit_cast(_UV, __a) | __builtin_bit_cast(_UV, __b)); |
| } |
| else |
| return __a | __b; |
| } |
| |
| template <__vec_builtin _TV> |
| [[__gnu__::__always_inline__]] |
| constexpr _TV |
| __vec_and(_TV __a, _TV __b) |
| { |
| using _Tp = __vec_value_type<_TV>; |
| if constexpr (is_floating_point_v<_Tp>) |
| { |
| using _UV = __vec_builtin_type<__integer_from<sizeof(_Tp)>, __width_of<_TV>>; |
| return __builtin_bit_cast( |
| _TV, __builtin_bit_cast(_UV, __a) & __builtin_bit_cast(_UV, __b)); |
| } |
| else |
| return __a & __b; |
| } |
| |
| /** @internal |
| * Returns the bit-wise and of not @p __a and @p __b. |
| * |
| * Use __vec_and(__vec_not(__a), __b) unless an andnot instruction is necessary for optimization. |
| * |
| * @see __vec_andnot in simd_x86.h |
| */ |
| template <__vec_builtin _TV> |
| [[__gnu__::__always_inline__]] |
| constexpr _TV |
| __vec_andnot(_TV __a, _TV __b) |
| { |
| using _Tp = __vec_value_type<_TV>; |
| using _UV = __vec_builtin_type<__integer_from<sizeof(_Tp)>, __width_of<_TV>>; |
| return __builtin_bit_cast( |
| _TV, ~__builtin_bit_cast(_UV, __a) & __builtin_bit_cast(_UV, __b)); |
| } |
| |
| template <__vec_builtin _TV> |
| [[__gnu__::__always_inline__]] |
| constexpr _TV |
| __vec_not(_TV __a) |
| { |
| using _Tp = __vec_value_type<_TV>; |
| using _UV = __vec_builtin_type_bytes<__integer_from<sizeof(_Tp)>, sizeof(_TV)>; |
| if constexpr (is_floating_point_v<__vec_value_type<_TV>>) |
| return __builtin_bit_cast(_TV, ~__builtin_bit_cast(_UV, __a)); |
| else |
| return ~__a; |
| } |
| |
| /** |
| * An object of given type where only the sign bits are 1. |
| */ |
| template <__vec_builtin _V> |
| requires std::floating_point<__vec_value_type<_V>> |
| constexpr _V _S_signmask = __vec_xor(_V() + 1, _V() - 1); |
| |
| template <__vec_builtin _TV, int _Np = __width_of<_TV>, |
| typename = make_integer_sequence<int, _Np>> |
| struct _VecOps; |
| |
| template <__vec_builtin _TV, int _Np, int... _Is> |
| struct _VecOps<_TV, _Np, integer_sequence<int, _Is...>> |
| { |
| static_assert(_Np <= __width_of<_TV>); |
| |
| using _Tp = __vec_value_type<_TV>; |
| |
| using _HV = __half_vec_builtin_t<__conditional_t<_Np >= 2, _TV, __double_vec_builtin_t<_TV>>>; |
| |
| [[__gnu__::__always_inline__]] |
| static constexpr _TV |
| _S_broadcast_to_even(_Tp __init) |
| { return _TV {((_Is & 1) == 0 ? __init : _Tp())...}; } |
| |
| [[__gnu__::__always_inline__]] |
| static constexpr _TV |
| _S_broadcast_to_odd(_Tp __init) |
| { return _TV {((_Is & 1) == 1 ? __init : _Tp())...}; } |
| |
| [[__gnu__::__always_inline__]] |
| static constexpr bool |
| _S_all_of(_TV __k) noexcept |
| { return (... && (__k[_Is] != 0)); } |
| |
| [[__gnu__::__always_inline__]] |
| static constexpr bool |
| _S_any_of(_TV __k) noexcept |
| { return (... || (__k[_Is] != 0)); } |
| |
| [[__gnu__::__always_inline__]] |
| static constexpr bool |
| _S_none_of(_TV __k) noexcept |
| { return (... && (__k[_Is] == 0)); } |
| |
| template <typename _Offset = integral_constant<int, 0>> |
| [[__gnu__::__always_inline__]] |
| static constexpr _TV |
| _S_extract(__vec_builtin auto __x, _Offset = {}) |
| { |
| static_assert(is_same_v<__vec_value_type<_TV>, __vec_value_type<decltype(__x)>>); |
| return __builtin_shufflevector(__x, decltype(__x)(), (_Is + _Offset::value)...); |
| } |
| |
| // swap neighboring elements |
| [[__gnu__::__always_inline__]] |
| static constexpr _TV |
| _S_swap_neighbors(_TV __x) |
| { return __builtin_shufflevector(__x, __x, (_Is ^ 1)...); } |
| |
| // duplicate even indexed elements, dropping the odd ones |
| [[__gnu__::__always_inline__]] |
| static constexpr _TV |
| _S_dup_even(_TV __x) |
| { return __builtin_shufflevector(__x, __x, (_Is & ~1)...); } |
| |
| // duplicate odd indexed elements, dropping the even ones |
| [[__gnu__::__always_inline__]] |
| static constexpr _TV |
| _S_dup_odd(_TV __x) |
| { return __builtin_shufflevector(__x, __x, (_Is | 1)...); } |
| |
| [[__gnu__::__always_inline__]] |
| static constexpr void |
| _S_overwrite_even_elements(_TV& __x, _HV __y) requires (_Np > 1) |
| { |
| constexpr __simd_size_type __n = __width_of<_TV>; |
| __x = __builtin_shufflevector(__x, |
| #ifdef _GLIBCXX_CLANG |
| __vec_concat(__y, __y), |
| #else |
| __y, |
| #endif |
| ((_Is & 1) == 0 ? __n + _Is / 2 : _Is)...); |
| } |
| |
| [[__gnu__::__always_inline__]] |
| static constexpr void |
| _S_overwrite_even_elements(_TV& __xl, _TV& __xh, _TV __y) |
| { |
| constexpr __simd_size_type __nl = __width_of<_TV>; |
| constexpr __simd_size_type __nh = __nl * 3 / 2; |
| __xl = __builtin_shufflevector(__xl, __y, ((_Is & 1) == 0 ? __nl + _Is / 2 : _Is)...); |
| __xh = __builtin_shufflevector(__xh, __y, ((_Is & 1) == 0 ? __nh + _Is / 2 : _Is)...); |
| } |
| |
| [[__gnu__::__always_inline__]] |
| static constexpr void |
| _S_overwrite_odd_elements(_TV& __x, _HV __y) requires (_Np > 1) |
| { |
| constexpr __simd_size_type __n = __width_of<_TV>; |
| __x = __builtin_shufflevector(__x, |
| #ifdef _GLIBCXX_CLANG |
| __vec_concat(__y, __y), |
| #else |
| __y, |
| #endif |
| ((_Is & 1) == 1 ? __n + _Is / 2 : _Is)...); |
| } |
| |
| [[__gnu__::__always_inline__]] |
| static constexpr void |
| _S_overwrite_odd_elements(_TV& __xl, _TV& __xh, _TV __y) |
| { |
| constexpr __simd_size_type __nl = __width_of<_TV>; |
| constexpr __simd_size_type __nh = __nl * 3 / 2; |
| __xl = __builtin_shufflevector(__xl, __y, ((_Is & 1) == 1 ? __nl + _Is / 2 : _Is)...); |
| __xh = __builtin_shufflevector(__xh, __y, ((_Is & 1) == 1 ? __nh + _Is / 2 : _Is)...); |
| } |
| |
| // true if all elements are know to be equal to __ref at compile time |
| [[__gnu__::__always_inline__]] |
| static constexpr bool |
| _S_is_const_known_equal_to(_TV __x, _Tp __ref) |
| { return (__is_const_known_equal_to(__x[_Is], __ref) && ...); } |
| |
| }; |
| } // namespace simd |
| _GLIBCXX_END_NAMESPACE_VERSION |
| } // namespace std |
| |
| #pragma GCC diagnostic pop |
| #endif // C++26 |
| #endif // _GLIBCXX_VEC_OPS_H |