blob: 8d48b8c55c8c2efcdffbfb73f993bea15701b25b [file] [log] [blame]
// { dg-do run { target c++26 } }
// { dg-require-effective-target x86 }
#include "test_setup.h"
#include <numeric>
template <typename T, std::size_t N, std::size_t Alignment>
class alignas(Alignment) aligned_array
: public std::array<T, N>
{};
template <typename V>
struct Tests
{
using T = typename V::value_type;
using M = typename V::mask_type;
static_assert(simd::alignment_v<V> <= 256);
ADD_TEST(load_zeros) {
std::tuple {aligned_array<T, V::size * 2, 256> {}, aligned_array<int, V::size * 2, 256> {}},
[](auto& t, auto mem, auto ints) {
t.verify_equal(simd::unchecked_load<V>(mem), V());
t.verify_equal(simd::partial_load<V>(mem), V());
t.verify_equal(simd::unchecked_load<V>(mem, simd::flag_aligned), V());
t.verify_equal(simd::partial_load<V>(mem, simd::flag_aligned), V());
t.verify_equal(simd::unchecked_load<V>(mem, simd::flag_overaligned<256>), V());
t.verify_equal(simd::partial_load<V>(mem, simd::flag_overaligned<256>), V());
t.verify_equal(simd::unchecked_load<V>(mem.begin() + 1, mem.end()), V());
t.verify_equal(simd::partial_load<V>(mem.begin() + 1, mem.end()), V());
t.verify_equal(simd::partial_load<V>(mem.begin() + 1, mem.begin() + 1), V());
t.verify_equal(simd::partial_load<V>(mem.begin() + 1, mem.begin() + 2), V());
t.verify_equal(simd::unchecked_load<V>(ints, simd::flag_convert), V());
t.verify_equal(simd::partial_load<V>(ints, simd::flag_convert), V());
t.verify_equal(simd::unchecked_load<V>(mem, M(true)), V());
t.verify_equal(simd::unchecked_load<V>(mem, M(false)), V());
t.verify_equal(simd::partial_load<V>(mem, M(true)), V());
t.verify_equal(simd::partial_load<V>(mem, M(false)), V());
}
};
static constexpr V ref = test_iota<V, 1, 0>;
static constexpr V ref1 = V([](int i) { return i == 0 ? T(1): T(); });
template <typename U>
static constexpr auto
make_iota_array()
{
aligned_array<U, V::size * 2, simd::alignment_v<V, U>> arr = {};
U init = 0;
for (auto& x : arr) x = (init += U(1));
return arr;
}
ADD_TEST(load_iotas, requires {T() + T(1);}) {
std::tuple {make_iota_array<T>(), make_iota_array<int>()},
[](auto& t, auto mem, auto ints) {
t.verify_equal(simd::unchecked_load<V>(mem), ref);
t.verify_equal(simd::partial_load<V>(mem), ref);
t.verify_equal(simd::unchecked_load<V>(mem.begin() + 1, mem.end()), ref + T(1));
t.verify_equal(simd::partial_load<V>(mem.begin() + 1, mem.end()), ref + T(1));
t.verify_equal(simd::partial_load<V>(mem.begin(), mem.begin() + 1), ref1);
t.verify_equal(simd::unchecked_load<V>(mem, simd::flag_aligned), ref);
t.verify_equal(simd::partial_load<V>(mem, simd::flag_aligned), ref);
t.verify_equal(simd::unchecked_load<V>(ints, simd::flag_convert), ref);
t.verify_equal(simd::partial_load<V>(ints, simd::flag_convert), ref);
t.verify_equal(simd::partial_load<V>(
ints.begin(), ints.begin(), simd::flag_convert), V());
t.verify_equal(simd::partial_load<V>(
ints.begin(), ints.begin() + 1, simd::flag_convert), ref1);
t.verify_equal(simd::unchecked_load<V>(mem, M(true)), ref);
t.verify_equal(simd::unchecked_load<V>(mem, M(false)), V());
t.verify_equal(simd::partial_load<V>(mem, M(true)), ref);
t.verify_equal(simd::partial_load<V>(mem, M(false)), V());
}
};
static constexpr M alternating = M([](int i) { return 1 == (i & 1); });
static constexpr V ref_k = select(alternating, ref, T());
static constexpr V ref_2 = select(M([](int i) { return i < 2; }), ref, T());
static constexpr V ref_k_2 = select(M([](int i) { return i < 2; }), ref_k, T());
ADD_TEST(masked_loads) {
std::tuple {make_iota_array<T>(), make_iota_array<int>(), alternating, M(true), M(false)},
[](auto& t, auto mem, auto ints, M k, M tr, M fa) {
t.verify_equal(simd::unchecked_load<V>(mem, tr), ref);
t.verify_equal(simd::unchecked_load<V>(mem, fa), V());
t.verify_equal(simd::unchecked_load<V>(mem, k), ref_k);
t.verify_equal(simd::unchecked_load<V>(ints, tr, simd::flag_convert), ref);
t.verify_equal(simd::unchecked_load<V>(ints, fa, simd::flag_convert), V());
t.verify_equal(simd::unchecked_load<V>(ints, k, simd::flag_convert), ref_k);
t.verify_equal(simd::partial_load<V>(mem, tr), ref);
t.verify_equal(simd::partial_load<V>(mem, fa), V());
t.verify_equal(simd::partial_load<V>(mem, k), ref_k);
t.verify_equal(simd::partial_load<V>(mem.begin(), mem.begin() + 2, tr), ref_2);
t.verify_equal(simd::partial_load<V>(mem.begin(), mem.begin() + 2, fa), V());
t.verify_equal(simd::partial_load<V>(mem.begin(), mem.begin() + 2, k), ref_k_2);
t.verify_equal(simd::partial_load<V>(ints.begin(), ints.begin() + 2, tr,
simd::flag_convert), ref_2);
t.verify_equal(simd::partial_load<V>(ints.begin(), ints.begin() + 2, fa,
simd::flag_convert), V());
t.verify_equal(simd::partial_load<V>(ints.begin(), ints.begin() + 2, k,
simd::flag_convert), ref_k_2);
}
};
};
#include "create_tests.h"