blob: 3abb65f4591c7ab70dd5167d201d218065d0be6f [file] [log] [blame]
/* { dg-do compile { target c++11 } } */
/* { dg-options "-O2 -msse2 -fdump-tree-optimized" } */
template <class T>
using V [[gnu::vector_size(16)]] = T;
template <class T, unsigned M = sizeof(V<T>)>
V<T> load(const void *p) {
V<T> r = {};
__builtin_memcpy(&r, p, M);
return r;
}
// movq or movsd
template V<char> load<char, 8>(const void *); // bad
template V<short> load<short, 8>(const void *); // bad
template V<int> load<int, 8>(const void *); // bad
template V<long> load<long, 8>(const void *); // good
// the following is disabled because V2SF isn't a supported mode
// template V<float> load<float, 8>(const void *); // bad
template V<double> load<double, 8>(const void *); // good (movsd?)
// movd or movss
template V<char> load<char, 4>(const void *); // bad
template V<short> load<short, 4>(const void *); // bad
template V<int> load<int, 4>(const void *); // good
template V<float> load<float, 4>(const void *); // good
/* We should end up with one load and one insert for each function. */
/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 9 "optimized" } } */
/* { dg-final { scan-tree-dump-times "MEM" 9 "optimized" } } */