gcc/testsuite/g++.target/i386/pr90424-2.C - gcc - Git at Google

 /* { dg-do compile { target c++11 } } */
 /* { dg-options "-O2 -msse2 -fdump-tree-optimized" } */

 template <class T>
 using V [[gnu::vector_size(16)]] = T;

 template <class T, unsigned M = sizeof(V<T>)>
 V<T> load(const void *p) {
   V<T> r = {};
   __builtin_memcpy(&r, p, M);
   return r;
 }

 // movq or movsd
 template V<char> load<char, 8>(const void *);     // bad
 template V<short> load<short, 8>(const void *);   // bad
 template V<int> load<int, 8>(const void *);       // bad
 template V<long> load<long, 8>(const void *);     // good
 // the following is disabled because V2SF isn't a supported mode
 // template V<float> load<float, 8>(const void *);   // bad
 template V<double> load<double, 8>(const void *); // good (movsd?)

 // movd or movss
 template V<char> load<char, 4>(const void *);   // bad
 template V<short> load<short, 4>(const void *); // bad
 template V<int> load<int, 4>(const void *);     // good
 template V<float> load<float, 4>(const void *); // good

 /* We should end up with one load and one insert for each function.  */
 /* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 9 "optimized" } } */
 /* { dg-final { scan-tree-dump-times "MEM" 9 "optimized" } } */
	/* { dg-do compile { target c++11 } } */
	/* { dg-options "-O2 -msse2 -fdump-tree-optimized" } */

	template <class T>
	using V [[gnu::vector_size(16)]] = T;

	template <class T, unsigned M = sizeof(V<T>)>
	V<T> load(const void *p) {
	V<T> r = {};
	__builtin_memcpy(&r, p, M);
	return r;
	}

	// movq or movsd
	template V<char> load<char, 8>(const void *); // bad
	template V<short> load<short, 8>(const void *); // bad
	template V<int> load<int, 8>(const void *); // bad
	template V<long> load<long, 8>(const void *); // good
	// the following is disabled because V2SF isn't a supported mode
	// template V<float> load<float, 8>(const void *); // bad
	template V<double> load<double, 8>(const void *); // good (movsd?)

	// movd or movss
	template V<char> load<char, 4>(const void *); // bad
	template V<short> load<short, 4>(const void *); // bad
	template V<int> load<int, 4>(const void *); // good
	template V<float> load<float, 4>(const void *); // good

	/* We should end up with one load and one insert for each function. */
	/* { dg-final { scan-tree-dump-times "BIT_INSERT_EXPR" 9 "optimized" } } */
	/* { dg-final { scan-tree-dump-times "MEM" 9 "optimized" } } */