| // { dg-do compile } |
| // { dg-additional-options "-ffast-math -fopenmp-simd" } |
| // { dg-additional-options "-msse2" { target x86_64-*-* i?86-*-* } } |
| |
| inline void* my_alloc (__SIZE_TYPE__ bytes) {void *ptr; __builtin_posix_memalign (&ptr, bytes, 128); return 0; } |
| inline void my_free (void* memory) {__builtin_free (memory);} |
| float W[100]; |
| |
| template <typename T> |
| class Vec |
| { |
| const int isize; |
| T* data; |
| |
| public: |
| |
| Vec (int n) : isize (n) {data = (T*)my_alloc (isize*sizeof (T));} |
| ~Vec () {my_free(data);} |
| |
| Vec& operator = (const Vec& other) |
| { |
| if (this != &other) |
| __builtin_memcpy (data, other.data, isize*sizeof (T)); |
| return *this; |
| } |
| |
| T& operator [] (int i) {return data[i];} |
| const T& operator [] (int i) const {return data[i];} |
| T& at (int i) {return data[i];} |
| const T& at (int i) const {return data[i];} |
| |
| operator T* () {return data;} |
| int size () const {return isize;} |
| }; |
| |
| template <typename T> |
| class Cl |
| { |
| public: |
| |
| Cl (int n, int m); |
| const int N, M; |
| Vec<T> v_x, v_y; |
| Vec<int> v_i; |
| Vec<float> v_z; |
| }; |
| |
| struct Ss |
| { |
| const int S_n, S_m; |
| Cl<float> v1; |
| float* C1; |
| float* C2; |
| Ss (int n1, int n2): S_n(n1), S_m(n2), v1(n1, n2) |
| { |
| C1 = new float[n1 * 3]; |
| C2 = new float[n2 * 4]; |
| } |
| |
| ~Ss () { delete C1; delete C2;} |
| void foo (int n); |
| }; |
| void Ss::foo (int n) |
| { |
| float w; |
| for (int j = 0; j < n; j++) |
| { |
| w = W[j]; |
| #pragma omp simd |
| for (int i = 0; i < S_n; i++) |
| { |
| float w1 = C2[S_n + i] * w; |
| v1.v_i[i] += (int)w1; |
| C1[S_n + i] += w1; |
| } |
| } |
| } |
| |
| // { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" { xfail *-*-* } } } |