| /* { dg-do compile } */ |
| /* { dg-additional-options "-O3" } */ |
| /* { dg-require-effective-target vect_double } */ |
| |
| void |
| gemm (const double* __restrict__ A, const double* __restrict__ B, |
| double* __restrict__ C) |
| { |
| unsigned int l_m = 0; |
| unsigned int l_n = 0; |
| unsigned int l_k = 0; |
| |
| for ( l_n = 0; l_n < 9; l_n++ ) { |
| /* Use -O3 so this loop is unrolled completely early. */ |
| for ( l_m = 0; l_m < 10; l_m++ ) { C[(l_n*10)+l_m] = 0.0; } |
| for ( l_k = 0; l_k < 17; l_k++ ) { |
| /* Use -O3 so this loop is unrolled completely early. */ |
| for ( l_m = 0; l_m < 10; l_m++ ) { |
| C[(l_n*10)+l_m] += A[(l_k*20)+l_m] * B[(l_n*20)+l_k]; |
| } |
| } |
| } |
| } |
| |
| /* Exact scanning is difficult but we expect all loads and stores |
| and computations to be vectorized. */ |
| /* { dg-final { scan-tree-dump "optimized: basic block" "slp1" } } */ |