| ! { dg-do compile } |
| ! { dg-options "-O2 -fpeel-loops -finline-functions -fipa-cp-clone -fdump-ipa-inline-details" } |
| |
| module TensorProducts |
| use, intrinsic :: iso_fortran_env |
| |
| implicit none |
| |
| integer, parameter :: dp = real64 ! KIND for double precision |
| |
| type Vect3D |
| real(dp) :: x, y, z |
| end type |
| |
| contains |
| |
| type(Vect3D) pure function MySum(array) |
| type(Vect3D), intent(in) :: array(:,:) |
| |
| mysum = Vect3D(sum(array%x), sum(array%y), sum(array%z)) |
| end function |
| |
| pure subroutine GenerateGrid(N, M, width, height, centre, P) |
| integer, intent(in) :: N, M |
| real(dp), intent(in) :: width, height |
| type(Vect3D), intent(in) :: centre |
| type(Vect3D), intent(out) :: P(N, M) |
| real(dp) :: x(N), y(M) |
| integer :: i, j |
| |
| x = ([( i, i = 0, N-1 )] * width/(N-1)) - (width / 2) + centre%x |
| y = ([( j, j = 0, M-1 )] * height/(M-1)) - (height / 2) + centre%y |
| do concurrent (i = 1:N) |
| do concurrent (j = 1:M) |
| P(i, j) = Vect3D(x(i), y(j), centre%z) |
| end do |
| end do |
| P(2:3,2:3)%z = P(2:3,2:3)%z + 1.0_dp*reshape([2,1,1,-2], [2,2]) |
| end subroutine |
| |
| type(Vect3D) pure function TP_SUM(NU, D, NV) result(tensorproduct) |
| ! (NU) D (NV)^T, row * matrix * column |
| ! TODO (#6): TensorProduct: Investigate whether using DO loops triggers a temporary array. |
| ! copied from Surfaces |
| real(dp), intent(in) :: NU(4), NV(4) |
| type(Vect3D), intent(in) :: D(4,4) |
| integer :: i, j |
| type(Vect3D) :: P(4,4) |
| |
| do concurrent (i = 1:4) |
| do concurrent (j = 1:4) |
| P(i,j)%x = NU(i) * D(i,j)%x * NV(j) |
| P(i,j)%y = NU(i) * D(i,j)%y * NV(j) |
| P(i,j)%z = NU(i) * D(i,j)%z * NV(j) |
| end do |
| end do |
| tensorproduct = MySum(P) |
| end function |
| |
| subroutine RandomSeed() |
| integer :: seed_size, clock, i |
| integer, allocatable, save :: seed(:) |
| |
| if (.not. allocated(seed)) then |
| call random_seed(size=seed_size) |
| allocate(seed(seed_size)) |
| call system_clock(count=clock) |
| seed = clock + 37 * [( i -1, i = 1, seed_size )] |
| call random_seed(put=seed) |
| end if |
| end subroutine |
| |
| subroutine RunTPTests() |
| type(Vect3D) :: tp, P(4,4) |
| integer, parameter :: i_max = 10000000 |
| real(dp) :: NU(4,i_max), NV(4,i_max) |
| real(dp) :: sum |
| real :: t(2) |
| integer :: i |
| |
| ! print *, 'This code variant uses explicit %x, %y and %z to represent the contents of Type(Vect3D).' |
| call GenerateGrid(4, 4, 20.0_dp, 20.0_dp, Vect3D(0.0_dp,0.0_dp,20.0_dp), P) |
| call RandomSeed() |
| ! call cpu_time(t(1)) |
| do i = 1, 4 |
| call random_number(NU(i,:)) |
| call random_number(NV(i,:)) |
| end do |
| ! call cpu_time(t(2)) |
| ! print *, 'Random Numbers, time: ', t(2)-t(1) |
| sum = 0.0 |
| call cpu_time(t(1)) |
| do i = 1, i_max |
| tp = TP_SUM(NU(:,i), P(1:4,1:4), NV(:,i)) |
| sum = sum + tp%x |
| end do |
| call cpu_time(t(2)) |
| print *, 'Using SUM, time: ', t(2)-t(1) |
| print *, 'sum =', sum |
| end subroutine |
| |
| end module |
| |
| program Main |
| use TensorProducts |
| |
| implicit none |
| |
| call RunTPTests() |
| end program |
| ! See PR88711. Inliner is currently not able to figure out that inlining tp_sum is a good idea. |
| ! { dg-final { scan-ipa-dump "Inlined tp_sum/\[0-9\]+ into runtptests/\[0-9\]+" "inline" { xfail *-*-* } } } |