| ! { dg-do compile } |
| ! { dg-options "-fcoarray=lib" } |
| ! |
| ! PR fortran/64771 |
| ! |
| ! Contributed by Alessandro Fanfarill |
| ! |
| ! Reduced version of the full NAS CG benchmark |
| ! |
| |
| !-------------------------------------------------------------------------! |
| ! ! |
| ! N A S P A R A L L E L B E N C H M A R K S 3.3 ! |
| ! ! |
| ! C G ! |
| ! ! |
| !-------------------------------------------------------------------------! |
| ! ! |
| ! This benchmark is part of the NAS Parallel Benchmark 3.3 suite. ! |
| ! It is described in NAS Technical Reports 95-020 and 02-007 ! |
| ! ! |
| ! Permission to use, copy, distribute and modify this software ! |
| ! for any purpose with or without fee is hereby granted. We ! |
| ! request, however, that all derived work reference the NAS ! |
| ! Parallel Benchmarks 3.3. This software is provided "as is" ! |
| ! without express or implied warranty. ! |
| ! ! |
| ! Information on NPB 3.3, including the technical report, the ! |
| ! original specifications, source code, results and information ! |
| ! on how to submit new results, is available at: ! |
| ! ! |
| ! http://www.nas.nasa.gov/Software/NPB/ ! |
| ! ! |
| ! Send comments or suggestions to npb@nas.nasa.gov ! |
| ! ! |
| ! NAS Parallel Benchmarks Group ! |
| ! NASA Ames Research Center ! |
| ! Mail Stop: T27A-1 ! |
| ! Moffett Field, CA 94035-1000 ! |
| ! ! |
| ! E-mail: npb@nas.nasa.gov ! |
| ! Fax: (650) 604-3957 ! |
| ! ! |
| !-------------------------------------------------------------------------! |
| |
| |
| c--------------------------------------------------------------------- |
| c |
| c Authors: M. Yarrow |
| c C. Kuszmaul |
| c R. F. Van der Wijngaart |
| c H. Jin |
| c |
| c--------------------------------------------------------------------- |
| |
| |
| c--------------------------------------------------------------------- |
| c--------------------------------------------------------------------- |
| program cg |
| c--------------------------------------------------------------------- |
| c--------------------------------------------------------------------- |
| implicit none |
| |
| integer na, nonzer, niter |
| double precision shift, rcond |
| parameter( na=75000, |
| > nonzer=13, |
| > niter=75, |
| > shift=60., |
| > rcond=1.0d-1 ) |
| |
| |
| |
| integer num_proc_rows, num_proc_cols |
| parameter( num_proc_rows = 2, num_proc_cols = 2) |
| integer num_procs |
| parameter( num_procs = num_proc_cols * num_proc_rows ) |
| |
| integer nz |
| parameter( nz = na*(nonzer+1)/num_procs*(nonzer+1)+nonzer |
| > + na*(nonzer+2+num_procs/256)/num_proc_cols ) |
| |
| common / partit_size / naa, nzz, |
| > npcols, nprows, |
| > proc_col, proc_row, |
| > firstrow, |
| > lastrow, |
| > firstcol, |
| > lastcol, |
| > exch_proc, |
| > exch_recv_length, |
| > send_start, |
| > send_len |
| integer naa, nzz, |
| > npcols, nprows, |
| > proc_col, proc_row, |
| > firstrow, |
| > lastrow, |
| > firstcol, |
| > lastcol, |
| > exch_proc, |
| > exch_recv_length, |
| > send_start, |
| > send_len |
| |
| |
| common / main_int_mem / colidx, rowstr, |
| > iv, arow, acol |
| integer colidx(nz), rowstr(na+1), |
| > iv(2*na+1), arow(nz), acol(nz) |
| |
| |
| c--------------------------------- |
| c Coarray Decalarations |
| c--------------------------------- |
| double precision v(na+1)[0:*], aelt(nz)[0:*], a(nz)[0:*], |
| > x(na/num_proc_rows+2)[0:*], |
| > z(na/num_proc_rows+2)[0:*], |
| > p(na/num_proc_rows+2)[0:*], |
| > q(na/num_proc_rows+2)[0:*], |
| > r(na/num_proc_rows+2)[0:*], |
| > w(na/num_proc_rows+2)[0:*] |
| |
| |
| common /urando/ amult, tran |
| double precision amult, tran |
| |
| |
| |
| integer l2npcols |
| integer reduce_exch_proc(num_proc_cols) |
| integer reduce_send_starts(num_proc_cols) |
| integer reduce_send_lengths(num_proc_cols) |
| integer reduce_recv_lengths(num_proc_cols) |
| integer reduce_rrecv_starts(num_proc_cols) |
| c--------------------------------- |
| c Coarray Decalarations |
| c--------------------------------- |
| integer reduce_recv_starts(num_proc_cols)[0:*] |
| |
| integer i, j, k, it, me, nprocs, root |
| |
| double precision zeta, randlc |
| external randlc |
| double precision rnorm |
| c--------------------------------- |
| c Coarray Decalarations |
| c--------------------------------- |
| double precision norm_temp1(2)[0:*], norm_temp2(2)[0:*] |
| |
| double precision t, tmax, mflops |
| double precision u(1), umax(1) |
| external timer_read |
| double precision timer_read |
| character class |
| logical verified |
| double precision zeta_verify_value, epsilon, err |
| |
| c--------------------------------------------------------------------- |
| c Explicit interface for conj_grad, due to coarray args |
| c--------------------------------------------------------------------- |
| interface |
| |
| subroutine conj_grad ( colidx, |
| > rowstr, |
| > x, |
| > z, |
| > a, |
| > p, |
| > q, |
| > r, |
| > w, |
| > rnorm, |
| > l2npcols, |
| > reduce_exch_proc, |
| > reduce_send_starts, |
| > reduce_send_lengths, |
| > reduce_recv_starts, |
| > reduce_recv_lengths, |
| > reduce_rrecv_starts ) |
| |
| common / partit_size / naa, nzz, |
| > npcols, nprows, |
| > proc_col, proc_row, |
| > firstrow, |
| > lastrow, |
| > firstcol, |
| > lastcol, |
| > exch_proc, |
| > exch_recv_length, |
| > send_start, |
| > send_len |
| |
| integer naa, nzz, |
| > npcols, nprows, |
| > proc_col, proc_row, |
| > firstrow, |
| > lastrow, |
| > firstcol, |
| > lastcol, |
| > exch_proc, |
| > exch_recv_length, |
| > send_start, |
| > send_len |
| |
| double precision x(*), |
| > z(*), |
| > a(nzz) |
| integer colidx(nzz), rowstr(naa+1) |
| |
| double precision p(*), |
| > q(*)[0:*], |
| > r(*)[0:*], |
| > w(*)[0:*] ! used as work temporary |
| |
| integer l2npcols |
| integer reduce_exch_proc(l2npcols) |
| integer reduce_send_starts(l2npcols) |
| integer reduce_send_lengths(l2npcols) |
| integer reduce_recv_starts(l2npcols)[0:*] |
| integer reduce_recv_lengths(l2npcols) |
| integer reduce_rrecv_starts(l2npcols) |
| |
| double precision rnorm |
| |
| end subroutine |
| |
| end interface |
| |
| c--------------------------------------------------------------------- |
| c The call to the conjugate gradient routine: |
| c--------------------------------------------------------------------- |
| call conj_grad ( colidx, |
| > rowstr, |
| > x, |
| > z, |
| > a, |
| > p, |
| > q, |
| > r, |
| > w, |
| > rnorm, |
| > l2npcols, |
| > reduce_exch_proc, |
| > reduce_send_starts, |
| > reduce_send_lengths, |
| > reduce_recv_starts, |
| > reduce_recv_lengths, |
| > reduce_rrecv_starts ) |
| |
| |
| sync all |
| |
| end ! end main |
| |
| c--------------------------------------------------------------------- |
| c--------------------------------------------------------------------- |
| subroutine conj_grad ( colidx, |
| > rowstr, |
| > x, |
| > z, |
| > a, |
| > p, |
| > q, |
| > r, |
| > w, |
| > rnorm, |
| > l2npcols, |
| > reduce_exch_proc, |
| > reduce_send_starts, |
| > reduce_send_lengths, |
| > reduce_recv_starts, |
| > reduce_recv_lengths, |
| > reduce_rrecv_starts ) |
| c--------------------------------------------------------------------- |
| c--------------------------------------------------------------------- |
| |
| c--------------------------------------------------------------------- |
| c Floaging point arrays here are named as in NPB1 spec discussion of |
| c CG algorithm |
| c--------------------------------------------------------------------- |
| |
| implicit none |
| |
| c include 'cafnpb.h' |
| |
| common / partit_size / naa, nzz, |
| > npcols, nprows, |
| > proc_col, proc_row, |
| > firstrow, |
| > lastrow, |
| > firstcol, |
| > lastcol, |
| > exch_proc, |
| > exch_recv_length, |
| > send_start, |
| > send_len |
| integer naa, nzz, |
| > npcols, nprows, |
| > proc_col, proc_row, |
| > firstrow, |
| > lastrow, |
| > firstcol, |
| > lastcol, |
| > exch_proc, |
| > exch_recv_length, |
| > send_start, |
| > send_len |
| |
| |
| |
| double precision x(*), |
| > z(*), |
| > a(nzz) |
| integer colidx(nzz), rowstr(naa+1) |
| |
| double precision p(*), |
| > q(*)[0:*], |
| > r(*)[0:*], |
| > w(*)[0:*] ! used as work temporary |
| |
| integer l2npcols |
| integer reduce_exch_proc(l2npcols) |
| integer reduce_send_starts(l2npcols) |
| integer reduce_send_lengths(l2npcols) |
| integer reduce_recv_starts(l2npcols)[0:*] |
| integer reduce_recv_lengths(l2npcols) |
| integer reduce_rrecv_starts(l2npcols) |
| |
| integer recv_start_idx, recv_end_idx, send_start_idx, |
| > send_end_idx, recv_length |
| |
| integer i, j, k, ierr |
| integer cgit, cgitmax |
| |
| double precision, save :: d[0:*], rho[0:*] |
| double precision sum, rho0, alpha, beta, rnorm |
| |
| external timer_read |
| double precision timer_read |
| |
| data cgitmax / 25 / |
| |
| |
| return |
| end ! end of routine conj_grad |
| |