| /* { dg-do run { target openacc_nvidia_accel_selected } } |
| { dg-skip-if "" { *-*-* } { "*" } { "-O2" } } */ |
| /* { dg-additional-options "-foffload=-fdump-rtl-mach" } */ |
| |
| int |
| main (void) |
| { |
| int v1; |
| |
| #pragma acc parallel |
| #pragma acc loop worker |
| for (v1 = 0; v1 < 20; v1 += 2) |
| ; |
| |
| return 0; |
| } |
| |
| /* Todo: Boths bar.syncs can be removed. |
| Atm we generate this dead code inbetween forked and joining: |
| |
| mov.u32 %r28, %ntid.y; |
| mov.u32 %r29, %tid.y; |
| add.u32 %r30, %r29, %r29; |
| setp.gt.s32 %r31, %r30, 19; |
| @%r31 bra $L2; |
| add.u32 %r25, %r28, %r28; |
| mov.u32 %r24, %r30; |
| $L3: |
| add.u32 %r24, %r24, %r25; |
| setp.le.s32 %r33, %r24, 19; |
| @%r33 bra $L3; |
| $L2: |
| |
| so the loop is not recognized as empty loop (which we detect by seeing if |
| joining immediately follows forked). */ |
| /* { dg-final { scan-offload-rtl-dump-times "nvptx_barsync" 2 "mach" } } */ |