libgomp/testsuite/libgomp.oacc-c-c++-common/loop-red-gwv-1.c - gcc - Git at Google

 /* This code uses nvptx inline assembly guarded with acc_on_device, which is
    not optimized away at -O0, and then confuses the target assembler.
    { dg-skip-if "" { *-*-* } { "-O0" } { "" } } */

 #include <stdio.h>

 #define N (32*32*32+17)
 int main ()
 {
   int ix;
   int ondev = 0;
   int t = 0, h = 0;

 #pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ondev)
   {
 #pragma acc loop gang worker vector reduction(+:t)
     for (unsigned ix = 0; ix < N; ix++)
       {
 	int val = ix;

 	if (__builtin_acc_on_device (5))
 	  {
 	    int g = 0, w = 0, v = 0;

 	    __asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
 	    __asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
 	    __asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
 	    val = (g << 16) | (w << 8) | v;
 	    ondev = 1;
 	  }
 	t += val;
       }
   }

   for (ix = 0; ix < N; ix++)
     {
       int val = ix;
       if(ondev)
 	{
 	  int chunk_size = (N + 32*32*32 - 1) / (32*32*32);

 	  int g = ix / (chunk_size * 32 * 32);
 	  int w = ix / 32 % 32;
 	  int v = ix % 32;

 	  val = (g << 16) | (w << 8) | v;
 	}
       h += val;
     }
   if (t != h)
     {
       printf ("t=%x expected %x\n", t, h);
       return 1;
     }

   return 0;
 }
	/* This code uses nvptx inline assembly guarded with acc_on_device, which is
	not optimized away at -O0, and then confuses the target assembler.
	{ dg-skip-if "" { --* } { "-O0" } { "" } } */

	#include <stdio.h>

	#define N (323232+17)
	int main ()
	{
	int ix;
	int ondev = 0;
	int t = 0, h = 0;

	#pragma acc parallel num_gangs(32) num_workers(32) vector_length(32) copy(ondev)
	{
	#pragma acc loop gang worker vector reduction(+:t)
	for (unsigned ix = 0; ix < N; ix++)
	{
	int val = ix;

	if (__builtin_acc_on_device (5))
	{
	int g = 0, w = 0, v = 0;

	__asm__ volatile ("mov.u32 %0,%%ctaid.x;" : "=r" (g));
	__asm__ volatile ("mov.u32 %0,%%tid.y;" : "=r" (w));
	__asm__ volatile ("mov.u32 %0,%%tid.x;" : "=r" (v));
	val = (g << 16) \| (w << 8) \| v;
	ondev = 1;
	}
	t += val;
	}
	}

	for (ix = 0; ix < N; ix++)
	{
	int val = ix;
	if(ondev)
	{
	int chunk_size = (N + 323232 - 1) / (323232);

	int g = ix / (chunk_size * 32 * 32);
	int w = ix / 32 % 32;
	int v = ix % 32;

	val = (g << 16) \| (w << 8) \| v;
	}
	h += val;
	}
	if (t != h)
	{
	printf ("t=%x expected %x\n", t, h);
	return 1;
	}

	return 0;
	}