gcc/testsuite/gcc.target/i386/vect-alignment-peeling-1.c - gcc - Git at Google

 /* { dg-do run { target lp64 } } */
 /* This is a test exercising peeling for alignment for a negative step
    vector loop.  We're forcing atom tuning here because that has a higher
    unaligned vs aligned cost unlike most other archs.  */
 /* { dg-options "-O3 -march=x86-64 -mtune=atom -fdump-tree-vect-details -save-temps" } */

 float a[1024], b[1024];

 void __attribute__((noipa)) foo1 ()
 {
   for (int i = 507; i > 1; --i)
     a[i] = b[i] * 2.;
 }
 void __attribute__((noipa)) foo2 ()
 {
   for (int i = 506; i > 1; --i)
     a[i] = b[i] * 2.;
 }
 void __attribute__((noipa)) foo3 ()
 {
   for (int i = 505; i > 1; --i)
     a[i] = b[i] * 2.;
 }
 void __attribute__((noipa)) foo4 ()
 {
   for (int i = 504; i > 1; --i)
     a[i] = b[i] * 2.;
 }
 void __attribute__((noipa)) foo5 (int start)
 {
   for (int i = start; i > 1; --i)
     a[i] = b[i] * 2.;
 }

 int main()
 {
   for (int i = 2; i < 508; ++i)
     {
       __asm__ volatile ("" : : : "memory");
       b[i] = i;
     }
   foo1 ();
   for (int i = 2; i < 508; ++i)
     if (a[i] != 2*i)
       __builtin_abort ();

   for (int i = 2; i < 507; ++i)
     {
       __asm__ volatile ("" : : : "memory");
       b[i] = i;
     }
   foo2 ();
   for (int i = 2; i < 507; ++i)
     if (a[i] != 2*i)
       __builtin_abort ();

   for (int i = 2; i < 506; ++i)
     {
       __asm__ volatile ("" : : : "memory");
       b[i] = i;
     }
   foo3 ();
   for (int i = 2; i < 506; ++i)
     if (a[i] != 2*i)
       __builtin_abort ();

   for (int i = 2; i < 505; ++i)
     {
       __asm__ volatile ("" : : : "memory");
       b[i] = i;
     }
   foo4 ();
   for (int i = 2; i < 505; ++i)
     if (a[i] != 2*i)
       __builtin_abort ();

   for (int i = 2; i < 506; ++i)
     {
       __asm__ volatile ("" : : : "memory");
       b[i] = i;
     }
   foo5 (505);
   for (int i = 2; i < 506; ++i)
     if (a[i] != 2*i)
       __builtin_abort ();
 }

 /* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 4 "vect" } } */
 /* Verify all vector accesses are emitted as aligned.  */
 /* { dg-final { scan-assembler-not "movup" } } */
	/* { dg-do run { target lp64 } } */
	/* This is a test exercising peeling for alignment for a negative step
	vector loop. We're forcing atom tuning here because that has a higher
	unaligned vs aligned cost unlike most other archs. */
	/* { dg-options "-O3 -march=x86-64 -mtune=atom -fdump-tree-vect-details -save-temps" } */

	float a[1024], b[1024];

	void __attribute__((noipa)) foo1 ()
	{
	for (int i = 507; i > 1; --i)
	a[i] = b[i] * 2.;
	}
	void __attribute__((noipa)) foo2 ()
	{
	for (int i = 506; i > 1; --i)
	a[i] = b[i] * 2.;
	}
	void __attribute__((noipa)) foo3 ()
	{
	for (int i = 505; i > 1; --i)
	a[i] = b[i] * 2.;
	}
	void __attribute__((noipa)) foo4 ()
	{
	for (int i = 504; i > 1; --i)
	a[i] = b[i] * 2.;
	}
	void __attribute__((noipa)) foo5 (int start)
	{
	for (int i = start; i > 1; --i)
	a[i] = b[i] * 2.;
	}

	int main()
	{
	for (int i = 2; i < 508; ++i)
	{
	__asm__ volatile ("" : : : "memory");
	b[i] = i;
	}
	foo1 ();
	for (int i = 2; i < 508; ++i)
	if (a[i] != 2*i)
	__builtin_abort ();

	for (int i = 2; i < 507; ++i)
	{
	__asm__ volatile ("" : : : "memory");
	b[i] = i;
	}
	foo2 ();
	for (int i = 2; i < 507; ++i)
	if (a[i] != 2*i)
	__builtin_abort ();

	for (int i = 2; i < 506; ++i)
	{
	__asm__ volatile ("" : : : "memory");
	b[i] = i;
	}
	foo3 ();
	for (int i = 2; i < 506; ++i)
	if (a[i] != 2*i)
	__builtin_abort ();

	for (int i = 2; i < 505; ++i)
	{
	__asm__ volatile ("" : : : "memory");
	b[i] = i;
	}
	foo4 ();
	for (int i = 2; i < 505; ++i)
	if (a[i] != 2*i)
	__builtin_abort ();

	for (int i = 2; i < 506; ++i)
	{
	__asm__ volatile ("" : : : "memory");
	b[i] = i;
	}
	foo5 (505);
	for (int i = 2; i < 506; ++i)
	if (a[i] != 2*i)
	__builtin_abort ();
	}

	/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 4 "vect" } } */
	/* Verify all vector accesses are emitted as aligned. */
	/* { dg-final { scan-assembler-not "movup" } } */