blob: 4aa536ba86c93526a40744e42be1f86d737af2cf [file] [log] [blame]
/* { dg-do run { target lp64 } } */
/* This is a test exercising peeling for alignment for a negative step
vector loop. We're forcing atom tuning here because that has a higher
unaligned vs aligned cost unlike most other archs. */
/* { dg-options "-O3 -march=x86-64 -mtune=atom -fdump-tree-vect-details -save-temps" } */
float a[1024], b[1024];
void __attribute__((noipa)) foo1 ()
{
for (int i = 507; i > 1; --i)
a[i] = b[i] * 2.;
}
void __attribute__((noipa)) foo2 ()
{
for (int i = 506; i > 1; --i)
a[i] = b[i] * 2.;
}
void __attribute__((noipa)) foo3 ()
{
for (int i = 505; i > 1; --i)
a[i] = b[i] * 2.;
}
void __attribute__((noipa)) foo4 ()
{
for (int i = 504; i > 1; --i)
a[i] = b[i] * 2.;
}
void __attribute__((noipa)) foo5 (int start)
{
for (int i = start; i > 1; --i)
a[i] = b[i] * 2.;
}
int main()
{
for (int i = 2; i < 508; ++i)
{
__asm__ volatile ("" : : : "memory");
b[i] = i;
}
foo1 ();
for (int i = 2; i < 508; ++i)
if (a[i] != 2*i)
__builtin_abort ();
for (int i = 2; i < 507; ++i)
{
__asm__ volatile ("" : : : "memory");
b[i] = i;
}
foo2 ();
for (int i = 2; i < 507; ++i)
if (a[i] != 2*i)
__builtin_abort ();
for (int i = 2; i < 506; ++i)
{
__asm__ volatile ("" : : : "memory");
b[i] = i;
}
foo3 ();
for (int i = 2; i < 506; ++i)
if (a[i] != 2*i)
__builtin_abort ();
for (int i = 2; i < 505; ++i)
{
__asm__ volatile ("" : : : "memory");
b[i] = i;
}
foo4 ();
for (int i = 2; i < 505; ++i)
if (a[i] != 2*i)
__builtin_abort ();
for (int i = 2; i < 506; ++i)
{
__asm__ volatile ("" : : : "memory");
b[i] = i;
}
foo5 (505);
for (int i = 2; i < 506; ++i)
if (a[i] != 2*i)
__builtin_abort ();
}
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 4 "vect" } } */
/* Verify all vector accesses are emitted as aligned. */
/* { dg-final { scan-assembler-not "movup" } } */