blob: 53a61542e6cc90179f1049eaf9c228c2e36b8720 [file] [log] [blame]
/* PR target/97642 */
/* { dg-do run { target *-*-linux* } } */
/* { dg-options "-O2 -mavx512dq -mavx512vl -mavx512bw" } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target avx512dq } */
/* { dg-require-effective-target avx512bw } */
#include <assert.h>
#include <immintrin.h>
#include <stdint.h>
#include <sys/mman.h>
#define N 5
// Faults with GCC because usage of vpblendd
__m256i __attribute__((noinline)) mask_load(uint32_t * arr) {
__m256i tmp;
return _mm256_mask_loadu_epi32(tmp, (1 << N) - 1, arr);
}
// Faults
__m256i __attribute__((noinline)) blend_load_asm(uint32_t * arr) {
__m256i tmp = _mm256_set1_epi64x(0);
asm volatile("vpblendd %[m], (%[arr]), %[tmp], %[tmp]\n\t"
: [ tmp ] "+x"(tmp)
: [ arr ] "r"(arr), [ m ] "i"(((1 << N) - 1))
:);
return tmp;
}
// Does not fault
__m256i __attribute__((noinline)) mask_load_asm(uint32_t * arr) {
__m256i tmp;
asm volatile(
"movb %[m], %%al\n\t"
"kmovb %%eax, %%k1\n\t"
"vmovdqu32 (%[arr]), %[tmp] %{%%k1} %{z%}\n\t"
: [ tmp ] "+x"(tmp)
: [ arr ] "r"(arr), [ m ] "i"(((1 << N) - 1))
: "eax", "k1");
return tmp;
}
void __attribute__((noinline)) mask_store(uint32_t * arr, __m256i v) {
return _mm256_mask_storeu_epi32(arr, (1 << N) - 1, v);
}
#define NPAGES (2)
#define END_OF_PAGE (1024 - N)
#ifndef LOAD_METHOD
#define LOAD_METHOD mask_load // mask_load_asm does not fault
#endif
int
main() {
if (!(__builtin_cpu_supports ("avx512dq")
&& __builtin_cpu_supports ("avx512vl")
&& __builtin_cpu_supports ("avx512bw")))
return 0;
uint32_t * addr =
(uint32_t *)mmap(NULL, NPAGES * 4096, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
for (uint32_t i = 0; i < NPAGES; i += 2) {
uint32_t page_offset = 1024 * i + END_OF_PAGE;
uint32_t next_page_offset = 1024 * (i + 1);
assert(!mprotect(addr + next_page_offset, 4096, PROT_NONE));
mask_store(addr + page_offset, LOAD_METHOD(addr + page_offset));
}
}