| /* { dg-do run } */ |
| /* { dg-options "-O2 -ftree-loop-distribution -fdump-tree-ldist-details -mzarch -march=z13" } */ |
| /* { dg-final { scan-tree-dump-times "generated rawmemchrQI" 2 "ldist" } } */ |
| /* { dg-final { scan-tree-dump-times "generated rawmemchrHI" 2 "ldist" } } */ |
| /* { dg-final { scan-tree-dump-times "generated rawmemchrSI" 2 "ldist" } } */ |
| |
| #include <string.h> |
| #include <assert.h> |
| #include <stdint.h> |
| #include <stdlib.h> |
| |
| #define rawmemchrT(T, pattern) \ |
| __attribute__((noinline,noclone)) \ |
| T* rawmemchr_##T (T *s) \ |
| { \ |
| while (*s != pattern) \ |
| ++s; \ |
| return s; \ |
| } |
| |
| rawmemchrT(int8_t, (int8_t)0xde) |
| rawmemchrT(uint8_t, 0xde) |
| rawmemchrT(int16_t, (int16_t)0xdead) |
| rawmemchrT(uint16_t, 0xdead) |
| rawmemchrT(int32_t, (int32_t)0xdeadbeef) |
| rawmemchrT(uint32_t, 0xdeadbeef) |
| |
| #define runT(T, pattern) \ |
| void run_##T () \ |
| { \ |
| T *buf = malloc (4096 * 2 * sizeof(T)); \ |
| assert (buf != NULL); \ |
| memset (buf, 0xa, 4096 * 2 * sizeof(T)); \ |
| /* ensure q is 4096-byte aligned */ \ |
| T *q = (T*)((unsigned char *)buf \ |
| + (4096 - ((uintptr_t)buf & 4095))); \ |
| T *p; \ |
| /* unaligned + block boundary + 1st load */ \ |
| p = (T *) ((uintptr_t)q - 8); \ |
| p[2] = pattern; \ |
| assert ((rawmemchr_##T (&p[0]) == &p[2])); \ |
| p[2] = (T) 0xaaaaaaaa; \ |
| /* unaligned + block boundary + 2nd load */ \ |
| p = (T *) ((uintptr_t)q - 8); \ |
| p[6] = pattern; \ |
| assert ((rawmemchr_##T (&p[0]) == &p[6])); \ |
| p[6] = (T) 0xaaaaaaaa; \ |
| /* unaligned + 1st load */ \ |
| q[5] = pattern; \ |
| assert ((rawmemchr_##T (&q[2]) == &q[5])); \ |
| q[5] = (T) 0xaaaaaaaa; \ |
| /* unaligned + 2nd load */ \ |
| q[14] = pattern; \ |
| assert ((rawmemchr_##T (&q[2]) == &q[14])); \ |
| q[14] = (T) 0xaaaaaaaa; \ |
| /* unaligned + 3rd load */ \ |
| q[19] = pattern; \ |
| assert ((rawmemchr_##T (&q[2]) == &q[19])); \ |
| q[19] = (T) 0xaaaaaaaa; \ |
| /* unaligned + 4th load */ \ |
| q[25] = pattern; \ |
| assert ((rawmemchr_##T (&q[2]) == &q[25])); \ |
| q[25] = (T) 0xaaaaaaaa; \ |
| /* aligned + 1st load */ \ |
| q[5] = pattern; \ |
| assert ((rawmemchr_##T (&q[0]) == &q[5])); \ |
| q[5] = (T) 0xaaaaaaaa; \ |
| /* aligned + 2nd load */ \ |
| q[14] = pattern; \ |
| assert ((rawmemchr_##T (&q[0]) == &q[14])); \ |
| q[14] = (T) 0xaaaaaaaa; \ |
| /* aligned + 3rd load */ \ |
| q[19] = pattern; \ |
| assert ((rawmemchr_##T (&q[0]) == &q[19])); \ |
| q[19] = (T) 0xaaaaaaaa; \ |
| /* aligned + 4th load */ \ |
| q[25] = pattern; \ |
| assert ((rawmemchr_##T (&q[0]) == &q[25])); \ |
| q[25] = (T) 0xaaaaaaaa; \ |
| free (buf); \ |
| } |
| |
| runT(int8_t, (int8_t)0xde) |
| runT(uint8_t, 0xde) |
| runT(int16_t, (int16_t)0xdead) |
| runT(uint16_t, 0xdead) |
| runT(int32_t, (int32_t)0xdeadbeef) |
| runT(uint32_t, 0xdeadbeef) |
| |
| int main (void) |
| { |
| run_uint8_t (); |
| run_int8_t (); |
| run_uint16_t (); |
| run_int16_t (); |
| run_uint32_t (); |
| run_int32_t (); |
| return 0; |
| } |