| # mach: aarch64 |
| |
| # Check the store single 1-element structure to one lane instructions: |
| # st1, st2, st3, st4. |
| # Check the addressing modes: no offset, post-index immediate offset, |
| # post-index register offset. |
| |
| .include "testutils.inc" |
| |
| .data |
| .align 4 |
| input: |
| .word 0x04030201 |
| .word 0x08070605 |
| .word 0x0c0b0a09 |
| .word 0x100f0e0d |
| .word 0x14131211 |
| .word 0x18171615 |
| .word 0x1c1b1a19 |
| .word 0x201f1e1d |
| output: |
| .zero 64 |
| |
| start |
| adrp x0, input |
| add x0, x0, :lo12:input |
| adrp x1, output |
| add x1, x1, :lo12:output |
| |
| mov x2, x0 |
| ldr q0, [x2], 16 |
| ldr q1, [x2] |
| mov x2, x0 |
| ldr q2, [x2], 16 |
| ldr q3, [x2] |
| |
| mov x2, x1 |
| mov x3, #1 |
| mov x4, #4 |
| st1 {v0.b}[0], [x2], 1 |
| st1 {v0.b}[1], [x2], x3 |
| st1 {v0.h}[1], [x2], 2 |
| st1 {v0.s}[1], [x2], x4 |
| st1 {v0.d}[1], [x2] |
| ldr q4, [x1] |
| addv b4, v4.16b |
| mov x5, v4.d[0] |
| cmp x5, #136 |
| bne .Lfailure |
| |
| mov x2, x1 |
| mov x3, #16 |
| mov x4, #4 |
| st2 {v0.d, v1.d}[0], [x2], x3 |
| st2 {v0.s, v1.s}[2], [x2], 8 |
| st2 {v0.h, v1.h}[6], [x2], x4 |
| st2 {v0.b, v1.b}[14], [x2], 2 |
| st2 {v0.b, v1.b}[15], [x2] |
| mov x2, x1 |
| ldr q4, [x2], 16 |
| ldr q5, [x2] |
| addv b4, v4.16b |
| addv b5, v5.16b |
| mov x5, v4.d[0] |
| mov x6, v5.d[0] |
| cmp x5, #200 |
| bne .Lfailure |
| cmp x6, #72 |
| bne .Lfailure |
| |
| mov x2, x1 |
| mov x3, #12 |
| st3 {v0.s, v1.s, v2.s}[0], [x2], 12 |
| st3 {v0.s, v1.s, v2.s}[1], [x2], x3 |
| st3 {v0.s, v1.s, v2.s}[2], [x2], 12 |
| st3 {v0.s, v1.s, v2.s}[3], [x2] |
| mov x2, x1 |
| ldr q4, [x2], 16 |
| ldr q5, [x2], 16 |
| ldr q6, [x2] |
| addv b4, v4.16b |
| addv b5, v5.16b |
| addv b6, v6.16b |
| mov x4, v4.d[0] |
| mov x5, v5.d[0] |
| mov x6, v6.d[0] |
| cmp x4, #120 |
| bne .Lfailure |
| cmp x5, #8 |
| bne .Lfailure |
| cmp x6, #24 |
| bne .Lfailure |
| |
| mov x2, x1 |
| mov x3, #16 |
| st4 {v0.s, v1.s, v2.s, v3.s}[0], [x2], 16 |
| st4 {v0.s, v1.s, v2.s, v3.s}[1], [x2], x3 |
| st4 {v0.s, v1.s, v2.s, v3.s}[2], [x2], 16 |
| st4 {v0.s, v1.s, v2.s, v3.s}[3], [x2] |
| mov x2, x1 |
| ldr q4, [x2], 16 |
| ldr q5, [x2], 16 |
| ldr q6, [x2], 16 |
| ldr q7, [x2] |
| addv b4, v4.16b |
| addv b5, v5.16b |
| addv b6, v6.16b |
| addv b7, v7.16b |
| mov x4, v4.d[0] |
| mov x5, v5.d[0] |
| mov x6, v6.d[0] |
| mov x7, v7.d[0] |
| cmp x4, #168 |
| bne .Lfailure |
| cmp x5, #232 |
| bne .Lfailure |
| cmp x6, #40 |
| bne .Lfailure |
| cmp x7, #104 |
| bne .Lfailure |
| |
| pass |
| .Lfailure: |
| fail |