| .syntax unified |
| // Check argument encoding by having different arguments. |
| // We use 20 and 11 since their binary encoding is 10100 and 01011 |
| // respectively which ensures that we distinguish between the D/M/N bit |
| // encoding the first or last bit of the argument. |
| // q registers are encoded as double their actual number. |
| vdot.bf16 d0, d20, d11 |
| vdot d11.bf16, d0.bf16, d20.bf16 |
| |
| .macro conversion_type_specifier_check insn, dest, source |
| \insn\().bf16.f32 \dest, \source |
| \insn \dest\().bf16, \source\().f32 |
| \insn \dest\().bf16, \source\().f32 |
| .endm |
| conversion_type_specifier_check vcvtt,s0,s0 |
| conversion_type_specifier_check vcvtb,s0,s0 |
| conversion_type_specifier_check vcvt,d0,q0 |
| |
| |
| // Here we follow the same encoding sequence as above. |
| // Since the 'M' bit encodes the index and the last register is encoded in 4 |
| // bits that argument has a different number. |
| vdot.bf16 d11, d0, d4[1] |
| vdot d0.bf16, d20.bf16, d11.bf16[0] |
| |
| // vmmla only works on q registers. |
| // These registers are encoded as double the number given in the mnemonic. |
| // Hence we choose different numbers to ensure a similar bit pattern as above. |
| // 10 & 5 produce the bit patterns 10100 & 01010 |
| vmmla.bf16 q10, q5, q0 |
| vmmla q5.bf16, q0.bf16, q10.bf16 |
| |
| vfmat.bf16 q10, q11, q0 |
| vfmat.bf16 q10, q11, d0[3] |
| vfmat.bf16 q10, q11, d0[0] |
| |
| vfmab.bf16 q10, q11, q0 |
| vfmab.bf16 q10, q11, d0[3] |
| vfmab.bf16 q10, q11, d0[0] |
| |
| // vcvt |
| // - no condition allowed in arm |
| // - no condition allowed in thumb outside IT block |
| // - Condition *allowed* in thumb in IT block |
| // - different encoding between thumb and arm |
| vcvt.bf16.f32 d20, q5 |
| vcvt.bf16.f32 d11, q10 |
| |
| // Only works for thumb mode. |
| .ifdef COMPILING_FOR_THUMB |
| it ne |
| vcvtne.bf16.f32 d0, q0 |
| .endif |