;; Machine description for AArch64 AdvSIMD architecture. ;; Copyright (C) 2011-2015 Free Software Foundation, Inc. ;; Contributed by ARM Ltd. ;; ;; This file is part of GCC. ;; ;; GCC is free software; you can redistribute it and/or modify it ;; under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 3, or (at your option) ;; any later version. ;; ;; GCC is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with GCC; see the file COPYING3. If not see ;; http://www.gnu.org/licenses/.
(define_expand “mov” [(set (match_operand:VALL 0 “nonimmediate_operand” "") (match_operand:VALL 1 “general_operand” ""))] “TARGET_SIMD” " if (GET_CODE (operands[0]) == MEM) operands[1] = force_reg (mode, operands[1]); " )
(define_expand “movmisalign” [(set (match_operand:VALL 0 “nonimmediate_operand” "") (match_operand:VALL 1 “general_operand” ""))] “TARGET_SIMD” { /* This pattern is not permitted to fail during expansion: if both arguments are non-registers (e.g. memory := constant, which can be created by the auto-vectorizer), force operand 1 into a register. */ if (!register_operand (operands[0], mode) && !register_operand (operands[1], mode)) operands[1] = force_reg (mode, operands[1]); })
(define_insn “aarch64_simd_dup” [(set (match_operand:VDQ_I 0 “register_operand” “=w, w”) (vec_duplicate:VDQ_I (match_operand: 1 “register_operand” “r, w”)))] “TARGET_SIMD” “@ dup\t%0., %1 dup\t%0., %1.[0]” [(set_attr “type” “neon_from_gp, neon_dup”)] )
(define_insn “aarch64_simd_dup” [(set (match_operand:VDQF 0 “register_operand” “=w”) (vec_duplicate:VDQF (match_operand: 1 “register_operand” “w”)))] “TARGET_SIMD” “dup\t%0., %1.[0]” [(set_attr “type” “neon_dup”)] )
(define_insn “aarch64_dup_lane” [(set (match_operand:VALL 0 “register_operand” “=w”) (vec_duplicate:VALL (vec_select: (match_operand:VALL 1 “register_operand” “w”) (parallel [(match_operand:SI 2 “immediate_operand” “i”)]) )))] “TARGET_SIMD” { operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); return “dup\t%0., %1.[%2]”; } [(set_attr “type” “neon_dup”)] )
(define_insn “aarch64_dup_lane_<vswap_width_name>” [(set (match_operand:VALL 0 “register_operand” “=w”) (vec_duplicate:VALL (vec_select: (match_operand:<VSWAP_WIDTH> 1 “register_operand” “w”) (parallel [(match_operand:SI 2 “immediate_operand” “i”)]) )))] “TARGET_SIMD” { operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, INTVAL (operands[2]))); return “dup\t%0., %1.[%2]”; } [(set_attr “type” “neon_dup”)] )
(define_insn “*aarch64_simd_mov” [(set (match_operand:VD 0 “nonimmediate_operand” “=w, m, w, ?r, ?w, ?r, w”) (match_operand:VD 1 “general_operand” “m, w, w, w, r, r, Dn”))] “TARGET_SIMD && (register_operand (operands[0], mode) || register_operand (operands[1], mode))” { switch (which_alternative) { case 0: return “ldr\t%d0, %1”; case 1: return “str\t%d1, %0”; case 2: return “orr\t%0., %1., %1.”; case 3: return “umov\t%0, %1.d[0]”; case 4: return “fmov\t%d0, %1”; case 5: return “mov\t%0, %1”; case 6: return aarch64_output_simd_mov_immediate (operands[1], mode, 64); default: gcc_unreachable (); } } [(set_attr “type” “neon_load1_1reg, neon_store1_1reg,
neon_logic, neon_to_gp, f_mcr,
mov_reg, neon_move”)] )
(define_insn “*aarch64_simd_mov” [(set (match_operand:VQ 0 “nonimmediate_operand” “=w, m, w, ?r, ?w, ?r, w”) (match_operand:VQ 1 “general_operand” “m, w, w, w, r, r, Dn”))] “TARGET_SIMD && (register_operand (operands[0], mode) || register_operand (operands[1], mode))” { switch (which_alternative) { case 0: return “ldr\t%q0, %1”; case 1: return “str\t%q1, %0”; case 2: return “orr\t%0., %1., %1.”; case 3: case 4: case 5: return “#”; case 6: return aarch64_output_simd_mov_immediate (operands[1], mode, 128); default: gcc_unreachable (); } } [(set_attr “type” “neon_load1_1reg, neon_store1_1reg,
neon_logic, multiple, multiple, multiple,
neon_move”) (set_attr “length” “4,4,4,8,8,8,4”)] )
(define_split [(set (match_operand:VQ 0 “register_operand” "") (match_operand:VQ 1 “register_operand” ""))] “TARGET_SIMD && reload_completed && GP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))” [(const_int 0)] { aarch64_simd_emit_reg_reg_move (operands, DImode, 2); DONE; })
(define_split [(set (match_operand:VQ 0 “register_operand” "") (match_operand:VQ 1 “register_operand” ""))] “TARGET_SIMD && reload_completed && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))) || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))” [(const_int 0)] { aarch64_split_simd_move (operands[0], operands[1]); DONE; })
(define_expand “aarch64_split_simd_mov” [(set (match_operand:VQ 0) (match_operand:VQ 1))] “TARGET_SIMD” { rtx dst = operands[0]; rtx src = operands[1];
if (GP_REGNUM_P (REGNO (src))) { rtx src_low_part = gen_lowpart (<VHALF>mode, src); rtx src_high_part = gen_highpart (<VHALF>mode, src); emit_insn (gen_move_lo_quad_<mode> (dst, src_low_part)); emit_insn (gen_move_hi_quad_<mode> (dst, src_high_part)); } else { rtx dst_low_part = gen_lowpart (<VHALF>mode, dst); rtx dst_high_part = gen_highpart (<VHALF>mode, dst); rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); emit_insn (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo)); emit_insn (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi)); } DONE;
} )
(define_insn “aarch64_simd_mov_from_low” [(set (match_operand: 0 “register_operand” “=r”) (vec_select: (match_operand:VQ 1 “register_operand” “w”) (match_operand:VQ 2 “vect_par_cnst_lo_half” "")))] “TARGET_SIMD && reload_completed” “umov\t%0, %1.d[0]” [(set_attr “type” “neon_to_gp”) (set_attr “length” “4”) ])
(define_insn “aarch64_simd_mov_from_high” [(set (match_operand: 0 “register_operand” “=r”) (vec_select: (match_operand:VQ 1 “register_operand” “w”) (match_operand:VQ 2 “vect_par_cnst_hi_half” "")))] “TARGET_SIMD && reload_completed” “umov\t%0, %1.d[1]” [(set_attr “type” “neon_to_gp”) (set_attr “length” “4”) ])
(define_insn “orn3” [(set (match_operand:VDQ_I 0 “register_operand” “=w”) (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 “register_operand” “w”)) (match_operand:VDQ_I 2 “register_operand” “w”)))] “TARGET_SIMD” “orn\t%0., %2., %1.” [(set_attr “type” “neon_logic”)] )
(define_insn “bic3” [(set (match_operand:VDQ_I 0 “register_operand” “=w”) (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 “register_operand” “w”)) (match_operand:VDQ_I 2 “register_operand” “w”)))] “TARGET_SIMD” “bic\t%0., %2., %1.” [(set_attr “type” “neon_logic”)] )
(define_insn “add3” [(set (match_operand:VDQ_I 0 “register_operand” “=w”) (plus:VDQ_I (match_operand:VDQ_I 1 “register_operand” “w”) (match_operand:VDQ_I 2 “register_operand” “w”)))] “TARGET_SIMD” “add\t%0., %1., %2.” [(set_attr “type” “neon_add”)] )
(define_insn “sub3” [(set (match_operand:VDQ_I 0 “register_operand” “=w”) (minus:VDQ_I (match_operand:VDQ_I 1 “register_operand” “w”) (match_operand:VDQ_I 2 “register_operand” “w”)))] “TARGET_SIMD” “sub\t%0., %1., %2.” [(set_attr “type” “neon_sub”)] )
(define_insn “mul3” [(set (match_operand:VDQ_BHSI 0 “register_operand” “=w”) (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 “register_operand” “w”) (match_operand:VDQ_BHSI 2 “register_operand” “w”)))] “TARGET_SIMD” “mul\t%0., %1., %2.” [(set_attr “type” “neon_mul_”)] )
(define_insn “bswap2” [(set (match_operand:VDQHSD 0 “register_operand” “=w”) (bswap:VDQHSD (match_operand:VDQHSD 1 “register_operand” “w”)))] “TARGET_SIMD” “rev\t%0., %1.” [(set_attr “type” “neon_rev”)] )
(define_insn “aarch64_rbit” [(set (match_operand:VB 0 “register_operand” “=w”) (unspec:VB [(match_operand:VB 1 “register_operand” “w”)] UNSPEC_RBIT))] “TARGET_SIMD” “rbit\t%0., %1.” [(set_attr “type” “neon_rbit”)] )
(define_expand “ctz2” [(set (match_operand:VS 0 “register_operand”) (ctz:VS (match_operand:VS 1 “register_operand”)))] “TARGET_SIMD” { emit_insn (gen_bswap2 (operands[0], operands[1])); rtx op0_castsi2qi = simplify_gen_subreg(VS:VSI2QImode, operands[0], mode, 0); emit_insn (gen_aarch64_rbitVS:vsi2qi (op0_castsi2qi, op0_castsi2qi)); emit_insn (gen_clz2 (operands[0], operands[0])); DONE; } )
(define_insn “*aarch64_mul3_elt” [(set (match_operand:VMUL 0 “register_operand” “=w”) (mult:VMUL (vec_duplicate:VMUL (vec_select: (match_operand:VMUL 1 “register_operand” “<h_con>”) (parallel [(match_operand:SI 2 “immediate_operand”)]))) (match_operand:VMUL 3 “register_operand” “w”)))] “TARGET_SIMD” { operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); return “mul\t%0., %3., %1.[%2]”; } [(set_attr “type” “neonmul_scalar”)] )
(define_insn “*aarch64_mul3_elt_<vswap_width_name>” [(set (match_operand:VMUL_CHANGE_NLANES 0 “register_operand” “=w”) (mult:VMUL_CHANGE_NLANES (vec_duplicate:VMUL_CHANGE_NLANES (vec_select: (match_operand:<VSWAP_WIDTH> 1 “register_operand” “<h_con>”) (parallel [(match_operand:SI 2 “immediate_operand”)]))) (match_operand:VMUL_CHANGE_NLANES 3 “register_operand” “w”)))] “TARGET_SIMD” { operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, INTVAL (operands[2]))); return “mul\t%0., %3., %1.[%2]”; } [(set_attr “type” “neonmul_scalar”)] )
(define_insn “*aarch64_mul3_elt_to_128df” [(set (match_operand:V2DF 0 “register_operand” “=w”) (mult:V2DF (vec_duplicate:V2DF (match_operand:DF 2 “register_operand” “w”)) (match_operand:V2DF 1 “register_operand” “w”)))] “TARGET_SIMD” “fmul\t%0.2d, %1.2d, %2.d[0]” [(set_attr “type” “neon_fp_mul_d_scalar_q”)] )
(define_insn “*aarch64_mul3_elt_to_64v2df” [(set (match_operand:DF 0 “register_operand” “=w”) (mult:DF (vec_select:DF (match_operand:V2DF 1 “register_operand” “w”) (parallel [(match_operand:SI 2 “immediate_operand”)])) (match_operand:DF 3 “register_operand” “w”)))] “TARGET_SIMD” { operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); return “fmul\t%0.2d, %3.2d, %1.d[%2]”; } [(set_attr “type” “neon_fp_mul_d_scalar_q”)] )
(define_insn “neg2” [(set (match_operand:VDQ_I 0 “register_operand” “=w”) (neg:VDQ_I (match_operand:VDQ_I 1 “register_operand” “w”)))] “TARGET_SIMD” “neg\t%0., %1.” [(set_attr “type” “neon_neg”)] )
(define_insn “abs2” [(set (match_operand:VDQ_I 0 “register_operand” “=w”) (abs:VDQ_I (match_operand:VDQ_I 1 “register_operand” “w”)))] “TARGET_SIMD” “abs\t%0., %1.” [(set_attr “type” “neon_abs”)] )
;; The intrinsic version of integer ABS must not be allowed to ;; combine with any operation with an integerated ABS step, such ;; as SABD. (define_insn “aarch64_abs” [(set (match_operand:VSDQ_I_DI 0 “register_operand” “=w”) (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 “register_operand” “w”)] UNSPEC_ABS))] “TARGET_SIMD” “abs\t%0, %1” [(set_attr “type” “neon_abs”)] )
(define_insn “abd_3” [(set (match_operand:VDQ_BHSI 0 “register_operand” “=w”) (abs:VDQ_BHSI (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 “register_operand” “w”) (match_operand:VDQ_BHSI 2 “register_operand” “w”))))] “TARGET_SIMD” “sabd\t%0., %1., %2.” [(set_attr “type” “neon_abd”)] )
(define_insn “aba_3” [(set (match_operand:VDQ_BHSI 0 “register_operand” “=w”) (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 “register_operand” “w”) (match_operand:VDQ_BHSI 2 “register_operand” “w”))) (match_operand:VDQ_BHSI 3 “register_operand” “0”)))] “TARGET_SIMD” “saba\t%0., %1., %2.” [(set_attr “type” “neon_arith_acc”)] )
(define_insn “fabd_3” [(set (match_operand:VDQF 0 “register_operand” “=w”) (abs:VDQF (minus:VDQF (match_operand:VDQF 1 “register_operand” “w”) (match_operand:VDQF 2 “register_operand” “w”))))] “TARGET_SIMD” “fabd\t%0., %1., %2.” [(set_attr “type” “neon_fp_abd_”)] )
(define_insn “*fabd_scalar3” [(set (match_operand:GPF 0 “register_operand” “=w”) (abs:GPF (minus:GPF (match_operand:GPF 1 “register_operand” “w”) (match_operand:GPF 2 “register_operand” “w”))))] “TARGET_SIMD” “fabd\t%0, %1, %2” [(set_attr “type” “neon_fp_abd_”)] )
(define_insn “and3” [(set (match_operand:VDQ_I 0 “register_operand” “=w”) (and:VDQ_I (match_operand:VDQ_I 1 “register_operand” “w”) (match_operand:VDQ_I 2 “register_operand” “w”)))] “TARGET_SIMD” “and\t%0., %1., %2.” [(set_attr “type” “neon_logic”)] )
(define_insn “ior3” [(set (match_operand:VDQ_I 0 “register_operand” “=w”) (ior:VDQ_I (match_operand:VDQ_I 1 “register_operand” “w”) (match_operand:VDQ_I 2 “register_operand” “w”)))] “TARGET_SIMD” “orr\t%0., %1., %2.” [(set_attr “type” “neon_logic”)] )
(define_insn “xor3” [(set (match_operand:VDQ_I 0 “register_operand” “=w”) (xor:VDQ_I (match_operand:VDQ_I 1 “register_operand” “w”) (match_operand:VDQ_I 2 “register_operand” “w”)))] “TARGET_SIMD” “eor\t%0., %1., %2.” [(set_attr “type” “neon_logic”)] )
(define_insn “one_cmpl2” [(set (match_operand:VDQ_I 0 “register_operand” “=w”) (not:VDQ_I (match_operand:VDQ_I 1 “register_operand” “w”)))] “TARGET_SIMD” “not\t%0., %1.” [(set_attr “type” “neon_logic”)] )
(define_insn “aarch64_simd_vec_set” [(set (match_operand:VDQ_BHSI 0 “register_operand” “=w,w,w”) (vec_merge:VDQ_BHSI (vec_duplicate:VDQ_BHSI (match_operand: 1 “aarch64_simd_general_operand” “r,w,Utv”)) (match_operand:VDQ_BHSI 3 “register_operand” “0,0,0”) (match_operand:SI 2 “immediate_operand” “i,i,i”)))] “TARGET_SIMD” { int elt = ENDIAN_LANE_N (mode, exact_log2 (INTVAL (operands[2]))); operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); switch (which_alternative) { case 0: return “ins\t%0.[%p2], %w1”; case 1: return “ins\t%0.[%p2], %1.[0]”; case 2: return “ld1\t{%0.}[%p2], %1”; default: gcc_unreachable (); } } [(set_attr “type” “neon_from_gp, neon_ins, neon_load1_1reg”)] )
(define_insn “aarch64_simd_lshr” [(set (match_operand:VDQ_I 0 “register_operand” “=w”) (lshiftrt:VDQ_I (match_operand:VDQ_I 1 “register_operand” “w”) (match_operand:VDQ_I 2 “aarch64_simd_rshift_imm” “Dr”)))] “TARGET_SIMD” “ushr\t%0., %1., %2” [(set_attr “type” “neon_shift_imm”)] )
(define_insn “aarch64_simd_ashr” [(set (match_operand:VDQ_I 0 “register_operand” “=w”) (ashiftrt:VDQ_I (match_operand:VDQ_I 1 “register_operand” “w”) (match_operand:VDQ_I 2 “aarch64_simd_rshift_imm” “Dr”)))] “TARGET_SIMD” “sshr\t%0., %1., %2” [(set_attr “type” “neon_shift_imm”)] )
(define_insn “aarch64_simd_imm_shl” [(set (match_operand:VDQ_I 0 “register_operand” “=w”) (ashift:VDQ_I (match_operand:VDQ_I 1 “register_operand” “w”) (match_operand:VDQ_I 2 “aarch64_simd_lshift_imm” “Dl”)))] “TARGET_SIMD” “shl\t%0., %1., %2” [(set_attr “type” “neon_shift_imm”)] )
(define_insn “aarch64_simd_reg_sshl” [(set (match_operand:VDQ_I 0 “register_operand” “=w”) (ashift:VDQ_I (match_operand:VDQ_I 1 “register_operand” “w”) (match_operand:VDQ_I 2 “register_operand” “w”)))] “TARGET_SIMD” “sshl\t%0., %1., %2.” [(set_attr “type” “neon_shift_reg”)] )
(define_insn “aarch64_simd_reg_shl_unsigned” [(set (match_operand:VDQ_I 0 “register_operand” “=w”) (unspec:VDQ_I [(match_operand:VDQ_I 1 “register_operand” “w”) (match_operand:VDQ_I 2 “register_operand” “w”)] UNSPEC_ASHIFT_UNSIGNED))] “TARGET_SIMD” “ushl\t%0., %1., %2.” [(set_attr “type” “neon_shift_reg”)] )
(define_insn “aarch64_simd_reg_shl_signed” [(set (match_operand:VDQ_I 0 “register_operand” “=w”) (unspec:VDQ_I [(match_operand:VDQ_I 1 “register_operand” “w”) (match_operand:VDQ_I 2 “register_operand” “w”)] UNSPEC_ASHIFT_SIGNED))] “TARGET_SIMD” “sshl\t%0., %1., %2.” [(set_attr “type” “neon_shift_reg”)] )
(define_expand “ashl3” [(match_operand:VDQ_I 0 “register_operand” "") (match_operand:VDQ_I 1 “register_operand” "") (match_operand:SI 2 “general_operand” "")] “TARGET_SIMD” { int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; int shift_amount;
if (CONST_INT_P (operands[2])) { shift_amount = INTVAL (operands[2]); if (shift_amount >= 0 && shift_amount < bit_width) { rtx tmp = aarch64_simd_gen_const_vector_dup (mode, shift_amount); emit_insn (gen_aarch64_simd_imm_shl (operands[0], operands[1], tmp)); DONE; } else { operands[2] = force_reg (SImode, operands[2]); } } else if (MEM_P (operands[2])) { operands[2] = force_reg (SImode, operands[2]); }
if (REG_P (operands[2])) { rtx tmp = gen_reg_rtx (mode); emit_insn (gen_aarch64_simd_dup (tmp, convert_to_mode (mode, operands[2], 0))); emit_insn (gen_aarch64_simd_reg_sshl (operands[0], operands[1], tmp)); DONE; } else FAIL; } )
(define_expand “lshr3” [(match_operand:VDQ_I 0 “register_operand” "") (match_operand:VDQ_I 1 “register_operand” "") (match_operand:SI 2 “general_operand” "")] “TARGET_SIMD” { int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; int shift_amount;
if (CONST_INT_P (operands[2])) { shift_amount = INTVAL (operands[2]); if (shift_amount > 0 && shift_amount <= bit_width) { rtx tmp = aarch64_simd_gen_const_vector_dup (mode, shift_amount); emit_insn (gen_aarch64_simd_lshr (operands[0], operands[1], tmp)); DONE; } else operands[2] = force_reg (SImode, operands[2]); } else if (MEM_P (operands[2])) { operands[2] = force_reg (SImode, operands[2]); }
if (REG_P (operands[2])) { rtx tmp = gen_reg_rtx (SImode); rtx tmp1 = gen_reg_rtx (mode); emit_insn (gen_negsi2 (tmp, operands[2])); emit_insn (gen_aarch64_simd_dup (tmp1, convert_to_mode (mode, tmp, 0))); emit_insn (gen_aarch64_simd_reg_shl_unsigned (operands[0], operands[1], tmp1)); DONE; } else FAIL; } )
(define_expand “ashr3” [(match_operand:VDQ_I 0 “register_operand” "") (match_operand:VDQ_I 1 “register_operand” "") (match_operand:SI 2 “general_operand” "")] “TARGET_SIMD” { int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; int shift_amount;
if (CONST_INT_P (operands[2])) { shift_amount = INTVAL (operands[2]); if (shift_amount > 0 && shift_amount <= bit_width) { rtx tmp = aarch64_simd_gen_const_vector_dup (mode, shift_amount); emit_insn (gen_aarch64_simd_ashr (operands[0], operands[1], tmp)); DONE; } else operands[2] = force_reg (SImode, operands[2]); } else if (MEM_P (operands[2])) { operands[2] = force_reg (SImode, operands[2]); }
if (REG_P (operands[2])) { rtx tmp = gen_reg_rtx (SImode); rtx tmp1 = gen_reg_rtx (mode); emit_insn (gen_negsi2 (tmp, operands[2])); emit_insn (gen_aarch64_simd_dup (tmp1, convert_to_mode (mode, tmp, 0))); emit_insn (gen_aarch64_simd_reg_shl_signed (operands[0], operands[1], tmp1)); DONE; } else FAIL; } )
(define_expand “vashl3” [(match_operand:VDQ_I 0 “register_operand” "") (match_operand:VDQ_I 1 “register_operand” "") (match_operand:VDQ_I 2 “register_operand” "")] “TARGET_SIMD” { emit_insn (gen_aarch64_simd_reg_sshl (operands[0], operands[1], operands[2])); DONE; })
;; Using mode VDQ_BHSI as there is no V2DImode neg! ;; Negating individual lanes most certainly offsets the ;; gain from vectorization. (define_expand “vashr3” [(match_operand:VDQ_BHSI 0 “register_operand” "") (match_operand:VDQ_BHSI 1 “register_operand” "") (match_operand:VDQ_BHSI 2 “register_operand” "")] “TARGET_SIMD” { rtx neg = gen_reg_rtx (mode); emit (gen_neg2 (neg, operands[2])); emit_insn (gen_aarch64_simd_reg_shl_signed (operands[0], operands[1], neg)); DONE; })
;; DI vector shift (define_expand “aarch64_ashr_simddi” [(match_operand:DI 0 “register_operand” “=w”) (match_operand:DI 1 “register_operand” “w”) (match_operand:SI 2 “aarch64_shift_imm64_di” "")] “TARGET_SIMD” { /* An arithmetic shift right by 64 fills the result with copies of the sign bit, just like asr by 63 - however the standard pattern does not handle a shift by 64. */ if (INTVAL (operands[2]) == 64) operands[2] = GEN_INT (63); emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2])); DONE; } )
(define_expand “vlshr3” [(match_operand:VDQ_BHSI 0 “register_operand” "") (match_operand:VDQ_BHSI 1 “register_operand” "") (match_operand:VDQ_BHSI 2 “register_operand” "")] “TARGET_SIMD” { rtx neg = gen_reg_rtx (mode); emit (gen_neg2 (neg, operands[2])); emit_insn (gen_aarch64_simd_reg_shl_unsigned (operands[0], operands[1], neg)); DONE; })
(define_expand “aarch64_lshr_simddi” [(match_operand:DI 0 “register_operand” “=w”) (match_operand:DI 1 “register_operand” “w”) (match_operand:SI 2 “aarch64_shift_imm64_di” "")] “TARGET_SIMD” { if (INTVAL (operands[2]) == 64) emit_move_insn (operands[0], const0_rtx); else emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2])); DONE; } )
(define_expand “vec_set” [(match_operand:VDQ_BHSI 0 “register_operand”) (match_operand: 1 “register_operand”) (match_operand:SI 2 “immediate_operand”)] “TARGET_SIMD” { HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); emit_insn (gen_aarch64_simd_vec_set (operands[0], operands[1], GEN_INT (elem), operands[0])); DONE; } )
;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero. (define_insn “vec_shr_” [(set (match_operand:VD 0 “register_operand” “=w”) (unspec:VD [(match_operand:VD 1 “register_operand” “w”) (match_operand:SI 2 “immediate_operand” “i”)] UNSPEC_VEC_SHR))] “TARGET_SIMD” { if (BYTES_BIG_ENDIAN) return “shl %d0, %d1, %2”; else return “ushr %d0, %d1, %2”; } [(set_attr “type” “neon_shift_imm”)] )
(define_insn “aarch64_simd_vec_setv2di” [(set (match_operand:V2DI 0 “register_operand” “=w,w”) (vec_merge:V2DI (vec_duplicate:V2DI (match_operand:DI 1 “register_operand” “r,w”)) (match_operand:V2DI 3 “register_operand” “0,0”) (match_operand:SI 2 “immediate_operand” “i,i”)))] “TARGET_SIMD” { int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2]))); operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); switch (which_alternative) { case 0: return “ins\t%0.d[%p2], %1”; case 1: return “ins\t%0.d[%p2], %1.d[0]”; default: gcc_unreachable (); } } [(set_attr “type” “neon_from_gp, neon_ins_q”)] )
(define_expand “vec_setv2di” [(match_operand:V2DI 0 “register_operand”) (match_operand:DI 1 “register_operand”) (match_operand:SI 2 “immediate_operand”)] “TARGET_SIMD” { HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1], GEN_INT (elem), operands[0])); DONE; } )
(define_insn “aarch64_simd_vec_set” [(set (match_operand:VDQF 0 “register_operand” “=w”) (vec_merge:VDQF (vec_duplicate:VDQF (match_operand: 1 “register_operand” “w”)) (match_operand:VDQF 3 “register_operand” “0”) (match_operand:SI 2 “immediate_operand” “i”)))] “TARGET_SIMD” { int elt = ENDIAN_LANE_N (mode, exact_log2 (INTVAL (operands[2])));
operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt); return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
} [(set_attr “type” “neon_ins”)] )
(define_expand “vec_set” [(match_operand:VDQF 0 “register_operand” “+w”) (match_operand: 1 “register_operand” “w”) (match_operand:SI 2 “immediate_operand” "")] “TARGET_SIMD” { HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); emit_insn (gen_aarch64_simd_vec_set (operands[0], operands[1], GEN_INT (elem), operands[0])); DONE; } )
(define_insn “aarch64_mla” [(set (match_operand:VDQ_BHSI 0 “register_operand” “=w”) (plus:VDQ_BHSI (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 “register_operand” “w”) (match_operand:VDQ_BHSI 3 “register_operand” “w”)) (match_operand:VDQ_BHSI 1 “register_operand” “0”)))] “TARGET_SIMD” “mla\t%0., %2., %3.” [(set_attr “type” “neon_mla_”)] )
(define_insn “*aarch64_mla_elt” [(set (match_operand:VDQHS 0 “register_operand” “=w”) (plus:VDQHS (mult:VDQHS (vec_duplicate:VDQHS (vec_select: (match_operand:VDQHS 1 “register_operand” “<h_con>”) (parallel [(match_operand:SI 2 “immediate_operand”)]))) (match_operand:VDQHS 3 “register_operand” “w”)) (match_operand:VDQHS 4 “register_operand” “0”)))] “TARGET_SIMD” { operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); return “mla\t%0., %3., %1.[%2]”; } [(set_attr “type” “neon_mla__scalar”)] )
(define_insn “*aarch64_mla_elt_<vswap_width_name>” [(set (match_operand:VDQHS 0 “register_operand” “=w”) (plus:VDQHS (mult:VDQHS (vec_duplicate:VDQHS (vec_select: (match_operand:<VSWAP_WIDTH> 1 “register_operand” “<h_con>”) (parallel [(match_operand:SI 2 “immediate_operand”)]))) (match_operand:VDQHS 3 “register_operand” “w”)) (match_operand:VDQHS 4 “register_operand” “0”)))] “TARGET_SIMD” { operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, INTVAL (operands[2]))); return “mla\t%0., %3., %1.[%2]”; } [(set_attr “type” “neon_mla__scalar”)] )
(define_insn “aarch64_mls” [(set (match_operand:VDQ_BHSI 0 “register_operand” “=w”) (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 “register_operand” “0”) (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 “register_operand” “w”) (match_operand:VDQ_BHSI 3 “register_operand” “w”))))] “TARGET_SIMD” “mls\t%0., %2., %3.” [(set_attr “type” “neon_mla_”)] )
(define_insn “*aarch64_mls_elt” [(set (match_operand:VDQHS 0 “register_operand” “=w”) (minus:VDQHS (match_operand:VDQHS 4 “register_operand” “0”) (mult:VDQHS (vec_duplicate:VDQHS (vec_select: (match_operand:VDQHS 1 “register_operand” “<h_con>”) (parallel [(match_operand:SI 2 “immediate_operand”)]))) (match_operand:VDQHS 3 “register_operand” “w”))))] “TARGET_SIMD” { operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); return “mls\t%0., %3., %1.[%2]”; } [(set_attr “type” “neon_mla__scalar”)] )
(define_insn “*aarch64_mls_elt_<vswap_width_name>” [(set (match_operand:VDQHS 0 “register_operand” “=w”) (minus:VDQHS (match_operand:VDQHS 4 “register_operand” “0”) (mult:VDQHS (vec_duplicate:VDQHS (vec_select: (match_operand:<VSWAP_WIDTH> 1 “register_operand” “<h_con>”) (parallel [(match_operand:SI 2 “immediate_operand”)]))) (match_operand:VDQHS 3 “register_operand” “w”))))] “TARGET_SIMD” { operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, INTVAL (operands[2]))); return “mls\t%0., %3., %1.[%2]”; } [(set_attr “type” “neon_mla__scalar”)] )
;; Max/Min operations. (define_insn “3” [(set (match_operand:VDQ_BHSI 0 “register_operand” “=w”) (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 “register_operand” “w”) (match_operand:VDQ_BHSI 2 “register_operand” “w”)))] “TARGET_SIMD” “\t%0., %1., %2.” [(set_attr “type” “neon_minmax”)] )
(define_expand “v2di3” [(set (match_operand:V2DI 0 “register_operand” "") (MAXMIN:V2DI (match_operand:V2DI 1 “register_operand” "") (match_operand:V2DI 2 “register_operand” "")))] “TARGET_SIMD” { enum rtx_code cmp_operator; rtx cmp_fmt;
switch () { case UMIN: cmp_operator = LTU; break; case SMIN: cmp_operator = LT; break; case UMAX: cmp_operator = GTU; break; case SMAX: cmp_operator = GT; break; default: gcc_unreachable (); }
cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]); emit_insn (gen_aarch64_vcond_internalv2div2di (operands[0], operands[1], operands[2], cmp_fmt, operands[1], operands[2])); DONE; })
;; Pairwise Integer Max/Min operations. (define_insn “aarch64_<maxmin_uns>p” [(set (match_operand:VDQ_BHSI 0 “register_operand” “=w”) (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 “register_operand” “w”) (match_operand:VDQ_BHSI 2 “register_operand” “w”)] MAXMINV))] “TARGET_SIMD” “<maxmin_uns_op>p\t%0., %1., %2.” [(set_attr “type” “neon_minmax”)] )
;; Pairwise FP Max/Min operations. (define_insn “aarch64_<maxmin_uns>p” [(set (match_operand:VDQF 0 “register_operand” “=w”) (unspec:VDQF [(match_operand:VDQF 1 “register_operand” “w”) (match_operand:VDQF 2 “register_operand” “w”)] FMAXMINV))] “TARGET_SIMD” “<maxmin_uns_op>p\t%0., %1., %2.” [(set_attr “type” “neon_minmax”)] )
;; vec_concat gives a new vector with the low elements from operand 1, and ;; the high elements from operand 2. That is to say, given op1 = { a, b } ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }. ;; What that means, is that the RTL descriptions of the below patterns ;; need to change depending on endianness.
;; Move to the low architectural bits of the register. ;; On little-endian this is { operand, zeroes } ;; On big-endian this is { zeroes, operand }
(define_insn “move_lo_quad_internal_” [(set (match_operand:VQ_NO2E 0 “register_operand” “=w,w,w”) (vec_concat:VQ_NO2E (match_operand: 1 “register_operand” “w,r,r”) (vec_duplicate: (const_int 0))))] “TARGET_SIMD && !BYTES_BIG_ENDIAN” “@ dup\t%d0, %1.d[0] fmov\t%d0, %1 dup\t%d0, %1” [(set_attr “type” “neon_dup,f_mcr,neon_dup”) (set_attr “simd” “yes,*,yes”) (set_attr “fp” “,yes,”) (set_attr “length” “4”)] )
(define_insn “move_lo_quad_internal_” [(set (match_operand:VQ_2E 0 “register_operand” “=w,w,w”) (vec_concat:VQ_2E (match_operand: 1 “register_operand” “w,r,r”) (const_int 0)))] “TARGET_SIMD && !BYTES_BIG_ENDIAN” “@ dup\t%d0, %1.d[0] fmov\t%d0, %1 dup\t%d0, %1” [(set_attr “type” “neon_dup,f_mcr,neon_dup”) (set_attr “simd” “yes,*,yes”) (set_attr “fp” “,yes,”) (set_attr “length” “4”)] )
(define_insn “move_lo_quad_internal_be_” [(set (match_operand:VQ_NO2E 0 “register_operand” “=w,w,w”) (vec_concat:VQ_NO2E (vec_duplicate: (const_int 0)) (match_operand: 1 “register_operand” “w,r,r”)))] “TARGET_SIMD && BYTES_BIG_ENDIAN” “@ dup\t%d0, %1.d[0] fmov\t%d0, %1 dup\t%d0, %1” [(set_attr “type” “neon_dup,f_mcr,neon_dup”) (set_attr “simd” “yes,*,yes”) (set_attr “fp” “,yes,”) (set_attr “length” “4”)] )
(define_insn “move_lo_quad_internal_be_” [(set (match_operand:VQ_2E 0 “register_operand” “=w,w,w”) (vec_concat:VQ_2E (const_int 0) (match_operand: 1 “register_operand” “w,r,r”)))] “TARGET_SIMD && BYTES_BIG_ENDIAN” “@ dup\t%d0, %1.d[0] fmov\t%d0, %1 dup\t%d0, %1” [(set_attr “type” “neon_dup,f_mcr,neon_dup”) (set_attr “simd” “yes,*,yes”) (set_attr “fp” “,yes,”) (set_attr “length” “4”)] )
(define_expand “move_lo_quad_” [(match_operand:VQ 0 “register_operand”) (match_operand:VQ 1 “register_operand”)] “TARGET_SIMD” { if (BYTES_BIG_ENDIAN) emit_insn (gen_move_lo_quad_internal_be_ (operands[0], operands[1])); else emit_insn (gen_move_lo_quad_internal_ (operands[0], operands[1])); DONE; } )
;; Move operand1 to the high architectural bits of the register, keeping ;; the low architectural bits of operand2. ;; For little-endian this is { operand2, operand1 } ;; For big-endian this is { operand1, operand2 }
(define_insn “aarch64_simd_move_hi_quad_” [(set (match_operand:VQ 0 “register_operand” “+w,w”) (vec_concat:VQ (vec_select: (match_dup 0) (match_operand:VQ 2 “vect_par_cnst_lo_half” "")) (match_operand: 1 “register_operand” “w,r”)))] “TARGET_SIMD && !BYTES_BIG_ENDIAN” “@ ins\t%0.d[1], %1.d[0] ins\t%0.d[1], %1” [(set_attr “type” “neon_ins”)] )
(define_insn “aarch64_simd_move_hi_quad_be_” [(set (match_operand:VQ 0 “register_operand” “+w,w”) (vec_concat:VQ (match_operand: 1 “register_operand” “w,r”) (vec_select: (match_dup 0) (match_operand:VQ 2 “vect_par_cnst_lo_half” ""))))] “TARGET_SIMD && BYTES_BIG_ENDIAN” “@ ins\t%0.d[1], %1.d[0] ins\t%0.d[1], %1” [(set_attr “type” “neon_ins”)] )
(define_expand “move_hi_quad_” [(match_operand:VQ 0 “register_operand” "") (match_operand: 1 “register_operand” "")] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, false); if (BYTES_BIG_ENDIAN) emit_insn (gen_aarch64_simd_move_hi_quad_be_ (operands[0], operands[1], p)); else emit_insn (gen_aarch64_simd_move_hi_quad_ (operands[0], operands[1], p)); DONE; })
;; Narrowing operations.
;; For doubles. (define_insn “aarch64_simd_vec_pack_trunc_” [(set (match_operand: 0 “register_operand” “=w”) (truncate: (match_operand:VQN 1 “register_operand” “w”)))] “TARGET_SIMD” “xtn\t%0., %1.” [(set_attr “type” “neon_shift_imm_narrow_q”)] )
(define_expand “vec_pack_trunc_” [(match_operand: 0 “register_operand” "") (match_operand:VDN 1 “register_operand” "") (match_operand:VDN 2 “register_operand” "")] “TARGET_SIMD” { rtx tempreg = gen_reg_rtx (mode); int lo = BYTES_BIG_ENDIAN ? 2 : 1; int hi = BYTES_BIG_ENDIAN ? 1 : 2;
emit_insn (gen_move_lo_quad_ (tempreg, operands[lo])); emit_insn (gen_move_hi_quad_ (tempreg, operands[hi])); emit_insn (gen_aarch64_simd_vec_pack_trunc_ (operands[0], tempreg)); DONE; })
;; For quads.
(define_insn “vec_pack_trunc_” [(set (match_operand: 0 “register_operand” “=&w”) (vec_concat: (truncate: (match_operand:VQN 1 “register_operand” “w”)) (truncate: (match_operand:VQN 2 “register_operand” “w”))))] “TARGET_SIMD” { if (BYTES_BIG_ENDIAN) return “xtn\t%0., %2.;xtn2\t%0., %1.”; else return “xtn\t%0., %1.;xtn2\t%0., %2.”; } [(set_attr “type” “multiple”) (set_attr “length” “8”)] )
;; Widening operations.
(define_insn “aarch64_simd_vec_unpacklo” [(set (match_operand: 0 “register_operand” “=w”) (ANY_EXTEND: (vec_select: (match_operand:VQW 1 “register_operand” “w”) (match_operand:VQW 2 “vect_par_cnst_lo_half” "") )))] “TARGET_SIMD” “shll\t%0., %1., 0” [(set_attr “type” “neon_shift_imm_long”)] )
(define_insn “aarch64_simd_vec_unpackhi” [(set (match_operand: 0 “register_operand” “=w”) (ANY_EXTEND: (vec_select: (match_operand:VQW 1 “register_operand” “w”) (match_operand:VQW 2 “vect_par_cnst_hi_half” "") )))] “TARGET_SIMD” “shll2\t%0., %1., 0” [(set_attr “type” “neon_shift_imm_long”)] )
(define_expand “vec_unpackhi” [(match_operand: 0 “register_operand” "") (ANY_EXTEND: (match_operand:VQW 1 “register_operand”))] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_simd_vec_unpackhi (operands[0], operands[1], p)); DONE; } )
(define_expand “vec_unpacklo” [(match_operand: 0 “register_operand” "") (ANY_EXTEND: (match_operand:VQW 1 “register_operand” ""))] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, false); emit_insn (gen_aarch64_simd_vec_unpacklo (operands[0], operands[1], p)); DONE; } )
;; Widening arithmetic.
(define_insn “*aarch64_mlal_lo” [(set (match_operand: 0 “register_operand” “=w”) (plus: (mult: (ANY_EXTEND: (vec_select: (match_operand:VQW 2 “register_operand” “w”) (match_operand:VQW 3 “vect_par_cnst_lo_half” ""))) (ANY_EXTEND: (vec_select: (match_operand:VQW 4 “register_operand” “w”) (match_dup 3)))) (match_operand: 1 “register_operand” “0”)))] “TARGET_SIMD” “mlal\t%0., %2., %4.” [(set_attr “type” “neon_mla__long”)] )
(define_insn “*aarch64_mlal_hi” [(set (match_operand: 0 “register_operand” “=w”) (plus: (mult: (ANY_EXTEND: (vec_select: (match_operand:VQW 2 “register_operand” “w”) (match_operand:VQW 3 “vect_par_cnst_hi_half” ""))) (ANY_EXTEND: (vec_select: (match_operand:VQW 4 “register_operand” “w”) (match_dup 3)))) (match_operand: 1 “register_operand” “0”)))] “TARGET_SIMD” “mlal2\t%0., %2., %4.” [(set_attr “type” “neon_mla__long”)] )
(define_insn “*aarch64_mlsl_lo” [(set (match_operand: 0 “register_operand” “=w”) (minus: (match_operand: 1 “register_operand” “0”) (mult: (ANY_EXTEND: (vec_select: (match_operand:VQW 2 “register_operand” “w”) (match_operand:VQW 3 “vect_par_cnst_lo_half” ""))) (ANY_EXTEND: (vec_select: (match_operand:VQW 4 “register_operand” “w”) (match_dup 3))))))] “TARGET_SIMD” “mlsl\t%0., %2., %4.” [(set_attr “type” “neon_mla__long”)] )
(define_insn “*aarch64_mlsl_hi” [(set (match_operand: 0 “register_operand” “=w”) (minus: (match_operand: 1 “register_operand” “0”) (mult: (ANY_EXTEND: (vec_select: (match_operand:VQW 2 “register_operand” “w”) (match_operand:VQW 3 “vect_par_cnst_hi_half” ""))) (ANY_EXTEND: (vec_select: (match_operand:VQW 4 “register_operand” “w”) (match_dup 3))))))] “TARGET_SIMD” “mlsl2\t%0., %2., %4.” [(set_attr “type” “neon_mla__long”)] )
(define_insn “*aarch64_mlal” [(set (match_operand: 0 “register_operand” “=w”) (plus: (mult: (ANY_EXTEND: (match_operand:VD_BHSI 1 “register_operand” “w”)) (ANY_EXTEND: (match_operand:VD_BHSI 2 “register_operand” “w”))) (match_operand: 3 “register_operand” “0”)))] “TARGET_SIMD” “mlal\t%0., %1., %2.” [(set_attr “type” “neon_mla__long”)] )
(define_insn “*aarch64_mlsl” [(set (match_operand: 0 “register_operand” “=w”) (minus: (match_operand: 1 “register_operand” “0”) (mult: (ANY_EXTEND: (match_operand:VD_BHSI 2 “register_operand” “w”)) (ANY_EXTEND: (match_operand:VD_BHSI 3 “register_operand” “w”)))))] “TARGET_SIMD” “mlsl\t%0., %2., %3.” [(set_attr “type” “neon_mla__long”)] )
(define_insn “aarch64_simd_vec_mult_lo_” [(set (match_operand: 0 “register_operand” “=w”) (mult: (ANY_EXTEND: (vec_select: (match_operand:VQW 1 “register_operand” “w”) (match_operand:VQW 3 “vect_par_cnst_lo_half” ""))) (ANY_EXTEND: (vec_select: (match_operand:VQW 2 “register_operand” “w”) (match_dup 3)))))] “TARGET_SIMD” “mull\t%0., %1., %2.” [(set_attr “type” “neon_mul__long”)] )
(define_expand “vec_widen_mult_lo_” [(match_operand: 0 “register_operand” "") (ANY_EXTEND: (match_operand:VQW 1 “register_operand” "")) (ANY_EXTEND: (match_operand:VQW 2 “register_operand” ""))] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, false); emit_insn (gen_aarch64_simd_vec_mult_lo_ (operands[0], operands[1], operands[2], p)); DONE; } )
(define_insn “aarch64_simd_vec_mult_hi_” [(set (match_operand: 0 “register_operand” “=w”) (mult: (ANY_EXTEND: (vec_select: (match_operand:VQW 1 “register_operand” “w”) (match_operand:VQW 3 “vect_par_cnst_hi_half” ""))) (ANY_EXTEND: (vec_select: (match_operand:VQW 2 “register_operand” “w”) (match_dup 3)))))] “TARGET_SIMD” “mull2\t%0., %1., %2.” [(set_attr “type” “neon_mul__long”)] )
(define_expand “vec_widen_mult_hi_” [(match_operand: 0 “register_operand” "") (ANY_EXTEND: (match_operand:VQW 1 “register_operand” "")) (ANY_EXTEND: (match_operand:VQW 2 “register_operand” ""))] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_simd_vec_mult_hi_ (operands[0], operands[1], operands[2], p)); DONE;
} )
;; FP vector operations. ;; AArch64 AdvSIMD supports single-precision (32-bit) and ;; double-precision (64-bit) floating-point data types and arithmetic as ;; defined by the IEEE 754-2008 standard. This makes them vectorizable ;; without the need for -ffast-math or -funsafe-math-optimizations. ;; ;; Floating-point operations can raise an exception. Vectorizing such ;; operations are safe because of reasons explained below. ;; ;; ARMv8 permits an extension to enable trapped floating-point ;; exception handling, however this is an optional feature. In the ;; event of a floating-point exception being raised by vectorised ;; code then: ;; 1. If trapped floating-point exceptions are available, then a trap ;; will be taken when any lane raises an enabled exception. A trap ;; handler may determine which lane raised the exception. ;; 2. Alternatively a sticky exception flag is set in the ;; floating-point status register (FPSR). Software may explicitly ;; test the exception flags, in which case the tests will either ;; prevent vectorisation, allowing precise identification of the ;; failing operation, or if tested outside of vectorisable regions ;; then the specific operation and lane are not of interest.
;; FP arithmetic operations.
(define_insn “add3” [(set (match_operand:VDQF 0 “register_operand” “=w”) (plus:VDQF (match_operand:VDQF 1 “register_operand” “w”) (match_operand:VDQF 2 “register_operand” “w”)))] “TARGET_SIMD” “fadd\t%0., %1., %2.” [(set_attr “type” “neon_fp_addsub_”)] )
(define_insn “sub3” [(set (match_operand:VDQF 0 “register_operand” “=w”) (minus:VDQF (match_operand:VDQF 1 “register_operand” “w”) (match_operand:VDQF 2 “register_operand” “w”)))] “TARGET_SIMD” “fsub\t%0., %1., %2.” [(set_attr “type” “neon_fp_addsub_”)] )
(define_insn “mul3” [(set (match_operand:VDQF 0 “register_operand” “=w”) (mult:VDQF (match_operand:VDQF 1 “register_operand” “w”) (match_operand:VDQF 2 “register_operand” “w”)))] “TARGET_SIMD” “fmul\t%0., %1., %2.” [(set_attr “type” “neon_fp_mul_”)] )
(define_insn “div3” [(set (match_operand:VDQF 0 “register_operand” “=w”) (div:VDQF (match_operand:VDQF 1 “register_operand” “w”) (match_operand:VDQF 2 “register_operand” “w”)))] “TARGET_SIMD” “fdiv\t%0., %1., %2.” [(set_attr “type” “neon_fp_div_”)] )
(define_insn “neg2” [(set (match_operand:VDQF 0 “register_operand” “=w”) (neg:VDQF (match_operand:VDQF 1 “register_operand” “w”)))] “TARGET_SIMD” “fneg\t%0., %1.” [(set_attr “type” “neon_fp_neg_”)] )
(define_insn “abs2” [(set (match_operand:VDQF 0 “register_operand” “=w”) (abs:VDQF (match_operand:VDQF 1 “register_operand” “w”)))] “TARGET_SIMD” “fabs\t%0., %1.” [(set_attr “type” “neon_fp_abs_”)] )
(define_insn “fma4” [(set (match_operand:VDQF 0 “register_operand” “=w”) (fma:VDQF (match_operand:VDQF 1 “register_operand” “w”) (match_operand:VDQF 2 “register_operand” “w”) (match_operand:VDQF 3 “register_operand” “0”)))] “TARGET_SIMD” “fmla\t%0., %1., %2.” [(set_attr “type” “neon_fp_mla_”)] )
(define_insn “*aarch64_fma4_elt” [(set (match_operand:VDQF 0 “register_operand” “=w”) (fma:VDQF (vec_duplicate:VDQF (vec_select: (match_operand:VDQF 1 “register_operand” “<h_con>”) (parallel [(match_operand:SI 2 “immediate_operand”)]))) (match_operand:VDQF 3 “register_operand” “w”) (match_operand:VDQF 4 “register_operand” “0”)))] “TARGET_SIMD” { operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); return “fmla\t%0., %3., %1.[%2]”; } [(set_attr “type” “neon_fp_mla__scalar”)] )
(define_insn “*aarch64_fma4_elt_<vswap_width_name>” [(set (match_operand:VDQSF 0 “register_operand” “=w”) (fma:VDQSF (vec_duplicate:VDQSF (vec_select: (match_operand:<VSWAP_WIDTH> 1 “register_operand” “<h_con>”) (parallel [(match_operand:SI 2 “immediate_operand”)]))) (match_operand:VDQSF 3 “register_operand” “w”) (match_operand:VDQSF 4 “register_operand” “0”)))] “TARGET_SIMD” { operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, INTVAL (operands[2]))); return “fmla\t%0., %3., %1.[%2]”; } [(set_attr “type” “neon_fp_mla__scalar”)] )
(define_insn “*aarch64_fma4_elt_to_128df” [(set (match_operand:V2DF 0 “register_operand” “=w”) (fma:V2DF (vec_duplicate:V2DF (match_operand:DF 1 “register_operand” “w”)) (match_operand:V2DF 2 “register_operand” “w”) (match_operand:V2DF 3 “register_operand” “0”)))] “TARGET_SIMD” “fmla\t%0.2d, %2.2d, %1.2d[0]” [(set_attr “type” “neon_fp_mla_d_scalar_q”)] )
(define_insn “*aarch64_fma4_elt_to_64v2df” [(set (match_operand:DF 0 “register_operand” “=w”) (fma:DF (vec_select:DF (match_operand:V2DF 1 “register_operand” “w”) (parallel [(match_operand:SI 2 “immediate_operand”)])) (match_operand:DF 3 “register_operand” “w”) (match_operand:DF 4 “register_operand” “0”)))] “TARGET_SIMD” { operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); return “fmla\t%0.2d, %3.2d, %1.2d[%2]”; } [(set_attr “type” “neon_fp_mla_d_scalar_q”)] )
(define_insn “fnma4” [(set (match_operand:VDQF 0 “register_operand” “=w”) (fma:VDQF (match_operand:VDQF 1 “register_operand” “w”) (neg:VDQF (match_operand:VDQF 2 “register_operand” “w”)) (match_operand:VDQF 3 “register_operand” “0”)))] “TARGET_SIMD” “fmls\t%0., %1., %2.” [(set_attr “type” “neon_fp_mla_”)] )
(define_insn “*aarch64_fnma4_elt” [(set (match_operand:VDQF 0 “register_operand” “=w”) (fma:VDQF (neg:VDQF (match_operand:VDQF 3 “register_operand” “w”)) (vec_duplicate:VDQF (vec_select: (match_operand:VDQF 1 “register_operand” “<h_con>”) (parallel [(match_operand:SI 2 “immediate_operand”)]))) (match_operand:VDQF 4 “register_operand” “0”)))] “TARGET_SIMD” { operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); return “fmls\t%0., %3., %1.[%2]”; } [(set_attr “type” “neon_fp_mla__scalar”)] )
(define_insn “*aarch64_fnma4_elt_<vswap_width_name>” [(set (match_operand:VDQSF 0 “register_operand” “=w”) (fma:VDQSF (neg:VDQSF (match_operand:VDQSF 3 “register_operand” “w”)) (vec_duplicate:VDQSF (vec_select: (match_operand:<VSWAP_WIDTH> 1 “register_operand” “<h_con>”) (parallel [(match_operand:SI 2 “immediate_operand”)]))) (match_operand:VDQSF 4 “register_operand” “0”)))] “TARGET_SIMD” { operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, INTVAL (operands[2]))); return “fmls\t%0., %3., %1.[%2]”; } [(set_attr “type” “neon_fp_mla__scalar”)] )
(define_insn “*aarch64_fnma4_elt_to_128df” [(set (match_operand:V2DF 0 “register_operand” “=w”) (fma:V2DF (neg:V2DF (match_operand:V2DF 2 “register_operand” “w”)) (vec_duplicate:V2DF (match_operand:DF 1 “register_operand” “w”)) (match_operand:V2DF 3 “register_operand” “0”)))] “TARGET_SIMD” “fmls\t%0.2d, %2.2d, %1.2d[0]” [(set_attr “type” “neon_fp_mla_d_scalar_q”)] )
(define_insn “*aarch64_fnma4_elt_to_64v2df” [(set (match_operand:DF 0 “register_operand” “=w”) (fma:DF (vec_select:DF (match_operand:V2DF 1 “register_operand” “w”) (parallel [(match_operand:SI 2 “immediate_operand”)])) (neg:DF (match_operand:DF 3 “register_operand” “w”)) (match_operand:DF 4 “register_operand” “0”)))] “TARGET_SIMD” { operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); return “fmls\t%0.2d, %3.2d, %1.2d[%2]”; } [(set_attr “type” “neon_fp_mla_d_scalar_q”)] )
;; Vector versions of the floating-point frint patterns. ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn. (define_insn “<frint_pattern>2” [(set (match_operand:VDQF 0 “register_operand” “=w”) (unspec:VDQF [(match_operand:VDQF 1 “register_operand” “w”)] FRINT))] “TARGET_SIMD” “frint<frint_suffix>\t%0., %1.” [(set_attr “type” “neon_fp_round_”)] )
;; Vector versions of the fcvt standard patterns. ;; Expands to lbtrunc, lround, lceil, lfloor (define_insn “l<fcvt_pattern><su_optab>VDQF:mode<fcvt_target>2” [(set (match_operand:<FCVT_TARGET> 0 “register_operand” “=w”) (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> [(match_operand:VDQF 1 “register_operand” “w”)] FCVT)))] “TARGET_SIMD” “fcvt<frint_suffix>\t%0., %1.” [(set_attr “type” “neon_fp_to_int_”)] )
(define_expand “VDQF:mode<fcvt_target>2” [(set (match_operand:<FCVT_TARGET> 0 “register_operand”) (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> [(match_operand:VDQF 1 “register_operand”)] UNSPEC_FRINTZ)))] “TARGET_SIMD” {})
(define_expand “<fix_trunc_optab>VDQF:mode<fcvt_target>2” [(set (match_operand:<FCVT_TARGET> 0 “register_operand”) (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET> [(match_operand:VDQF 1 “register_operand”)] UNSPEC_FRINTZ)))] “TARGET_SIMD” {})
(define_expand “ftruncVDQF:mode2” [(set (match_operand:VDQF 0 “register_operand”) (unspec:VDQF [(match_operand:VDQF 1 “register_operand”)] UNSPEC_FRINTZ))] “TARGET_SIMD” {})
(define_insn “<fcvt_target>VDQF:mode2” [(set (match_operand:VDQF 0 “register_operand” “=w”) (FLOATUORS:VDQF (match_operand:<FCVT_TARGET> 1 “register_operand” “w”)))] “TARGET_SIMD” “<su_optab>cvtf\t%0., %1.” [(set_attr “type” “neon_int_to_fp_”)] )
;; Conversions between vectors of floats and doubles. ;; Contains a mix of patterns to match standard pattern names ;; and those for intrinsics.
;; Float widening operations.
(define_insn “vec_unpacks_lo_v4sf” [(set (match_operand:V2DF 0 “register_operand” “=w”) (float_extend:V2DF (vec_select:V2SF (match_operand:V4SF 1 “register_operand” “w”) (parallel [(const_int 0) (const_int 1)]) )))] “TARGET_SIMD” “fcvtl\t%0.2d, %1.2s” [(set_attr “type” “neon_fp_cvt_widen_s”)] )
(define_insn “aarch64_float_extend_lo_v2df” [(set (match_operand:V2DF 0 “register_operand” “=w”) (float_extend:V2DF (match_operand:V2SF 1 “register_operand” “w”)))] “TARGET_SIMD” “fcvtl\t%0.2d, %1.2s” [(set_attr “type” “neon_fp_cvt_widen_s”)] )
(define_insn “vec_unpacks_hi_v4sf” [(set (match_operand:V2DF 0 “register_operand” “=w”) (float_extend:V2DF (vec_select:V2SF (match_operand:V4SF 1 “register_operand” “w”) (parallel [(const_int 2) (const_int 3)]) )))] “TARGET_SIMD” “fcvtl2\t%0.2d, %1.4s” [(set_attr “type” “neon_fp_cvt_widen_s”)] )
;; Float narrowing operations.
(define_insn “aarch64_float_truncate_lo_v2sf” [(set (match_operand:V2SF 0 “register_operand” “=w”) (float_truncate:V2SF (match_operand:V2DF 1 “register_operand” “w”)))] “TARGET_SIMD” “fcvtn\t%0.2s, %1.2d” [(set_attr “type” “neon_fp_cvt_narrow_d_q”)] )
(define_insn “aarch64_float_truncate_hi_v4sf” [(set (match_operand:V4SF 0 “register_operand” “=w”) (vec_concat:V4SF (match_operand:V2SF 1 “register_operand” “0”) (float_truncate:V2SF (match_operand:V2DF 2 “register_operand” “w”))))] “TARGET_SIMD” “fcvtn2\t%0.4s, %2.2d” [(set_attr “type” “neon_fp_cvt_narrow_d_q”)] )
(define_expand “vec_pack_trunc_v2df” [(set (match_operand:V4SF 0 “register_operand”) (vec_concat:V4SF (float_truncate:V2SF (match_operand:V2DF 1 “register_operand”)) (float_truncate:V2SF (match_operand:V2DF 2 “register_operand”)) ))] “TARGET_SIMD” { rtx tmp = gen_reg_rtx (V2SFmode); int lo = BYTES_BIG_ENDIAN ? 2 : 1; int hi = BYTES_BIG_ENDIAN ? 1 : 2;
emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo])); emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0], tmp, operands[hi])); DONE;
} )
(define_expand “vec_pack_trunc_df” [(set (match_operand:V2SF 0 “register_operand”) (vec_concat:V2SF (float_truncate:SF (match_operand:DF 1 “register_operand”)) (float_truncate:SF (match_operand:DF 2 “register_operand”)) ))] “TARGET_SIMD” { rtx tmp = gen_reg_rtx (V2SFmode); int lo = BYTES_BIG_ENDIAN ? 2 : 1; int hi = BYTES_BIG_ENDIAN ? 1 : 2;
emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo])); emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi])); emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp)); DONE;
} )
;; FP Max/Min ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An ;; expression like: ;; a = (b < c) ? b : c; ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled ;; either explicitly or indirectly via -ffast-math. ;; ;; MIN_EXPR and MAX_EXPR eventually map to ‘smin’ and ‘smax’ in RTL. ;; The ‘smax’ and ‘smin’ RTL standard pattern names do not specify which ;; operand will be returned when both operands are zero (i.e. they may not ;; honour signed zeroes), or when either operand is NaN. Therefore GCC ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring ;; NaNs.
(define_insn “3” [(set (match_operand:VDQF 0 “register_operand” “=w”) (FMAXMIN:VDQF (match_operand:VDQF 1 “register_operand” “w”) (match_operand:VDQF 2 “register_operand” “w”)))] “TARGET_SIMD” “fnm\t%0., %1., %2.” [(set_attr “type” “neon_fp_minmax_”)] )
(define_insn “<maxmin_uns>3” [(set (match_operand:VDQF 0 “register_operand” “=w”) (unspec:VDQF [(match_operand:VDQF 1 “register_operand” “w”) (match_operand:VDQF 2 “register_operand” “w”)] FMAXMIN_UNS))] “TARGET_SIMD” “<maxmin_uns_op>\t%0., %1., %2.” [(set_attr “type” “neon_fp_minmax_”)] )
;; ‘across lanes’ add.
(define_expand “reduc_plus_scal_” [(match_operand: 0 “register_operand” “=w”) (unspec:VDQ_I [(match_operand:VDQ_I 1 “register_operand” “w”)] UNSPEC_ADDV)] “TARGET_SIMD” { rtx elt = GEN_INT (ENDIAN_LANE_N (mode, 0)); rtx scratch = gen_reg_rtx (mode); emit_insn (gen_aarch64_reduc_plus_internal (scratch, operands[1])); emit_insn (gen_aarch64_get_lane (operands[0], scratch, elt)); DONE; } )
(define_expand “reduc_plus_scal_” [(match_operand: 0 “register_operand” “=w”) (match_operand:V2F 1 “register_operand” “w”)] “TARGET_SIMD” { rtx elt = GEN_INT (ENDIAN_LANE_N (mode, 0)); rtx scratch = gen_reg_rtx (mode); emit_insn (gen_aarch64_reduc_plus_internal (scratch, operands[1])); emit_insn (gen_aarch64_get_lane (operands[0], scratch, elt)); DONE; } )
(define_insn “aarch64_reduc_plus_internal” [(set (match_operand:VDQV 0 “register_operand” “=w”) (unspec:VDQV [(match_operand:VDQV 1 “register_operand” “w”)] UNSPEC_ADDV))] “TARGET_SIMD” “addVDQV:vp\t%0, %1.” [(set_attr “type” “neon_reduc_add”)] )
(define_insn “aarch64_reduc_plus_internalv2si” [(set (match_operand:V2SI 0 “register_operand” “=w”) (unspec:V2SI [(match_operand:V2SI 1 “register_operand” “w”)] UNSPEC_ADDV))] “TARGET_SIMD” “addp\t%0.2s, %1.2s, %1.2s” [(set_attr “type” “neon_reduc_add”)] )
(define_insn “aarch64_reduc_plus_internal” [(set (match_operand:V2F 0 “register_operand” “=w”) (unspec:V2F [(match_operand:V2F 1 “register_operand” “w”)] UNSPEC_FADDV))] “TARGET_SIMD” “faddp\t%0, %1.” [(set_attr “type” “neon_fp_reduc_add_”)] )
(define_insn “aarch64_addpv4sf” [(set (match_operand:V4SF 0 “register_operand” “=w”) (unspec:V4SF [(match_operand:V4SF 1 “register_operand” “w”)] UNSPEC_FADDV))] “TARGET_SIMD” “faddp\t%0.4s, %1.4s, %1.4s” [(set_attr “type” “neon_fp_reduc_add_s_q”)] )
(define_expand “reduc_plus_scal_v4sf” [(set (match_operand:SF 0 “register_operand”) (unspec:V4SF [(match_operand:V4SF 1 “register_operand”)] UNSPEC_FADDV))] “TARGET_SIMD” { rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0)); rtx scratch = gen_reg_rtx (V4SFmode); emit_insn (gen_aarch64_addpv4sf (scratch, operands[1])); emit_insn (gen_aarch64_addpv4sf (scratch, scratch)); emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt)); DONE; })
(define_insn “clrsb2” [(set (match_operand:VDQ_BHSI 0 “register_operand” “=w”) (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 “register_operand” “w”)))] “TARGET_SIMD” “cls\t%0., %1.” [(set_attr “type” “neon_cls”)] )
(define_insn “clz2” [(set (match_operand:VDQ_BHSI 0 “register_operand” “=w”) (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 “register_operand” “w”)))] “TARGET_SIMD” “clz\t%0., %1.” [(set_attr “type” “neon_cls”)] )
(define_insn “popcount2” [(set (match_operand:VB 0 “register_operand” “=w”) (popcount:VB (match_operand:VB 1 “register_operand” “w”)))] “TARGET_SIMD” “cnt\t%0., %1.” [(set_attr “type” “neon_cnt”)] )
;; ‘across lanes’ max and min ops.
;; Template for outputting a scalar, so we can create builtins which can be ;; gimple_fold'd to the REDUC(MAX|MIN)EXPR tree code. (This is FP smax/smin). (define_expand "reduc<maxmin_uns>scal" [(match_operand: 0 “register_operand”) (unspec:VDQF [(match_operand:VDQF 1 “register_operand”)] FMAXMINV)] “TARGET_SIMD” { rtx elt = GEN_INT (ENDIAN_LANE_N (mode, 0)); rtx scratch = gen_reg_rtx (mode); emit_insn (gen_aarch64_reduc<maxmin_uns>_internal (scratch, operands[1])); emit_insn (gen_aarch64_get_lane (operands[0], scratch, elt)); DONE; } )
;; Likewise for integer cases, signed and unsigned. (define_expand “reduc_<maxmin_uns>scal” [(match_operand: 0 “register_operand”) (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 “register_operand”)] MAXMINV)] “TARGET_SIMD” { rtx elt = GEN_INT (ENDIAN_LANE_N (mode, 0)); rtx scratch = gen_reg_rtx (mode); emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal (scratch, operands[1])); emit_insn (gen_aarch64_get_lane (operands[0], scratch, elt)); DONE; } )
(define_insn “aarch64_reduc_<maxmin_uns>_internal” [(set (match_operand:VDQV_S 0 “register_operand” “=w”) (unspec:VDQV_S [(match_operand:VDQV_S 1 “register_operand” “w”)] MAXMINV))] “TARGET_SIMD” “<maxmin_uns_op>v\t%0, %1.” [(set_attr “type” “neon_reduc_minmax”)] )
(define_insn “aarch64_reduc_<maxmin_uns>_internalv2si” [(set (match_operand:V2SI 0 “register_operand” “=w”) (unspec:V2SI [(match_operand:V2SI 1 “register_operand” “w”)] MAXMINV))] “TARGET_SIMD” “<maxmin_uns_op>p\t%0.2s, %1.2s, %1.2s” [(set_attr “type” “neon_reduc_minmax”)] )
(define_insn “aarch64_reduc_<maxmin_uns>_internal” [(set (match_operand:VDQF 0 “register_operand” “=w”) (unspec:VDQF [(match_operand:VDQF 1 “register_operand” “w”)] FMAXMINV))] “TARGET_SIMD” “<maxmin_uns_op>\t%0, %1.” [(set_attr “type” “neon_fp_reduc_minmax_”)] )
;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register ;; allocation. ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which ;; to select. ;; ;; Thus our BSL is of the form: ;; op0 = bsl (mask, op2, op3) ;; We can use any of: ;; ;; if (op0 = mask) ;; bsl mask, op1, op2 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0) ;; bit op0, op2, mask ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0) ;; bif op0, op1, mask ;; ;; This pattern is expanded to by the aarch64_simd_bsl expander. ;; Some forms of straight-line code may generate the equivalent form ;; in *aarch64_simd_bsl_alt.
(define_insn “aarch64_simd_bsl_internal” [(set (match_operand:VSDQ_I_DI 0 “register_operand” “=w,w,w”) (xor:VSDQ_I_DI (and:VSDQ_I_DI (xor:VSDQ_I_DI (match_operand:<V_cmp_result> 3 “register_operand” “w,0,w”) (match_operand:VSDQ_I_DI 2 “register_operand” “w,w,0”)) (match_operand:VSDQ_I_DI 1 “register_operand” “0,w,w”)) (match_dup:<V_cmp_result> 3) ))] “TARGET_SIMD” “@ bsl\t%0., %2., %3. bit\t%0., %2., %1. bif\t%0., %3., %1.” [(set_attr “type” “neon_bsl”)] )
;; We need this form in addition to the above pattern to match the case ;; when combine tries merging three insns such that the second operand of ;; the outer XOR matches the second operand of the inner XOR rather than ;; the first. The two are equivalent but since recog doesn't try all ;; permutations of commutative operations, we have to have a separate pattern.
(define_insn “*aarch64_simd_bsl_alt” [(set (match_operand:VSDQ_I_DI 0 “register_operand” “=w,w,w”) (xor:VSDQ_I_DI (and:VSDQ_I_DI (xor:VSDQ_I_DI (match_operand:VSDQ_I_DI 3 “register_operand” “w,w,0”) (match_operand:VSDQ_I_DI 2 “register_operand” “w,0,w”)) (match_operand:VSDQ_I_DI 1 “register_operand” “0,w,w”)) (match_dup:VSDQ_I_DI 2)))] “TARGET_SIMD” “@ bsl\t%0., %3., %2. bit\t%0., %3., %1. bif\t%0., %2., %1.” [(set_attr “type” “neon_bsl”)] )
(define_expand “aarch64_simd_bsl” [(match_operand:VALLDIF 0 “register_operand”) (match_operand:<V_cmp_result> 1 “register_operand”) (match_operand:VALLDIF 2 “register_operand”) (match_operand:VALLDIF 3 “register_operand”)] “TARGET_SIMD” { /* We can't alias operands together if they have different modes. */ rtx tmp = operands[0]; if (FLOAT_MODE_P (mode)) { operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]); operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]); tmp = gen_reg_rtx (<V_cmp_result>mode); } operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]); emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp, operands[1], operands[2], operands[3])); if (tmp != operands[0]) emit_move_insn (operands[0], gen_lowpart (mode, tmp));
DONE; })
(define_expand “aarch64_vcond_internal” [(set (match_operand:VDQ_I 0 “register_operand”) (if_then_else:VDQ_I (match_operator 3 “comparison_operator” [(match_operand:VDQ_I 4 “register_operand”) (match_operand:VDQ_I 5 “nonmemory_operand”)]) (match_operand:VDQ_I 1 “nonmemory_operand”) (match_operand:VDQ_I 2 “nonmemory_operand”)))] “TARGET_SIMD” { rtx op1 = operands[1]; rtx op2 = operands[2]; rtx mask = gen_reg_rtx (mode); enum rtx_code code = GET_CODE (operands[3]);
/* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn), and desirable for other comparisons if it results in FOO ? -1 : 0 (this allows direct use of the comparison result without a bsl). / if (code == NE || (code != EQ && op1 == CONST0_RTX (<V_cmp_result>mode) && op2 == CONSTM1_RTX (<V_cmp_result>mode))) { op1 = operands[2]; op2 = operands[1]; switch (code) { case LE: code = GT; break; case LT: code = GE; break; case GE: code = LT; break; case GT: code = LE; break; / No case EQ. */ case NE: code = EQ; break; case LTU: code = GEU; break; case LEU: code = GTU; break; case GTU: code = LEU; break; case GEU: code = LTU; break; default: gcc_unreachable (); } }
/* Make sure we can handle the last operand. / switch (code) { case NE: / Normalized to EQ above. */ gcc_unreachable ();
case LE: case LT: case GE: case GT: case EQ: /* These instructions have a form taking an immediate zero. */ if (operands[5] == CONST0_RTX (<MODE>mode)) break; /* Fall through, as may need to load into register. */ default: if (!REG_P (operands[5])) operands[5] = force_reg (<MODE>mode, operands[5]); break; }
switch (code) { case LT: emit_insn (gen_aarch64_cmlt (mask, operands[4], operands[5])); break;
case GE: emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5])); break; case LE: emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5])); break; case GT: emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5])); break; case LTU: emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4])); break; case GEU: emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5])); break; case LEU: emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4])); break; case GTU: emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5])); break; /* NE has been normalized to EQ above. */ case EQ: emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5])); break; default: gcc_unreachable (); } /* If we have (a = (b CMP c) ? -1 : 0); Then we can simply move the generated mask. */ if (op1 == CONSTM1_RTX (<V_cmp_result>mode) && op2 == CONST0_RTX (<V_cmp_result>mode)) emit_move_insn (operands[0], mask); else { if (!REG_P (op1)) op1 = force_reg (<MODE>mode, op1); if (!REG_P (op2)) op2 = force_reg (<MODE>mode, op2); emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, op1, op2)); }
DONE; })
(define_expand “aarch64_vcond_internal<VDQF_COND:mode>VDQF:mode” [(set (match_operand:VDQF_COND 0 “register_operand”) (if_then_else:VDQF (match_operator 3 “comparison_operator” [(match_operand:VDQF 4 “register_operand”) (match_operand:VDQF 5 “nonmemory_operand”)]) (match_operand:VDQF_COND 1 “nonmemory_operand”) (match_operand:VDQF_COND 2 “nonmemory_operand”)))] “TARGET_SIMD” { int inverse = 0; int use_zero_form = 0; int swap_bsl_operands = 0; rtx op1 = operands[1]; rtx op2 = operands[2]; rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode); rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
rtx (*base_comparison) (rtx, rtx, rtx); rtx (*complimentary_comparison) (rtx, rtx, rtx);
switch (GET_CODE (operands[3])) { case GE: case GT: case LE: case LT: case EQ: if (operands[5] == CONST0_RTX (mode)) { use_zero_form = 1; break; } /* Fall through. */ default: if (!REG_P (operands[5])) operands[5] = force_reg (VDQF:MODEmode, operands[5]); }
switch (GET_CODE (operands[3])) { case LT: case UNLT: inverse = 1; /* Fall through. / case GE: case UNGE: case ORDERED: case UNORDERED: base_comparison = gen_aarch64_cmgeVDQF:mode; complimentary_comparison = gen_aarch64_cmgtVDQF:mode; break; case LE: case UNLE: inverse = 1; / Fall through. */ case GT: case UNGT: base_comparison = gen_aarch64_cmgtVDQF:mode; complimentary_comparison = gen_aarch64_cmgeVDQF:mode; break; case EQ: case NE: case UNEQ: base_comparison = gen_aarch64_cmeqVDQF:mode; complimentary_comparison = gen_aarch64_cmeqVDQF:mode; break; default: gcc_unreachable (); }
switch (GET_CODE (operands[3])) { case LT: case LE: case GT: case GE: case EQ: /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ. As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: a GE b -> a GE b a GT b -> a GT b a LE b -> b GE a a LT b -> b GT a a EQ b -> a EQ b Note that there also exist direct comparison against 0 forms, so catch those as a special case. / if (use_zero_form) { inverse = 0; switch (GET_CODE (operands[3])) { case LT: base_comparison = gen_aarch64_cmltVDQF:mode; break; case LE: base_comparison = gen_aarch64_cmleVDQF:mode; break; default: / Do nothing, other zero form cases already have the correct base_comparison. */ break; } }
if (!inverse) emit_insn (base_comparison (mask, operands[4], operands[5])); else emit_insn (complimentary_comparison (mask, operands[5], operands[4])); break; case UNLT: case UNLE: case UNGT: case UNGE: case NE: /* FCM returns false for lanes which are unordered, so if we use the inverse of the comparison we actually want to emit, then swap the operands to BSL, we will end up with the correct result. Note that a NE NaN and NaN NE b are true for all a, b. Our transformations are: a GE b -> !(b GT a) a GT b -> !(b GE a) a LE b -> !(a GT b) a LT b -> !(a GE b) a NE b -> !(a EQ b) */ if (inverse) emit_insn (base_comparison (mask, operands[4], operands[5])); else emit_insn (complimentary_comparison (mask, operands[5], operands[4])); swap_bsl_operands = 1; break; case UNEQ: /* We check (a > b || b > a). combining these comparisons give us true iff !(a != b && a ORDERED b), swapping the operands to BSL will then give us (a == b || a UNORDERED b) as intended. */ emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5])); emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4])); emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp)); swap_bsl_operands = 1; break; case UNORDERED: /* Operands are ORDERED iff (a > b || b >= a). Swapping the operands to BSL will give the UNORDERED case. */ swap_bsl_operands = 1; /* Fall through. */ case ORDERED: emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5])); emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4])); emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp)); break; default: gcc_unreachable (); }
if (swap_bsl_operands) { op1 = operands[2]; op2 = operands[1]; }
/* If we have (a = (b CMP c) ? -1 : 0); Then we can simply move the generated mask. */ if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode) && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode)) emit_move_insn (operands[0], mask); else { if (!REG_P (op1)) op1 = force_reg (<VDQF_COND:MODE>mode, op1); if (!REG_P (op2)) op2 = force_reg (<VDQF_COND:MODE>mode, op2); emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask, op1, op2)); }
DONE; })
(define_expand “vcond” [(set (match_operand:VALL 0 “register_operand”) (if_then_else:VALL (match_operator 3 “comparison_operator” [(match_operand:VALL 4 “register_operand”) (match_operand:VALL 5 “nonmemory_operand”)]) (match_operand:VALL 1 “nonmemory_operand”) (match_operand:VALL 2 “nonmemory_operand”)))] “TARGET_SIMD” { emit_insn (gen_aarch64_vcond_internal (operands[0], operands[1], operands[2], operands[3], operands[4], operands[5])); DONE; })
(define_expand “vcond<v_cmp_result>” [(set (match_operand:<V_cmp_result> 0 “register_operand”) (if_then_else:<V_cmp_result> (match_operator 3 “comparison_operator” [(match_operand:VDQF 4 “register_operand”) (match_operand:VDQF 5 “nonmemory_operand”)]) (match_operand:<V_cmp_result> 1 “nonmemory_operand”) (match_operand:<V_cmp_result> 2 “nonmemory_operand”)))] “TARGET_SIMD” { emit_insn (gen_aarch64_vcond_internal<v_cmp_result> ( operands[0], operands[1], operands[2], operands[3], operands[4], operands[5])); DONE; })
(define_expand “vcondu” [(set (match_operand:VDQ_I 0 “register_operand”) (if_then_else:VDQ_I (match_operator 3 “comparison_operator” [(match_operand:VDQ_I 4 “register_operand”) (match_operand:VDQ_I 5 “nonmemory_operand”)]) (match_operand:VDQ_I 1 “nonmemory_operand”) (match_operand:VDQ_I 2 “nonmemory_operand”)))] “TARGET_SIMD” { emit_insn (gen_aarch64_vcond_internal (operands[0], operands[1], operands[2], operands[3], operands[4], operands[5])); DONE; })
;; Patterns for AArch64 SIMD Intrinsics.
;; Lane extraction with sign extension to general purpose register. (define_insn “*aarch64_get_lane_extendGPI:modeVDQQH:mode” [(set (match_operand:GPI 0 “register_operand” “=r”) (sign_extend:GPI (vec_select: (match_operand:VDQQH 1 “register_operand” “w”) (parallel [(match_operand:SI 2 “immediate_operand” “i”)]))))] “TARGET_SIMD” { operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); return “smov\t%GPI:w0, %1.VDQQH:Vetype[%2]”; } [(set_attr “type” “neon_to_gp”)] )
(define_insn “*aarch64_get_lane_zero_extendsi” [(set (match_operand:SI 0 “register_operand” “=r”) (zero_extend:SI (vec_select: (match_operand:VDQQH 1 “register_operand” “w”) (parallel [(match_operand:SI 2 “immediate_operand” “i”)]))))] “TARGET_SIMD” { operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); return “umov\t%w0, %1.[%2]”; } [(set_attr “type” “neon_to_gp”)] )
;; Lane extraction of a value, neither sign nor zero extension ;; is guaranteed so upper bits should be considered undefined. ;; RTL uses GCC vector extension indices throughout so flip only for assembly. (define_insn “aarch64_get_lane” [(set (match_operand: 0 “aarch64_simd_nonimmediate_operand” “=r, w, Utv”) (vec_select: (match_operand:VALL 1 “register_operand” “w, w, w”) (parallel [(match_operand:SI 2 “immediate_operand” “i, i, i”)])))] “TARGET_SIMD” { operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2]))); switch (which_alternative) { case 0: return “umov\t%0, %1.[%2]”; case 1: return “dup\t%0, %1.[%2]”; case 2: return “st1\t{%1.}[%2], %0”; default: gcc_unreachable (); } } [(set_attr “type” “neon_to_gp, neon_dup, neon_store1_one_lane”)] )
;; In this insn, operand 1 should be low, and operand 2 the high part of the ;; dest vector.
(define_insn “*aarch64_combinez” [(set (match_operand: 0 “register_operand” “=&w”) (vec_concat: (match_operand:VD_BHSI 1 “register_operand” “w”) (match_operand:VD_BHSI 2 “aarch64_simd_imm_zero” “Dz”)))] “TARGET_SIMD && !BYTES_BIG_ENDIAN” “mov\t%0.8b, %1.8b” [(set_attr “type” “neon_move”)] )
(define_insn “*aarch64_combinez_be” [(set (match_operand: 0 “register_operand” “=&w”) (vec_concat: (match_operand:VD_BHSI 2 “aarch64_simd_imm_zero” “Dz”) (match_operand:VD_BHSI 1 “register_operand” “w”)))] “TARGET_SIMD && BYTES_BIG_ENDIAN” “mov\t%0.8b, %1.8b” [(set_attr “type” “neon_move”)] )
(define_expand “aarch64_combine” [(match_operand: 0 “register_operand”) (match_operand:VDC 1 “register_operand”) (match_operand:VDC 2 “register_operand”)] “TARGET_SIMD” { rtx op1, op2; if (BYTES_BIG_ENDIAN) { op1 = operands[2]; op2 = operands[1]; } else { op1 = operands[1]; op2 = operands[2]; } emit_insn (gen_aarch64_combine_internal (operands[0], op1, op2)); DONE; } )
(define_insn_and_split “aarch64_combine_internal” [(set (match_operand: 0 “register_operand” “=&w”) (vec_concat: (match_operand:VDC 1 “register_operand” “w”) (match_operand:VDC 2 “register_operand” “w”)))] “TARGET_SIMD” “#” “&& reload_completed” [(const_int 0)] { if (BYTES_BIG_ENDIAN) aarch64_split_simd_combine (operands[0], operands[2], operands[1]); else aarch64_split_simd_combine (operands[0], operands[1], operands[2]); DONE; } [(set_attr “type” “multiple”)] )
(define_expand “aarch64_simd_combine” [(match_operand: 0 “register_operand”) (match_operand:VDC 1 “register_operand”) (match_operand:VDC 2 “register_operand”)] “TARGET_SIMD” { emit_insn (gen_move_lo_quad_ (operands[0], operands[1])); emit_insn (gen_move_hi_quad_ (operands[0], operands[2])); DONE; } [(set_attr “type” “multiple”)] )
;; l.
(define_insn “aarch64_<ANY_EXTEND:su>ADDSUB:optabl_hi_internal” [(set (match_operand: 0 “register_operand” “=w”) (ADDSUB: (ANY_EXTEND: (vec_select: (match_operand:VQW 1 “register_operand” “w”) (match_operand:VQW 3 “vect_par_cnst_hi_half” ""))) (ANY_EXTEND: (vec_select: (match_operand:VQW 2 “register_operand” “w”) (match_dup 3)))))] “TARGET_SIMD” “<ANY_EXTEND:su>ADDSUB:optabl2\t%0., %1., %2.” [(set_attr “type” “neon_ADDSUB:optab_long”)] )
(define_insn “aarch64_<ANY_EXTEND:su>ADDSUB:optabl_lo_internal” [(set (match_operand: 0 “register_operand” “=w”) (ADDSUB: (ANY_EXTEND: (vec_select: (match_operand:VQW 1 “register_operand” “w”) (match_operand:VQW 3 “vect_par_cnst_lo_half” ""))) (ANY_EXTEND: (vec_select: (match_operand:VQW 2 “register_operand” “w”) (match_dup 3)))))] “TARGET_SIMD” “<ANY_EXTEND:su>ADDSUB:optabl\t%0., %1., %2.” [(set_attr “type” “neon_ADDSUB:optab_long”)] )
(define_expand “aarch64_saddl2” [(match_operand: 0 “register_operand” “=w”) (match_operand:VQW 1 “register_operand” “w”) (match_operand:VQW 2 “register_operand” “w”)] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_saddl_hi_internal (operands[0], operands[1], operands[2], p)); DONE; })
(define_expand “aarch64_uaddl2” [(match_operand: 0 “register_operand” “=w”) (match_operand:VQW 1 “register_operand” “w”) (match_operand:VQW 2 “register_operand” “w”)] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_uaddl_hi_internal (operands[0], operands[1], operands[2], p)); DONE; })
(define_expand “aarch64_ssubl2” [(match_operand: 0 “register_operand” “=w”) (match_operand:VQW 1 “register_operand” “w”) (match_operand:VQW 2 “register_operand” “w”)] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_ssubl_hi_internal (operands[0], operands[1], operands[2], p)); DONE; })
(define_expand “aarch64_usubl2” [(match_operand: 0 “register_operand” “=w”) (match_operand:VQW 1 “register_operand” “w”) (match_operand:VQW 2 “register_operand” “w”)] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_usubl_hi_internal (operands[0], operands[1], operands[2], p)); DONE; })
(define_insn “aarch64_<ANY_EXTEND:su>ADDSUB:optabl” [(set (match_operand: 0 “register_operand” “=w”) (ADDSUB: (ANY_EXTEND: (match_operand:VD_BHSI 1 “register_operand” “w”)) (ANY_EXTEND: (match_operand:VD_BHSI 2 “register_operand” “w”))))] “TARGET_SIMD” “<ANY_EXTEND:su>ADDSUB:optabl\t%0., %1., %2.” [(set_attr “type” “neon_ADDSUB:optab_long”)] )
;; w.
(define_insn “aarch64_<ANY_EXTEND:su>ADDSUB:optabw” [(set (match_operand: 0 “register_operand” “=w”) (ADDSUB: (match_operand: 1 “register_operand” “w”) (ANY_EXTEND: (match_operand:VD_BHSI 2 “register_operand” “w”))))] “TARGET_SIMD” “<ANY_EXTEND:su>ADDSUB:optabw\t%0., %1., %2.” [(set_attr “type” “neon_ADDSUB:optab_widen”)] )
(define_insn “aarch64_<ANY_EXTEND:su>ADDSUB:optabw2_internal” [(set (match_operand: 0 “register_operand” “=w”) (ADDSUB: (match_operand: 1 “register_operand” “w”) (ANY_EXTEND: (vec_select: (match_operand:VQW 2 “register_operand” “w”) (match_operand:VQW 3 “vect_par_cnst_hi_half” "")))))] “TARGET_SIMD” “<ANY_EXTEND:su>ADDSUB:optabw2\t%0., %1., %2.” [(set_attr “type” “neon_ADDSUB:optab_widen”)] )
(define_expand “aarch64_saddw2” [(match_operand: 0 “register_operand” “=w”) (match_operand: 1 “register_operand” “w”) (match_operand:VQW 2 “register_operand” “w”)] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_saddw2_internal (operands[0], operands[1], operands[2], p)); DONE; })
(define_expand “aarch64_uaddw2” [(match_operand: 0 “register_operand” “=w”) (match_operand: 1 “register_operand” “w”) (match_operand:VQW 2 “register_operand” “w”)] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_uaddw2_internal (operands[0], operands[1], operands[2], p)); DONE; })
(define_expand “aarch64_ssubw2” [(match_operand: 0 “register_operand” “=w”) (match_operand: 1 “register_operand” “w”) (match_operand:VQW 2 “register_operand” “w”)] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_ssubw2_internal (operands[0], operands[1], operands[2], p)); DONE; })
(define_expand “aarch64_usubw2” [(match_operand: 0 “register_operand” “=w”) (match_operand: 1 “register_operand” “w”) (match_operand:VQW 2 “register_operand” “w”)] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_usubw2_internal (operands[0], operands[1], operands[2], p)); DONE; })
;; h.
(define_insn “aarch64_h” [(set (match_operand:VDQ_BHSI 0 “register_operand” “=w”) (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 “register_operand” “w”) (match_operand:VDQ_BHSI 2 “register_operand” “w”)] HADDSUB))] “TARGET_SIMD” “h\t%0., %1., %2.” [(set_attr “type” “neon__halve”)] )
;; hn.
(define_insn “aarch64_hn” [(set (match_operand: 0 “register_operand” “=w”) (unspec: [(match_operand:VQN 1 “register_operand” “w”) (match_operand:VQN 2 “register_operand” “w”)] ADDSUBHN))] “TARGET_SIMD” “hn\t%0., %1., %2.” [(set_attr “type” “neon__halve_narrow_q”)] )
(define_insn “aarch64_hn2” [(set (match_operand: 0 “register_operand” “=w”) (unspec: [(match_operand: 1 “register_operand” “0”) (match_operand:VQN 2 “register_operand” “w”) (match_operand:VQN 3 “register_operand” “w”)] ADDSUBHN2))] “TARGET_SIMD” “hn2\t%0., %2., %3.” [(set_attr “type” “neon__halve_narrow_q”)] )
;; pmul.
(define_insn “aarch64_pmul” [(set (match_operand:VB 0 “register_operand” “=w”) (unspec:VB [(match_operand:VB 1 “register_operand” “w”) (match_operand:VB 2 “register_operand” “w”)] UNSPEC_PMUL))] “TARGET_SIMD” “pmul\t%0., %1., %2.” [(set_attr “type” “neon_mul_”)] )
;; q
(define_insn “aarch64_<su_optab>” [(set (match_operand:VSDQ_I 0 “register_operand” “=w”) (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 “register_operand” “w”) (match_operand:VSDQ_I 2 “register_operand” “w”)))] “TARGET_SIMD” “<su_optab>\t%0, %1, %2” [(set_attr “type” “neon_”)] )
;; suqadd and usqadd
(define_insn “aarch64_qadd” [(set (match_operand:VSDQ_I 0 “register_operand” “=w”) (unspec:VSDQ_I [(match_operand:VSDQ_I 1 “register_operand” “0”) (match_operand:VSDQ_I 2 “register_operand” “w”)] USSUQADD))] “TARGET_SIMD” “qadd\t%0, %2” [(set_attr “type” “neon_qadd”)] )
;; sqmovun
(define_insn “aarch64_sqmovun” [(set (match_operand: 0 “register_operand” “=w”) (unspec: [(match_operand:VSQN_HSDI 1 “register_operand” “w”)] UNSPEC_SQXTUN))] “TARGET_SIMD” “sqxtun\t%0, %1” [(set_attr “type” “neon_sat_shift_imm_narrow_q”)] )
;; sqmovn and uqmovn
(define_insn “aarch64_qmovn” [(set (match_operand: 0 “register_operand” “=w”) (unspec: [(match_operand:VSQN_HSDI 1 “register_operand” “w”)] SUQMOVN))] “TARGET_SIMD” “qxtn\t%0, %1” [(set_attr “type” “neon_sat_shift_imm_narrow_q”)] )
;; q
(define_insn “aarch64_s” [(set (match_operand:VSDQ_I 0 “register_operand” “=w”) (UNQOPS:VSDQ_I (match_operand:VSDQ_I 1 “register_operand” “w”)))] “TARGET_SIMD” “s\t%0, %1” [(set_attr “type” “neon_”)] )
;; sqdmulh.
(define_insn “aarch64_sqdmulh” [(set (match_operand:VSDQ_HSI 0 “register_operand” “=w”) (unspec:VSDQ_HSI [(match_operand:VSDQ_HSI 1 “register_operand” “w”) (match_operand:VSDQ_HSI 2 “register_operand” “w”)] VQDMULH))] “TARGET_SIMD” “sqdmulh\t%0, %1, %2” [(set_attr “type” “neon_sat_mul_”)] )
;; sqdmulh_lane
(define_insn “aarch64_sqdmulh_lane” [(set (match_operand:VDQHS 0 “register_operand” “=w”) (unspec:VDQHS [(match_operand:VDQHS 1 “register_operand” “w”) (vec_select: (match_operand: 2 “register_operand” “”) (parallel [(match_operand:SI 3 “immediate_operand” “i”)]))] VQDMULH))] “TARGET_SIMD” “* operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); return "sqdmulh\t%0., %1., %2.[%3]";” [(set_attr “type” “neon_sat_mul__scalar”)] )
(define_insn “aarch64_sqdmulh_laneq” [(set (match_operand:VDQHS 0 “register_operand” “=w”) (unspec:VDQHS [(match_operand:VDQHS 1 “register_operand” “w”) (vec_select: (match_operand: 2 “register_operand” “”) (parallel [(match_operand:SI 3 “immediate_operand” “i”)]))] VQDMULH))] “TARGET_SIMD” “* operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); return "sqdmulh\t%0., %1., %2.[%3]";” [(set_attr “type” “neon_sat_mul__scalar”)] )
(define_insn “aarch64_sqdmulh_lane” [(set (match_operand:SD_HSI 0 “register_operand” “=w”) (unspec:SD_HSI [(match_operand:SD_HSI 1 “register_operand” “w”) (vec_select: (match_operand: 2 “register_operand” “”) (parallel [(match_operand:SI 3 “immediate_operand” “i”)]))] VQDMULH))] “TARGET_SIMD” “* operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); return "sqdmulh\t%0, %1, %2.[%3]";” [(set_attr “type” “neon_sat_mul__scalar”)] )
(define_insn “aarch64_sqdmulh_laneq” [(set (match_operand:SD_HSI 0 “register_operand” “=w”) (unspec:SD_HSI [(match_operand:SD_HSI 1 “register_operand” “w”) (vec_select: (match_operand: 2 “register_operand” “”) (parallel [(match_operand:SI 3 “immediate_operand” “i”)]))] VQDMULH))] “TARGET_SIMD” “* operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); return "sqdmulh\t%0, %1, %2.[%3]";” [(set_attr “type” “neon_sat_mul__scalar”)] )
;; vqdml[sa]l
(define_insn “aarch64_sqdmlSBINQOPS:asl” [(set (match_operand: 0 “register_operand” “=w”) (SBINQOPS: (match_operand: 1 “register_operand” “0”) (ss_ashift: (mult: (sign_extend: (match_operand:VSD_HSI 2 “register_operand” “w”)) (sign_extend: (match_operand:VSD_HSI 3 “register_operand” “w”))) (const_int 1))))] “TARGET_SIMD” “sqdmlSBINQOPS:asl\t%0, %2, %3” [(set_attr “type” “neon_sat_mla__long”)] )
;; vqdml[sa]l_lane
(define_insn “aarch64_sqdmlSBINQOPS:asl_lane” [(set (match_operand: 0 “register_operand” “=w”) (SBINQOPS: (match_operand: 1 “register_operand” “0”) (ss_ashift: (mult: (sign_extend: (match_operand:VD_HSI 2 “register_operand” “w”)) (sign_extend: (vec_duplicate:VD_HSI (vec_select: (match_operand: 3 “register_operand” “”) (parallel [(match_operand:SI 4 “immediate_operand” “i”)]))) )) (const_int 1))))] “TARGET_SIMD” { operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); return “sqdmlSBINQOPS:asl\t%0, %2, %3.[%4]”; } [(set_attr “type” “neon_sat_mla__scalar_long”)] )
(define_insn “aarch64_sqdmlSBINQOPS:asl_laneq” [(set (match_operand: 0 “register_operand” “=w”) (SBINQOPS: (match_operand: 1 “register_operand” “0”) (ss_ashift: (mult: (sign_extend: (match_operand:VD_HSI 2 “register_operand” “w”)) (sign_extend: (vec_duplicate:VD_HSI (vec_select: (match_operand: 3 “register_operand” “”) (parallel [(match_operand:SI 4 “immediate_operand” “i”)]))) )) (const_int 1))))] “TARGET_SIMD” { operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); return “sqdmlSBINQOPS:asl\t%0, %2, %3.[%4]”; } [(set_attr “type” “neon_sat_mla__scalar_long”)] )
(define_insn “aarch64_sqdmlSBINQOPS:asl_lane” [(set (match_operand: 0 “register_operand” “=w”) (SBINQOPS: (match_operand: 1 “register_operand” “0”) (ss_ashift: (mult: (sign_extend: (match_operand:SD_HSI 2 “register_operand” “w”)) (sign_extend: (vec_select: (match_operand: 3 “register_operand” “”) (parallel [(match_operand:SI 4 “immediate_operand” “i”)]))) ) (const_int 1))))] “TARGET_SIMD” { operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); return “sqdmlSBINQOPS:asl\t%0, %2, %3.[%4]”; } [(set_attr “type” “neon_sat_mla__scalar_long”)] )
(define_insn “aarch64_sqdmlSBINQOPS:asl_laneq” [(set (match_operand: 0 “register_operand” “=w”) (SBINQOPS: (match_operand: 1 “register_operand” “0”) (ss_ashift: (mult: (sign_extend: (match_operand:SD_HSI 2 “register_operand” “w”)) (sign_extend: (vec_select: (match_operand: 3 “register_operand” “”) (parallel [(match_operand:SI 4 “immediate_operand” “i”)]))) ) (const_int 1))))] “TARGET_SIMD” { operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); return “sqdmlSBINQOPS:asl\t%0, %2, %3.[%4]”; } [(set_attr “type” “neon_sat_mla__scalar_long”)] )
;; vqdml[sa]l_n
(define_insn “aarch64_sqdmlSBINQOPS:asl_n” [(set (match_operand: 0 “register_operand” “=w”) (SBINQOPS: (match_operand: 1 “register_operand” “0”) (ss_ashift: (mult: (sign_extend: (match_operand:VD_HSI 2 “register_operand” “w”)) (sign_extend: (vec_duplicate:VD_HSI (match_operand: 3 “register_operand” “”)))) (const_int 1))))] “TARGET_SIMD” “sqdmlSBINQOPS:asl\t%0, %2, %3.[0]” [(set_attr “type” “neon_sat_mla__scalar_long”)] )
;; sqdml[as]l2
(define_insn “aarch64_sqdmlSBINQOPS:asl2_internal” [(set (match_operand: 0 “register_operand” “=w”) (SBINQOPS: (match_operand: 1 “register_operand” “0”) (ss_ashift: (mult: (sign_extend: (vec_select: (match_operand:VQ_HSI 2 “register_operand” “w”) (match_operand:VQ_HSI 4 “vect_par_cnst_hi_half” ""))) (sign_extend: (vec_select: (match_operand:VQ_HSI 3 “register_operand” “w”) (match_dup 4)))) (const_int 1))))] “TARGET_SIMD” “sqdmlSBINQOPS:asl2\t%0, %2, %3” [(set_attr “type” “neon_sat_mla__scalar_long”)] )
(define_expand “aarch64_sqdmlal2” [(match_operand: 0 “register_operand” “=w”) (match_operand: 1 “register_operand” “w”) (match_operand:VQ_HSI 2 “register_operand” “w”) (match_operand:VQ_HSI 3 “register_operand” “w”)] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_sqdmlal2_internal (operands[0], operands[1], operands[2], operands[3], p)); DONE; })
(define_expand “aarch64_sqdmlsl2” [(match_operand: 0 “register_operand” “=w”) (match_operand: 1 “register_operand” “w”) (match_operand:VQ_HSI 2 “register_operand” “w”) (match_operand:VQ_HSI 3 “register_operand” “w”)] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_sqdmlsl2_internal (operands[0], operands[1], operands[2], operands[3], p)); DONE; })
;; vqdml[sa]l2_lane
(define_insn “aarch64_sqdmlSBINQOPS:asl2_lane_internal” [(set (match_operand: 0 “register_operand” “=w”) (SBINQOPS: (match_operand: 1 “register_operand” “0”) (ss_ashift: (mult: (sign_extend: (vec_select: (match_operand:VQ_HSI 2 “register_operand” “w”) (match_operand:VQ_HSI 5 “vect_par_cnst_hi_half” ""))) (sign_extend: (vec_duplicate: (vec_select: (match_operand: 3 “register_operand” “”) (parallel [(match_operand:SI 4 “immediate_operand” “i”)]) )))) (const_int 1))))] “TARGET_SIMD” { operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); return “sqdmlSBINQOPS:asl2\t%0, %2, %3.[%4]”; } [(set_attr “type” “neon_sat_mla__scalar_long”)] )
(define_insn “aarch64_sqdmlSBINQOPS:asl2_laneq_internal” [(set (match_operand: 0 “register_operand” “=w”) (SBINQOPS: (match_operand: 1 “register_operand” “0”) (ss_ashift: (mult: (sign_extend: (vec_select: (match_operand:VQ_HSI 2 “register_operand” “w”) (match_operand:VQ_HSI 5 “vect_par_cnst_hi_half” ""))) (sign_extend: (vec_duplicate: (vec_select: (match_operand: 3 “register_operand” “”) (parallel [(match_operand:SI 4 “immediate_operand” “i”)]) )))) (const_int 1))))] “TARGET_SIMD” { operands[4] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[4]))); return “sqdmlSBINQOPS:asl2\t%0, %2, %3.[%4]”; } [(set_attr “type” “neon_sat_mla__scalar_long”)] )
(define_expand “aarch64_sqdmlal2_lane” [(match_operand: 0 “register_operand” “=w”) (match_operand: 1 “register_operand” “w”) (match_operand:VQ_HSI 2 “register_operand” “w”) (match_operand: 3 “register_operand” “”) (match_operand:SI 4 “immediate_operand” “i”)] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_sqdmlal2_lane_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); DONE; })
(define_expand “aarch64_sqdmlal2_laneq” [(match_operand: 0 “register_operand” “=w”) (match_operand: 1 “register_operand” “w”) (match_operand:VQ_HSI 2 “register_operand” “w”) (match_operand: 3 “register_operand” “”) (match_operand:SI 4 “immediate_operand” “i”)] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_sqdmlal2_laneq_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); DONE; })
(define_expand “aarch64_sqdmlsl2_lane” [(match_operand: 0 “register_operand” “=w”) (match_operand: 1 “register_operand” “w”) (match_operand:VQ_HSI 2 “register_operand” “w”) (match_operand: 3 “register_operand” “”) (match_operand:SI 4 “immediate_operand” “i”)] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_sqdmlsl2_lane_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); DONE; })
(define_expand “aarch64_sqdmlsl2_laneq” [(match_operand: 0 “register_operand” “=w”) (match_operand: 1 “register_operand” “w”) (match_operand:VQ_HSI 2 “register_operand” “w”) (match_operand: 3 “register_operand” “”) (match_operand:SI 4 “immediate_operand” “i”)] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_sqdmlsl2_laneq_internal (operands[0], operands[1], operands[2], operands[3], operands[4], p)); DONE; })
(define_insn “aarch64_sqdmlSBINQOPS:asl2_n_internal” [(set (match_operand: 0 “register_operand” “=w”) (SBINQOPS: (match_operand: 1 “register_operand” “0”) (ss_ashift: (mult: (sign_extend: (vec_select: (match_operand:VQ_HSI 2 “register_operand” “w”) (match_operand:VQ_HSI 4 “vect_par_cnst_hi_half” ""))) (sign_extend: (vec_duplicate: (match_operand: 3 “register_operand” “”)))) (const_int 1))))] “TARGET_SIMD” “sqdmlSBINQOPS:asl2\t%0, %2, %3.[0]” [(set_attr “type” “neon_sat_mla__scalar_long”)] )
(define_expand “aarch64_sqdmlal2_n” [(match_operand: 0 “register_operand” “=w”) (match_operand: 1 “register_operand” “w”) (match_operand:VQ_HSI 2 “register_operand” “w”) (match_operand: 3 “register_operand” “w”)] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_sqdmlal2_n_internal (operands[0], operands[1], operands[2], operands[3], p)); DONE; })
(define_expand “aarch64_sqdmlsl2_n” [(match_operand: 0 “register_operand” “=w”) (match_operand: 1 “register_operand” “w”) (match_operand:VQ_HSI 2 “register_operand” “w”) (match_operand: 3 “register_operand” “w”)] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_sqdmlsl2_n_internal (operands[0], operands[1], operands[2], operands[3], p)); DONE; })
;; vqdmull
(define_insn “aarch64_sqdmull” [(set (match_operand: 0 “register_operand” “=w”) (ss_ashift: (mult: (sign_extend: (match_operand:VSD_HSI 1 “register_operand” “w”)) (sign_extend: (match_operand:VSD_HSI 2 “register_operand” “w”))) (const_int 1)))] “TARGET_SIMD” “sqdmull\t%0, %1, %2” [(set_attr “type” “neon_sat_mul__long”)] )
;; vqdmull_lane
(define_insn “aarch64_sqdmull_lane” [(set (match_operand: 0 “register_operand” “=w”) (ss_ashift: (mult: (sign_extend: (match_operand:VD_HSI 1 “register_operand” “w”)) (sign_extend: (vec_duplicate:VD_HSI (vec_select: (match_operand: 2 “register_operand” “”) (parallel [(match_operand:SI 3 “immediate_operand” “i”)]))) )) (const_int 1)))] “TARGET_SIMD” { operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); return “sqdmull\t%0, %1, %2.[%3]”; } [(set_attr “type” “neon_sat_mul__scalar_long”)] )
(define_insn “aarch64_sqdmull_laneq” [(set (match_operand: 0 “register_operand” “=w”) (ss_ashift: (mult: (sign_extend: (match_operand:VD_HSI 1 “register_operand” “w”)) (sign_extend: (vec_duplicate:VD_HSI (vec_select: (match_operand: 2 “register_operand” “”) (parallel [(match_operand:SI 3 “immediate_operand” “i”)]))) )) (const_int 1)))] “TARGET_SIMD” { operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); return “sqdmull\t%0, %1, %2.[%3]”; } [(set_attr “type” “neon_sat_mul__scalar_long”)] )
(define_insn “aarch64_sqdmull_lane” [(set (match_operand: 0 “register_operand” “=w”) (ss_ashift: (mult: (sign_extend: (match_operand:SD_HSI 1 “register_operand” “w”)) (sign_extend: (vec_select: (match_operand: 2 “register_operand” “”) (parallel [(match_operand:SI 3 “immediate_operand” “i”)])) )) (const_int 1)))] “TARGET_SIMD” { operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); return “sqdmull\t%0, %1, %2.[%3]”; } [(set_attr “type” “neon_sat_mul__scalar_long”)] )
(define_insn “aarch64_sqdmull_laneq” [(set (match_operand: 0 “register_operand” “=w”) (ss_ashift: (mult: (sign_extend: (match_operand:SD_HSI 1 “register_operand” “w”)) (sign_extend: (vec_select: (match_operand: 2 “register_operand” “”) (parallel [(match_operand:SI 3 “immediate_operand” “i”)])) )) (const_int 1)))] “TARGET_SIMD” { operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); return “sqdmull\t%0, %1, %2.[%3]”; } [(set_attr “type” “neon_sat_mul__scalar_long”)] )
;; vqdmull_n
(define_insn “aarch64_sqdmull_n” [(set (match_operand: 0 “register_operand” “=w”) (ss_ashift: (mult: (sign_extend: (match_operand:VD_HSI 1 “register_operand” “w”)) (sign_extend: (vec_duplicate:VD_HSI (match_operand: 2 “register_operand” “”))) ) (const_int 1)))] “TARGET_SIMD” “sqdmull\t%0, %1, %2.[0]” [(set_attr “type” “neon_sat_mul__scalar_long”)] )
;; vqdmull2
(define_insn “aarch64_sqdmull2_internal” [(set (match_operand: 0 “register_operand” “=w”) (ss_ashift: (mult: (sign_extend: (vec_select: (match_operand:VQ_HSI 1 “register_operand” “w”) (match_operand:VQ_HSI 3 “vect_par_cnst_hi_half” ""))) (sign_extend: (vec_select: (match_operand:VQ_HSI 2 “register_operand” “w”) (match_dup 3))) ) (const_int 1)))] “TARGET_SIMD” “sqdmull2\t%0, %1, %2” [(set_attr “type” “neon_sat_mul__scalar_long”)] )
(define_expand “aarch64_sqdmull2” [(match_operand: 0 “register_operand” “=w”) (match_operand:VQ_HSI 1 “register_operand” “w”) (match_operand:VQ_HSI 2 “register_operand” “w”)] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_sqdmull2_internal (operands[0], operands[1], operands[2], p)); DONE; })
;; vqdmull2_lane
(define_insn “aarch64_sqdmull2_lane_internal” [(set (match_operand: 0 “register_operand” “=w”) (ss_ashift: (mult: (sign_extend: (vec_select: (match_operand:VQ_HSI 1 “register_operand” “w”) (match_operand:VQ_HSI 4 “vect_par_cnst_hi_half” ""))) (sign_extend: (vec_duplicate: (vec_select: (match_operand: 2 “register_operand” “”) (parallel [(match_operand:SI 3 “immediate_operand” “i”)]))) )) (const_int 1)))] “TARGET_SIMD” { operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); return “sqdmull2\t%0, %1, %2.[%3]”; } [(set_attr “type” “neon_sat_mul__scalar_long”)] )
(define_insn “aarch64_sqdmull2_laneq_internal” [(set (match_operand: 0 “register_operand” “=w”) (ss_ashift: (mult: (sign_extend: (vec_select: (match_operand:VQ_HSI 1 “register_operand” “w”) (match_operand:VQ_HSI 4 “vect_par_cnst_hi_half” ""))) (sign_extend: (vec_duplicate: (vec_select: (match_operand: 2 “register_operand” “”) (parallel [(match_operand:SI 3 “immediate_operand” “i”)]))) )) (const_int 1)))] “TARGET_SIMD” { operands[3] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[3]))); return “sqdmull2\t%0, %1, %2.[%3]”; } [(set_attr “type” “neon_sat_mul__scalar_long”)] )
(define_expand “aarch64_sqdmull2_lane” [(match_operand: 0 “register_operand” “=w”) (match_operand:VQ_HSI 1 “register_operand” “w”) (match_operand: 2 “register_operand” “”) (match_operand:SI 3 “immediate_operand” “i”)] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_sqdmull2_lane_internal (operands[0], operands[1], operands[2], operands[3], p)); DONE; })
(define_expand “aarch64_sqdmull2_laneq” [(match_operand: 0 “register_operand” “=w”) (match_operand:VQ_HSI 1 “register_operand” “w”) (match_operand: 2 “register_operand” “”) (match_operand:SI 3 “immediate_operand” “i”)] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_sqdmull2_laneq_internal (operands[0], operands[1], operands[2], operands[3], p)); DONE; })
;; vqdmull2_n
(define_insn “aarch64_sqdmull2_n_internal” [(set (match_operand: 0 “register_operand” “=w”) (ss_ashift: (mult: (sign_extend: (vec_select: (match_operand:VQ_HSI 1 “register_operand” “w”) (match_operand:VQ_HSI 3 “vect_par_cnst_hi_half” ""))) (sign_extend: (vec_duplicate: (match_operand: 2 “register_operand” “”))) ) (const_int 1)))] “TARGET_SIMD” “sqdmull2\t%0, %1, %2.[0]” [(set_attr “type” “neon_sat_mul__scalar_long”)] )
(define_expand “aarch64_sqdmull2_n” [(match_operand: 0 “register_operand” “=w”) (match_operand:VQ_HSI 1 “register_operand” “w”) (match_operand: 2 “register_operand” “w”)] “TARGET_SIMD” { rtx p = aarch64_simd_vect_par_cnst_half (mode, true); emit_insn (gen_aarch64_sqdmull2_n_internal (operands[0], operands[1], operands[2], p)); DONE; })
;; vshl
(define_insn “aarch64_shl” [(set (match_operand:VSDQ_I_DI 0 “register_operand” “=w”) (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 “register_operand” “w”) (match_operand:VSDQ_I_DI 2 “register_operand” “w”)] VSHL))] “TARGET_SIMD” “shl\t%0, %1, %2”; [(set_attr “type” “neon_shift_reg”)] )
;; vqshl
(define_insn “aarch64_qshl” [(set (match_operand:VSDQ_I 0 “register_operand” “=w”) (unspec:VSDQ_I [(match_operand:VSDQ_I 1 “register_operand” “w”) (match_operand:VSDQ_I 2 “register_operand” “w”)] VQSHL))] “TARGET_SIMD” “qshl\t%0, %1, %2”; [(set_attr “type” “neon_sat_shift_reg”)] )
;; vshll_n
(define_insn “aarch64_shll_n” [(set (match_operand: 0 “register_operand” “=w”) (unspec: [(match_operand:VD_BHSI 1 “register_operand” “w”) (match_operand:SI 2 “aarch64_simd_shift_imm_bitsize_<ve_mode>” “i”)] VSHLL))] “TARGET_SIMD” “* int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; if (INTVAL (operands[2]) == bit_width) { return "shll\t%0., %1., %2"; } else { return "shll\t%0., %1., %2"; }” [(set_attr “type” “neon_shift_imm_long”)] )
;; vshll_high_n
(define_insn “aarch64_shll2_n” [(set (match_operand: 0 “register_operand” “=w”) (unspec: [(match_operand:VQW 1 “register_operand” “w”) (match_operand:SI 2 “immediate_operand” “i”)] VSHLL))] “TARGET_SIMD” “* int bit_width = GET_MODE_UNIT_SIZE (mode) * BITS_PER_UNIT; if (INTVAL (operands[2]) == bit_width) { return "shll2\t%0., %1., %2"; } else { return "shll2\t%0., %1., %2"; }” [(set_attr “type” “neon_shift_imm_long”)] )
;; vrshr_n
(define_insn “aarch64_shr_n” [(set (match_operand:VSDQ_I_DI 0 “register_operand” “=w”) (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 “register_operand” “w”) (match_operand:SI 2 “aarch64_simd_shift_imm_offset_<ve_mode>” “i”)] VRSHR_N))] “TARGET_SIMD” “shr\t%0, %1, %2” [(set_attr “type” “neon_sat_shift_imm”)] )
;; v(r)sra_n
(define_insn “aarch64_sra_n” [(set (match_operand:VSDQ_I_DI 0 “register_operand” “=w”) (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 “register_operand” “0”) (match_operand:VSDQ_I_DI 2 “register_operand” “w”) (match_operand:SI 3 “aarch64_simd_shift_imm_offset_<ve_mode>” “i”)] VSRA))] “TARGET_SIMD” “sra\t%0, %2, %3” [(set_attr “type” “neon_shift_acc”)] )
;; vsi_n
(define_insn “aarch64_si_n” [(set (match_operand:VSDQ_I_DI 0 “register_operand” “=w”) (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 “register_operand” “0”) (match_operand:VSDQ_I_DI 2 “register_operand” “w”) (match_operand:SI 3 “aarch64_simd_shift_imm_<ve_mode>” “i”)] VSLRI))] “TARGET_SIMD” “si\t%0, %2, %3” [(set_attr “type” “neon_shift_imm”)] )
;; vqshl(u)
(define_insn “aarch64_qshl_n” [(set (match_operand:VSDQ_I 0 “register_operand” “=w”) (unspec:VSDQ_I [(match_operand:VSDQ_I 1 “register_operand” “w”) (match_operand:SI 2 “aarch64_simd_shift_imm_<ve_mode>” “i”)] VQSHL_N))] “TARGET_SIMD” “qshl\t%0, %1, %2” [(set_attr “type” “neon_sat_shift_imm”)] )
;; vq(r)shr(u)n_n
(define_insn “aarch64_qshrn_n” [(set (match_operand: 0 “register_operand” “=w”) (unspec: [(match_operand:VSQN_HSDI 1 “register_operand” “w”) (match_operand:SI 2 “aarch64_simd_shift_imm_offset_<ve_mode>” “i”)] VQSHRN_N))] “TARGET_SIMD” “qshrn\t%0, %1, %2” [(set_attr “type” “neon_sat_shift_imm_narrow_q”)] )
;; cm(eq|ge|gt|lt|le) ;; Note, we have constraints for Dz and Z as different expanders ;; have different ideas of what should be passed to this pattern.
(define_insn “aarch64_cm” [(set (match_operand:<V_cmp_result> 0 “register_operand” “=w,w”) (neg:<V_cmp_result> (COMPARISONS:<V_cmp_result> (match_operand:VDQ_I 1 “register_operand” “w,w”) (match_operand:VDQ_I 2 “aarch64_simd_reg_or_zero” “w,ZDz”) )))] “TARGET_SIMD” “@ cm<n_optab>\t%0, %<cmp_1>, %<cmp_2> cm\t%0, %1, #0” [(set_attr “type” “neon_compare, neon_compare_zero”)] )
(define_insn_and_split “aarch64_cmdi” [(set (match_operand:DI 0 “register_operand” “=w,w,r”) (neg:DI (COMPARISONS:DI (match_operand:DI 1 “register_operand” “w,w,r”) (match_operand:DI 2 “aarch64_simd_reg_or_zero” “w,ZDz,r”) ))) (clobber (reg:CC CC_REGNUM))] “TARGET_SIMD” “#” “reload_completed” [(set (match_operand:DI 0 “register_operand”) (neg:DI (COMPARISONS:DI (match_operand:DI 1 “register_operand”) (match_operand:DI 2 “aarch64_simd_reg_or_zero”) )))] { /* If we are in the general purpose register file, we split to a sequence of comparison and store. / if (GP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))) { machine_mode mode = SELECT_CC_MODE (, operands[1], operands[2]); rtx cc_reg = aarch64_gen_compare_reg (, operands[1], operands[2]); rtx comparison = gen_rtx_ (mode, operands[1], operands[2]); emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); DONE; } / Otherwise, we expand to a similar pattern which does not clobber CC_REGNUM. */ } [(set_attr “type” “neon_compare, neon_compare_zero, multiple”)] )
(define_insn “*aarch64_cmdi” [(set (match_operand:DI 0 “register_operand” “=w,w”) (neg:DI (COMPARISONS:DI (match_operand:DI 1 “register_operand” “w,w”) (match_operand:DI 2 “aarch64_simd_reg_or_zero” “w,ZDz”) )))] “TARGET_SIMD && reload_completed” “@ cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2> cm\t%d0, %d1, #0” [(set_attr “type” “neon_compare, neon_compare_zero”)] )
;; cm(hs|hi)
(define_insn “aarch64_cm” [(set (match_operand:<V_cmp_result> 0 “register_operand” “=w”) (neg:<V_cmp_result> (UCOMPARISONS:<V_cmp_result> (match_operand:VDQ_I 1 “register_operand” “w”) (match_operand:VDQ_I 2 “register_operand” “w”) )))] “TARGET_SIMD” “cm<n_optab>\t%0, %<cmp_1>, %<cmp_2>” [(set_attr “type” “neon_compare”)] )
(define_insn_and_split “aarch64_cmdi” [(set (match_operand:DI 0 “register_operand” “=w,r”) (neg:DI (UCOMPARISONS:DI (match_operand:DI 1 “register_operand” “w,r”) (match_operand:DI 2 “aarch64_simd_reg_or_zero” “w,r”) ))) (clobber (reg:CC CC_REGNUM))] “TARGET_SIMD” “#” “reload_completed” [(set (match_operand:DI 0 “register_operand”) (neg:DI (UCOMPARISONS:DI (match_operand:DI 1 “register_operand”) (match_operand:DI 2 “aarch64_simd_reg_or_zero”) )))] { /* If we are in the general purpose register file, we split to a sequence of comparison and store. / if (GP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))) { machine_mode mode = CCmode; rtx cc_reg = aarch64_gen_compare_reg (, operands[1], operands[2]); rtx comparison = gen_rtx_ (mode, operands[1], operands[2]); emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); DONE; } / Otherwise, we expand to a similar pattern which does not clobber CC_REGNUM. */ } [(set_attr “type” “neon_compare,multiple”)] )
(define_insn “*aarch64_cmdi” [(set (match_operand:DI 0 “register_operand” “=w”) (neg:DI (UCOMPARISONS:DI (match_operand:DI 1 “register_operand” “w”) (match_operand:DI 2 “aarch64_simd_reg_or_zero” “w”) )))] “TARGET_SIMD && reload_completed” “cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>” [(set_attr “type” “neon_compare”)] )
;; cmtst
;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst, ;; we don't have any insns using ne, and aarch64_vcond_internal outputs ;; not (neg (eq (and x y) 0)) ;; which is rewritten by simplify_rtx as ;; plus (eq (and x y) 0) -1.
(define_insn “aarch64_cmtst” [(set (match_operand:<V_cmp_result> 0 “register_operand” “=w”) (plus:<V_cmp_result> (eq:<V_cmp_result> (and:VDQ_I (match_operand:VDQ_I 1 “register_operand” “w”) (match_operand:VDQ_I 2 “register_operand” “w”)) (match_operand:VDQ_I 3 “aarch64_simd_imm_zero”)) (match_operand:<V_cmp_result> 4 “aarch64_simd_imm_minus_one”))) ] “TARGET_SIMD” “cmtst\t%0, %1, %2” [(set_attr “type” “neon_tst”)] )
(define_insn_and_split “aarch64_cmtstdi” [(set (match_operand:DI 0 “register_operand” “=w,r”) (neg:DI (ne:DI (and:DI (match_operand:DI 1 “register_operand” “w,r”) (match_operand:DI 2 “register_operand” “w,r”)) (const_int 0)))) (clobber (reg:CC CC_REGNUM))] “TARGET_SIMD” “#” “reload_completed” [(set (match_operand:DI 0 “register_operand”) (neg:DI (ne:DI (and:DI (match_operand:DI 1 “register_operand”) (match_operand:DI 2 “register_operand”)) (const_int 0))))] { /* If we are in the general purpose register file, we split to a sequence of comparison and store. / if (GP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1]))) { rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]); machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx); rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx); rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx); emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg)); DONE; } / Otherwise, we expand to a similar pattern which does not clobber CC_REGNUM. */ } [(set_attr “type” “neon_tst,multiple”)] )
(define_insn “*aarch64_cmtstdi” [(set (match_operand:DI 0 “register_operand” “=w”) (neg:DI (ne:DI (and:DI (match_operand:DI 1 “register_operand” “w”) (match_operand:DI 2 “register_operand” “w”)) (const_int 0))))] “TARGET_SIMD” “cmtst\t%d0, %d1, %d2” [(set_attr “type” “neon_tst”)] )
;; fcm(eq|ge|gt|le|lt)
(define_insn “aarch64_cm” [(set (match_operand:<V_cmp_result> 0 “register_operand” “=w,w”) (neg:<V_cmp_result> (COMPARISONS:<V_cmp_result> (match_operand:VALLF 1 “register_operand” “w,w”) (match_operand:VALLF 2 “aarch64_simd_reg_or_zero” “w,YDz”) )))] “TARGET_SIMD” “@ fcm<n_optab>\t%0, %<cmp_1>, %<cmp_2> fcm\t%0, %1, 0” [(set_attr “type” “neon_fp_compare_”)] )
;; fac(ge|gt) ;; Note we can also handle what would be fac(le|lt) by ;; generating fac(ge|gt).
(define_insn “*aarch64_fac” [(set (match_operand:<V_cmp_result> 0 “register_operand” “=w”) (neg:<V_cmp_result> (FAC_COMPARISONS:<V_cmp_result> (abs:VALLF (match_operand:VALLF 1 “register_operand” “w”)) (abs:VALLF (match_operand:VALLF 2 “register_operand” “w”)) )))] “TARGET_SIMD” “fac<n_optab>\t%0, %<cmp_1>, %<cmp_2>” [(set_attr “type” “neon_fp_compare_”)] )
;; addp
(define_insn “aarch64_addp” [(set (match_operand:VD_BHSI 0 “register_operand” “=w”) (unspec:VD_BHSI [(match_operand:VD_BHSI 1 “register_operand” “w”) (match_operand:VD_BHSI 2 “register_operand” “w”)] UNSPEC_ADDP))] “TARGET_SIMD” “addp\t%0, %1, %2” [(set_attr “type” “neon_reduc_add”)] )
(define_insn “aarch64_addpdi” [(set (match_operand:DI 0 “register_operand” “=w”) (unspec:DI [(match_operand:V2DI 1 “register_operand” “w”)] UNSPEC_ADDP))] “TARGET_SIMD” “addp\t%d0, %1.2d” [(set_attr “type” “neon_reduc_add”)] )
;; sqrt
(define_insn “sqrt2” [(set (match_operand:VDQF 0 “register_operand” “=w”) (sqrt:VDQF (match_operand:VDQF 1 “register_operand” “w”)))] “TARGET_SIMD” “fsqrt\t%0., %1.” [(set_attr “type” “neon_fp_sqrt_”)] )
;; Patterns for vector struct loads and stores.
(define_insn “aarch64_simd_ld2” [(set (match_operand:OI 0 “register_operand” “=w”) (unspec:OI [(match_operand:OI 1 “aarch64_simd_struct_operand” “Utv”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD2))] “TARGET_SIMD” “ld2\t{%S0. - %T0.}, %1” [(set_attr “type” “neon_load2_2reg”)] )
(define_insn “aarch64_simd_ld2r” [(set (match_operand:OI 0 “register_operand” “=w”) (unspec:OI [(match_operand:<V_TWO_ELEM> 1 “aarch64_simd_struct_operand” “Utv”) (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] UNSPEC_LD2_DUP))] “TARGET_SIMD” “ld2r\t{%S0. - %T0.}, %1” [(set_attr “type” “neon_load2_all_lanes”)] )
(define_insn “aarch64_vec_load_lanesoi_lane” [(set (match_operand:OI 0 “register_operand” “=w”) (unspec:OI [(match_operand:<V_TWO_ELEM> 1 “aarch64_simd_struct_operand” “Utv”) (match_operand:OI 2 “register_operand” “0”) (match_operand:SI 3 “immediate_operand” “i”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] UNSPEC_LD2_LANE))] “TARGET_SIMD” “ld2\t{%S0. - %T0.}[%3], %1” [(set_attr “type” “neon_load2_one_lane”)] )
(define_expand “vec_load_lanesoi” [(set (match_operand:OI 0 “register_operand” “=w”) (unspec:OI [(match_operand:OI 1 “aarch64_simd_struct_operand” “Utv”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD2))] “TARGET_SIMD” { if (BYTES_BIG_ENDIAN) { rtx tmp = gen_reg_rtx (OImode); rtx mask = aarch64_reverse_mask (mode); emit_insn (gen_aarch64_simd_ld2 (tmp, operands[1])); emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask)); } else emit_insn (gen_aarch64_simd_ld2 (operands[0], operands[1])); DONE; })
(define_insn “aarch64_simd_st2” [(set (match_operand:OI 0 “aarch64_simd_struct_operand” “=Utv”) (unspec:OI [(match_operand:OI 1 “register_operand” “w”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST2))] “TARGET_SIMD” “st2\t{%S1. - %T1.}, %0” [(set_attr “type” “neon_store2_2reg”)] )
(define_insn “vec_store_lanesoi_lane” [(set (match_operand:<V_TWO_ELEM> 0 “aarch64_simd_struct_operand” “=Utv”) (unspec:<V_TWO_ELEM> [(match_operand:OI 1 “register_operand” “w”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) (match_operand:SI 2 “immediate_operand” “i”)] UNSPEC_ST2_LANE))] “TARGET_SIMD” “st2\t{%S1. - %T1.}[%2], %0” [(set_attr “type” “neon_store3_one_lane”)] )
(define_expand “vec_store_lanesoi” [(set (match_operand:OI 0 “aarch64_simd_struct_operand” “=Utv”) (unspec:OI [(match_operand:OI 1 “register_operand” “w”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST2))] “TARGET_SIMD” { if (BYTES_BIG_ENDIAN) { rtx tmp = gen_reg_rtx (OImode); rtx mask = aarch64_reverse_mask (mode); emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask)); emit_insn (gen_aarch64_simd_st2 (operands[0], tmp)); } else emit_insn (gen_aarch64_simd_st2 (operands[0], operands[1])); DONE; })
(define_insn “aarch64_simd_ld3” [(set (match_operand:CI 0 “register_operand” “=w”) (unspec:CI [(match_operand:CI 1 “aarch64_simd_struct_operand” “Utv”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD3))] “TARGET_SIMD” “ld3\t{%S0. - %U0.}, %1” [(set_attr “type” “neon_load3_3reg”)] )
(define_insn “aarch64_simd_ld3r” [(set (match_operand:CI 0 “register_operand” “=w”) (unspec:CI [(match_operand:<V_THREE_ELEM> 1 “aarch64_simd_struct_operand” “Utv”) (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] UNSPEC_LD3_DUP))] “TARGET_SIMD” “ld3r\t{%S0. - %U0.}, %1” [(set_attr “type” “neon_load3_all_lanes”)] )
(define_insn “aarch64_vec_load_lanesci_lane” [(set (match_operand:CI 0 “register_operand” “=w”) (unspec:CI [(match_operand:<V_THREE_ELEM> 1 “aarch64_simd_struct_operand” “Utv”) (match_operand:CI 2 “register_operand” “0”) (match_operand:SI 3 “immediate_operand” “i”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD3_LANE))] “TARGET_SIMD” “ld3\t{%S0. - %U0.}[%3], %1” [(set_attr “type” “neon_load3_one_lane”)] )
(define_expand “vec_load_lanesci” [(set (match_operand:CI 0 “register_operand” “=w”) (unspec:CI [(match_operand:CI 1 “aarch64_simd_struct_operand” “Utv”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD3))] “TARGET_SIMD” { if (BYTES_BIG_ENDIAN) { rtx tmp = gen_reg_rtx (CImode); rtx mask = aarch64_reverse_mask (mode); emit_insn (gen_aarch64_simd_ld3 (tmp, operands[1])); emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask)); } else emit_insn (gen_aarch64_simd_ld3 (operands[0], operands[1])); DONE; })
(define_insn “aarch64_simd_st3” [(set (match_operand:CI 0 “aarch64_simd_struct_operand” “=Utv”) (unspec:CI [(match_operand:CI 1 “register_operand” “w”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST3))] “TARGET_SIMD” “st3\t{%S1. - %U1.}, %0” [(set_attr “type” “neon_store3_3reg”)] )
(define_insn “vec_store_lanesci_lane” [(set (match_operand:<V_THREE_ELEM> 0 “aarch64_simd_struct_operand” “=Utv”) (unspec:<V_THREE_ELEM> [(match_operand:CI 1 “register_operand” “w”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) (match_operand:SI 2 “immediate_operand” “i”)] UNSPEC_ST3_LANE))] “TARGET_SIMD” “st3\t{%S1. - %U1.}[%2], %0” [(set_attr “type” “neon_store3_one_lane”)] )
(define_expand “vec_store_lanesci” [(set (match_operand:CI 0 “aarch64_simd_struct_operand” “=Utv”) (unspec:CI [(match_operand:CI 1 “register_operand” “w”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST3))] “TARGET_SIMD” { if (BYTES_BIG_ENDIAN) { rtx tmp = gen_reg_rtx (CImode); rtx mask = aarch64_reverse_mask (mode); emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask)); emit_insn (gen_aarch64_simd_st3 (operands[0], tmp)); } else emit_insn (gen_aarch64_simd_st3 (operands[0], operands[1])); DONE; })
(define_insn “aarch64_simd_ld4” [(set (match_operand:XI 0 “register_operand” “=w”) (unspec:XI [(match_operand:XI 1 “aarch64_simd_struct_operand” “Utv”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD4))] “TARGET_SIMD” “ld4\t{%S0. - %V0.}, %1” [(set_attr “type” “neon_load4_4reg”)] )
(define_insn “aarch64_simd_ld4r” [(set (match_operand:XI 0 “register_operand” “=w”) (unspec:XI [(match_operand:<V_FOUR_ELEM> 1 “aarch64_simd_struct_operand” “Utv”) (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] UNSPEC_LD4_DUP))] “TARGET_SIMD” “ld4r\t{%S0. - %V0.}, %1” [(set_attr “type” “neon_load4_all_lanes”)] )
(define_insn “aarch64_vec_load_lanesxi_lane” [(set (match_operand:XI 0 “register_operand” “=w”) (unspec:XI [(match_operand:<V_FOUR_ELEM> 1 “aarch64_simd_struct_operand” “Utv”) (match_operand:XI 2 “register_operand” “0”) (match_operand:SI 3 “immediate_operand” “i”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD4_LANE))] “TARGET_SIMD” “ld4\t{%S0. - %V0.}[%3], %1” [(set_attr “type” “neon_load4_one_lane”)] )
(define_expand “vec_load_lanesxi” [(set (match_operand:XI 0 “register_operand” “=w”) (unspec:XI [(match_operand:XI 1 “aarch64_simd_struct_operand” “Utv”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD4))] “TARGET_SIMD” { if (BYTES_BIG_ENDIAN) { rtx tmp = gen_reg_rtx (XImode); rtx mask = aarch64_reverse_mask (mode); emit_insn (gen_aarch64_simd_ld4 (tmp, operands[1])); emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask)); } else emit_insn (gen_aarch64_simd_ld4 (operands[0], operands[1])); DONE; })
(define_insn “aarch64_simd_st4” [(set (match_operand:XI 0 “aarch64_simd_struct_operand” “=Utv”) (unspec:XI [(match_operand:XI 1 “register_operand” “w”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST4))] “TARGET_SIMD” “st4\t{%S1. - %V1.}, %0” [(set_attr “type” “neon_store4_4reg”)] )
(define_insn “vec_store_lanesxi_lane” [(set (match_operand:<V_FOUR_ELEM> 0 “aarch64_simd_struct_operand” “=Utv”) (unspec:<V_FOUR_ELEM> [(match_operand:XI 1 “register_operand” “w”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) (match_operand:SI 2 “immediate_operand” “i”)] UNSPEC_ST4_LANE))] “TARGET_SIMD” “st4\t{%S1. - %V1.}[%2], %0” [(set_attr “type” “neon_store4_one_lane”)] )
(define_expand “vec_store_lanesxi” [(set (match_operand:XI 0 “aarch64_simd_struct_operand” “=Utv”) (unspec:XI [(match_operand:XI 1 “register_operand” “w”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST4))] “TARGET_SIMD” { if (BYTES_BIG_ENDIAN) { rtx tmp = gen_reg_rtx (XImode); rtx mask = aarch64_reverse_mask (mode); emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask)); emit_insn (gen_aarch64_simd_st4 (operands[0], tmp)); } else emit_insn (gen_aarch64_simd_st4 (operands[0], operands[1])); DONE; })
(define_insn_and_split “aarch64_rev_reglist” [(set (match_operand:VSTRUCT 0 “register_operand” “=&w”) (unspec:VSTRUCT [(match_operand:VSTRUCT 1 “register_operand” “w”) (match_operand:V16QI 2 “register_operand” “w”)] UNSPEC_REV_REGLIST))] “TARGET_SIMD” “#” “&& reload_completed” [(const_int 0)] { int i; int nregs = GET_MODE_SIZE (mode) / UNITS_PER_VREG; for (i = 0; i < nregs; i++) { rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i); rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i); emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2])); } DONE; } [(set_attr “type” “neon_tbl1_q”) (set_attr “length” “<insn_count>”)] )
;; Reload patterns for AdvSIMD register list operands.
(define_expand “mov” [(set (match_operand:VSTRUCT 0 “nonimmediate_operand” "") (match_operand:VSTRUCT 1 “general_operand” ""))] “TARGET_SIMD” { if (can_create_pseudo_p ()) { if (GET_CODE (operands[0]) != REG) operands[1] = force_reg (mode, operands[1]); } })
(define_insn “*aarch64_mov” [(set (match_operand:VSTRUCT 0 “aarch64_simd_nonimmediate_operand” “=w,Utv,w”) (match_operand:VSTRUCT 1 “aarch64_simd_general_operand” " w,w,Utv"))] “TARGET_SIMD && !BYTES_BIG_ENDIAN && (register_operand (operands[0], mode) || register_operand (operands[1], mode))” "@
st1\t{%S1.16b - %1.16b}, %0 ld1\t{%S0.16b - %0.16b}, %1" [(set_attr “type” “multiple,neon_storereg_q,
neon_loadreg_q”) (set (attr “length”) (symbol_ref “aarch64_simd_attr_length_move (insn)”))] )
(define_insn “aarch64_be_ld1” [(set (match_operand:VALLDI 0 “register_operand” “=w”) (unspec:VALLDI [(match_operand:VALLDI 1 “aarch64_simd_struct_operand” “Utv”)] UNSPEC_LD1))] “TARGET_SIMD” “ld1\t{%0}, %1” [(set_attr “type” “neon_load1_1reg”)] )
(define_insn “aarch64_be_st1” [(set (match_operand:VALLDI 0 “aarch64_simd_struct_operand” “=Utv”) (unspec:VALLDI [(match_operand:VALLDI 1 “register_operand” “w”)] UNSPEC_ST1))] “TARGET_SIMD” “st1\t{%1}, %0” [(set_attr “type” “neon_store1_1reg”)] )
(define_insn “*aarch64_be_movoi” [(set (match_operand:OI 0 “nonimmediate_operand” “=w,m,w”) (match_operand:OI 1 “general_operand” " w,w,m"))] “TARGET_SIMD && BYTES_BIG_ENDIAN && (register_operand (operands[0], OImode) || register_operand (operands[1], OImode))” "@
stp\t%q1, %R1, %0 ldp\t%q0, %R0, %1" [(set_attr “type” “multiple,neon_store2_2reg_q,neon_load2_2reg_q”) (set (attr “length”) (symbol_ref “aarch64_simd_attr_length_move (insn)”))] )
(define_insn “*aarch64_be_movci” [(set (match_operand:CI 0 “nonimmediate_operand” “=w,o,w”) (match_operand:CI 1 “general_operand” " w,w,o"))] “TARGET_SIMD && BYTES_BIG_ENDIAN && (register_operand (operands[0], CImode) || register_operand (operands[1], CImode))” “#” [(set_attr “type” “multiple”) (set (attr “length”) (symbol_ref “aarch64_simd_attr_length_move (insn)”))] )
(define_insn “*aarch64_be_movxi” [(set (match_operand:XI 0 “nonimmediate_operand” “=w,o,w”) (match_operand:XI 1 “general_operand” " w,w,o"))] “TARGET_SIMD && BYTES_BIG_ENDIAN && (register_operand (operands[0], XImode) || register_operand (operands[1], XImode))” “#” [(set_attr “type” “multiple”) (set (attr “length”) (symbol_ref “aarch64_simd_attr_length_move (insn)”))] )
(define_split [(set (match_operand:OI 0 “register_operand”) (match_operand:OI 1 “register_operand”))] “TARGET_SIMD && reload_completed” [(const_int 0)] { aarch64_simd_emit_reg_reg_move (operands, TImode, 2); DONE; })
(define_split [(set (match_operand:CI 0 “nonimmediate_operand”) (match_operand:CI 1 “general_operand”))] “TARGET_SIMD && reload_completed” [(const_int 0)] { if (register_operand (operands[0], CImode) && register_operand (operands[1], CImode)) { aarch64_simd_emit_reg_reg_move (operands, TImode, 3); DONE; } else if (BYTES_BIG_ENDIAN) { emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0), simplify_gen_subreg (OImode, operands[1], CImode, 0)); emit_move_insn (gen_lowpart (V16QImode, simplify_gen_subreg (TImode, operands[0], CImode, 32)), gen_lowpart (V16QImode, simplify_gen_subreg (TImode, operands[1], CImode, 32))); DONE; } else FAIL; })
(define_split [(set (match_operand:XI 0 “nonimmediate_operand”) (match_operand:XI 1 “general_operand”))] “TARGET_SIMD && reload_completed” [(const_int 0)] { if (register_operand (operands[0], XImode) && register_operand (operands[1], XImode)) { aarch64_simd_emit_reg_reg_move (operands, TImode, 4); DONE; } else if (BYTES_BIG_ENDIAN) { emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0), simplify_gen_subreg (OImode, operands[1], XImode, 0)); emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32), simplify_gen_subreg (OImode, operands[1], XImode, 32)); DONE; } else FAIL; })
(define_expand “aarch64_ld2r” [(match_operand:OI 0 “register_operand” “=w”) (match_operand:DI 1 “register_operand” “w”) (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] “TARGET_SIMD” { machine_mode mode = <V_TWO_ELEM>mode; rtx mem = gen_rtx_MEM (mode, operands[1]);
emit_insn (gen_aarch64_simd_ld2r (operands[0], mem)); DONE; })
(define_expand “aarch64_ld3r” [(match_operand:CI 0 “register_operand” “=w”) (match_operand:DI 1 “register_operand” “w”) (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] “TARGET_SIMD” { machine_mode mode = <V_THREE_ELEM>mode; rtx mem = gen_rtx_MEM (mode, operands[1]);
emit_insn (gen_aarch64_simd_ld3r (operands[0], mem)); DONE; })
(define_expand “aarch64_ld4r” [(match_operand:XI 0 “register_operand” “=w”) (match_operand:DI 1 “register_operand” “w”) (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] “TARGET_SIMD” { machine_mode mode = <V_FOUR_ELEM>mode; rtx mem = gen_rtx_MEM (mode, operands[1]);
emit_insn (gen_aarch64_simd_ld4r (operands[0],mem)); DONE; })
(define_insn “aarch64_ld2_dreg” [(set (match_operand:OI 0 “register_operand” “=w”) (subreg:OI (vec_concat: (vec_concat: (unspec:VD [(match_operand:TI 1 “aarch64_simd_struct_operand” “Utv”)] UNSPEC_LD2) (vec_duplicate:VD (const_int 0))) (vec_concat: (unspec:VD [(match_dup 1)] UNSPEC_LD2) (vec_duplicate:VD (const_int 0)))) 0))] “TARGET_SIMD” “ld2\t{%S0. - %T0.}, %1” [(set_attr “type” “neon_load2_2reg”)] )
(define_insn “aarch64_ld2_dreg” [(set (match_operand:OI 0 “register_operand” “=w”) (subreg:OI (vec_concat: (vec_concat: (unspec:DX [(match_operand:TI 1 “aarch64_simd_struct_operand” “Utv”)] UNSPEC_LD2) (const_int 0)) (vec_concat: (unspec:DX [(match_dup 1)] UNSPEC_LD2) (const_int 0))) 0))] “TARGET_SIMD” “ld1\t{%S0.1d - %T0.1d}, %1” [(set_attr “type” “neon_load1_2reg”)] )
(define_insn “aarch64_ld3_dreg” [(set (match_operand:CI 0 “register_operand” “=w”) (subreg:CI (vec_concat: (vec_concat: (vec_concat: (unspec:VD [(match_operand:EI 1 “aarch64_simd_struct_operand” “Utv”)] UNSPEC_LD3) (vec_duplicate:VD (const_int 0))) (vec_concat: (unspec:VD [(match_dup 1)] UNSPEC_LD3) (vec_duplicate:VD (const_int 0)))) (vec_concat: (unspec:VD [(match_dup 1)] UNSPEC_LD3) (vec_duplicate:VD (const_int 0)))) 0))] “TARGET_SIMD” “ld3\t{%S0. - %U0.}, %1” [(set_attr “type” “neon_load3_3reg”)] )
(define_insn “aarch64_ld3_dreg” [(set (match_operand:CI 0 “register_operand” “=w”) (subreg:CI (vec_concat: (vec_concat: (vec_concat: (unspec:DX [(match_operand:EI 1 “aarch64_simd_struct_operand” “Utv”)] UNSPEC_LD3) (const_int 0)) (vec_concat: (unspec:DX [(match_dup 1)] UNSPEC_LD3) (const_int 0))) (vec_concat: (unspec:DX [(match_dup 1)] UNSPEC_LD3) (const_int 0))) 0))] “TARGET_SIMD” “ld1\t{%S0.1d - %U0.1d}, %1” [(set_attr “type” “neon_load1_3reg”)] )
(define_insn “aarch64_ld4_dreg” [(set (match_operand:XI 0 “register_operand” “=w”) (subreg:XI (vec_concat: (vec_concat: (vec_concat: (unspec:VD [(match_operand:OI 1 “aarch64_simd_struct_operand” “Utv”)] UNSPEC_LD4) (vec_duplicate:VD (const_int 0))) (vec_concat: (unspec:VD [(match_dup 1)] UNSPEC_LD4) (vec_duplicate:VD (const_int 0)))) (vec_concat: (vec_concat: (unspec:VD [(match_dup 1)] UNSPEC_LD4) (vec_duplicate:VD (const_int 0))) (vec_concat: (unspec:VD [(match_dup 1)] UNSPEC_LD4) (vec_duplicate:VD (const_int 0))))) 0))] “TARGET_SIMD” “ld4\t{%S0. - %V0.}, %1” [(set_attr “type” “neon_load4_4reg”)] )
(define_insn “aarch64_ld4_dreg” [(set (match_operand:XI 0 “register_operand” “=w”) (subreg:XI (vec_concat: (vec_concat: (vec_concat: (unspec:DX [(match_operand:OI 1 “aarch64_simd_struct_operand” “Utv”)] UNSPEC_LD4) (const_int 0)) (vec_concat: (unspec:DX [(match_dup 1)] UNSPEC_LD4) (const_int 0))) (vec_concat: (vec_concat: (unspec:DX [(match_dup 1)] UNSPEC_LD4) (const_int 0)) (vec_concat: (unspec:DX [(match_dup 1)] UNSPEC_LD4) (const_int 0)))) 0))] “TARGET_SIMD” “ld1\t{%S0.1d - %V0.1d}, %1” [(set_attr “type” “neon_load1_4reg”)] )
(define_expand “aarch64_ldVSTRUCT:nregsVDC:mode” [(match_operand:VSTRUCT 0 “register_operand” “=w”) (match_operand:DI 1 “register_operand” “r”) (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] “TARGET_SIMD” { machine_mode mode = VSTRUCT:VSTRUCT_DREGmode; rtx mem = gen_rtx_MEM (mode, operands[1]);
emit_insn (gen_aarch64_ldVSTRUCT:nregsVDC:mode_dreg (operands[0], mem)); DONE; })
(define_expand “aarch64_ld1VALL:mode” [(match_operand:VALL 0 “register_operand”) (match_operand:DI 1 “register_operand”)] “TARGET_SIMD” { machine_mode mode = VALL:MODEmode; rtx mem = gen_rtx_MEM (mode, operands[1]);
if (BYTES_BIG_ENDIAN) emit_insn (gen_aarch64_be_ld1VALL:mode (operands[0], mem)); else emit_move_insn (operands[0], mem); DONE; })
(define_expand “aarch64_ldVSTRUCT:nregsVQ:mode” [(match_operand:VSTRUCT 0 “register_operand” “=w”) (match_operand:DI 1 “register_operand” “r”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] “TARGET_SIMD” { machine_mode mode = VSTRUCT:MODEmode; rtx mem = gen_rtx_MEM (mode, operands[1]);
emit_insn (gen_vec_load_lanesVSTRUCT:modeVQ:mode (operands[0], mem)); DONE; })
(define_expand “aarch64_ld2_lane” [(match_operand:OI 0 “register_operand” “=w”) (match_operand:DI 1 “register_operand” “w”) (match_operand:OI 2 “register_operand” “0”) (match_operand:SI 3 “immediate_operand” “i”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] “TARGET_SIMD” { machine_mode mode = <V_TWO_ELEM>mode; rtx mem = gen_rtx_MEM (mode, operands[1]);
aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode), NULL); emit_insn (gen_aarch64_vec_load_lanesoi_lane (operands[0], mem, operands[2], operands[3])); DONE; })
(define_expand “aarch64_ld3_lane” [(match_operand:CI 0 “register_operand” “=w”) (match_operand:DI 1 “register_operand” “w”) (match_operand:CI 2 “register_operand” “0”) (match_operand:SI 3 “immediate_operand” “i”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] “TARGET_SIMD” { machine_mode mode = <V_THREE_ELEM>mode; rtx mem = gen_rtx_MEM (mode, operands[1]);
aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode), NULL); emit_insn (gen_aarch64_vec_load_lanesci_lane (operands[0], mem, operands[2], operands[3])); DONE; })
(define_expand “aarch64_ld4_lane” [(match_operand:XI 0 “register_operand” “=w”) (match_operand:DI 1 “register_operand” “w”) (match_operand:XI 2 “register_operand” “0”) (match_operand:SI 3 “immediate_operand” “i”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] “TARGET_SIMD” { machine_mode mode = <V_FOUR_ELEM>mode; rtx mem = gen_rtx_MEM (mode, operands[1]);
aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (mode), NULL); emit_insn (gen_aarch64_vec_load_lanesxi_lane (operands[0], mem, operands[2], operands[3])); DONE; })
;; Expanders for builtins to extract vector registers from large ;; opaque integer modes.
;; D-register list.
(define_expand “aarch64_get_dregVSTRUCT:modeVDC:mode” [(match_operand:VDC 0 “register_operand” “=w”) (match_operand:VSTRUCT 1 “register_operand” “w”) (match_operand:SI 2 “immediate_operand” “i”)] “TARGET_SIMD” { int part = INTVAL (operands[2]); rtx temp = gen_reg_rtx (VDC:VDBLmode); int offset = part * 16;
emit_move_insn (temp, gen_rtx_SUBREG (VDC:VDBLmode, operands[1], offset)); emit_move_insn (operands[0], gen_lowpart (VDC:MODEmode, temp)); DONE; })
;; Q-register list.
(define_expand “aarch64_get_qregVSTRUCT:modeVQ:mode” [(match_operand:VQ 0 “register_operand” “=w”) (match_operand:VSTRUCT 1 “register_operand” “w”) (match_operand:SI 2 “immediate_operand” “i”)] “TARGET_SIMD” { int part = INTVAL (operands[2]); int offset = part * 16;
emit_move_insn (operands[0], gen_rtx_SUBREG (VQ:MODEmode, operands[1], offset)); DONE; })
;; Permuted-store expanders for neon intrinsics.
;; Permute instructions
;; vec_perm support
(define_expand “vec_perm_const” [(match_operand:VALL 0 “register_operand”) (match_operand:VALL 1 “register_operand”) (match_operand:VALL 2 “register_operand”) (match_operand:<V_cmp_result> 3)] “TARGET_SIMD” { if (aarch64_expand_vec_perm_const (operands[0], operands[1], operands[2], operands[3])) DONE; else FAIL; })
(define_expand “vec_perm” [(match_operand:VB 0 “register_operand”) (match_operand:VB 1 “register_operand”) (match_operand:VB 2 “register_operand”) (match_operand:VB 3 “register_operand”)] “TARGET_SIMD” { aarch64_expand_vec_perm (operands[0], operands[1], operands[2], operands[3]); DONE; })
(define_insn “aarch64_tbl1” [(set (match_operand:VB 0 “register_operand” “=w”) (unspec:VB [(match_operand:V16QI 1 “register_operand” “w”) (match_operand:VB 2 “register_operand” “w”)] UNSPEC_TBL))] “TARGET_SIMD” “tbl\t%0., {%1.16b}, %2.” [(set_attr “type” “neon_tbl1”)] )
;; Two source registers.
(define_insn “aarch64_tbl2v16qi” [(set (match_operand:V16QI 0 “register_operand” “=w”) (unspec:V16QI [(match_operand:OI 1 “register_operand” “w”) (match_operand:V16QI 2 “register_operand” “w”)] UNSPEC_TBL))] “TARGET_SIMD” “tbl\t%0.16b, {%S1.16b - %T1.16b}, %2.16b” [(set_attr “type” “neon_tbl2_q”)] )
(define_insn_and_split “aarch64_combinev16qi” [(set (match_operand:OI 0 “register_operand” “=w”) (unspec:OI [(match_operand:V16QI 1 “register_operand” “w”) (match_operand:V16QI 2 “register_operand” “w”)] UNSPEC_CONCAT))] “TARGET_SIMD” “#” “&& reload_completed” [(const_int 0)] { aarch64_split_combinev16qi (operands); DONE; } [(set_attr “type” “multiple”)] )
(define_insn “aarch64_PERMUTE:perm_insnPERMUTE:perm_hilo” [(set (match_operand:VALL 0 “register_operand” “=w”) (unspec:VALL [(match_operand:VALL 1 “register_operand” “w”) (match_operand:VALL 2 “register_operand” “w”)] PERMUTE))] “TARGET_SIMD” “PERMUTE:perm_insnPERMUTE:perm_hilo\t%0., %1., %2.” [(set_attr “type” “neon_permute”)] )
;; Note immediate (third) operand is lane index not byte index. (define_insn “aarch64_ext” [(set (match_operand:VALL 0 “register_operand” “=w”) (unspec:VALL [(match_operand:VALL 1 “register_operand” “w”) (match_operand:VALL 2 “register_operand” “w”) (match_operand:SI 3 “immediate_operand” “i”)] UNSPEC_EXT))] “TARGET_SIMD” { operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (GET_MODE_INNER (mode))); return “ext\t%0., %1., %2., #%3”; } [(set_attr “type” “neon_ext”)] )
(define_insn “aarch64_revREVERSE:rev_op” [(set (match_operand:VALL 0 “register_operand” “=w”) (unspec:VALL [(match_operand:VALL 1 “register_operand” “w”)] REVERSE))] “TARGET_SIMD” “revREVERSE:rev_op\t%0., %1.” [(set_attr “type” “neon_rev”)] )
(define_insn “aarch64_st2_dreg” [(set (match_operand:TI 0 “aarch64_simd_struct_operand” “=Utv”) (unspec:TI [(match_operand:OI 1 “register_operand” “w”) (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST2))] “TARGET_SIMD” “st2\t{%S1. - %T1.}, %0” [(set_attr “type” “neon_store2_2reg”)] )
(define_insn “aarch64_st2_dreg” [(set (match_operand:TI 0 “aarch64_simd_struct_operand” “=Utv”) (unspec:TI [(match_operand:OI 1 “register_operand” “w”) (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST2))] “TARGET_SIMD” “st1\t{%S1.1d - %T1.1d}, %0” [(set_attr “type” “neon_store1_2reg”)] )
(define_insn “aarch64_st3_dreg” [(set (match_operand:EI 0 “aarch64_simd_struct_operand” “=Utv”) (unspec:EI [(match_operand:CI 1 “register_operand” “w”) (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST3))] “TARGET_SIMD” “st3\t{%S1. - %U1.}, %0” [(set_attr “type” “neon_store3_3reg”)] )
(define_insn “aarch64_st3_dreg” [(set (match_operand:EI 0 “aarch64_simd_struct_operand” “=Utv”) (unspec:EI [(match_operand:CI 1 “register_operand” “w”) (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST3))] “TARGET_SIMD” “st1\t{%S1.1d - %U1.1d}, %0” [(set_attr “type” “neon_store1_3reg”)] )
(define_insn “aarch64_st4_dreg” [(set (match_operand:OI 0 “aarch64_simd_struct_operand” “=Utv”) (unspec:OI [(match_operand:XI 1 “register_operand” “w”) (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST4))] “TARGET_SIMD” “st4\t{%S1. - %V1.}, %0” [(set_attr “type” “neon_store4_4reg”)] )
(define_insn “aarch64_st4_dreg” [(set (match_operand:OI 0 “aarch64_simd_struct_operand” “=Utv”) (unspec:OI [(match_operand:XI 1 “register_operand” “w”) (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST4))] “TARGET_SIMD” “st1\t{%S1.1d - %V1.1d}, %0” [(set_attr “type” “neon_store1_4reg”)] )
(define_expand “aarch64_stVSTRUCT:nregsVDC:mode” [(match_operand:DI 0 “register_operand” “r”) (match_operand:VSTRUCT 1 “register_operand” “w”) (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] “TARGET_SIMD” { machine_mode mode = VSTRUCT:VSTRUCT_DREGmode; rtx mem = gen_rtx_MEM (mode, operands[0]);
emit_insn (gen_aarch64_stVSTRUCT:nregsVDC:mode_dreg (mem, operands[1])); DONE; })
(define_expand “aarch64_stVSTRUCT:nregsVQ:mode” [(match_operand:DI 0 “register_operand” “r”) (match_operand:VSTRUCT 1 “register_operand” “w”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] “TARGET_SIMD” { machine_mode mode = VSTRUCT:MODEmode; rtx mem = gen_rtx_MEM (mode, operands[0]);
emit_insn (gen_vec_store_lanesVSTRUCT:modeVQ:mode (mem, operands[1])); DONE; })
(define_expand “aarch64_st2_laneVQ:mode” [(match_operand:DI 0 “register_operand” “r”) (match_operand:OI 1 “register_operand” “w”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) (match_operand:SI 2 “immediate_operand”)] “TARGET_SIMD” { machine_mode mode = <V_TWO_ELEM>mode; rtx mem = gen_rtx_MEM (mode, operands[0]); operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2])));
emit_insn (gen_vec_store_lanesoi_laneVQ:mode (mem, operands[1], operands[2])); DONE; })
(define_expand “aarch64_st3_laneVQ:mode” [(match_operand:DI 0 “register_operand” “r”) (match_operand:CI 1 “register_operand” “w”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) (match_operand:SI 2 “immediate_operand”)] “TARGET_SIMD” { machine_mode mode = <V_THREE_ELEM>mode; rtx mem = gen_rtx_MEM (mode, operands[0]); operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2])));
emit_insn (gen_vec_store_lanesci_laneVQ:mode (mem, operands[1], operands[2])); DONE; })
(define_expand “aarch64_st4_laneVQ:mode” [(match_operand:DI 0 “register_operand” “r”) (match_operand:XI 1 “register_operand” “w”) (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) (match_operand:SI 2 “immediate_operand”)] “TARGET_SIMD” { machine_mode mode = <V_FOUR_ELEM>mode; rtx mem = gen_rtx_MEM (mode, operands[0]); operands[2] = GEN_INT (ENDIAN_LANE_N (mode, INTVAL (operands[2])));
emit_insn (gen_vec_store_lanesxi_laneVQ:mode (mem, operands[1], operands[2])); DONE; })
(define_expand “aarch64_st1VALL:mode” [(match_operand:DI 0 “register_operand”) (match_operand:VALL 1 “register_operand”)] “TARGET_SIMD” { machine_mode mode = VALL:MODEmode; rtx mem = gen_rtx_MEM (mode, operands[0]);
if (BYTES_BIG_ENDIAN) emit_insn (gen_aarch64_be_st1VALL:mode (mem, operands[1])); else emit_move_insn (mem, operands[1]); DONE; })
;; Expander for builtins to insert vector registers into large ;; opaque integer modes.
;; Q-register list. We don't need a D-reg inserter as we zero ;; extend them in arm_neon.h and insert the resulting Q-regs.
(define_expand “aarch64_set_qregVSTRUCT:modeVQ:mode” [(match_operand:VSTRUCT 0 “register_operand” “+w”) (match_operand:VSTRUCT 1 “register_operand” “0”) (match_operand:VQ 2 “register_operand” “w”) (match_operand:SI 3 “immediate_operand” “i”)] “TARGET_SIMD” { int part = INTVAL (operands[3]); int offset = part * 16;
emit_move_insn (operands[0], operands[1]); emit_move_insn (gen_rtx_SUBREG (VQ:MODEmode, operands[0], offset), operands[2]); DONE; })
;; Standard pattern name vec_init.
(define_expand “vec_init” [(match_operand:VALL 0 “register_operand” "") (match_operand 1 "" "")] “TARGET_SIMD” { aarch64_expand_vector_init (operands[0], operands[1]); DONE; })
(define_insn “*aarch64_simd_ld1r” [(set (match_operand:VALL 0 “register_operand” “=w”) (vec_duplicate:VALL (match_operand: 1 “aarch64_simd_struct_operand” “Utv”)))] “TARGET_SIMD” “ld1r\t{%0.}, %1” [(set_attr “type” “neon_load1_all_lanes”)] )
(define_insn “aarch64_frecpe” [(set (match_operand:VDQF 0 “register_operand” “=w”) (unspec:VDQF [(match_operand:VDQF 1 “register_operand” “w”)] UNSPEC_FRECPE))] “TARGET_SIMD” “frecpe\t%0., %1.” [(set_attr “type” “neon_fp_recpe_”)] )
(define_insn “aarch64_frecpFRECP:frecp_suffix” [(set (match_operand:GPF 0 “register_operand” “=w”) (unspec:GPF [(match_operand:GPF 1 “register_operand” “w”)] FRECP))] “TARGET_SIMD” “frecpFRECP:frecp_suffix\t%0, %1” [(set_attr “type” “neon_fp_recpFRECP:frecp_suffix_GPF:VetypeGPF:q”)] )
(define_insn “aarch64_frecps” [(set (match_operand:VALLF 0 “register_operand” “=w”) (unspec:VALLF [(match_operand:VALLF 1 “register_operand” “w”) (match_operand:VALLF 2 “register_operand” “w”)] UNSPEC_FRECPS))] “TARGET_SIMD” “frecps\t%0, %1, %2” [(set_attr “type” “neon_fp_recps_”)] )
(define_insn “aarch64_urecpe” [(set (match_operand:VDQ_SI 0 “register_operand” “=w”) (unspec:VDQ_SI [(match_operand:VDQ_SI 1 “register_operand” “w”)] UNSPEC_URECPE))] “TARGET_SIMD” “urecpe\t%0., %1.” [(set_attr “type” “neon_fp_recpe_”)])
;; Standard pattern name vec_extract.
(define_expand “vec_extract” [(match_operand: 0 “aarch64_simd_nonimmediate_operand” "") (match_operand:VALL 1 “register_operand” "") (match_operand:SI 2 “immediate_operand” "")] “TARGET_SIMD” { emit_insn (gen_aarch64_get_lane (operands[0], operands[1], operands[2])); DONE; })
;; aes
(define_insn “aarch64_crypto_aes<aes_op>v16qi” [(set (match_operand:V16QI 0 “register_operand” “=w”) (unspec:V16QI [(match_operand:V16QI 1 “register_operand” “0”) (match_operand:V16QI 2 “register_operand” “w”)] CRYPTO_AES))] “TARGET_SIMD && TARGET_CRYPTO” “aes<aes_op>\t%0.16b, %2.16b” [(set_attr “type” “crypto_aese”)] )
(define_insn “aarch64_crypto_aes<aesmc_op>v16qi” [(set (match_operand:V16QI 0 “register_operand” “=w”) (unspec:V16QI [(match_operand:V16QI 1 “register_operand” “w”)] CRYPTO_AESMC))] “TARGET_SIMD && TARGET_CRYPTO” “aes<aesmc_op>\t%0.16b, %1.16b” [(set_attr “type” “crypto_aesmc”)] )
;; sha1
(define_insn “aarch64_crypto_sha1hsi” [(set (match_operand:SI 0 “register_operand” “=w”) (unspec:SI [(match_operand:SI 1 “register_operand” “w”)] UNSPEC_SHA1H))] “TARGET_SIMD && TARGET_CRYPTO” “sha1h\t%s0, %s1” [(set_attr “type” “crypto_sha1_fast”)] )
(define_insn “aarch64_crypto_sha1su1v4si” [(set (match_operand:V4SI 0 “register_operand” “=w”) (unspec:V4SI [(match_operand:V4SI 1 “register_operand” “0”) (match_operand:V4SI 2 “register_operand” “w”)] UNSPEC_SHA1SU1))] “TARGET_SIMD && TARGET_CRYPTO” “sha1su1\t%0.4s, %2.4s” [(set_attr “type” “crypto_sha1_fast”)] )
(define_insn “aarch64_crypto_sha1<sha1_op>v4si” [(set (match_operand:V4SI 0 “register_operand” “=w”) (unspec:V4SI [(match_operand:V4SI 1 “register_operand” “0”) (match_operand:SI 2 “register_operand” “w”) (match_operand:V4SI 3 “register_operand” “w”)] CRYPTO_SHA1))] “TARGET_SIMD && TARGET_CRYPTO” “sha1<sha1_op>\t%q0, %s2, %3.4s” [(set_attr “type” “crypto_sha1_slow”)] )
(define_insn “aarch64_crypto_sha1su0v4si” [(set (match_operand:V4SI 0 “register_operand” “=w”) (unspec:V4SI [(match_operand:V4SI 1 “register_operand” “0”) (match_operand:V4SI 2 “register_operand” “w”) (match_operand:V4SI 3 “register_operand” “w”)] UNSPEC_SHA1SU0))] “TARGET_SIMD && TARGET_CRYPTO” “sha1su0\t%0.4s, %2.4s, %3.4s” [(set_attr “type” “crypto_sha1_xor”)] )
;; sha256
(define_insn “aarch64_crypto_sha256h<sha256_op>v4si” [(set (match_operand:V4SI 0 “register_operand” “=w”) (unspec:V4SI [(match_operand:V4SI 1 “register_operand” “0”) (match_operand:V4SI 2 “register_operand” “w”) (match_operand:V4SI 3 “register_operand” “w”)] CRYPTO_SHA256))] “TARGET_SIMD && TARGET_CRYPTO” “sha256h<sha256_op>\t%q0, %q2, %3.4s” [(set_attr “type” “crypto_sha256_slow”)] )
(define_insn “aarch64_crypto_sha256su0v4si” [(set (match_operand:V4SI 0 “register_operand” “=w”) (unspec:V4SI [(match_operand:V4SI 1 “register_operand” “0”) (match_operand:V4SI 2 “register_operand” “w”)] UNSPEC_SHA256SU0))] “TARGET_SIMD &&TARGET_CRYPTO” “sha256su0\t%0.4s, %2.4s” [(set_attr “type” “crypto_sha256_fast”)] )
(define_insn “aarch64_crypto_sha256su1v4si” [(set (match_operand:V4SI 0 “register_operand” “=w”) (unspec:V4SI [(match_operand:V4SI 1 “register_operand” “0”) (match_operand:V4SI 2 “register_operand” “w”) (match_operand:V4SI 3 “register_operand” “w”)] UNSPEC_SHA256SU1))] “TARGET_SIMD &&TARGET_CRYPTO” “sha256su1\t%0.4s, %2.4s, %3.4s” [(set_attr “type” “crypto_sha256_slow”)] )
;; pmull
(define_insn “aarch64_crypto_pmulldi” [(set (match_operand:TI 0 “register_operand” “=w”) (unspec:TI [(match_operand:DI 1 “register_operand” “w”) (match_operand:DI 2 “register_operand” “w”)] UNSPEC_PMULL))] “TARGET_SIMD && TARGET_CRYPTO” “pmull\t%0.1q, %1.1d, %2.1d” [(set_attr “type” “neon_mul_d_long”)] )
(define_insn “aarch64_crypto_pmullv2di” [(set (match_operand:TI 0 “register_operand” “=w”) (unspec:TI [(match_operand:V2DI 1 “register_operand” “w”) (match_operand:V2DI 2 “register_operand” “w”)] UNSPEC_PMULL2))] “TARGET_SIMD && TARGET_CRYPTO” “pmull2\t%0.1q, %1.2d, %2.2d” [(set_attr “type” “neon_mul_d_long”)] )