gcc/config/aarch64/aarch64-sve2.md

;; Machine description for AArch64 SVE2. ;; Copyright (C) 2019-2021 Free Software Foundation, Inc. ;; Contributed by ARM Ltd. ;; ;; This file is part of GCC. ;; ;; GCC is free software; you can redistribute it and/or modify it ;; under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 3, or (at your option) ;; any later version. ;; ;; GCC is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with GCC; see the file COPYING3. If not see ;; http://www.gnu.org/licenses/.

;; The file is organised into the following sections (search for the full ;; line): ;; ;; == Moves ;; ---- Non-temporal gather loads ;; ---- Non-temporal scatter stores ;; ;; == Uniform binary arithmnetic ;; ---- [INT] Multiplication ;; ---- [INT] Scaled high-part multiplication ;; ---- [INT] General binary arithmetic that maps to unspecs ;; ---- [INT] Saturating binary arithmetic ;; ---- [INT] Saturating left shifts ;; ;; == Uniform ternary arithmnetic ;; ---- [INT] General ternary arithmetic that maps to unspecs ;; ---- [INT] Multiply-and-accumulate operations ;; ---- [INT] Binary logic operations with rotation ;; ---- [INT] Ternary logic operations ;; ---- [INT] Shift-and-accumulate operations ;; ---- [INT] Shift-and-insert operations ;; ---- [INT] Sum of absolute differences ;; ;; == Extending arithmetic ;; ---- [INT] Wide binary arithmetic ;; ---- [INT] Long binary arithmetic ;; ---- [INT] Long left shifts ;; ---- [INT] Long binary arithmetic with accumulation ;; ---- [FP] Long multiplication with accumulation ;; ;; == Narrowing arithnetic ;; ---- [INT] Narrowing unary arithmetic ;; ---- [INT] Narrowing binary arithmetic ;; ---- [INT] Narrowing right shifts ;; ;; == Pairwise arithmetic ;; ---- [INT] Pairwise arithmetic ;; ---- [FP] Pairwise arithmetic ;; ---- [INT] Pairwise arithmetic with accumulation ;; ;; == Complex arithmetic ;; ---- [INT] Complex binary operations ;; ---- [INT] Complex ternary operations ;; ---- [INT] Complex dot product ;; ;; == Conversions ;; ---- [FP<-FP] Widening conversions ;; ---- [FP<-FP] Narrowing conversions ;; ;; == Other arithmetic ;; ---- [INT] Reciprocal approximation ;; ---- [INT<-FP] Base-2 logarithm ;; ---- [INT] Polynomial multiplication ;; ;; == Permutation ;; ---- [INT,FP] General permutes ;; ---- [INT] Optional bit-permute extensions ;; ;; == General ;; ---- Check for aliases between pointers ;; ---- Histogram processing ;; ---- String matching ;; ;; == Crypotographic extensions ;; ---- Optional AES extensions ;; ---- Optional SHA-3 extensions ;; ---- Optional SM4 extensions

;; ========================================================================= ;; == Moves ;; =========================================================================

;; ------------------------------------------------------------------------- ;; ---- Non-temporal gather loads ;; ------------------------------------------------------------------------- ;; Includes gather forms of: ;; - LDNT1B ;; - LDNT1D ;; - LDNT1H ;; - LDNT1W ;; -------------------------------------------------------------------------

;; Non-extending loads. (define_insn “@aarch64_gather_ldnt” [(set (match_operand:SVE_FULL_SD 0 “register_operand” “=w, w”) (unspec:SVE_FULL_SD [(match_operand: 1 “register_operand” “Upl, Upl”) (match_operand:DI 2 “aarch64_reg_or_zero” “Z, r”) (match_operand:<V_INT_EQUIV> 3 “register_operand” “w, w”) (mem:BLK (scratch))] UNSPEC_LDNT1_GATHER))] “TARGET_SVE2” “@ ldnt1\t%0., %1/z, [%3.] ldnt1\t%0., %1/z, [%3., %2]” )

;; Extending loads. (define_insn_and_rewrite “@aarch64_gather_ldnt_<ANY_EXTEND:optab><SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>” [(set (match_operand:SVE_FULL_SDI 0 “register_operand” “=w, w”) (unspec:SVE_FULL_SDI [(match_operand:<SVE_FULL_SDI:VPRED> 4 “general_operand” “UplDnm, UplDnm”) (ANY_EXTEND:SVE_FULL_SDI (unspec:SVE_PARTIAL_I [(match_operand:<SVE_FULL_SDI:VPRED> 1 “register_operand” “Upl, Upl”) (match_operand:DI 2 “aarch64_reg_or_zero” “Z, r”) (match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 3 “register_operand” “w, w”) (mem:BLK (scratch))] UNSPEC_LDNT1_GATHER))] UNSPEC_PRED_X))] “TARGET_SVE2 && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0” “@ ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>] ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>, %2]” “&& !CONSTANT_P (operands[4])” { operands[4] = CONSTM1_RTX (<SVE_FULL_SDI:VPRED>mode); } )

;; ------------------------------------------------------------------------- ;; ---- Non-temporal scatter stores ;; ------------------------------------------------------------------------- ;; Includes scatter forms of: ;; - STNT1B ;; - STNT1D ;; - STNT1H ;; - STNT1W ;; -------------------------------------------------------------------------

;; Non-truncating stores. (define_insn “@aarch64_scatter_stnt” [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand: 0 “register_operand” “Upl, Upl”) (match_operand:DI 1 “aarch64_reg_or_zero” “Z, r”) (match_operand:<V_INT_EQUIV> 2 “register_operand” “w, w”) (match_operand:SVE_FULL_SD 3 “register_operand” “w, w”)]

  UNSPEC_STNT1_SCATTER))]

“TARGET_SVE” “@ stnt1\t%3., %0, [%2.] stnt1\t%3., %0, [%2., %1]” )

;; Truncating stores. (define_insn “@aarch64_scatter_stnt_<SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>” [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand:<SVE_FULL_SDI:VPRED> 0 “register_operand” “Upl, Upl”) (match_operand:DI 1 “aarch64_reg_or_zero” “Z, r”) (match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 2 “register_operand” “w, w”) (truncate:SVE_PARTIAL_I (match_operand:SVE_FULL_SDI 3 “register_operand” “w, w”))] UNSPEC_STNT1_SCATTER))] “TARGET_SVE2 && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0” “@ stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>] stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>, %1]” )

;; ========================================================================= ;; == Uniform binary arithmnetic ;; =========================================================================

;; ------------------------------------------------------------------------- ;; ---- [INT] Multiplication ;; ------------------------------------------------------------------------- ;; Includes the lane forms of: ;; - MUL ;; -------------------------------------------------------------------------

(define_insn “@aarch64_mul_lane_” [(set (match_operand:SVE_FULL_HSDI 0 “register_operand” “=w”) (mult:SVE_FULL_HSDI (unspec:SVE_FULL_HSDI [(match_operand:SVE_FULL_HSDI 2 “register_operand” “<sve_lane_con>”) (match_operand:SI 3 “const_int_operand”)] UNSPEC_SVE_LANE_SELECT) (match_operand:SVE_FULL_HSDI 1 “register_operand” “w”)))] “TARGET_SVE2” “mul\t%0., %1., %2.[%3]” )

;; ------------------------------------------------------------------------- ;; ---- [INT] Scaled high-part multiplication ;; ------------------------------------------------------------------------- ;; The patterns in this section are synthetic. ;; -------------------------------------------------------------------------

;; Unpredicated integer multiply-high-with-(round-and-)scale. (define_expand “mulhs3” [(set (match_operand:SVE_FULL_BHSI 0 “register_operand”) (unspec:SVE_FULL_BHSI [(match_dup 3) (unspec:SVE_FULL_BHSI [(match_operand:SVE_FULL_BHSI 1 “register_operand”) (match_operand:SVE_FULL_BHSI 2 “register_operand”)] MULHRS)] UNSPEC_PRED_X))] “TARGET_SVE2” { operands[3] = aarch64_ptrue_reg (mode);

rtx prod_b = gen_reg_rtx (<VWIDE>mode);
rtx prod_t = gen_reg_rtx (<VWIDE>mode);
emit_insn (gen_aarch64_sve_<su>mullb<Vwide> (prod_b, operands[1],
					 operands[2]));
emit_insn (gen_aarch64_sve_<su>mullt<Vwide> (prod_t, operands[1],
					 operands[2]));

rtx shift = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1);
emit_insn (gen_aarch64_sve_<r>shrnb<Vwide> (operands[0], prod_b, shift));
emit_insn (gen_aarch64_sve_<r>shrnt<Vwide> (operands[0], operands[0],
					prod_t, shift));

DONE;

} )

;; ------------------------------------------------------------------------- ;; ---- [INT] General binary arithmetic that maps to unspecs ;; ------------------------------------------------------------------------- ;; Includes: ;; - SHADD ;; - SHSUB ;; - SHSUBR ;; - SQRSHL ;; - SQRSHLR ;; - SRHADD ;; - SRSHL ;; - SRSHLR ;; - SUQADD ;; - UHADD ;; - UHSUB ;; - UHSUBR ;; - UQRSHL ;; - UQRSHLR ;; - URHADD ;; - URSHL ;; - URSHLR ;; - USQADD ;; -------------------------------------------------------------------------

;; Integer average (floor). (define_expand “avg3_floor” [(set (match_operand:SVE_FULL_I 0 “register_operand”) (unspec:SVE_FULL_I [(match_dup 3) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 1 “register_operand”) (match_operand:SVE_FULL_I 2 “register_operand”)] HADD)] UNSPEC_PRED_X))] “TARGET_SVE2” { operands[3] = force_reg (mode, CONSTM1_RTX (mode)); } )

;; Integer average (rounding). (define_expand “avg3_ceil” [(set (match_operand:SVE_FULL_I 0 “register_operand”) (unspec:SVE_FULL_I [(match_dup 3) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 1 “register_operand”) (match_operand:SVE_FULL_I 2 “register_operand”)] RHADD)] UNSPEC_PRED_X))] “TARGET_SVE2” { operands[3] = force_reg (mode, CONSTM1_RTX (mode)); } )

;; The immediate form of SQADD acts as an immediate form of SUQADD ;; over its full range. In contrast to the ss_plus pattern, we do ;; not need to treat byte immediates specially. E.g.: ;; ;; SQADD Z0.B, Z0.B, #128 ;; ;; is equivalent to: ;; ;; MOV Z1.B, #128 ;; SUQADD Z0.B, P0/M, Z0.B, Z1.B ;; ;; even though it's not equivalent to: ;; ;; MOV Z1.B, #128 ;; SQADD Z0.B, P0/M, Z0.B, Z1.B // Saturating subtraction of 128 (define_insn “@aarch64_sve_suqadd_const” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 1 “register_operand” “0, w”) (match_operand:SVE_FULL_I 2 “aarch64_sve_arith_immediate”)] UNSPEC_SUQADD))] “TARGET_SVE2” “@ sqadd\t%0., %0., #%D2 movprfx\t%0, %1;sqadd\t%0., %0., #%D2” [(set_attr “movprfx” “*,yes”)] )

;; General predicated binary arithmetic. All operations handled here ;; are commutative or have a reversed form. (define_insn “@aarch64_pred_<sve_int_op>” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, w, ?&w”) (unspec:SVE_FULL_I [(match_operand: 1 “register_operand” “Upl, Upl, Upl”) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 “register_operand” “0, w, w”) (match_operand:SVE_FULL_I 3 “register_operand” “w, 0, w”)] SVE2_COND_INT_BINARY_REV)] UNSPEC_PRED_X))] “TARGET_SVE2” “@ <sve_int_op>\t%0., %1/m, %0., %3. <sve_int_op_rev>\t%0., %1/m, %0., %2. movprfx\t%0, %2;<sve_int_op>\t%0., %1/m, %0., %3.” [(set_attr “movprfx” “,,yes”)] )

;; Predicated binary arithmetic with merging. (define_expand “@cond_<sve_int_op>” [(set (match_operand:SVE_FULL_I 0 “register_operand”) (unspec:SVE_FULL_I [(match_operand: 1 “register_operand”) (unspec:SVE_FULL_I [(match_dup 5) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 “register_operand”) (match_operand:SVE_FULL_I 3 “register_operand”)] SVE2_COND_INT_BINARY)] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 4 “aarch64_simd_reg_or_zero”)] UNSPEC_SEL))] “TARGET_SVE2” { operands[5] = CONSTM1_RTX (mode); } )

;; Predicated binary arithmetic, merging with the first input. (define_insn_and_rewrite “*cond_<sve_int_op>_2” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (unspec:SVE_FULL_I [(match_operand: 1 “register_operand” “Upl, Upl”) (unspec:SVE_FULL_I [(match_operand 4) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 “register_operand” “0, w”) (match_operand:SVE_FULL_I 3 “register_operand” “w, w”)] SVE2_COND_INT_BINARY)] UNSPEC_PRED_X) (match_dup 2)] UNSPEC_SEL))] “TARGET_SVE2” “@ <sve_int_op>\t%0., %1/m, %0., %3. movprfx\t%0, %2;<sve_int_op>\t%0., %1/m, %0., %3.” “&& !CONSTANT_P (operands[4])” { operands[4] = CONSTM1_RTX (mode); } [(set_attr “movprfx” “*,yes”)] )

;; Predicated binary arithmetic, merging with the second input. (define_insn_and_rewrite “*cond_<sve_int_op>_3” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (unspec:SVE_FULL_I [(match_operand: 1 “register_operand” “Upl, Upl”) (unspec:SVE_FULL_I [(match_operand 4) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 “register_operand” “w, w”) (match_operand:SVE_FULL_I 3 “register_operand” “0, w”)] SVE2_COND_INT_BINARY_REV)] UNSPEC_PRED_X) (match_dup 3)] UNSPEC_SEL))] “TARGET_SVE2” “@ <sve_int_op_rev>\t%0., %1/m, %0., %2. movprfx\t%0, %3;<sve_int_op_rev>\t%0., %1/m, %0., %2.” “&& !CONSTANT_P (operands[4])” { operands[4] = CONSTM1_RTX (mode); } [(set_attr “movprfx” “*,yes”)] )

;; Predicated binary operations, merging with an independent value. (define_insn_and_rewrite “*cond_<sve_int_op>_any” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=&w, &w, &w, &w, ?&w”) (unspec:SVE_FULL_I [(match_operand: 1 “register_operand” “Upl, Upl, Upl, Upl, Upl”) (unspec:SVE_FULL_I [(match_operand 5) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 “register_operand” “0, w, w, w, w”) (match_operand:SVE_FULL_I 3 “register_operand” “w, 0, w, w, w”)] SVE2_COND_INT_BINARY_REV)] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 4 “aarch64_simd_reg_or_zero” “Dz, Dz, Dz, 0, w”)] UNSPEC_SEL))] “TARGET_SVE2 && !rtx_equal_p (operands[2], operands[4]) && !rtx_equal_p (operands[3], operands[4])” “@ movprfx\t%0., %1/z, %0.;<sve_int_op>\t%0., %1/m, %0., %3. movprfx\t%0., %1/z, %0.;<sve_int_op_rev>\t%0., %1/m, %0., %2. movprfx\t%0., %1/z, %2.;<sve_int_op>\t%0., %1/m, %0., %3. movprfx\t%0., %1/m, %2.;<sve_int_op>\t%0., %1/m, %0., %3. #” “&& 1” { if (reload_completed && register_operand (operands[4], mode) && !rtx_equal_p (operands[0], operands[4])) { emit_insn (gen_vcond_mask_ (operands[0], operands[2], operands[4], operands[1])); operands[4] = operands[2] = operands[0]; } else if (!CONSTANT_P (operands[5])) operands[5] = CONSTM1_RTX (mode); else FAIL; } [(set_attr “movprfx” “yes”)] )

;; Predicated binary operations with no reverse form, merging with zero. ;; At present we don‘t generate these patterns via a cond_* optab, ;; so there’s no correctness requirement to handle merging with an ;; independent value. (define_insn_and_rewrite “*cond_<sve_int_op>_z” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=&w, &w”) (unspec:SVE_FULL_I [(match_operand: 1 “register_operand” “Upl, Upl”) (unspec:SVE_FULL_I [(match_operand 5) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 “register_operand” “0, w”) (match_operand:SVE_FULL_I 3 “register_operand” “w, w”)] SVE2_COND_INT_BINARY_NOREV)] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 4 “aarch64_simd_imm_zero”)] UNSPEC_SEL))] “TARGET_SVE2” “@ movprfx\t%0., %1/z, %0.;<sve_int_op>\t%0., %1/m, %0., %3. movprfx\t%0., %1/z, %2.;<sve_int_op>\t%0., %1/m, %0., %3.” “&& !CONSTANT_P (operands[5])” { operands[5] = CONSTM1_RTX (mode); } [(set_attr “movprfx” “yes”)] )

;; ------------------------------------------------------------------------- ;; ---- [INT] Saturating binary arithmetic ;; ------------------------------------------------------------------------- ;; Includes: ;; - SQDMULH ;; - SQRDMULH ;; -------------------------------------------------------------------------

(define_insn “@aarch64_sve_<sve_int_op>” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w”) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 1 “register_operand” “w”) (match_operand:SVE_FULL_I 2 “register_operand” “w”)] SVE2_INT_BINARY))] “TARGET_SVE2” “<sve_int_op>\t%0., %1., %2.” )

(define_insn “@aarch64_sve_<sve_int_op>lane” [(set (match_operand:SVE_FULL_HSDI 0 “register_operand” “=w”) (unspec:SVE_FULL_HSDI [(match_operand:SVE_FULL_HSDI 1 “register_operand” “w”) (unspec:SVE_FULL_HSDI [(match_operand:SVE_FULL_HSDI 2 “register_operand” “<sve_lane_con>”) (match_operand:SI 3 “const_int_operand”)] UNSPEC_SVE_LANE_SELECT)] SVE2_INT_BINARY_LANE))] “TARGET_SVE2” “<sve_int_op>\t%0., %1., %2.[%3]” )

;; ------------------------------------------------------------------------- ;; ---- [INT] Saturating left shifts ;; ------------------------------------------------------------------------- ;; Includes: ;; - SQSHL ;; - SQSHLR ;; - UQSHL ;; - UQSHLR ;; -------------------------------------------------------------------------

;; Predicated left shifts. (define_insn “@aarch64_pred_<sve_int_op>” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, w, w, ?&w, ?&w”) (unspec:SVE_FULL_I [(match_operand: 1 “register_operand” “Upl, Upl, Upl, Upl, Upl”) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 “register_operand” “0, 0, w, w, w”) (match_operand:SVE_FULL_I 3 “aarch64_sve_shift_operand” “D, w, 0, D, w”)] SVE2_COND_INT_SHIFT)] UNSPEC_PRED_X))] “TARGET_SVE2” “@ <sve_int_op>\t%0., %1/m, %0., #%3 <sve_int_op>\t%0., %1/m, %0., %3. <sve_int_op>r\t%0., %1/m, %0., %2. movprfx\t%0, %2;<sve_int_op>\t%0., %1/m, %0., #%3 movprfx\t%0, %2;<sve_int_op>\t%0., %1/m, %0., %3.” [(set_attr “movprfx” “,,*,yes,yes”)] )

;; Predicated left shifts with merging. (define_expand “@cond_<sve_int_op>” [(set (match_operand:SVE_FULL_I 0 “register_operand”) (unspec:SVE_FULL_I [(match_operand: 1 “register_operand”) (unspec:SVE_FULL_I [(match_dup 5) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 “register_operand”) (match_operand:SVE_FULL_I 3 “aarch64_sve_shift_operand”)] SVE2_COND_INT_SHIFT)] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 4 “register_operand”)] UNSPEC_SEL))] “TARGET_SVE2” { operands[5] = CONSTM1_RTX (mode); } )

;; Predicated left shifts, merging with the first input. (define_insn_and_rewrite “*cond_<sve_int_op>_2” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, w, ?&w, ?&w”) (unspec:SVE_FULL_I [(match_operand: 1 “register_operand” “Upl, Upl, Upl, Upl”) (unspec:SVE_FULL_I [(match_operand 4) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 “register_operand” “0, 0, w, w”) (match_operand:SVE_FULL_I 3 “aarch64_sve_shift_operand” “D, w, D, w”)] SVE2_COND_INT_SHIFT)] UNSPEC_PRED_X) (match_dup 2)] UNSPEC_SEL))] “TARGET_SVE2” “@ <sve_int_op>\t%0., %1/m, %0., #%3 <sve_int_op>\t%0., %1/m, %0., %3. movprfx\t%0, %2;<sve_int_op>\t%0., %1/m, %0., #%3 movprfx\t%0, %2;<sve_int_op>\t%0., %1/m, %0., %3.” “&& !CONSTANT_P (operands[4])” { operands[4] = CONSTM1_RTX (mode); } [(set_attr “movprfx” “,,yes,yes”)] )

;; Predicated left shifts, merging with the second input. (define_insn_and_rewrite “*cond_<sve_int_op>_3” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (unspec:SVE_FULL_I [(match_operand: 1 “register_operand” “Upl, Upl”) (unspec:SVE_FULL_I [(match_operand 4) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 “register_operand” “w, w”) (match_operand:SVE_FULL_I 3 “register_operand” “0, w”)] SVE2_COND_INT_SHIFT)] UNSPEC_PRED_X) (match_dup 3)] UNSPEC_SEL))] “TARGET_SVE2” “@ <sve_int_op>r\t%0., %1/m, %0., %2. movprfx\t%0, %3;<sve_int_op>r\t%0., %1/m, %0., %2.” “&& !CONSTANT_P (operands[4])” { operands[4] = CONSTM1_RTX (mode); } [(set_attr “movprfx” “*,yes”)] )

;; Predicated left shifts, merging with an independent value. (define_insn_and_rewrite “*cond_<sve_int_op>_any” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=&w, &w, &w, &w, &w, &w, &w, ?&w, ?&w”) (unspec:SVE_FULL_I [(match_operand: 1 “register_operand” “Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl, Upl”) (unspec:SVE_FULL_I [(match_operand 5) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 “register_operand” “0, 0, w, w, w, w, w, w, w”) (match_operand:SVE_FULL_I 3 “aarch64_sve_shift_operand” “D, w, 0, D, w, D, w, D, w”)] SVE2_COND_INT_SHIFT)] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 4 “aarch64_simd_reg_or_zero” “Dz, Dz, Dz, Dz, Dz, 0, 0, w, w”)] UNSPEC_SEL))] “TARGET_SVE2 && !rtx_equal_p (operands[2], operands[4]) && (CONSTANT_P (operands[4]) || !rtx_equal_p (operands[3], operands[4]))” "@ movprfx\t%0., %1/z, %0.;<sve_int_op>\t%0., %1/m, %0., #%3 movprfx\t%0., %1/z, %0.;<sve_int_op>\t%0., %1/m, %0., %3. movprfx\t%0., %1/z, %0.;<sve_int_op>r\t%0., %1/m, %0., %2. movprfx\t%0., %1/z, %2.;<sve_int_op>\t%0., %1/m, %0., #%3 movprfx\t%0., %1/z, %2.;<sve_int_op>\t%0., %1/m, %0., %3. movprfx\t%0., %1/m, %2.;<sve_int_op>\t%0., %1/m, %0., #%3 movprfx\t%0., %1/m, %2.;<sve_int_op>\t%0., %1/m, %0., %3.

#" “&& 1” { if (reload_completed && register_operand (operands[4], mode) && !rtx_equal_p (operands[0], operands[4])) { emit_insn (gen_vcond_mask_ (operands[0], operands[2], operands[4], operands[1])); operands[4] = operands[2] = operands[0]; } else if (!CONSTANT_P (operands[5])) operands[5] = CONSTM1_RTX (mode); else FAIL; } [(set_attr “movprfx” “yes”)] )

;; ========================================================================= ;; == Uniform ternary arithmnetic ;; =========================================================================

;; ------------------------------------------------------------------------- ;; ---- [INT] General ternary arithmetic that maps to unspecs ;; ------------------------------------------------------------------------- ;; Includes: ;; - ADCLB ;; - ADCLT ;; - EORBT ;; - EORTB ;; - SBCLB ;; - SBCLT ;; - SQRDMLAH ;; - SQRDMLSH ;; -------------------------------------------------------------------------

(define_insn “@aarch64_sve_<sve_int_op>” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 “register_operand” “w, w”) (match_operand:SVE_FULL_I 3 “register_operand” “w, w”) (match_operand:SVE_FULL_I 1 “register_operand” “0, w”)] SVE2_INT_TERNARY))] “TARGET_SVE2” “@ <sve_int_op>\t%0., %2., %3. movprfx\t%0, %1;<sve_int_op>\t%0., %2., %3.” [(set_attr “movprfx” “*,yes”)] )

(define_insn “@aarch64_sve_<sve_int_op>lane” [(set (match_operand:SVE_FULL_HSDI 0 “register_operand” “=w, ?&w”) (unspec:SVE_FULL_HSDI [(match_operand:SVE_FULL_HSDI 2 “register_operand” “w, w”) (unspec:SVE_FULL_HSDI [(match_operand:SVE_FULL_HSDI 3 “register_operand” “<sve_lane_con>, <sve_lane_con>”) (match_operand:SI 4 “const_int_operand”)] UNSPEC_SVE_LANE_SELECT) (match_operand:SVE_FULL_HSDI 1 “register_operand” “0, w”)] SVE2_INT_TERNARY_LANE))] “TARGET_SVE2” “@ <sve_int_op>\t%0., %2., %3.[%4] movprfx\t%0, %1;<sve_int_op>\t%0., %2., %3.[%4]” [(set_attr “movprfx” “*,yes”)] )

;; ------------------------------------------------------------------------- ;; ---- [INT] Multiply-and-accumulate operations ;; ------------------------------------------------------------------------- ;; Includes the lane forms of: ;; - MLA ;; - MLS ;; -------------------------------------------------------------------------

(define_insn “@aarch64_sve_add_mul_lane_” [(set (match_operand:SVE_FULL_HSDI 0 “register_operand” “=w, ?&w”) (plus:SVE_FULL_HSDI (mult:SVE_FULL_HSDI (unspec:SVE_FULL_HSDI [(match_operand:SVE_FULL_HSDI 3 “register_operand” “<sve_lane_con>, <sve_lane_con>”) (match_operand:SI 4 “const_int_operand”)] UNSPEC_SVE_LANE_SELECT) (match_operand:SVE_FULL_HSDI 2 “register_operand” “w, w”)) (match_operand:SVE_FULL_HSDI 1 “register_operand” “0, w”)))] “TARGET_SVE2” “@ mla\t%0., %2., %3.[%4] movprfx\t%0, %1;mla\t%0., %2., %3.[%4]” [(set_attr “movprfx” “*,yes”)] )

(define_insn “@aarch64_sve_sub_mul_lane_” [(set (match_operand:SVE_FULL_HSDI 0 “register_operand” “=w, ?&w”) (minus:SVE_FULL_HSDI (match_operand:SVE_FULL_HSDI 1 “register_operand” “0, w”) (mult:SVE_FULL_HSDI (unspec:SVE_FULL_HSDI [(match_operand:SVE_FULL_HSDI 3 “register_operand” “<sve_lane_con>, <sve_lane_con>”) (match_operand:SI 4 “const_int_operand”)] UNSPEC_SVE_LANE_SELECT) (match_operand:SVE_FULL_HSDI 2 “register_operand” “w, w”))))] “TARGET_SVE2” “@ mls\t%0., %2., %3.[%4] movprfx\t%0, %1;mls\t%0., %2., %3.[%4]” [(set_attr “movprfx” “*,yes”)] )

;; ------------------------------------------------------------------------- ;; ---- [INT] Binary logic operations with rotation ;; ------------------------------------------------------------------------- ;; Includes: ;; - XAR ;; -------------------------------------------------------------------------

(define_insn “@aarch64_sve2_xar” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (rotatert:SVE_FULL_I (xor:SVE_FULL_I (match_operand:SVE_FULL_I 1 “register_operand” “%0, w”) (match_operand:SVE_FULL_I 2 “register_operand” “w, w”)) (match_operand:SVE_FULL_I 3 “aarch64_simd_rshift_imm”)))] “TARGET_SVE2” “@ xar\t%0., %0., %2., #%3 movprfx\t%0, %1;xar\t%0., %0., %2., #%3” [(set_attr “movprfx” “*,yes”)] )

;; ------------------------------------------------------------------------- ;; ---- [INT] Ternary logic operations ;; ------------------------------------------------------------------------- ;; Includes: ;; - BCAX ;; - BSL ;; - BSL1N ;; - BSL2N ;; - EOR3 ;; - NBSL ;; -------------------------------------------------------------------------

;; Unpredicated exclusive OR of AND. (define_expand “@aarch64_sve2_bcax” [(set (match_operand:SVE_FULL_I 0 “register_operand”) (xor:SVE_FULL_I (and:SVE_FULL_I (unspec:SVE_FULL_I [(match_dup 4) (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 “register_operand”))] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 2 “register_operand”)) (match_operand:SVE_FULL_I 1 “register_operand”)))] “TARGET_SVE2” { operands[4] = CONSTM1_RTX (mode); } )

(define_insn_and_rewrite “*aarch64_sve2_bcax” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (xor:SVE_FULL_I (and:SVE_FULL_I (unspec:SVE_FULL_I [(match_operand 4) (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 “register_operand” “w, w”))] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 2 “register_operand” “w, w”)) (match_operand:SVE_FULL_I 1 “register_operand” “0, w”)))] “TARGET_SVE2” “@ bcax\t%0.d, %0.d, %2.d, %3.d movprfx\t%0, %1;bcax\t%0.d, %0.d, %2.d, %3.d” “&& !CONSTANT_P (operands[4])” { operands[4] = CONSTM1_RTX (mode); } [(set_attr “movprfx” “*,yes”)] )

;; Unpredicated 3-way exclusive OR. (define_insn “@aarch64_sve2_eor3” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, w, w, ?&w”) (xor:SVE_FULL_I (xor:SVE_FULL_I (match_operand:SVE_FULL_I 1 “register_operand” “0, w, w, w”) (match_operand:SVE_FULL_I 2 “register_operand” “w, 0, w, w”)) (match_operand:SVE_FULL_I 3 “register_operand” “w, w, 0, w”)))] “TARGET_SVE2” “@ eor3\t%0.d, %0.d, %2.d, %3.d eor3\t%0.d, %0.d, %1.d, %3.d eor3\t%0.d, %0.d, %1.d, %2.d movprfx\t%0, %1;eor3\t%0.d, %0.d, %2.d, %3.d” [(set_attr “movprfx” “,,*,yes”)] )

;; Use NBSL for vector NOR. (define_insn_and_rewrite “*aarch64_sve2_nor” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (unspec:SVE_FULL_I [(match_operand 3) (and:SVE_FULL_I (not:SVE_FULL_I (match_operand:SVE_FULL_I 1 “register_operand” “%0, w”)) (not:SVE_FULL_I (match_operand:SVE_FULL_I 2 “register_operand” “w, w”)))] UNSPEC_PRED_X))] “TARGET_SVE2” “@ nbsl\t%0.d, %0.d, %2.d, %0.d movprfx\t%0, %1;nbsl\t%0.d, %0.d, %2.d, %0.d” “&& !CONSTANT_P (operands[3])” { operands[3] = CONSTM1_RTX (mode); } [(set_attr “movprfx” “*,yes”)] )

;; Use NBSL for vector NAND. (define_insn_and_rewrite “*aarch64_sve2_nand” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (unspec:SVE_FULL_I [(match_operand 3) (ior:SVE_FULL_I (not:SVE_FULL_I (match_operand:SVE_FULL_I 1 “register_operand” “%0, w”)) (not:SVE_FULL_I (match_operand:SVE_FULL_I 2 “register_operand” “w, w”)))] UNSPEC_PRED_X))] “TARGET_SVE2” “@ nbsl\t%0.d, %0.d, %2.d, %2.d movprfx\t%0, %1;nbsl\t%0.d, %0.d, %2.d, %2.d” “&& !CONSTANT_P (operands[3])” { operands[3] = CONSTM1_RTX (mode); } [(set_attr “movprfx” “*,yes”)] )

;; Unpredicated bitwise select. ;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup) (define_expand “@aarch64_sve2_bsl” [(set (match_operand:SVE_FULL_I 0 “register_operand”) (xor:SVE_FULL_I (and:SVE_FULL_I (xor:SVE_FULL_I (match_operand:SVE_FULL_I 1 “register_operand”) (match_operand:SVE_FULL_I 2 “register_operand”)) (match_operand:SVE_FULL_I 3 “register_operand”)) (match_dup 2)))] “TARGET_SVE2” )

(define_insn “*aarch64_sve2_bsl” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (xor:SVE_FULL_I (and:SVE_FULL_I (xor:SVE_FULL_I (match_operand:SVE_FULL_I 1 “register_operand” “<bsl_1st>, w”) (match_operand:SVE_FULL_I 2 “register_operand” “<bsl_2nd>, w”)) (match_operand:SVE_FULL_I 3 “register_operand” “w, w”)) (match_dup BSL_DUP)))] “TARGET_SVE2” “@ bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d movprfx\t%0, %<bsl_mov>;bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d” [(set_attr “movprfx” “*,yes”)] )

;; Unpredicated bitwise inverted select. ;; (~~(op3 ? bsl_mov : bsl_dup)) == (~~(((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)) (define_expand “@aarch64_sve2_nbsl” [(set (match_operand:SVE_FULL_I 0 “register_operand”) (unspec:SVE_FULL_I [(match_dup 4) (not:SVE_FULL_I (xor:SVE_FULL_I (and:SVE_FULL_I (xor:SVE_FULL_I (match_operand:SVE_FULL_I 1 “register_operand”) (match_operand:SVE_FULL_I 2 “register_operand”)) (match_operand:SVE_FULL_I 3 “register_operand”)) (match_dup 2)))] UNSPEC_PRED_X))] “TARGET_SVE2” { operands[4] = CONSTM1_RTX (mode); } )

(define_insn_and_rewrite “*aarch64_sve2_nbsl” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (unspec:SVE_FULL_I [(match_operand 4) (not:SVE_FULL_I (xor:SVE_FULL_I (and:SVE_FULL_I (xor:SVE_FULL_I (match_operand:SVE_FULL_I 1 “register_operand” “<bsl_1st>, w”) (match_operand:SVE_FULL_I 2 “register_operand” “<bsl_2nd>, w”)) (match_operand:SVE_FULL_I 3 “register_operand” “w, w”)) (match_dup BSL_DUP)))] UNSPEC_PRED_X))] “TARGET_SVE2” “@ nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d movprfx\t%0, %<bsl_mov>;nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d” “&& !CONSTANT_P (operands[4])” { operands[4] = CONSTM1_RTX (mode); } [(set_attr “movprfx” “*,yes”)] )

;; Unpredicated bitwise select with inverted first operand. ;; (op3 ? ~~bsl_mov : bsl_dup) == ((~~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup) (define_expand “@aarch64_sve2_bsl1n” [(set (match_operand:SVE_FULL_I 0 “register_operand”) (xor:SVE_FULL_I (and:SVE_FULL_I (unspec:SVE_FULL_I [(match_dup 4) (not:SVE_FULL_I (xor:SVE_FULL_I (match_operand:SVE_FULL_I 1 “register_operand”) (match_operand:SVE_FULL_I 2 “register_operand”)))] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 3 “register_operand”)) (match_dup 2)))] “TARGET_SVE2” { operands[4] = CONSTM1_RTX (mode); } )

(define_insn_and_rewrite “*aarch64_sve2_bsl1n” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (xor:SVE_FULL_I (and:SVE_FULL_I (unspec:SVE_FULL_I [(match_operand 4) (not:SVE_FULL_I (xor:SVE_FULL_I (match_operand:SVE_FULL_I 1 “register_operand” “<bsl_1st>, w”) (match_operand:SVE_FULL_I 2 “register_operand” “<bsl_2nd>, w”)))] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 3 “register_operand” “w, w”)) (match_dup BSL_DUP)))] “TARGET_SVE2” “@ bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d movprfx\t%0, %<bsl_mov>;bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d” “&& !CONSTANT_P (operands[4])” { operands[4] = CONSTM1_RTX (mode); } [(set_attr “movprfx” “*,yes”)] )

;; Unpredicated bitwise select with inverted second operand. ;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup)) (define_expand “@aarch64_sve2_bsl2n” [(set (match_operand:SVE_FULL_I 0 “register_operand”) (ior:SVE_FULL_I (and:SVE_FULL_I (match_operand:SVE_FULL_I 1 “register_operand”) (match_operand:SVE_FULL_I 3 “register_operand”)) (unspec:SVE_FULL_I [(match_dup 4) (and:SVE_FULL_I (not:SVE_FULL_I (match_operand:SVE_FULL_I 2 “register_operand”)) (not:SVE_FULL_I (match_dup 3)))] UNSPEC_PRED_X)))] “TARGET_SVE2” { operands[4] = CONSTM1_RTX (mode); } )

(define_insn_and_rewrite “*aarch64_sve2_bsl2n” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (ior:SVE_FULL_I (and:SVE_FULL_I (match_operand:SVE_FULL_I 1 “register_operand” “<bsl_1st>, w”) (match_operand:SVE_FULL_I 2 “register_operand” “<bsl_2nd>, w”)) (unspec:SVE_FULL_I [(match_operand 4) (and:SVE_FULL_I (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 “register_operand” “w, w”)) (not:SVE_FULL_I (match_dup BSL_DUP)))] UNSPEC_PRED_X)))] “TARGET_SVE2” “@ bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d movprfx\t%0, %<bsl_mov>;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d” “&& !CONSTANT_P (operands[4])” { operands[4] = CONSTM1_RTX (mode); } [(set_attr “movprfx” “*,yes”)] )

;; Unpredicated bitwise select with inverted second operand, alternative form. ;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~bsl_dup & ~op3)) (define_insn_and_rewrite “*aarch64_sve2_bsl2n” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (ior:SVE_FULL_I (and:SVE_FULL_I (match_operand:SVE_FULL_I 1 “register_operand” “<bsl_1st>, w”) (match_operand:SVE_FULL_I 2 “register_operand” “<bsl_2nd>, w”)) (unspec:SVE_FULL_I [(match_operand 4) (and:SVE_FULL_I (not:SVE_FULL_I (match_dup BSL_DUP)) (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 “register_operand” “w, w”)))] UNSPEC_PRED_X)))] “TARGET_SVE2” “@ bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d movprfx\t%0, %<bsl_mov>;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d” “&& !CONSTANT_P (operands[4])” { operands[4] = CONSTM1_RTX (mode); } [(set_attr “movprfx” “*,yes”)] )

;; ------------------------------------------------------------------------- ;; ---- [INT] Shift-and-accumulate operations ;; ------------------------------------------------------------------------- ;; Includes: ;; - SRSRA ;; - SSRA ;; - URSRA ;; - USRA ;; -------------------------------------------------------------------------

;; Provide the natural unpredicated interface for SSRA and USRA. (define_expand “@aarch64_sve_add_<sve_int_op>” [(set (match_operand:SVE_FULL_I 0 “register_operand”) (plus:SVE_FULL_I (unspec:SVE_FULL_I [(match_dup 4) (SHIFTRT:SVE_FULL_I (match_operand:SVE_FULL_I 2 “register_operand”) (match_operand:SVE_FULL_I 3 “aarch64_simd_rshift_imm”))] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 1 “register_operand”)))] “TARGET_SVE2” { operands[4] = CONSTM1_RTX (mode); } )

;; Pattern-match SSRA and USRA as a predicated operation whose predicate ;; isn't needed. (define_insn_and_rewrite “*aarch64_sve2_sra” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (plus:SVE_FULL_I (unspec:SVE_FULL_I [(match_operand 4) (SHIFTRT:SVE_FULL_I (match_operand:SVE_FULL_I 2 “register_operand” “w, w”) (match_operand:SVE_FULL_I 3 “aarch64_simd_rshift_imm”))] UNSPEC_PRED_X) (match_operand:SVE_FULL_I 1 “register_operand” “0, w”)))] “TARGET_SVE2” “@ <sra_op>sra\t%0., %2., #%3 movprfx\t%0, %1;<sra_op>sra\t%0., %2., #%3” “&& !CONSTANT_P (operands[4])” { operands[4] = CONSTM1_RTX (mode); } [(set_attr “movprfx” “*,yes”)] )

;; SRSRA and URSRA. (define_insn “@aarch64_sve_add_<sve_int_op>” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (plus:SVE_FULL_I (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 “register_operand” “w, w”) (match_operand:SVE_FULL_I 3 “aarch64_simd_rshift_imm”)] VRSHR_N) (match_operand:SVE_FULL_I 1 “register_operand” “0, w”)))] “TARGET_SVE2” “@ sra\t%0., %2., #%3 movprfx\t%0, %1;sra\t%0., %2., #%3” [(set_attr “movprfx” “*,yes”)] )

;; ------------------------------------------------------------------------- ;; ---- [INT] Shift-and-insert operations ;; ------------------------------------------------------------------------- ;; Includes: ;; - SLI ;; - SRI ;; -------------------------------------------------------------------------

;; These instructions do not take MOVPRFX. (define_insn “@aarch64_sve_<sve_int_op>” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w”) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 1 “register_operand” “0”) (match_operand:SVE_FULL_I 2 “register_operand” “w”) (match_operand:SVE_FULL_I 3 “aarch64_simd_shift_imm”)] SVE2_INT_SHIFT_INSERT))] “TARGET_SVE2” “<sve_int_op>\t%0., %2., #%3” )

;; ------------------------------------------------------------------------- ;; ---- [INT] Sum of absolute differences ;; ------------------------------------------------------------------------- ;; Includes: ;; - SABA ;; - UABA ;; -------------------------------------------------------------------------

;; Provide the natural unpredicated interface for SABA and UABA. (define_expand “@aarch64_sve2_aba” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (plus:SVE_FULL_I (minus:SVE_FULL_I (unspec:SVE_FULL_I [(match_dup 4) (USMAX:SVE_FULL_I (match_operand:SVE_FULL_I 2 “register_operand” “w, w”) (match_operand:SVE_FULL_I 3 “register_operand” “w, w”))] UNSPEC_PRED_X) (unspec:SVE_FULL_I [(match_dup 4) (<max_opp>:SVE_FULL_I (match_dup 2) (match_dup 3))] UNSPEC_PRED_X)) (match_operand:SVE_FULL_I 1 “register_operand” “0, w”)))] “TARGET_SVE2” { operands[4] = CONSTM1_RTX (mode); } )

;; Pattern-match SABA and UABA as an absolute-difference-and-accumulate ;; operation whose predicates aren't needed. (define_insn “*aarch64_sve2_aba” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (plus:SVE_FULL_I (minus:SVE_FULL_I (unspec:SVE_FULL_I [(match_operand 4) (USMAX:SVE_FULL_I (match_operand:SVE_FULL_I 2 “register_operand” “w, w”) (match_operand:SVE_FULL_I 3 “register_operand” “w, w”))] UNSPEC_PRED_X) (unspec:SVE_FULL_I [(match_operand 5) (<max_opp>:SVE_FULL_I (match_dup 2) (match_dup 3))] UNSPEC_PRED_X)) (match_operand:SVE_FULL_I 1 “register_operand” “0, w”)))] “TARGET_SVE2” “@ aba\t%0., %2., %3. movprfx\t%0, %1;aba\t%0., %2., %3.” [(set_attr “movprfx” “*,yes”)] )

;; ========================================================================= ;; == Extending arithmetic ;; =========================================================================

;; ------------------------------------------------------------------------- ;; ---- [INT] Wide binary arithmetic ;; ------------------------------------------------------------------------- ;; Includes: ;; - SADDWB ;; - SADDWT ;; - SSUBWB ;; - SSUBWT ;; - UADDWB ;; - UADDWT ;; - USUBWB ;; - USUBWT ;; -------------------------------------------------------------------------

(define_insn “@aarch64_sve_<sve_int_op>” [(set (match_operand:SVE_FULL_HSDI 0 “register_operand” “=w”) (unspec:SVE_FULL_HSDI [(match_operand:SVE_FULL_HSDI 1 “register_operand” “w”) (match_operand: 2 “register_operand” “w”)] SVE2_INT_BINARY_WIDE))] “TARGET_SVE2” “<sve_int_op>\t%0., %1., %2.” )

;; ------------------------------------------------------------------------- ;; ---- [INT] Long binary arithmetic ;; ------------------------------------------------------------------------- ;; Includes: ;; - SABDLB ;; - SABDLT ;; - SADDLB ;; - SADDLBT ;; - SADDLT ;; - SMULLB ;; - SMULLT ;; - SQDMULLB ;; - SQDMULLT ;; - SSUBLB ;; - SSUBLBT ;; - SSUBLT ;; - SSUBLTB ;; - UABDLB ;; - UABDLT ;; - UADDLB ;; - UADDLT ;; - UMULLB ;; - UMULLT ;; - USUBLB ;; - USUBLT ;; -------------------------------------------------------------------------

(define_insn “@aarch64_sve_<sve_int_op>” [(set (match_operand:SVE_FULL_HSDI 0 “register_operand” “=w”) (unspec:SVE_FULL_HSDI [(match_operand: 1 “register_operand” “w”) (match_operand: 2 “register_operand” “w”)] SVE2_INT_BINARY_LONG))] “TARGET_SVE2” “<sve_int_op>\t%0., %1., %2.” )

(define_insn “@aarch64_sve_<sve_int_op>lane” [(set (match_operand:SVE_FULL_SDI 0 “register_operand” “=w”) (unspec:SVE_FULL_SDI [(match_operand: 1 “register_operand” “w”) (unspec: [(match_operand: 2 “register_operand” “<sve_lane_con>”) (match_operand:SI 3 “const_int_operand”)] UNSPEC_SVE_LANE_SELECT)] SVE2_INT_BINARY_LONG_LANE))] “TARGET_SVE2” “<sve_int_op>\t%0., %1., %2.[%3]” )

;; ------------------------------------------------------------------------- ;; ---- [INT] Long left shifts ;; ------------------------------------------------------------------------- ;; Includes: ;; - SSHLLB ;; - SSHLLT ;; - USHLLB ;; - USHLLT ;; -------------------------------------------------------------------------

;; The immediate range is enforced before generating the instruction. (define_insn “@aarch64_sve_<sve_int_op>” [(set (match_operand:SVE_FULL_HSDI 0 “register_operand” “=w”) (unspec:SVE_FULL_HSDI [(match_operand: 1 “register_operand” “w”) (match_operand:DI 2 “const_int_operand”)] SVE2_INT_SHIFT_IMM_LONG))] “TARGET_SVE2” “<sve_int_op>\t%0., %1., #%2” )

;; ------------------------------------------------------------------------- ;; ---- [INT] Long binary arithmetic with accumulation ;; ------------------------------------------------------------------------- ;; Includes: ;; - SABALB ;; - SABALT ;; - SMLALB ;; - SMLALT ;; - SMLSLB ;; - SMLSLT ;; - SQDMLALB ;; - SQDMLALBT ;; - SQDMLALT ;; - SQDMLSLB ;; - SQDMLSLBT ;; - SQDMLSLT ;; - UABALB ;; - UABALT ;; - UMLALB ;; - UMLALT ;; - UMLSLB ;; - UMLSLT ;; -------------------------------------------------------------------------

;; Non-saturating MLA operations. (define_insn “@aarch64_sve_add_<sve_int_op>” [(set (match_operand:SVE_FULL_HSDI 0 “register_operand” “=w, ?&w”) (plus:SVE_FULL_HSDI (unspec:SVE_FULL_HSDI [(match_operand: 2 “register_operand” “w, w”) (match_operand: 3 “register_operand” “w, w”)] SVE2_INT_ADD_BINARY_LONG) (match_operand:SVE_FULL_HSDI 1 “register_operand” “0, w”)))] “TARGET_SVE2” “@ <sve_int_add_op>\t%0., %2., %3. movprfx\t%0, %1;<sve_int_add_op>\t%0., %2., %3.” [(set_attr “movprfx” “*,yes”)] )

;; Non-saturating MLA operations with lane select. (define_insn “@aarch64_sve_add_<sve_int_op>lane” [(set (match_operand:SVE_FULL_SDI 0 “register_operand” “=w, ?&w”) (plus:SVE_FULL_SDI (unspec:SVE_FULL_SDI [(match_operand: 2 “register_operand” “w, w”) (unspec: [(match_operand: 3 “register_operand” “<sve_lane_con>, <sve_lane_con>”) (match_operand:SI 4 “const_int_operand”)] UNSPEC_SVE_LANE_SELECT)] SVE2_INT_ADD_BINARY_LONG_LANE) (match_operand:SVE_FULL_SDI 1 “register_operand” “0, w”)))] “TARGET_SVE2” “@ <sve_int_add_op>\t%0., %2., %3.[%4] movprfx\t%0, %1;<sve_int_add_op>\t%0., %2., %3.[%4]” [(set_attr “movprfx” “*,yes”)] )

;; Saturating MLA operations. (define_insn “@aarch64_sve_qadd_<sve_int_op>” [(set (match_operand:SVE_FULL_HSDI 0 “register_operand” “=w, ?&w”) (ss_plus:SVE_FULL_HSDI (unspec:SVE_FULL_HSDI [(match_operand: 2 “register_operand” “w, w”) (match_operand: 3 “register_operand” “w, w”)] SVE2_INT_QADD_BINARY_LONG) (match_operand:SVE_FULL_HSDI 1 “register_operand” “0, w”)))] “TARGET_SVE2” “@ <sve_int_qadd_op>\t%0., %2., %3. movprfx\t%0, %1;<sve_int_qadd_op>\t%0., %2., %3.” [(set_attr “movprfx” “*,yes”)] )

;; Saturating MLA operations with lane select. (define_insn “@aarch64_sve_qadd_<sve_int_op>lane” [(set (match_operand:SVE_FULL_SDI 0 “register_operand” “=w, ?&w”) (ss_plus:SVE_FULL_SDI (unspec:SVE_FULL_SDI [(match_operand: 2 “register_operand” “w, w”) (unspec: [(match_operand: 3 “register_operand” “<sve_lane_con>, <sve_lane_con>”) (match_operand:SI 4 “const_int_operand”)] UNSPEC_SVE_LANE_SELECT)] SVE2_INT_QADD_BINARY_LONG_LANE) (match_operand:SVE_FULL_SDI 1 “register_operand” “0, w”)))] “TARGET_SVE2” “@ <sve_int_qadd_op>\t%0., %2., %3.[%4] movprfx\t%0, %1;<sve_int_qadd_op>\t%0., %2., %3.[%4]” [(set_attr “movprfx” “*,yes”)] )

;; Non-saturating MLS operations. (define_insn “@aarch64_sve_sub_<sve_int_op>” [(set (match_operand:SVE_FULL_HSDI 0 “register_operand” “=w, ?&w”) (minus:SVE_FULL_HSDI (match_operand:SVE_FULL_HSDI 1 “register_operand” “0, w”) (unspec:SVE_FULL_HSDI [(match_operand: 2 “register_operand” “w, w”) (match_operand: 3 “register_operand” “w, w”)] SVE2_INT_SUB_BINARY_LONG)))] “TARGET_SVE2” “@ <sve_int_sub_op>\t%0., %2., %3. movprfx\t%0, %1;<sve_int_sub_op>\t%0., %2., %3.” [(set_attr “movprfx” “*,yes”)] )

;; Non-saturating MLS operations with lane select. (define_insn “@aarch64_sve_sub_<sve_int_op>lane” [(set (match_operand:SVE_FULL_SDI 0 “register_operand” “=w, ?&w”) (minus:SVE_FULL_SDI (match_operand:SVE_FULL_SDI 1 “register_operand” “0, w”) (unspec:SVE_FULL_SDI [(match_operand: 2 “register_operand” “w, w”) (unspec: [(match_operand: 3 “register_operand” “<sve_lane_con>, <sve_lane_con>”) (match_operand:SI 4 “const_int_operand”)] UNSPEC_SVE_LANE_SELECT)] SVE2_INT_SUB_BINARY_LONG_LANE)))] “TARGET_SVE2” “@ <sve_int_sub_op>\t%0., %2., %3.[%4] movprfx\t%0, %1;<sve_int_sub_op>\t%0., %2., %3.[%4]” [(set_attr “movprfx” “*,yes”)] )

;; Saturating MLS operations. (define_insn “@aarch64_sve_qsub_<sve_int_op>” [(set (match_operand:SVE_FULL_HSDI 0 “register_operand” “=w, ?&w”) (ss_minus:SVE_FULL_HSDI (match_operand:SVE_FULL_HSDI 1 “register_operand” “0, w”) (unspec:SVE_FULL_HSDI [(match_operand: 2 “register_operand” “w, w”) (match_operand: 3 “register_operand” “w, w”)] SVE2_INT_QSUB_BINARY_LONG)))] “TARGET_SVE2” “@ <sve_int_qsub_op>\t%0., %2., %3. movprfx\t%0, %1;<sve_int_qsub_op>\t%0., %2., %3.” [(set_attr “movprfx” “*,yes”)] )

;; Saturating MLS operations with lane select. (define_insn “@aarch64_sve_qsub_<sve_int_op>lane” [(set (match_operand:SVE_FULL_SDI 0 “register_operand” “=w, ?&w”) (ss_minus:SVE_FULL_SDI (match_operand:SVE_FULL_SDI 1 “register_operand” “0, w”) (unspec:SVE_FULL_SDI [(match_operand: 2 “register_operand” “w, w”) (unspec: [(match_operand: 3 “register_operand” “<sve_lane_con>, <sve_lane_con>”) (match_operand:SI 4 “const_int_operand”)] UNSPEC_SVE_LANE_SELECT)] SVE2_INT_QSUB_BINARY_LONG_LANE)))] “TARGET_SVE2” “@ <sve_int_qsub_op>\t%0., %2., %3.[%4] movprfx\t%0, %1;<sve_int_qsub_op>\t%0., %2., %3.[%4]” [(set_attr “movprfx” “*,yes”)] ) ;; ------------------------------------------------------------------------- ;; ---- [FP] Long multiplication with accumulation ;; ------------------------------------------------------------------------- ;; Includes: ;; - FMLALB ;; - FMLALT ;; - FMLSLB ;; - FMLSLT ;; -------------------------------------------------------------------------

(define_insn “@aarch64_sve_<sve_fp_op>” [(set (match_operand:VNx4SF_ONLY 0 “register_operand” “=w, ?&w”) (unspec:VNx4SF_ONLY [(match_operand: 1 “register_operand” “w, w”) (match_operand: 2 “register_operand” “w, w”) (match_operand:VNx4SF_ONLY 3 “register_operand” “0, w”)] SVE2_FP_TERNARY_LONG))] “TARGET_SVE2” “@ <sve_fp_op>\t%0., %1., %2. movprfx\t%0, %3;<sve_fp_op>\t%0., %1., %2.” [(set_attr “movprfx” “*,yes”)] )

(define_insn “@aarch64_<sve_fp_op>lane” [(set (match_operand:VNx4SF_ONLY 0 “register_operand” “=w, ?&w”) (unspec:VNx4SF_ONLY [(match_operand: 1 “register_operand” “w, w”) (unspec: [(match_operand: 2 “register_operand” “<sve_lane_con>, <sve_lane_con>”) (match_operand:SI 3 “const_int_operand”)] UNSPEC_SVE_LANE_SELECT) (match_operand:VNx4SF_ONLY 4 “register_operand” “0, w”)] SVE2_FP_TERNARY_LONG_LANE))] “TARGET_SVE2” “@ <sve_fp_op>\t%0., %1., %2.[%3] movprfx\t%0, %4;<sve_fp_op>\t%0., %1., %2.[%3]” [(set_attr “movprfx” “*,yes”)] )

;; ========================================================================= ;; == Narrowing arithnetic ;; =========================================================================

;; ------------------------------------------------------------------------- ;; ---- [INT] Narrowing unary arithmetic ;; ------------------------------------------------------------------------- ;; Includes: ;; - SQXTNB ;; - SQXTNT ;; - SQXTUNB ;; - SQXTUNT ;; - UQXTNB ;; - UQXTNT ;; -------------------------------------------------------------------------

(define_insn “@aarch64_sve_<sve_int_op>” [(set (match_operand: 0 “register_operand” “=w”) (unspec: [(match_operand:SVE_FULL_HSDI 1 “register_operand” “w”)] SVE2_INT_UNARY_NARROWB))] “TARGET_SVE2” “<sve_int_op>\t%0., %1.” )

;; These instructions do not take MOVPRFX. (define_insn “@aarch64_sve_<sve_int_op>” [(set (match_operand: 0 “register_operand” “=w”) (unspec: [(match_operand: 1 “register_operand” “0”) (match_operand:SVE_FULL_HSDI 2 “register_operand” “w”)] SVE2_INT_UNARY_NARROWT))] “TARGET_SVE2” “<sve_int_op>\t%0., %2.” )

;; ------------------------------------------------------------------------- ;; ---- [INT] Narrowing binary arithmetic ;; ------------------------------------------------------------------------- ;; Includes: ;; - ADDHNB ;; - ADDHNT ;; - RADDHNB ;; - RADDHNT ;; - RSUBHNB ;; - RSUBHNT ;; - SUBHNB ;; - SUBHNT ;; -------------------------------------------------------------------------

(define_insn “@aarch64_sve_<sve_int_op>” [(set (match_operand: 0 “register_operand” “=w”) (unspec: [(match_operand:SVE_FULL_HSDI 1 “register_operand” “w”) (match_operand:SVE_FULL_HSDI 2 “register_operand” “w”)] SVE2_INT_BINARY_NARROWB))] “TARGET_SVE2” “<sve_int_op>\t%0., %1., %2.” )

;; These instructions do not take MOVPRFX. (define_insn “@aarch64_sve_<sve_int_op>” [(set (match_operand: 0 “register_operand” “=w”) (unspec: [(match_operand: 1 “register_operand” “0”) (match_operand:SVE_FULL_HSDI 2 “register_operand” “w”) (match_operand:SVE_FULL_HSDI 3 “register_operand” “w”)] SVE2_INT_BINARY_NARROWT))] “TARGET_SVE2” “<sve_int_op>\t%0., %2., %3.” )

;; ------------------------------------------------------------------------- ;; ---- [INT] Narrowing right shifts ;; ------------------------------------------------------------------------- ;; Includes: ;; - RSHRNB ;; - RSHRNT ;; - SHRNB ;; - SHRNT ;; - SQRSHRNB ;; - SQRSHRNT ;; - SQRSHRUNB ;; - SQRSHRUNT ;; - SQSHRNB ;; - SQSHRNT ;; - SQSHRUNB ;; - SQSHRUNT ;; - UQRSHRNB ;; - UQRSHRNT ;; - UQSHRNB ;; - UQSHRNT ;; -------------------------------------------------------------------------

;; The immediate range is enforced before generating the instruction. (define_insn “@aarch64_sve_<sve_int_op>” [(set (match_operand: 0 “register_operand” “=w”) (unspec: [(match_operand:SVE_FULL_HSDI 1 “register_operand” “w”) (match_operand:DI 2 “const_int_operand”)] SVE2_INT_SHIFT_IMM_NARROWB))] “TARGET_SVE2” “<sve_int_op>\t%0., %1., #%2” )

;; The immediate range is enforced before generating the instruction. ;; These instructions do not take MOVPRFX. (define_insn “@aarch64_sve_<sve_int_op>” [(set (match_operand: 0 “register_operand” “=w”) (unspec: [(match_operand: 1 “register_operand” “0”) (match_operand:SVE_FULL_HSDI 2 “register_operand” “w”) (match_operand:DI 3 “const_int_operand”)] SVE2_INT_SHIFT_IMM_NARROWT))] “TARGET_SVE2” “<sve_int_op>\t%0., %2., #%3” )

;; ========================================================================= ;; == Pairwise arithmetic ;; =========================================================================

;; ------------------------------------------------------------------------- ;; ---- [INT] Pairwise arithmetic ;; ------------------------------------------------------------------------- ;; Includes: ;; - ADDP ;; - SMAXP ;; - SMINP ;; - UMAXP ;; - UMINP ;; -------------------------------------------------------------------------

(define_insn “@aarch64_pred_<sve_int_op>” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (unspec:SVE_FULL_I [(match_operand: 1 “register_operand” “Upl, Upl”) (match_operand:SVE_FULL_I 2 “register_operand” “0, w”) (match_operand:SVE_FULL_I 3 “register_operand” “w, w”)] SVE2_INT_BINARY_PAIR))] “TARGET_SVE2” “@ <sve_int_op>\t%0., %1/m, %0., %3. movprfx\t%0, %2;<sve_int_op>\t%0., %1/m, %0., %3.” [(set_attr “movprfx” “*,yes”)] )

;; ------------------------------------------------------------------------- ;; ---- [FP] Pairwise arithmetic ;; ------------------------------------------------------------------------- ;; Includes: ;; - FADDP ;; - FMAXP ;; - FMAXNMP ;; - FMINP ;; - FMINNMP ;; -------------------------------------------------------------------------

(define_insn “@aarch64_pred_<sve_fp_op>” [(set (match_operand:SVE_FULL_F 0 “register_operand” “=w, ?&w”) (unspec:SVE_FULL_F [(match_operand: 1 “register_operand” “Upl, Upl”) (match_operand:SVE_FULL_F 2 “register_operand” “0, w”) (match_operand:SVE_FULL_F 3 “register_operand” “w, w”)] SVE2_FP_BINARY_PAIR))] “TARGET_SVE2” “@ <sve_fp_op>\t%0., %1/m, %0., %3. movprfx\t%0, %2;<sve_fp_op>\t%0., %1/m, %0., %3.” [(set_attr “movprfx” “*,yes”)] )

;; ------------------------------------------------------------------------- ;; ---- [INT] Pairwise arithmetic with accumulation ;; ------------------------------------------------------------------------- ;; Includes: ;; - SADALP ;; - UADALP ;; -------------------------------------------------------------------------

;; Predicated pairwise absolute difference and accumulate with merging. (define_expand “@cond_<sve_int_op>” [(set (match_operand:SVE_FULL_HSDI 0 “register_operand”) (unspec:SVE_FULL_HSDI [(match_operand: 1 “register_operand”) (unspec:SVE_FULL_HSDI [(match_dup 1) (match_operand:SVE_FULL_HSDI 2 “register_operand”) (match_operand: 3 “register_operand”)] SVE2_INT_BINARY_PAIR_LONG) (match_operand:SVE_FULL_HSDI 4 “aarch64_simd_reg_or_zero”)] UNSPEC_SEL))] “TARGET_SVE2” { /* Only target code is aware of these operations, so we don't need to handle the fully-general case. */ gcc_assert (rtx_equal_p (operands[2], operands[4]) || CONSTANT_P (operands[4])); })

;; Predicated pairwise absolute difference and accumulate, merging with ;; the first input. (define_insn_and_rewrite “*cond_<sve_int_op>_2” [(set (match_operand:SVE_FULL_HSDI 0 “register_operand” “=w, ?&w”) (unspec:SVE_FULL_HSDI [(match_operand: 1 “register_operand” “Upl, Upl”) (unspec:SVE_FULL_HSDI [(match_operand 4) (match_operand:SVE_FULL_HSDI 2 “register_operand” “0, w”) (match_operand: 3 “register_operand” “w, w”)] SVE2_INT_BINARY_PAIR_LONG) (match_dup 2)] UNSPEC_SEL))] “TARGET_SVE2” “@ <sve_int_op>\t%0., %1/m, %3. movprfx\t%0, %2;<sve_int_op>\t%0., %1/m, %3.” “&& !CONSTANT_P (operands[4])” { operands[4] = CONSTM1_RTX (mode); } [(set_attr “movprfx” “*,yes”)] )

;; Predicated pairwise absolute difference and accumulate, merging with zero. (define_insn_and_rewrite “*cond_<sve_int_op>_z” [(set (match_operand:SVE_FULL_HSDI 0 “register_operand” “=&w, &w”) (unspec:SVE_FULL_HSDI [(match_operand: 1 “register_operand” “Upl, Upl”) (unspec:SVE_FULL_HSDI [(match_operand 5) (match_operand:SVE_FULL_HSDI 2 “register_operand” “0, w”) (match_operand: 3 “register_operand” “w, w”)] SVE2_INT_BINARY_PAIR_LONG) (match_operand:SVE_FULL_HSDI 4 “aarch64_simd_imm_zero”)] UNSPEC_SEL))] “TARGET_SVE2” “@ movprfx\t%0., %1/z, %0.;<sve_int_op>\t%0., %1/m, %3. movprfx\t%0., %1/z, %2.;<sve_int_op>\t%0., %1/m, %3.” “&& !CONSTANT_P (operands[5])” { operands[5] = CONSTM1_RTX (mode); } [(set_attr “movprfx” “yes”)] )

;; ========================================================================= ;; == Complex arithmetic ;; =========================================================================

;; ------------------------------------------------------------------------- ;; ---- [INT] Complex binary operations ;; ------------------------------------------------------------------------- ;; Includes: ;; - CADD ;; - SQCADD ;; -------------------------------------------------------------------------

(define_insn “@aarch64_sve_” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 1 “register_operand” “0, w”) (match_operand:SVE_FULL_I 2 “register_operand” “w, w”)] SVE2_INT_CADD))] “TARGET_SVE2” “@ <sve_int_op>\t%0., %0., %2., # movprfx\t%0, %1;<sve_int_op>\t%0., %0., %2., #” [(set_attr “movprfx” “*,yes”)] )

;; unpredicated optab pattern for auto-vectorizer (define_expand “cadd3” [(set (match_operand:SVE_FULL_I 0 “register_operand”) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 1 “register_operand”) (match_operand:SVE_FULL_I 2 “register_operand”)] SVE2_INT_CADD_OP))] “TARGET_SVE2” )

;; ------------------------------------------------------------------------- ;; ---- [INT] Complex ternary operations ;; ------------------------------------------------------------------------- ;; Includes: ;; - CMLA ;; - SQRDCMLA ;; -------------------------------------------------------------------------

(define_insn “@aarch64_sve_” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w, ?&w”) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 1 “register_operand” “0, w”) (match_operand:SVE_FULL_I 2 “register_operand” “w, w”) (match_operand:SVE_FULL_I 3 “register_operand” “w, w”)] SVE2_INT_CMLA))] “TARGET_SVE2” “@ <sve_int_op>\t%0., %2., %3., # movprfx\t%0, %1;<sve_int_op>\t%0., %2., %3., #” [(set_attr “movprfx” “*,yes”)] )

(define_insn “@aarch64_lane” [(set (match_operand:SVE_FULL_HSI 0 “register_operand” “=w, ?&w”) (unspec:SVE_FULL_HSI [(match_operand:SVE_FULL_HSI 1 “register_operand” “0, w”) (match_operand:SVE_FULL_HSI 2 “register_operand” “w, w”) (unspec:SVE_FULL_HSI [(match_operand:SVE_FULL_HSI 3 “register_operand” “<sve_lane_con>, <sve_lane_con>”) (match_operand:SI 4 “const_int_operand”)] UNSPEC_SVE_LANE_SELECT)] SVE2_INT_CMLA))] “TARGET_SVE2” “@ <sve_int_op>\t%0., %2., %3.[%4], # movprfx\t%0, %1;<sve_int_op>\t%0., %2., %3.[%4], #” [(set_attr “movprfx” “*,yes”)] )

;; unpredicated optab pattern for auto-vectorizer ;; The complex mla/mls operations always need to expand to two instructions. ;; The first operation does half the computation and the second does the ;; remainder. Because of this, expand early. (define_expand “cml<conj_op>4” [(set (match_operand:SVE_FULL_I 0 “register_operand”) (plus:SVE_FULL_I (match_operand:SVE_FULL_I 1 “register_operand”) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 2 “register_operand”) (match_operand:SVE_FULL_I 3 “register_operand”)] SVE2_INT_CMLA_OP)))] “TARGET_SVE2” { rtx tmp = gen_reg_rtx (mode); emit_insn (gen_aarch64_sve_cmla<sve_rot1> (tmp, operands[1], operands[3], operands[2])); emit_insn (gen_aarch64_sve_cmla<sve_rot2> (operands[0], tmp, operands[3], operands[2])); DONE; })

;; unpredicated optab pattern for auto-vectorizer ;; The complex mul operations always need to expand to two instructions. ;; The first operation does half the computation and the second does the ;; remainder. Because of this, expand early. (define_expand “cmul<conj_op>3” [(set (match_operand:SVE_FULL_I 0 “register_operand”) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 1 “register_operand”) (match_operand:SVE_FULL_I 2 “register_operand”)] SVE2_INT_CMUL_OP))] “TARGET_SVE2” { rtx accum = force_reg (mode, CONST0_RTX (mode)); rtx tmp = gen_reg_rtx (mode); emit_insn (gen_aarch64_sve_cmla<sve_rot1> (tmp, accum, operands[2], operands[1])); emit_insn (gen_aarch64_sve_cmla<sve_rot2> (operands[0], tmp, operands[2], operands[1])); DONE; })

;; ------------------------------------------------------------------------- ;; ---- [INT] Complex dot product ;; ------------------------------------------------------------------------- ;; Includes: ;; - CDOT ;; -------------------------------------------------------------------------

(define_insn “@aarch64_sve_” [(set (match_operand:SVE_FULL_SDI 0 “register_operand” “=w, ?&w”) (unspec:SVE_FULL_SDI [(match_operand:SVE_FULL_SDI 1 “register_operand” “0, w”) (match_operand: 2 “register_operand” “w, w”) (match_operand: 3 “register_operand” “w, w”)] SVE2_INT_CDOT))] “TARGET_SVE2” “@ <sve_int_op>\t%0., %2.<Vetype_fourth>, %3.<Vetype_fourth>, # movprfx\t%0, %1;<sve_int_op>\t%0., %2.<Vetype_fourth>, %3.<Vetype_fourth>, #” [(set_attr “movprfx” “*,yes”)] )

(define_insn “@aarch64_lane” [(set (match_operand:SVE_FULL_SDI 0 “register_operand” “=w, ?&w”) (unspec:SVE_FULL_SDI [(match_operand:SVE_FULL_SDI 1 “register_operand” “0, w”) (match_operand: 2 “register_operand” “w, w”) (unspec: [(match_operand: 3 “register_operand” “<sve_lane_con>, <sve_lane_con>”) (match_operand:SI 4 “const_int_operand”)] UNSPEC_SVE_LANE_SELECT)] SVE2_INT_CDOT))] “TARGET_SVE2” “@ <sve_int_op>\t%0., %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], # movprfx\t%0, %1;<sve_int_op>\t%0., %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #” [(set_attr “movprfx” “*,yes”)] )

;; ========================================================================= ;; == Conversions ;; =========================================================================

;; ------------------------------------------------------------------------- ;; ---- [FP<-FP] Widening conversions ;; ------------------------------------------------------------------------- ;; Includes: ;; - FCVTLT ;; -------------------------------------------------------------------------

;; Predicated convert long top. (define_insn “@aarch64_pred_<sve_fp_op>” [(set (match_operand:SVE_FULL_SDF 0 “register_operand” “=w”) (unspec:SVE_FULL_SDF [(match_operand: 1 “register_operand” “Upl”) (match_operand:SI 3 “aarch64_sve_gp_strictness”) (match_operand: 2 “register_operand” “0”)] SVE2_COND_FP_UNARY_LONG))] “TARGET_SVE2” “<sve_fp_op>\t%0., %1/m, %0.” )

;; Predicated convert long top with merging. (define_expand “@cond_<sve_fp_op>” [(set (match_operand:SVE_FULL_SDF 0 “register_operand”) (unspec:SVE_FULL_SDF [(match_operand: 1 “register_operand”) (unspec:SVE_FULL_SDF [(match_dup 1) (const_int SVE_STRICT_GP) (match_operand: 2 “register_operand”)] SVE2_COND_FP_UNARY_LONG) (match_operand:SVE_FULL_SDF 3 “register_operand”)] UNSPEC_SEL))] “TARGET_SVE2” )

;; These instructions do not take MOVPRFX. (define_insn_and_rewrite “*cond_<sve_fp_op>_relaxed” [(set (match_operand:SVE_FULL_SDF 0 “register_operand” “=w”) (unspec:SVE_FULL_SDF [(match_operand: 1 “register_operand” “Upl”) (unspec:SVE_FULL_SDF [(match_operand 4) (const_int SVE_RELAXED_GP) (match_operand: 2 “register_operand” “w”)] SVE2_COND_FP_UNARY_LONG) (match_operand:SVE_FULL_SDF 3 “register_operand” “0”)] UNSPEC_SEL))] “TARGET_SVE2” “<sve_fp_op>\t%0., %1/m, %2.” “&& !rtx_equal_p (operands[1], operands[4])” { operands[4] = copy_rtx (operands[1]); } )

(define_insn “*cond_<sve_fp_op>_strict” [(set (match_operand:SVE_FULL_SDF 0 “register_operand” “=w”) (unspec:SVE_FULL_SDF [(match_operand: 1 “register_operand” “Upl”) (unspec:SVE_FULL_SDF [(match_dup 1) (const_int SVE_STRICT_GP) (match_operand: 2 “register_operand” “w”)] SVE2_COND_FP_UNARY_LONG) (match_operand:SVE_FULL_SDF 3 “register_operand” “0”)] UNSPEC_SEL))] “TARGET_SVE2” “<sve_fp_op>\t%0., %1/m, %2.” )

;; ------------------------------------------------------------------------- ;; ---- [FP<-FP] Narrowing conversions ;; ------------------------------------------------------------------------- ;; Includes: ;; - FCVTNT ;; - FCVTX ;; - FCVTXNT ;; -------------------------------------------------------------------------

;; Predicated FCVTNT. This doesn‘t give a natural aarch64_pred_/cond_ ;; pair because the even elements always have to be supplied for active ;; elements, even if the inactive elements don’t matter. ;; ;; These instructions do not take MOVPRFX. (define_insn “@aarch64_sve_cvtnt” [(set (match_operand:SVE_FULL_HSF 0 “register_operand” “=w”) (unspec:SVE_FULL_HSF [(match_operand:<VWIDE_PRED> 2 “register_operand” “Upl”) (const_int SVE_STRICT_GP) (match_operand:SVE_FULL_HSF 1 “register_operand” “0”) (match_operand: 3 “register_operand” “w”)] UNSPEC_COND_FCVTNT))] “TARGET_SVE2” “fcvtnt\t%0., %2/m, %3.” )

;; Predicated FCVTX (equivalent to what would be FCVTXNB, except that ;; it supports MOVPRFX). (define_insn “@aarch64_pred_<sve_fp_op>” [(set (match_operand:VNx4SF_ONLY 0 “register_operand” “=w, ?&w”) (unspec:VNx4SF_ONLY [(match_operand:<VWIDE_PRED> 1 “register_operand” “Upl, Upl”) (match_operand:SI 3 “aarch64_sve_gp_strictness”) (match_operand: 2 “register_operand” “0, w”)] SVE2_COND_FP_UNARY_NARROWB))] “TARGET_SVE2” “@ <sve_fp_op>\t%0., %1/m, %2. movprfx\t%0, %2;<sve_fp_op>\t%0., %1/m, %2.” [(set_attr “movprfx” “*,yes”)] )

;; Predicated FCVTX with merging. (define_expand “@cond_<sve_fp_op>” [(set (match_operand:VNx4SF_ONLY 0 “register_operand”) (unspec:VNx4SF_ONLY [(match_operand:<VWIDE_PRED> 1 “register_operand”) (unspec:VNx4SF_ONLY [(match_dup 1) (const_int SVE_STRICT_GP) (match_operand: 2 “register_operand”)] SVE2_COND_FP_UNARY_NARROWB) (match_operand:VNx4SF_ONLY 3 “aarch64_simd_reg_or_zero”)] UNSPEC_SEL))] “TARGET_SVE2” )

(define_insn_and_rewrite “*cond_<sve_fp_op>_any_relaxed” [(set (match_operand:VNx4SF_ONLY 0 “register_operand” “=&w, &w, &w”) (unspec:VNx4SF_ONLY [(match_operand:<VWIDE_PRED> 1 “register_operand” “Upl, Upl, Upl”) (unspec:VNx4SF_ONLY [(match_operand 4) (const_int SVE_RELAXED_GP) (match_operand: 2 “register_operand” “w, w, w”)] SVE2_COND_FP_UNARY_NARROWB) (match_operand:VNx4SF_ONLY 3 “aarch64_simd_reg_or_zero” “0, Dz, w”)] UNSPEC_SEL))] “TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])” “@ <sve_fp_op>\t%0., %1/m, %2. movprfx\t%0., %1/z, %2.;<sve_fp_op>\t%0., %1/m, %2. movprfx\t%0, %3;<sve_fp_op>\t%0., %1/m, %2.” “&& !rtx_equal_p (operands[1], operands[4])” { operands[4] = copy_rtx (operands[1]); } [(set_attr “movprfx” “*,yes,yes”)] )

(define_insn “*cond_<sve_fp_op>_any_strict” [(set (match_operand:VNx4SF_ONLY 0 “register_operand” “=&w, &w, &w”) (unspec:VNx4SF_ONLY [(match_operand:<VWIDE_PRED> 1 “register_operand” “Upl, Upl, Upl”) (unspec:VNx4SF_ONLY [(match_dup 1) (const_int SVE_STRICT_GP) (match_operand: 2 “register_operand” “w, w, w”)] SVE2_COND_FP_UNARY_NARROWB) (match_operand:VNx4SF_ONLY 3 “aarch64_simd_reg_or_zero” “0, Dz, w”)] UNSPEC_SEL))] “TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])” “@ <sve_fp_op>\t%0., %1/m, %2. movprfx\t%0., %1/z, %2.;<sve_fp_op>\t%0., %1/m, %2. movprfx\t%0, %3;<sve_fp_op>\t%0., %1/m, %2.” [(set_attr “movprfx” “*,yes,yes”)] )

;; Predicated FCVTXNT. This doesn‘t give a natural aarch64_pred_/cond_ ;; pair because the even elements always have to be supplied for active ;; elements, even if the inactive elements don’t matter. ;; ;; These instructions do not take MOVPRFX. (define_insn “@aarch64_sve2_cvtxnt” [(set (match_operand: 0 “register_operand” “=w”) (unspec: [(match_operand: 2 “register_operand” “Upl”) (const_int SVE_STRICT_GP) (match_operand: 1 “register_operand” “0”) (match_operand:VNx2DF_ONLY 3 “register_operand” “w”)] UNSPEC_COND_FCVTXNT))] “TARGET_SVE2” “fcvtxnt\t%0., %2/m, %3.” )

;; ========================================================================= ;; == Other arithmetic ;; =========================================================================

;; ------------------------------------------------------------------------- ;; ---- [INT] Reciprocal approximation ;; ------------------------------------------------------------------------- ;; Includes: ;; - URECPE ;; - URSQRTE ;; -------------------------------------------------------------------------

;; Predicated integer unary operations. (define_insn “@aarch64_pred_<sve_int_op>” [(set (match_operand:VNx4SI_ONLY 0 “register_operand” “=w, ?&w”) (unspec:VNx4SI_ONLY [(match_operand: 1 “register_operand” “Upl, Upl”) (unspec:VNx4SI_ONLY [(match_operand:VNx4SI_ONLY 2 “register_operand” “0, w”)] SVE2_U32_UNARY)] UNSPEC_PRED_X))] “TARGET_SVE2” “@ <sve_int_op>\t%0., %1/m, %2. movprfx\t%0, %2;<sve_int_op>\t%0., %1/m, %2.” [(set_attr “movprfx” “*,yes”)] )

;; Predicated integer unary operations with merging. (define_expand “@cond_<sve_int_op>” [(set (match_operand:VNx4SI_ONLY 0 “register_operand”) (unspec:VNx4SI_ONLY [(match_operand: 1 “register_operand”) (unspec:VNx4SI_ONLY [(match_dup 4) (unspec:VNx4SI_ONLY [(match_operand:VNx4SI_ONLY 2 “register_operand”)] SVE2_U32_UNARY)] UNSPEC_PRED_X) (match_operand:VNx4SI_ONLY 3 “aarch64_simd_reg_or_zero”)] UNSPEC_SEL))] “TARGET_SVE2” { operands[4] = CONSTM1_RTX (mode); } )

(define_insn_and_rewrite “*cond_<sve_int_op>” [(set (match_operand:VNx4SI_ONLY 0 “register_operand” “=w, ?&w, ?&w”) (unspec:VNx4SI_ONLY [(match_operand: 1 “register_operand” “Upl, Upl, Upl”) (unspec:VNx4SI_ONLY [(match_operand 4) (unspec:VNx4SI_ONLY [(match_operand:VNx4SI_ONLY 2 “register_operand” “w, w, w”)] SVE2_U32_UNARY)] UNSPEC_PRED_X) (match_operand:VNx4SI_ONLY 3 “aarch64_simd_reg_or_zero” “0, Dz, w”)] UNSPEC_SEL))] “TARGET_SVE2” “@ <sve_int_op>\t%0., %1/m, %2. movprfx\t%0., %1/z, %2.;<sve_int_op>\t%0., %1/m, %2. movprfx\t%0, %3;<sve_int_op>\t%0., %1/m, %2.” “&& !CONSTANT_P (operands[4])” { operands[4] = CONSTM1_RTX (mode); } [(set_attr “movprfx” “*,yes,yes”)] )

;; ------------------------------------------------------------------------- ;; ---- [INT<-FP] Base-2 logarithm ;; ------------------------------------------------------------------------- ;; Includes: ;; - FLOGB ;; -------------------------------------------------------------------------

;; Predicated FLOGB. (define_insn “@aarch64_pred_<sve_fp_op>” [(set (match_operand:<V_INT_EQUIV> 0 “register_operand” “=w, ?&w”) (unspec:<V_INT_EQUIV> [(match_operand: 1 “register_operand” “Upl, Upl”) (match_operand:SI 3 “aarch64_sve_gp_strictness”) (match_operand:SVE_FULL_F 2 “register_operand” “0, w”)] SVE2_COND_INT_UNARY_FP))] “TARGET_SVE2” “@ <sve_fp_op>\t%0., %1/m, %2. movprfx\t%0, %2;<sve_fp_op>\t%0., %1/m, %2.” [(set_attr “movprfx” “*,yes”)] )

;; Predicated FLOGB with merging. (define_expand “@cond_<sve_fp_op>” [(set (match_operand:<V_INT_EQUIV> 0 “register_operand”) (unspec:<V_INT_EQUIV> [(match_operand: 1 “register_operand”) (unspec:<V_INT_EQUIV> [(match_dup 1) (const_int SVE_STRICT_GP) (match_operand:SVE_FULL_F 2 “register_operand”)] SVE2_COND_INT_UNARY_FP) (match_operand:<V_INT_EQUIV> 3 “aarch64_simd_reg_or_zero”)] UNSPEC_SEL))] “TARGET_SVE2” )

(define_insn_and_rewrite “*cond_<sve_fp_op>” [(set (match_operand:<V_INT_EQUIV> 0 “register_operand” “=&w, ?&w, ?&w”) (unspec:<V_INT_EQUIV> [(match_operand: 1 “register_operand” “Upl, Upl, Upl”) (unspec:<V_INT_EQUIV> [(match_operand 4) (const_int SVE_RELAXED_GP) (match_operand:SVE_FULL_F 2 “register_operand” “w, w, w”)] SVE2_COND_INT_UNARY_FP) (match_operand:<V_INT_EQUIV> 3 “aarch64_simd_reg_or_zero” “0, Dz, w”)] UNSPEC_SEL))] “TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])” “@ <sve_fp_op>\t%0., %1/m, %2. movprfx\t%0., %1/z, %2.;<sve_fp_op>\t%0., %1/m, %2. movprfx\t%0, %3;<sve_fp_op>\t%0., %1/m, %2.” “&& !rtx_equal_p (operands[1], operands[4])” { operands[4] = copy_rtx (operands[1]); } [(set_attr “movprfx” “*,yes,yes”)] )

(define_insn “*cond_<sve_fp_op>_strict” [(set (match_operand:<V_INT_EQUIV> 0 “register_operand” “=&w, ?&w, ?&w”) (unspec:<V_INT_EQUIV> [(match_operand: 1 “register_operand” “Upl, Upl, Upl”) (unspec:<V_INT_EQUIV> [(match_dup 1) (const_int SVE_STRICT_GP) (match_operand:SVE_FULL_F 2 “register_operand” “w, w, w”)] SVE2_COND_INT_UNARY_FP) (match_operand:<V_INT_EQUIV> 3 “aarch64_simd_reg_or_zero” “0, Dz, w”)] UNSPEC_SEL))] “TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])” “@ <sve_fp_op>\t%0., %1/m, %2. movprfx\t%0., %1/z, %2.;<sve_fp_op>\t%0., %1/m, %2. movprfx\t%0, %3;<sve_fp_op>\t%0., %1/m, %2.” [(set_attr “movprfx” “*,yes,yes”)] )

;; ------------------------------------------------------------------------- ;; ---- [INT] Polynomial multiplication ;; ------------------------------------------------------------------------- ;; Includes: ;; - PMUL ;; - PMULLB ;; - PMULLT ;; -------------------------------------------------------------------------

;; Uniform PMUL. (define_insn “@aarch64_sve2_pmul” [(set (match_operand:VNx16QI_ONLY 0 “register_operand” “=w”) (unspec:VNx16QI_ONLY [(match_operand:VNx16QI_ONLY 1 “register_operand” “w”) (match_operand:VNx16QI_ONLY 2 “register_operand” “w”)] UNSPEC_PMUL))] “TARGET_SVE2” “pmul\t%0., %1., %2.” )

;; Extending PMUL, with the results modeled as wider vectors. ;; This representation is only possible for .H and .D, not .Q. (define_insn “@aarch64_sve_” [(set (match_operand:SVE_FULL_HDI 0 “register_operand” “=w”) (unspec:SVE_FULL_HDI [(match_operand: 1 “register_operand” “w”) (match_operand: 2 “register_operand” “w”)] SVE2_PMULL))] “TARGET_SVE2” “<sve_int_op>\t%0., %1., %2.” )

;; Extending PMUL, with the results modeled as pairs of values. ;; This representation works for .H, .D and .Q, with .Q requiring ;; the AES extension. (This is enforced by the mode iterator.) (define_insn “@aarch64_sve_” [(set (match_operand:SVE2_PMULL_PAIR_I 0 “register_operand” “=w”) (unspec:SVE2_PMULL_PAIR_I [(match_operand:SVE2_PMULL_PAIR_I 1 “register_operand” “w”) (match_operand:SVE2_PMULL_PAIR_I 2 “register_operand” “w”)] SVE2_PMULL_PAIR))] “TARGET_SVE2” “<sve_int_op>\t%0., %1., %2.” )

;; ========================================================================= ;; == Permutation ;; =========================================================================

;; ------------------------------------------------------------------------- ;; ---- [INT,FP] General permutes ;; ------------------------------------------------------------------------- ;; Includes: ;; - TBL (vector pair form) ;; - TBX ;; -------------------------------------------------------------------------

;; TBL on a pair of data vectors. (define_insn “@aarch64_sve2_tbl2” [(set (match_operand:SVE_FULL 0 “register_operand” “=w”) (unspec:SVE_FULL [(match_operand: 1 “register_operand” “w”) (match_operand:<V_INT_EQUIV> 2 “register_operand” “w”)] UNSPEC_TBL2))] “TARGET_SVE2” “tbl\t%0., %1, %2.” )

;; TBX. These instructions do not take MOVPRFX. (define_insn “@aarch64_sve2_tbx” [(set (match_operand:SVE_FULL 0 “register_operand” “=w”) (unspec:SVE_FULL [(match_operand:SVE_FULL 1 “register_operand” “0”) (match_operand:SVE_FULL 2 “register_operand” “w”) (match_operand:<V_INT_EQUIV> 3 “register_operand” “w”)] UNSPEC_TBX))] “TARGET_SVE2” “tbx\t%0., %2., %3.” )

;; ------------------------------------------------------------------------- ;; ---- [INT] Optional bit-permute extensions ;; ------------------------------------------------------------------------- ;; Includes: ;; - BDEP ;; - BEXT ;; - BGRP ;; -------------------------------------------------------------------------

(define_insn “@aarch64_sve_<sve_int_op>” [(set (match_operand:SVE_FULL_I 0 “register_operand” “=w”) (unspec:SVE_FULL_I [(match_operand:SVE_FULL_I 1 “register_operand” “w”) (match_operand:SVE_FULL_I 2 “register_operand” “w”)] SVE2_INT_BITPERM))] “TARGET_SVE2_BITPERM” “<sve_int_op>\t%0., %1., %2.” )

;; ========================================================================= ;; == General ;; =========================================================================

;; ------------------------------------------------------------------------- ;; ---- Check for aliases between pointers ;; ------------------------------------------------------------------------- ;; The patterns in this section are synthetic: WHILERW and WHILEWR are ;; defined in aarch64-sve.md instead. ;; -------------------------------------------------------------------------

;; Use WHILERW and WHILEWR to accelerate alias checks. This is only ;; possible if the accesses we're checking are exactly the same size ;; as an SVE vector. (define_expand “check_<raw_war>_ptrs” [(match_operand:GPI 0 “register_operand”) (unspec:VNx16BI [(match_operand:GPI 1 “register_operand”) (match_operand:GPI 2 “register_operand”) (match_operand:GPI 3 “aarch64_bytes_per_sve_vector_operand”) (match_operand:GPI 4 “const_int_operand”)] SVE2_WHILE_PTR)] “TARGET_SVE2” { /* Use the widest predicate mode we can. */ unsigned int align = INTVAL (operands[4]); if (align > 8) align = 8; machine_mode pred_mode = aarch64_sve_pred_mode (align).require ();

/* Emit a WHILERW or WHILEWR, setting the condition codes based on the result. */ emit_insn (gen_while_ptest (<SVE2_WHILE_PTR:unspec>, mode, pred_mode, gen_rtx_SCRATCH (pred_mode), operands[1], operands[2], CONSTM1_RTX (VNx16BImode), CONSTM1_RTX (pred_mode)));

/* Set operand 0 to true if the last bit of the predicate result is set, i.e. if all elements are free of dependencies. */ rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM); rtx cmp = gen_rtx_LTU (mode, cc_reg, const0_rtx); emit_insn (gen_aarch64_cstore (operands[0], cmp, cc_reg)); DONE; })

;; ------------------------------------------------------------------------- ;; ---- Histogram processing ;; ------------------------------------------------------------------------- ;; Includes: ;; - HISTCNT ;; - HISTSEG ;; -------------------------------------------------------------------------

(define_insn “@aarch64_sve2_histcnt” [(set (match_operand:SVE_FULL_SDI 0 “register_operand” “=w”) (unspec:SVE_FULL_SDI [(match_operand: 1 “register_operand” “Upl”) (match_operand:SVE_FULL_SDI 2 “register_operand” “w”) (match_operand:SVE_FULL_SDI 3 “register_operand” “w”)] UNSPEC_HISTCNT))] “TARGET_SVE2” “histcnt\t%0., %1/z, %2., %3.” )

(define_insn “@aarch64_sve2_histseg” [(set (match_operand:VNx16QI_ONLY 0 “register_operand” “=w”) (unspec:VNx16QI_ONLY [(match_operand:VNx16QI_ONLY 1 “register_operand” “w”) (match_operand:VNx16QI_ONLY 2 “register_operand” “w”)] UNSPEC_HISTSEG))] “TARGET_SVE2” “histseg\t%0., %1., %2.” )

;; ------------------------------------------------------------------------- ;; ---- String matching ;; ------------------------------------------------------------------------- ;; Includes: ;; - MATCH ;; - NMATCH ;; -------------------------------------------------------------------------

;; Predicated string matching. (define_insn “@aarch64_pred_<sve_int_op>” [(set (match_operand: 0 “register_operand” “=Upa”) (unspec: [(match_operand: 1 “register_operand” “Upl”) (match_operand:SI 2 “aarch64_sve_ptrue_flag”) (unspec: [(match_operand:SVE_FULL_BHI 3 “register_operand” “w”) (match_operand:SVE_FULL_BHI 4 “register_operand” “w”)] SVE2_MATCH)] UNSPEC_PRED_Z)) (clobber (reg:CC_NZC CC_REGNUM))] “TARGET_SVE2” “<sve_int_op>\t%0., %1/z, %3., %4.” )

;; Predicated string matching in which both the flag and predicate results ;; are interesting. (define_insn_and_rewrite “*aarch64_pred_<sve_int_op>_cc” [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC [(match_operand:VNx16BI 1 “register_operand” “Upl”) (match_operand 4) (match_operand:SI 5 “aarch64_sve_ptrue_flag”) (unspec: [(match_operand 6) (match_operand:SI 7 “aarch64_sve_ptrue_flag”) (unspec: [(match_operand:SVE_FULL_BHI 2 “register_operand” “w”) (match_operand:SVE_FULL_BHI 3 “register_operand” “w”)] SVE2_MATCH)] UNSPEC_PRED_Z)] UNSPEC_PTEST)) (set (match_operand: 0 “register_operand” “=Upa”) (unspec: [(match_dup 6) (match_dup 7) (unspec: [(match_dup 2) (match_dup 3)] SVE2_MATCH)] UNSPEC_PRED_Z))] “TARGET_SVE2 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])” “<sve_int_op>\t%0., %1/z, %2., %3.” “&& !rtx_equal_p (operands[4], operands[6])” { operands[6] = copy_rtx (operands[4]); operands[7] = operands[5]; } )

;; Predicated string matching in which only the flags result is interesting. (define_insn_and_rewrite “*aarch64_pred_<sve_int_op>_ptest” [(set (reg:CC_NZC CC_REGNUM) (unspec:CC_NZC [(match_operand:VNx16BI 1 “register_operand” “Upl”) (match_operand 4) (match_operand:SI 5 “aarch64_sve_ptrue_flag”) (unspec: [(match_operand 6) (match_operand:SI 7 “aarch64_sve_ptrue_flag”) (unspec: [(match_operand:SVE_FULL_BHI 2 “register_operand” “w”) (match_operand:SVE_FULL_BHI 3 “register_operand” “w”)] SVE2_MATCH)] UNSPEC_PRED_Z)] UNSPEC_PTEST)) (clobber (match_scratch: 0 “=Upa”))] “TARGET_SVE2 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])” “<sve_int_op>\t%0., %1/z, %2., %3.” “&& !rtx_equal_p (operands[4], operands[6])” { operands[6] = copy_rtx (operands[4]); operands[7] = operands[5]; } )

;; ========================================================================= ;; == Crypotographic extensions ;; =========================================================================

;; ------------------------------------------------------------------------- ;; ---- Optional AES extensions ;; ------------------------------------------------------------------------- ;; Includes: ;; - AESD ;; - AESE ;; - AESIMC ;; - AESMC ;; -------------------------------------------------------------------------

;; AESD and AESE. (define_insn “aarch64_sve2_aes<aes_op>” [(set (match_operand:VNx16QI 0 “register_operand” “=w”) (unspec:VNx16QI [(xor:VNx16QI (match_operand:VNx16QI 1 “register_operand” “%0”) (match_operand:VNx16QI 2 “register_operand” “w”))] CRYPTO_AES))] “TARGET_SVE2_AES” “aes<aes_op>\t%0.b, %0.b, %2.b” [(set_attr “type” “crypto_aese”)] )

;; AESMC and AESIMC. These instructions do not take MOVPRFX. (define_insn “aarch64_sve2_aes<aesmc_op>” [(set (match_operand:VNx16QI 0 “register_operand” “=w”) (unspec:VNx16QI [(match_operand:VNx16QI 1 “register_operand” “0”)] CRYPTO_AESMC))] “TARGET_SVE2_AES” “aes<aesmc_op>\t%0.b, %0.b” [(set_attr “type” “crypto_aesmc”)] )

;; When AESE/AESMC and AESD/AESIMC fusion is enabled, we really want ;; to keep the two together and enforce the register dependency without ;; scheduling or register allocation messing up the order or introducing ;; moves inbetween. Mash the two together during combine.

(define_insn “*aarch64_sve2_aese_fused” [(set (match_operand:VNx16QI 0 “register_operand” “=w”) (unspec:VNx16QI [(unspec:VNx16QI [(xor:VNx16QI (match_operand:VNx16QI 1 “register_operand” “%0”) (match_operand:VNx16QI 2 “register_operand” “w”))] UNSPEC_AESE)] UNSPEC_AESMC))] “TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)” “aese\t%0.b, %0.b, %2.b;aesmc\t%0.b, %0.b” [(set_attr “type” “crypto_aese”) (set_attr “length” “8”)] )

(define_insn “*aarch64_sve2_aesd_fused” [(set (match_operand:VNx16QI 0 “register_operand” “=w”) (unspec:VNx16QI [(unspec:VNx16QI [(xor:VNx16QI (match_operand:VNx16QI 1 “register_operand” “%0”) (match_operand:VNx16QI 2 “register_operand” “w”))] UNSPEC_AESD)] UNSPEC_AESIMC))] “TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)” “aesd\t%0.b, %0.b, %2.b;aesimc\t%0.b, %0.b” [(set_attr “type” “crypto_aese”) (set_attr “length” “8”)] )

;; ------------------------------------------------------------------------- ;; ---- Optional SHA-3 extensions ;; ------------------------------------------------------------------------- ;; Includes: ;; - RAX1 ;; -------------------------------------------------------------------------

(define_insn “aarch64_sve2_rax1” [(set (match_operand:VNx2DI 0 “register_operand” “=w”) (xor:VNx2DI (rotate:VNx2DI (match_operand:VNx2DI 2 “register_operand” “w”) (const_int 1)) (match_operand:VNx2DI 1 “register_operand” “w”)))] “TARGET_SVE2_SHA3” “rax1\t%0.d, %1.d, %2.d” [(set_attr “type” “crypto_sha3”)] )

;; ------------------------------------------------------------------------- ;; ---- Optional SM4 extensions ;; ------------------------------------------------------------------------- ;; Includes: ;; - SM4E ;; - SM4EKEY ;; -------------------------------------------------------------------------

;; These instructions do not take MOVPRFX. (define_insn “aarch64_sve2_sm4e” [(set (match_operand:VNx4SI 0 “register_operand” “=w”) (unspec:VNx4SI [(match_operand:VNx4SI 1 “register_operand” “0”) (match_operand:VNx4SI 2 “register_operand” “w”)] UNSPEC_SM4E))] “TARGET_SVE2_SM4” “sm4e\t%0.s, %0.s, %2.s” [(set_attr “type” “crypto_sm4”)] )

(define_insn “aarch64_sve2_sm4ekey” [(set (match_operand:VNx4SI 0 “register_operand” “=w”) (unspec:VNx4SI [(match_operand:VNx4SI 1 “register_operand” “w”) (match_operand:VNx4SI 2 “register_operand” “w”)] UNSPEC_SM4EKEY))] “TARGET_SVE2_SM4” “sm4ekey\t%0.s, %1.s, %2.s” [(set_attr “type” “crypto_sm4”)] )