;; Machine description for optimization of RVV auto-vectorization. ;; Copyright (C) 2023 Free Software Foundation, Inc. ;; Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.

;; This file is part of GCC.

;; GCC is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 3, or (at your option) ;; any later version.

;; GCC is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License ;; along with GCC; see the file COPYING3. If not see ;; http://www.gnu.org/licenses/.

;; We don't have vwmul.wv instruction like vwadd.wv in RVV. ;; This pattern is an intermediate RTL IR as a pseudo vwmul.wv to enhance ;; optimization of instructions combine. (define_insn_and_split “@pred_single_widen_mul<any_extend:su>” [(set (match_operand:VWEXTI 0 “register_operand” “=&vr,&vr”) (if_then_else:VWEXTI (unspec: [(match_operand: 1 “vector_mask_operand” “vmWc1,vmWc1”) (match_operand 5 “vector_length_operand” " rK, rK") (match_operand 6 “const_int_operand” " i, i") (match_operand 7 “const_int_operand” " i, i") (match_operand 8 “const_int_operand” " i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (mult:VWEXTI (any_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 4 “register_operand” " vr, vr")) (match_operand:VWEXTI 3 “register_operand” " vr, vr")) (match_operand:VWEXTI 2 “vector_merge_operand” " vu, 0")))] “TARGET_VECTOR && can_create_pseudo_p ()” “#” “&& 1” [(const_int 0)] { insn_code icode = code_for_pred_vf2 (, mode); rtx tmp = gen_reg_rtx (mode); rtx ops[] = {tmp, operands[4]}; riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, ops);

emit_insn (gen_pred (MULT, <MODE>mode, operands[0], operands[1], operands[2],
		 operands[3], tmp, operands[5], operands[6],
		 operands[7], operands[8]));
DONE;

} [(set_attr “type” “viwmul”) (set_attr “mode” “”)])

;; This pattern it to enchance the instruction combine optimizations for complicate ;; sign and unsigned widening multiplication operations. (define_insn “*pred_widen_mulsu” [(set (match_operand:VWEXTI 0 “register_operand” “=&vr,&vr”) (if_then_else:VWEXTI (unspec: [(match_operand: 1 “vector_mask_operand” “vmWc1,vmWc1”) (match_operand 5 “vector_length_operand” " rK, rK") (match_operand 6 “const_int_operand” " i, i") (match_operand 7 “const_int_operand” " i, i") (match_operand 8 “const_int_operand” " i, i") (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (mult:VWEXTI (zero_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 4 “register_operand” " vr, vr")) (sign_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 3 “register_operand” " vr, vr"))) (match_operand:VWEXTI 2 “vector_merge_operand” " vu, 0")))] “TARGET_VECTOR” “vwmulsu.vv\t%0,%3,%4%p1” [(set_attr “type” “viwmul”) (set_attr “mode” “<V_DOUBLE_TRUNC>”)])

;; ----------------------------------------------------------------------------- ;; ---- Integer Compare Instructions Simplification ;; ----------------------------------------------------------------------------- ;; Simplify OP(V, V) Instructions to VMCLR.m Includes: ;; - 1. VMSNE ;; - 2. VMSLT ;; - 3. VMSLTU ;; - 4. VMSGT ;; - 5. VMSGTU ;; ----------------------------------------------------------------------------- ;; Simplify OP(V, V) Instructions to VMSET.m Includes: ;; - 1. VMSEQ ;; - 2. VMSLE ;; - 3. VMSLEU ;; - 4. VMSGE ;; - 5. VMSGEU ;; -----------------------------------------------------------------------------

(define_split [(set (match_operand:VB 0 “register_operand”) (if_then_else:VB (unspec:VB [(match_operand:VB 1 “vector_all_trues_mask_operand”) (match_operand 4 “vector_length_operand”) (match_operand 5 “const_int_operand”) (match_operand 6 “const_int_operand”) (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (match_operand:VB 3 “vector_move_operand”) (match_operand:VB 2 “vector_undef_operand”)))] “TARGET_VECTOR” [(const_int 0)] { emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mode), RVV_VUNDEF (mode), operands[3], operands[4], operands[5])); DONE; } )

;; ------------------------------------------------------------------------- ;; ---- [BOOL] Binary logical operations (inverted second input) ;; ------------------------------------------------------------------------- ;; Includes: ;; - vmandnot.mm ;; - vmornot.mm ;; -------------------------------------------------------------------------

(define_insn_and_split “*not” [(set (match_operand:VB 0 “register_operand” “=vr”) (bitmanip_bitwise:VB (not:VB (match_operand:VB 2 “register_operand” " vr")) (match_operand:VB 1 “register_operand” " vr")))] “TARGET_VECTOR && can_create_pseudo_p ()” “#” “&& 1” [(const_int 0)] { insn_code icode = code_for_pred_not (, mode); riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands); DONE; } [(set_attr “type” “vmalu”) (set_attr “mode” “”)])

;; ------------------------------------------------------------------------- ;; ---- [BOOL] Binary logical operations (inverted result) ;; ------------------------------------------------------------------------- ;; Includes: ;; - vmnand.mm ;; - vmnor.mm ;; - vmxnor.mm ;; -------------------------------------------------------------------------

(define_insn_and_split “*n” [(set (match_operand:VB 0 “register_operand” “=vr”) (not:VB (any_bitwise:VB (match_operand:VB 1 “register_operand” " vr") (match_operand:VB 2 “register_operand” " vr"))))] “TARGET_VECTOR && can_create_pseudo_p ()” “#” “&& 1” [(const_int 0)] { insn_code icode = code_for_pred_n (, mode); riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands); DONE; } [(set_attr “type” “vmalu”) (set_attr “mode” “”)])

;; ========================================================================= ;; == Widening Ternary arithmetic ;; =========================================================================

;; ------------------------------------------------------------------------- ;; ---- [INT] VWMACC ;; ------------------------------------------------------------------------- ;; Includes: ;; - vwmacc.vv ;; - vwmaccu.vv ;; -------------------------------------------------------------------------

;; Combine ext + ext + fma ===> widen fma. ;; Most of circumstantces, LoopVectorizer will generate the following IR: ;; vect__8.64_40 = (vector([4,4]) int) vect__7.63_41; ;; vect__11.68_35 = (vector([4,4]) int) vect__10.67_36; ;; vect__13.70_33 = .FMA (vect__11.68_35, vect__8.64_40, vect__4.60_45); (define_insn_and_split “*_fma” [(set (match_operand:VWEXTI 0 “register_operand”) (plus:VWEXTI (mult:VWEXTI (any_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 2 “register_operand”)) (any_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 3 “register_operand”))) (match_operand:VWEXTI 1 “register_operand”)))] “TARGET_VECTOR && can_create_pseudo_p ()” “#” “&& 1” [(const_int 0)] { riscv_vector::emit_vlmax_ternary_insn (code_for_pred_widen_mul_plus (, mode), riscv_vector::RVV_WIDEN_TERNOP, operands); DONE; } [(set_attr “type” “viwmuladd”) (set_attr “mode” “<V_DOUBLE_TRUNC>”)])

;; This helps to match ext + fma. (define_insn_and_split “*single_mult_plus” [(set (match_operand:VWEXTI 0 “register_operand”) (plus:VWEXTI (mult:VWEXTI (any_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 2 “register_operand”)) (match_operand:VWEXTI 3 “register_operand”)) (match_operand:VWEXTI 1 “register_operand”)))] “TARGET_VECTOR && can_create_pseudo_p ()” “#” “&& 1” [(const_int 0)] { insn_code icode = code_for_pred_vf2 (, mode); rtx tmp = gen_reg_rtx (mode); rtx ext_ops[] = {tmp, operands[2]}; riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_UNOP, ext_ops);

rtx dst = expand_ternary_op (<MODE>mode, fma_optab, tmp, operands[3],
			 operands[1], operands[0], 0);
emit_move_insn (operands[0], dst);
DONE;

} [(set_attr “type” “viwmuladd”) (set_attr “mode” “<V_DOUBLE_TRUNC>”)])

;; Combine ext + ext + mult + plus ===> widen fma. ;; We have some special cases generated by LoopVectorizer: ;; vect__8.18_46 = (vector([8,8]) signed short) vect__7.17_47; ;; vect__11.22_41 = (vector([8,8]) signed short) vect__10.21_42; ;; vect__12.23_40 = vect__11.22_41 * vect__8.18_46; ;; vect__14.25_38 = vect__13.24_39 + vect__5.14_51; ;; This situation doesn't generate FMA IR. (define_insn_and_split “*double_mult_plus” [(set (match_operand:VWEXTI 0 “register_operand”) (if_then_else:VWEXTI (unspec: [(match_operand: 1 “vector_mask_operand”) (match_operand 6 “vector_length_operand”) (match_operand 7 “const_int_operand”) (match_operand 8 “const_int_operand”) (match_operand 9 “const_int_operand”) (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (plus:VWEXTI (if_then_else:VWEXTI (unspec: [(match_dup 1) (match_dup 6) (match_dup 7) (match_dup 8) (match_dup 9) (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) (mult:VWEXTI (any_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 4 “register_operand”)) (any_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 5 “register_operand”))) (match_operand:VWEXTI 2 “vector_undef_operand”)) (match_operand:VWEXTI 3 “register_operand”)) (match_dup 2)))] “TARGET_VECTOR && can_create_pseudo_p ()” “#” “&& 1” [(const_int 0)] { emit_insn (gen_pred_widen_mul_plus (, mode, operands[0], operands[1], operands[3], operands[4], operands[5], operands[6], operands[7], operands[8], operands[9])); DONE; } [(set_attr “type” “viwmuladd”) (set_attr “mode” “<V_DOUBLE_TRUNC>”)])

;; Combine sign_extend + zero_extend + fma ===> widen fma (su). (define_insn_and_split “*sign_zero_extend_fma” [(set (match_operand:VWEXTI 0 “register_operand”) (plus:VWEXTI (mult:VWEXTI (sign_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 2 “register_operand”)) (zero_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 3 “register_operand”))) (match_operand:VWEXTI 1 “register_operand”)))] “TARGET_VECTOR && can_create_pseudo_p ()” “#” “&& 1” [(const_int 0)] { riscv_vector::emit_vlmax_ternary_insn (code_for_pred_widen_mul_plussu (mode), riscv_vector::RVV_WIDEN_TERNOP, operands); DONE; } [(set_attr “type” “viwmuladd”) (set_attr “mode” “<V_DOUBLE_TRUNC>”)])

;; This helps to match zero_extend + sign_extend + fma. (define_insn_and_split “*zero_sign_extend_fma” [(set (match_operand:VWEXTI 0 “register_operand”) (plus:VWEXTI (mult:VWEXTI (zero_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 2 “register_operand”)) (sign_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 3 “register_operand”))) (match_operand:VWEXTI 1 “register_operand”)))] “TARGET_VECTOR && can_create_pseudo_p ()” “#” “&& 1” [(const_int 0)] { rtx ops[] = {operands[0], operands[1], operands[3], operands[2]}; riscv_vector::emit_vlmax_ternary_insn (code_for_pred_widen_mul_plussu (mode), riscv_vector::RVV_WIDEN_TERNOP, ops); DONE; } [(set_attr “type” “viwmuladd”) (set_attr “mode” “<V_DOUBLE_TRUNC>”)])

;; ------------------------------------------------------------------------- ;; ---- [INT] Binary narrow shifts. ;; ------------------------------------------------------------------------- ;; Includes: ;; - vnsrl.wv/vnsrl.wx/vnsrl.wi ;; - vnsra.wv/vnsra.wx/vnsra.wi ;; -------------------------------------------------------------------------

(define_insn_and_split “*v<any_shiftrt:optab><any_extend:optab>trunc” [(set (match_operand:<V_DOUBLE_TRUNC> 0 “register_operand” “=vr,vr”) (truncate:<V_DOUBLE_TRUNC> (any_shiftrt:VWEXTI (match_operand:VWEXTI 1 “register_operand” " vr,vr") (any_extend:VWEXTI (match_operand:<V_DOUBLE_TRUNC> 2 “vector_shift_operand” " vr,vk")))))] “TARGET_VECTOR && can_create_pseudo_p ()” “#” “&& 1” [(const_int 0)] { insn_code icode = code_for_pred_narrow (<any_shiftrt:CODE>, mode); riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands); DONE; } [(set_attr “type” “vnshift”) (set_attr “mode” “<V_DOUBLE_TRUNC>”)])

(define_insn_and_split “*<any_shiftrt:optab>trunc” [(set (match_operand:<V_DOUBLE_TRUNC> 0 “register_operand” “=vr”) (truncate:<V_DOUBLE_TRUNC> (any_shiftrt:VWEXTI (match_operand:VWEXTI 1 “register_operand” " vr") (match_operand: 2 “csr_operand” " rK"))))] “TARGET_VECTOR && can_create_pseudo_p ()” “#” “&& 1” [(const_int 0)] { operands[2] = gen_lowpart (Pmode, operands[2]); insn_code icode = code_for_pred_narrow_scalar (<any_shiftrt:CODE>, mode); riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands); DONE; } [(set_attr “type” “vnshift”) (set_attr “mode” “<V_DOUBLE_TRUNC>”)])

;; ------------------------------------------------------------------------- ;; ---- Sign-extension for vmv.x.s. ;; ------------------------------------------------------------------------- (define_insn “*pred_extract_first_sextdi” [(set (match_operand:DI 0 “register_operand” “=r”) (sign_extend:DI (unspec: [(vec_select: (match_operand:VI_QHS 1 “register_operand”“vr”) (parallel [(const_int 0)])) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)))] “TARGET_VECTOR && Pmode == DImode” “vmv.x.s\t%0,%1” [(set_attr “type” “vimovvx”) (set_attr “mode” “”)])

(define_insn “*pred_extract_first_sextsi” [(set (match_operand:SI 0 “register_operand” “=r”) (sign_extend:SI (unspec: [(vec_select: (match_operand:VI_QH 1 “register_operand” “vr”) (parallel [(const_int 0)])) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)))] “TARGET_VECTOR && Pmode == SImode” “vmv.x.s\t%0,%1” [(set_attr “type” “vimovvx”) (set_attr “mode” “”)])