| /* VSETVL pass for RISC-V 'V' Extension for GNU compiler. |
| Copyright (C) 2022-2023 Free Software Foundation, Inc. |
| Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or(at your option) |
| any later version. |
| |
| GCC is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| /* This pass is to Set VL/VTYPE global status for RVV instructions |
| that depend on VL and VTYPE registers by Lazy code motion (LCM). |
| |
| Strategy: |
| |
| - Backward demanded info fusion within block. |
| |
| - Lazy code motion (LCM) based demanded info backward propagation. |
| |
| - RTL_SSA framework for def-use, PHI analysis. |
| |
| - Lazy code motion (LCM) for global VL/VTYPE optimization. |
| |
| Assumption: |
| |
| - Each avl operand is either an immediate (must be in range 0 ~ 31) or reg. |
| |
| This pass consists of 5 phases: |
| |
| - Phase 1 - compute VL/VTYPE demanded information within each block |
| by backward data-flow analysis. |
| |
| - Phase 2 - Emit vsetvl instructions within each basic block according to |
| demand, compute and save ANTLOC && AVLOC of each block. |
| |
| - Phase 3 - Backward && forward demanded info propagation and fusion across |
| blocks. |
| |
| - Phase 4 - Lazy code motion including: compute local properties, |
| pre_edge_lcm and vsetvl insertion && delete edges for LCM results. |
| |
| - Phase 5 - Cleanup AVL operand of RVV instruction since it will not be |
| used any more and VL operand of VSETVL instruction if it is not used by |
| any non-debug instructions. |
| |
| - Phase 6 - Propagate AVL between vsetvl instructions. |
| |
| Implementation: |
| |
| - The subroutine of optimize == 0 is simple_vsetvl. |
| This function simplily vsetvl insertion for each RVV |
| instruction. No optimization. |
| |
| - The subroutine of optimize > 0 is lazy_vsetvl. |
| This function optimize vsetvl insertion process by |
| lazy code motion (LCM) layering on RTL_SSA. */ |
| |
| #define IN_TARGET_CODE 1 |
| #define INCLUDE_ALGORITHM |
| #define INCLUDE_FUNCTIONAL |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "tm.h" |
| #include "backend.h" |
| #include "rtl.h" |
| #include "target.h" |
| #include "tree-pass.h" |
| #include "df.h" |
| #include "rtl-ssa.h" |
| #include "cfgcleanup.h" |
| #include "insn-config.h" |
| #include "insn-attr.h" |
| #include "insn-opinit.h" |
| #include "tm-constrs.h" |
| #include "cfgrtl.h" |
| #include "cfganal.h" |
| #include "lcm.h" |
| #include "predict.h" |
| #include "profile-count.h" |
| #include "riscv-vsetvl.h" |
| |
| using namespace rtl_ssa; |
| using namespace riscv_vector; |
| |
| DEBUG_FUNCTION void |
| debug (const vector_insn_info *info) |
| { |
| info->dump (stderr); |
| } |
| |
| DEBUG_FUNCTION void |
| debug (const vector_infos_manager *info) |
| { |
| info->dump (stderr); |
| } |
| |
| static bool |
| vlmax_avl_p (rtx x) |
| { |
| return x && rtx_equal_p (x, RVV_VLMAX); |
| } |
| |
| static bool |
| vlmax_avl_insn_p (rtx_insn *rinsn) |
| { |
| return (INSN_CODE (rinsn) == CODE_FOR_vlmax_avlsi |
| || INSN_CODE (rinsn) == CODE_FOR_vlmax_avldi); |
| } |
| |
| /* Return true if the block is a loop itself: |
| local_dem |
| __________ |
| ____|____ | |
| | | | |
| |________| | |
| |_________| |
| reaching_out |
| */ |
| static bool |
| loop_basic_block_p (const basic_block cfg_bb) |
| { |
| if (JUMP_P (BB_END (cfg_bb)) && any_condjump_p (BB_END (cfg_bb))) |
| { |
| edge e; |
| edge_iterator ei; |
| FOR_EACH_EDGE (e, ei, cfg_bb->succs) |
| if (e->dest->index == cfg_bb->index) |
| return true; |
| } |
| return false; |
| } |
| |
| /* Return true if it is an RVV instruction depends on VTYPE global |
| status register. */ |
| static bool |
| has_vtype_op (rtx_insn *rinsn) |
| { |
| return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn); |
| } |
| |
| /* Return true if it is an RVV instruction depends on VL global |
| status register. */ |
| static bool |
| has_vl_op (rtx_insn *rinsn) |
| { |
| return recog_memoized (rinsn) >= 0 && get_attr_has_vl_op (rinsn); |
| } |
| |
| /* Is this a SEW value that can be encoded into the VTYPE format. */ |
| static bool |
| valid_sew_p (size_t sew) |
| { |
| return exact_log2 (sew) && sew >= 8 && sew <= 64; |
| } |
| |
| /* Return true if it is a vsetvl instruction. */ |
| static bool |
| vector_config_insn_p (rtx_insn *rinsn) |
| { |
| return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL; |
| } |
| |
| /* Return true if it is vsetvldi or vsetvlsi. */ |
| static bool |
| vsetvl_insn_p (rtx_insn *rinsn) |
| { |
| if (!vector_config_insn_p (rinsn)) |
| return false; |
| return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi |
| || INSN_CODE (rinsn) == CODE_FOR_vsetvlsi); |
| } |
| |
| /* Return true if it is vsetvl zero, rs1. */ |
| static bool |
| vsetvl_discard_result_insn_p (rtx_insn *rinsn) |
| { |
| if (!vector_config_insn_p (rinsn)) |
| return false; |
| return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi |
| || INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi); |
| } |
| |
| static bool |
| real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb) |
| { |
| return insn != nullptr && insn->is_real () && insn->bb () == bb; |
| } |
| |
| static bool |
| before_p (const insn_info *insn1, const insn_info *insn2) |
| { |
| return insn1->compare_with (insn2) < 0; |
| } |
| |
| static insn_info * |
| find_reg_killed_by (const bb_info *bb, rtx x) |
| { |
| if (!x || vlmax_avl_p (x) || !REG_P (x)) |
| return nullptr; |
| for (insn_info *insn : bb->reverse_real_nondebug_insns ()) |
| if (find_access (insn->defs (), REGNO (x))) |
| return insn; |
| return nullptr; |
| } |
| |
| /* Helper function to get VL operand. */ |
| static rtx |
| get_vl (rtx_insn *rinsn) |
| { |
| if (has_vl_op (rinsn)) |
| { |
| extract_insn_cached (rinsn); |
| return recog_data.operand[get_attr_vl_op_idx (rinsn)]; |
| } |
| return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0)); |
| } |
| |
| static bool |
| has_vsetvl_killed_avl_p (const bb_info *bb, const vector_insn_info &info) |
| { |
| if (info.dirty_with_killed_avl_p ()) |
| { |
| rtx avl = info.get_avl (); |
| if (vlmax_avl_p (avl)) |
| return find_reg_killed_by (bb, get_vl (info.get_insn ()->rtl ())) |
| != nullptr; |
| for (const insn_info *insn : bb->reverse_real_nondebug_insns ()) |
| { |
| def_info *def = find_access (insn->defs (), REGNO (avl)); |
| if (def) |
| { |
| set_info *set = safe_dyn_cast<set_info *> (def); |
| if (!set) |
| return false; |
| |
| rtx new_avl = gen_rtx_REG (GET_MODE (avl), REGNO (avl)); |
| gcc_assert (new_avl != avl); |
| if (!info.compatible_avl_p (avl_info (new_avl, set))) |
| return false; |
| |
| return true; |
| } |
| } |
| } |
| return false; |
| } |
| |
| /* An "anticipatable occurrence" is one that is the first occurrence in the |
| basic block, the operands are not modified in the basic block prior |
| to the occurrence and the output is not used between the start of |
| the block and the occurrence. |
| |
| For VSETVL instruction, we have these following formats: |
| 1. vsetvl zero, rs1. |
| 2. vsetvl zero, imm. |
| 3. vsetvl rd, rs1. |
| |
| So base on these circumstances, a DEM is considered as a local anticipatable |
| occurrence should satisfy these following conditions: |
| |
| 1). rs1 (avl) are not modified in the basic block prior to the VSETVL. |
| 2). rd (vl) are not modified in the basic block prior to the VSETVL. |
| 3). rd (vl) is not used between the start of the block and the occurrence. |
| |
| Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE |
| is modified prior to the occurrence. This case is already considered as |
| a non-local anticipatable occurrence. |
| */ |
| static bool |
| anticipatable_occurrence_p (const bb_info *bb, const vector_insn_info dem) |
| { |
| insn_info *insn = dem.get_insn (); |
| /* The only possible operand we care of VSETVL is AVL. */ |
| if (dem.has_avl_reg ()) |
| { |
| /* rs1 (avl) are not modified in the basic block prior to the VSETVL. */ |
| if (!vlmax_avl_p (dem.get_avl ())) |
| { |
| set_info *set |
| = find_access (insn->uses (), REGNO (dem.get_avl ()))->def (); |
| /* If it's undefined, it's not anticipatable conservatively. */ |
| if (!set) |
| return false; |
| if (real_insn_and_same_bb_p (set->insn (), bb) |
| && before_p (set->insn (), insn)) |
| return false; |
| } |
| } |
| |
| /* rd (vl) is not used between the start of the block and the occurrence. */ |
| if (vsetvl_insn_p (insn->rtl ())) |
| { |
| rtx dest = get_vl (insn->rtl ()); |
| for (insn_info *i = insn->prev_nondebug_insn (); |
| real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ()) |
| { |
| /* rd (vl) is not used between the start of the block and the |
| * occurrence. */ |
| if (find_access (i->uses (), REGNO (dest))) |
| return false; |
| /* rd (vl) are not modified in the basic block prior to the VSETVL. */ |
| if (find_access (i->defs (), REGNO (dest))) |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| /* An "available occurrence" is one that is the last occurrence in the |
| basic block and the operands are not modified by following statements in |
| the basic block [including this insn]. |
| |
| For VSETVL instruction, we have these following formats: |
| 1. vsetvl zero, rs1. |
| 2. vsetvl zero, imm. |
| 3. vsetvl rd, rs1. |
| |
| So base on these circumstances, a DEM is considered as a local available |
| occurrence should satisfy these following conditions: |
| |
| 1). rs1 (avl) are not modified by following statements in |
| the basic block. |
| 2). rd (vl) are not modified by following statements in |
| the basic block. |
| |
| Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE |
| is modified prior to the occurrence. This case is already considered as |
| a non-local available occurrence. |
| */ |
| static bool |
| available_occurrence_p (const bb_info *bb, const vector_insn_info dem) |
| { |
| insn_info *insn = dem.get_insn (); |
| /* The only possible operand we care of VSETVL is AVL. */ |
| if (dem.has_avl_reg ()) |
| { |
| if (!vlmax_avl_p (dem.get_avl ())) |
| { |
| rtx dest = NULL_RTX; |
| if (vsetvl_insn_p (insn->rtl ())) |
| dest = get_vl (insn->rtl ()); |
| for (const insn_info *i = insn; real_insn_and_same_bb_p (i, bb); |
| i = i->next_nondebug_insn ()) |
| { |
| /* rs1 (avl) are not modified by following statements in |
| the basic block. */ |
| if (find_access (i->defs (), REGNO (dem.get_avl ()))) |
| return false; |
| /* rd (vl) are not modified by following statements in |
| the basic block. */ |
| if (dest && find_access (i->defs (), REGNO (dest))) |
| return false; |
| } |
| } |
| } |
| return true; |
| } |
| |
| /* Return true if the block is worthwhile backward propagation. */ |
| static bool |
| backward_propagate_worthwhile_p (const basic_block cfg_bb, |
| const vector_block_info block_info) |
| { |
| if (loop_basic_block_p (cfg_bb)) |
| { |
| if (block_info.reaching_out.valid_or_dirty_p ()) |
| { |
| if (block_info.local_dem.compatible_p (block_info.reaching_out)) |
| { |
| /* Case 1 (Can backward propagate): |
| .... |
| bb0: |
| ... |
| for (int i = 0; i < n; i++) |
| { |
| vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7); |
| __riscv_vse16_v_i16mf4 (out + i + 5, v, 7); |
| } |
| The local_dem is compatible with reaching_out. Such case is |
| worthwhile backward propagation. */ |
| return true; |
| } |
| else |
| { |
| /* Case 2 (Don't backward propagate): |
| .... |
| bb0: |
| ... |
| for (int i = 0; i < n; i++) |
| { |
| vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7); |
| __riscv_vse16_v_i16mf4 (out + i + 5, v, 7); |
| vint16mf2_t v2 = __riscv_vle16_v_i16mf2 (in + i + 6, 8); |
| __riscv_vse16_v_i16mf2 (out + i + 6, v, 8); |
| } |
| The local_dem is incompatible with reaching_out. |
| It makes no sense to backward propagate the local_dem since we |
| can't avoid VSETVL inside the loop. */ |
| return false; |
| } |
| } |
| else |
| { |
| gcc_assert (block_info.reaching_out.unknown_p ()); |
| /* Case 3 (Don't backward propagate): |
| .... |
| bb0: |
| ... |
| for (int i = 0; i < n; i++) |
| { |
| vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7); |
| __riscv_vse16_v_i16mf4 (out + i + 5, v, 7); |
| fn3 (); |
| } |
| The local_dem is VALID, but the reaching_out is UNKNOWN. |
| It makes no sense to backward propagate the local_dem since we |
| can't avoid VSETVL inside the loop. */ |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| static bool |
| insn_should_be_added_p (const insn_info *insn, unsigned int types) |
| { |
| if (insn->is_real () && (types & REAL_SET)) |
| return true; |
| if (insn->is_phi () && (types & PHI_SET)) |
| return true; |
| if (insn->is_bb_head () && (types & BB_HEAD_SET)) |
| return true; |
| if (insn->is_bb_end () && (types & BB_END_SET)) |
| return true; |
| return false; |
| } |
| |
| /* Recursively find all define instructions. The kind of instruction is |
| specified by the DEF_TYPE. */ |
| static hash_set<set_info *> |
| get_all_sets (phi_info *phi, unsigned int types) |
| { |
| hash_set<set_info *> insns; |
| auto_vec<phi_info *> work_list; |
| hash_set<phi_info *> visited_list; |
| if (!phi) |
| return hash_set<set_info *> (); |
| work_list.safe_push (phi); |
| |
| while (!work_list.is_empty ()) |
| { |
| phi_info *phi = work_list.pop (); |
| visited_list.add (phi); |
| for (use_info *use : phi->inputs ()) |
| { |
| def_info *def = use->def (); |
| set_info *set = safe_dyn_cast<set_info *> (def); |
| if (!set) |
| return hash_set<set_info *> (); |
| |
| gcc_assert (!set->insn ()->is_debug_insn ()); |
| |
| if (insn_should_be_added_p (set->insn (), types)) |
| insns.add (set); |
| if (set->insn ()->is_phi ()) |
| { |
| phi_info *new_phi = as_a<phi_info *> (set); |
| if (!visited_list.contains (new_phi)) |
| work_list.safe_push (new_phi); |
| } |
| } |
| } |
| return insns; |
| } |
| |
| static hash_set<set_info *> |
| get_all_sets (set_info *set, bool /* get_real_inst */ real_p, |
| bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p) |
| { |
| if (real_p && phi_p && param_p) |
| return get_all_sets (safe_dyn_cast<phi_info *> (set), |
| REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET); |
| |
| else if (real_p && param_p) |
| return get_all_sets (safe_dyn_cast<phi_info *> (set), |
| REAL_SET | BB_HEAD_SET | BB_END_SET); |
| |
| else if (real_p) |
| return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET); |
| return hash_set<set_info *> (); |
| } |
| |
| /* Helper function to get AVL operand. */ |
| static rtx |
| get_avl (rtx_insn *rinsn) |
| { |
| if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn)) |
| return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0); |
| |
| if (!has_vl_op (rinsn)) |
| return NULL_RTX; |
| if (get_attr_avl_type (rinsn) == VLMAX) |
| return RVV_VLMAX; |
| extract_insn_cached (rinsn); |
| return recog_data.operand[get_attr_vl_op_idx (rinsn)]; |
| } |
| |
| static set_info * |
| get_same_bb_set (hash_set<set_info *> &sets, const basic_block cfg_bb) |
| { |
| for (set_info *set : sets) |
| if (set->bb ()->cfg_bb () == cfg_bb) |
| return set; |
| return nullptr; |
| } |
| |
| /* Recursively find all predecessor blocks for cfg_bb. */ |
| static hash_set<basic_block> |
| get_all_predecessors (basic_block cfg_bb) |
| { |
| hash_set<basic_block> blocks; |
| auto_vec<basic_block> work_list; |
| hash_set<basic_block> visited_list; |
| work_list.safe_push (cfg_bb); |
| |
| while (!work_list.is_empty ()) |
| { |
| basic_block new_cfg_bb = work_list.pop (); |
| visited_list.add (new_cfg_bb); |
| edge e; |
| edge_iterator ei; |
| FOR_EACH_EDGE (e, ei, new_cfg_bb->preds) |
| { |
| if (!visited_list.contains (e->src)) |
| work_list.safe_push (e->src); |
| blocks.add (e->src); |
| } |
| } |
| return blocks; |
| } |
| |
| /* Return true if there is an INSN in insns staying in the block BB. */ |
| static bool |
| any_set_in_bb_p (hash_set<set_info *> sets, const bb_info *bb) |
| { |
| for (const set_info *set : sets) |
| if (set->bb ()->index () == bb->index ()) |
| return true; |
| return false; |
| } |
| |
| /* Helper function to get SEW operand. We always have SEW value for |
| all RVV instructions that have VTYPE OP. */ |
| static uint8_t |
| get_sew (rtx_insn *rinsn) |
| { |
| return get_attr_sew (rinsn); |
| } |
| |
| /* Helper function to get VLMUL operand. We always have VLMUL value for |
| all RVV instructions that have VTYPE OP. */ |
| static enum vlmul_type |
| get_vlmul (rtx_insn *rinsn) |
| { |
| return (enum vlmul_type) get_attr_vlmul (rinsn); |
| } |
| |
| /* Get default tail policy. */ |
| static bool |
| get_default_ta () |
| { |
| /* For the instruction that doesn't require TA, we still need a default value |
| to emit vsetvl. We pick up the default value according to prefer policy. */ |
| return (bool) (get_prefer_tail_policy () & 0x1 |
| || (get_prefer_tail_policy () >> 1 & 0x1)); |
| } |
| |
| /* Get default mask policy. */ |
| static bool |
| get_default_ma () |
| { |
| /* For the instruction that doesn't require MA, we still need a default value |
| to emit vsetvl. We pick up the default value according to prefer policy. */ |
| return (bool) (get_prefer_mask_policy () & 0x1 |
| || (get_prefer_mask_policy () >> 1 & 0x1)); |
| } |
| |
| /* Helper function to get TA operand. */ |
| static bool |
| tail_agnostic_p (rtx_insn *rinsn) |
| { |
| /* If it doesn't have TA, we return agnostic by default. */ |
| extract_insn_cached (rinsn); |
| int ta = get_attr_ta (rinsn); |
| return ta == INVALID_ATTRIBUTE ? get_default_ta () : IS_AGNOSTIC (ta); |
| } |
| |
| /* Helper function to get MA operand. */ |
| static bool |
| mask_agnostic_p (rtx_insn *rinsn) |
| { |
| /* If it doesn't have MA, we return agnostic by default. */ |
| extract_insn_cached (rinsn); |
| int ma = get_attr_ma (rinsn); |
| return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma); |
| } |
| |
| /* Return true if FN has a vector instruction that use VL/VTYPE. */ |
| static bool |
| has_vector_insn (function *fn) |
| { |
| basic_block cfg_bb; |
| rtx_insn *rinsn; |
| FOR_ALL_BB_FN (cfg_bb, fn) |
| FOR_BB_INSNS (cfg_bb, rinsn) |
| if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn)) |
| return true; |
| return false; |
| } |
| |
| /* Emit vsetvl instruction. */ |
| static rtx |
| gen_vsetvl_pat (enum vsetvl_type insn_type, const vl_vtype_info &info, rtx vl) |
| { |
| rtx avl = info.get_avl (); |
| rtx sew = gen_int_mode (info.get_sew (), Pmode); |
| rtx vlmul = gen_int_mode (info.get_vlmul (), Pmode); |
| rtx ta = gen_int_mode (info.get_ta (), Pmode); |
| rtx ma = gen_int_mode (info.get_ma (), Pmode); |
| |
| if (insn_type == VSETVL_NORMAL) |
| { |
| gcc_assert (vl != NULL_RTX); |
| return gen_vsetvl (Pmode, vl, avl, sew, vlmul, ta, ma); |
| } |
| else if (insn_type == VSETVL_VTYPE_CHANGE_ONLY) |
| return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma); |
| else |
| return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma); |
| } |
| |
| static rtx |
| gen_vsetvl_pat (rtx_insn *rinsn, const vector_insn_info &info) |
| { |
| rtx new_pat; |
| if (vsetvl_insn_p (rinsn) || vlmax_avl_p (info.get_avl ())) |
| { |
| rtx dest = get_vl (rinsn); |
| new_pat = gen_vsetvl_pat (VSETVL_NORMAL, info, dest); |
| } |
| else if (INSN_CODE (rinsn) == CODE_FOR_vsetvl_vtype_change_only) |
| new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX); |
| else |
| new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, info, NULL_RTX); |
| return new_pat; |
| } |
| |
| static void |
| emit_vsetvl_insn (enum vsetvl_type insn_type, enum emit_type emit_type, |
| const vl_vtype_info &info, rtx vl, rtx_insn *rinsn) |
| { |
| rtx pat = gen_vsetvl_pat (insn_type, info, vl); |
| if (dump_file) |
| { |
| fprintf (dump_file, "\nInsert vsetvl insn PATTERN:\n"); |
| print_rtl_single (dump_file, pat); |
| } |
| |
| if (emit_type == EMIT_DIRECT) |
| emit_insn (pat); |
| else if (emit_type == EMIT_BEFORE) |
| emit_insn_before (pat, rinsn); |
| else |
| emit_insn_after (pat, rinsn); |
| } |
| |
| static void |
| eliminate_insn (rtx_insn *rinsn) |
| { |
| if (dump_file) |
| { |
| fprintf (dump_file, "\nEliminate insn %d:\n", INSN_UID (rinsn)); |
| print_rtl_single (dump_file, rinsn); |
| } |
| if (in_sequence_p ()) |
| remove_insn (rinsn); |
| else |
| delete_insn (rinsn); |
| } |
| |
| static void |
| insert_vsetvl (enum emit_type emit_type, rtx_insn *rinsn, |
| const vector_insn_info &info, const vector_insn_info &prev_info) |
| { |
| /* Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same |
| VLMAX. */ |
| if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p () |
| && info.compatible_avl_p (prev_info) && info.same_vlmax_p (prev_info)) |
| { |
| emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX, |
| rinsn); |
| return; |
| } |
| |
| if (info.has_avl_imm ()) |
| { |
| emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX, |
| rinsn); |
| return; |
| } |
| |
| if (info.has_avl_no_reg ()) |
| { |
| /* We can only use x0, x0 if there's no chance of the vtype change causing |
| the previous vl to become invalid. */ |
| if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p () |
| && info.same_vlmax_p (prev_info)) |
| { |
| emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX, |
| rinsn); |
| return; |
| } |
| /* Otherwise use an AVL of 0 to avoid depending on previous vl. */ |
| vl_vtype_info new_info = info; |
| new_info.set_avl_info (avl_info (const0_rtx, nullptr)); |
| emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, new_info, NULL_RTX, |
| rinsn); |
| return; |
| } |
| |
| /* Use X0 as the DestReg unless AVLReg is X0. We also need to change the |
| opcode if the AVLReg is X0 as they have different register classes for |
| the AVL operand. */ |
| if (vlmax_avl_p (info.get_avl ())) |
| { |
| gcc_assert (has_vtype_op (rinsn) || vsetvl_insn_p (rinsn)); |
| rtx vl_op = get_vl (rinsn); |
| gcc_assert (!vlmax_avl_p (vl_op)); |
| emit_vsetvl_insn (VSETVL_NORMAL, emit_type, info, vl_op, rinsn); |
| return; |
| } |
| |
| emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX, rinsn); |
| |
| if (dump_file) |
| { |
| fprintf (dump_file, "Update VL/VTYPE info, previous info="); |
| prev_info.dump (dump_file); |
| } |
| } |
| |
| /* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them |
| to INSN. If such notes are added to an insn which references a |
| CODE_LABEL, the LABEL_NUSES count is incremented. We have to add |
| that note, because the following loop optimization pass requires |
| them. */ |
| |
| /* ??? If there was a jump optimization pass after gcse and before loop, |
| then we would not need to do this here, because jump would add the |
| necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */ |
| |
| static void |
| add_label_notes (rtx x, rtx_insn *rinsn) |
| { |
| enum rtx_code code = GET_CODE (x); |
| int i, j; |
| const char *fmt; |
| |
| if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x)) |
| { |
| /* This code used to ignore labels that referred to dispatch tables to |
| avoid flow generating (slightly) worse code. |
| |
| We no longer ignore such label references (see LABEL_REF handling in |
| mark_jump_label for additional information). */ |
| |
| /* There's no reason for current users to emit jump-insns with |
| such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET |
| notes. */ |
| gcc_assert (!JUMP_P (rinsn)); |
| add_reg_note (rinsn, REG_LABEL_OPERAND, label_ref_label (x)); |
| |
| if (LABEL_P (label_ref_label (x))) |
| LABEL_NUSES (label_ref_label (x))++; |
| |
| return; |
| } |
| |
| for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--) |
| { |
| if (fmt[i] == 'e') |
| add_label_notes (XEXP (x, i), rinsn); |
| else if (fmt[i] == 'E') |
| for (j = XVECLEN (x, i) - 1; j >= 0; j--) |
| add_label_notes (XVECEXP (x, i, j), rinsn); |
| } |
| } |
| |
| /* Add EXPR to the end of basic block BB. |
| |
| This is used by both the PRE and code hoisting. */ |
| |
| static void |
| insert_insn_end_basic_block (rtx_insn *rinsn, basic_block cfg_bb) |
| { |
| rtx_insn *end_rinsn = BB_END (cfg_bb); |
| rtx_insn *new_insn; |
| rtx_insn *pat, *pat_end; |
| |
| pat = rinsn; |
| gcc_assert (pat && INSN_P (pat)); |
| |
| pat_end = pat; |
| while (NEXT_INSN (pat_end) != NULL_RTX) |
| pat_end = NEXT_INSN (pat_end); |
| |
| /* If the last end_rinsn is a jump, insert EXPR in front. Similarly we need |
| to take care of trapping instructions in presence of non-call exceptions. |
| */ |
| |
| if (JUMP_P (end_rinsn) |
| || (NONJUMP_INSN_P (end_rinsn) |
| && (!single_succ_p (cfg_bb) |
| || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL))) |
| { |
| /* FIXME: What if something in jump uses value set in new end_rinsn? */ |
| new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb); |
| } |
| |
| /* Likewise if the last end_rinsn is a call, as will happen in the presence |
| of exception handling. */ |
| else if (CALL_P (end_rinsn) |
| && (!single_succ_p (cfg_bb) |
| || single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL)) |
| { |
| /* Keeping in mind targets with small register classes and parameters |
| in registers, we search backward and place the instructions before |
| the first parameter is loaded. Do this for everyone for consistency |
| and a presumption that we'll get better code elsewhere as well. */ |
| |
| /* Since different machines initialize their parameter registers |
| in different orders, assume nothing. Collect the set of all |
| parameter registers. */ |
| end_rinsn = find_first_parameter_load (end_rinsn, BB_HEAD (cfg_bb)); |
| |
| /* If we found all the parameter loads, then we want to insert |
| before the first parameter load. |
| |
| If we did not find all the parameter loads, then we might have |
| stopped on the head of the block, which could be a CODE_LABEL. |
| If we inserted before the CODE_LABEL, then we would be putting |
| the end_rinsn in the wrong basic block. In that case, put the |
| end_rinsn after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK. |
| */ |
| while (LABEL_P (end_rinsn) || NOTE_INSN_BASIC_BLOCK_P (end_rinsn)) |
| end_rinsn = NEXT_INSN (end_rinsn); |
| |
| new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb); |
| } |
| else |
| new_insn = emit_insn_after_noloc (pat, end_rinsn, cfg_bb); |
| |
| while (1) |
| { |
| if (INSN_P (pat)) |
| add_label_notes (PATTERN (pat), new_insn); |
| if (pat == pat_end) |
| break; |
| pat = NEXT_INSN (pat); |
| } |
| } |
| |
| /* Get VL/VTYPE information for INSN. */ |
| static vl_vtype_info |
| get_vl_vtype_info (const insn_info *insn) |
| { |
| set_info *set = nullptr; |
| rtx avl = ::get_avl (insn->rtl ()); |
| if (avl && REG_P (avl) && !vlmax_avl_p (avl)) |
| set = find_access (insn->uses (), REGNO (avl))->def (); |
| |
| uint8_t sew = get_sew (insn->rtl ()); |
| enum vlmul_type vlmul = get_vlmul (insn->rtl ()); |
| uint8_t ratio = get_attr_ratio (insn->rtl ()); |
| /* when get_attr_ratio is invalid, this kind of instructions |
| doesn't care about ratio. However, we still need this value |
| in demand info backward analysis. */ |
| if (ratio == INVALID_ATTRIBUTE) |
| ratio = calculate_ratio (sew, vlmul); |
| bool ta = tail_agnostic_p (insn->rtl ()); |
| bool ma = mask_agnostic_p (insn->rtl ()); |
| |
| /* If merge operand is undef value, we prefer agnostic. */ |
| int merge_op_idx = get_attr_merge_op_idx (insn->rtl ()); |
| if (merge_op_idx != INVALID_ATTRIBUTE |
| && satisfies_constraint_vu (recog_data.operand[merge_op_idx])) |
| { |
| ta = true; |
| ma = true; |
| } |
| |
| vl_vtype_info info (avl_info (avl, set), sew, vlmul, ratio, ta, ma); |
| return info; |
| } |
| |
| static void |
| change_insn (rtx_insn *rinsn, rtx new_pat) |
| { |
| /* We don't apply change on RTL_SSA here since it's possible a |
| new INSN we add in the PASS before which doesn't have RTL_SSA |
| info yet.*/ |
| if (dump_file) |
| { |
| fprintf (dump_file, "\nChange PATTERN of insn %d from:\n", |
| INSN_UID (rinsn)); |
| print_rtl_single (dump_file, PATTERN (rinsn)); |
| } |
| |
| validate_change (rinsn, &PATTERN (rinsn), new_pat, false); |
| |
| if (dump_file) |
| { |
| fprintf (dump_file, "\nto:\n"); |
| print_rtl_single (dump_file, PATTERN (rinsn)); |
| } |
| } |
| |
| static bool |
| change_insn (function_info *ssa, insn_change change, insn_info *insn, |
| rtx new_pat) |
| { |
| rtx_insn *rinsn = insn->rtl (); |
| auto attempt = ssa->new_change_attempt (); |
| if (!restrict_movement (change)) |
| return false; |
| |
| if (dump_file) |
| { |
| fprintf (dump_file, "\nChange PATTERN of insn %d from:\n", |
| INSN_UID (rinsn)); |
| print_rtl_single (dump_file, PATTERN (rinsn)); |
| } |
| |
| insn_change_watermark watermark; |
| validate_change (rinsn, &PATTERN (rinsn), new_pat, true); |
| |
| /* These routines report failures themselves. */ |
| if (!recog (attempt, change) || !change_is_worthwhile (change, false)) |
| return false; |
| confirm_change_group (); |
| ssa->change_insn (change); |
| |
| if (dump_file) |
| { |
| fprintf (dump_file, "\nto:\n"); |
| print_rtl_single (dump_file, PATTERN (rinsn)); |
| } |
| return true; |
| } |
| |
| static void |
| change_vsetvl_insn (const insn_info *insn, const vector_insn_info &info) |
| { |
| rtx_insn *rinsn; |
| if (vector_config_insn_p (insn->rtl ())) |
| { |
| rinsn = insn->rtl (); |
| gcc_assert (vsetvl_insn_p (rinsn) && "Can't handle X0, rs1 vsetvli yet"); |
| } |
| else |
| { |
| gcc_assert (has_vtype_op (insn->rtl ())); |
| rinsn = PREV_INSN (insn->rtl ()); |
| gcc_assert (vector_config_insn_p (rinsn)); |
| } |
| rtx new_pat = gen_vsetvl_pat (rinsn, info); |
| change_insn (rinsn, new_pat); |
| } |
| |
| static bool |
| source_equal_p (insn_info *insn1, insn_info *insn2) |
| { |
| if (!insn1 || !insn2) |
| return false; |
| rtx_insn *rinsn1 = insn1->rtl (); |
| rtx_insn *rinsn2 = insn2->rtl (); |
| if (!rinsn1 || !rinsn2) |
| return false; |
| rtx note1 = find_reg_equal_equiv_note (rinsn1); |
| rtx note2 = find_reg_equal_equiv_note (rinsn2); |
| rtx single_set1 = single_set (rinsn1); |
| rtx single_set2 = single_set (rinsn2); |
| |
| if (note1 && note2 && rtx_equal_p (note1, note2)) |
| return true; |
| |
| /* Since vsetvl instruction is not single SET. |
| We handle this case specially here. */ |
| if (vsetvl_insn_p (insn1->rtl ()) && vsetvl_insn_p (insn2->rtl ())) |
| { |
| /* For example: |
| vsetvl1 a6,a5,e32m1 |
| RVV 1 (use a6 as AVL) |
| vsetvl2 a5,a5,e8mf4 |
| RVV 2 (use a5 as AVL) |
| We consider AVL of RVV 1 and RVV 2 are same so that we can |
| gain more optimization opportunities. |
| |
| Note: insn1_info.compatible_avl_p (insn2_info) |
| will make sure there is no instruction between vsetvl1 and vsetvl2 |
| modify a5 since their def will be different if there is instruction |
| modify a5 and compatible_avl_p will return false. */ |
| vector_insn_info insn1_info, insn2_info; |
| insn1_info.parse_insn (insn1); |
| insn2_info.parse_insn (insn2); |
| if (insn1_info.same_vlmax_p (insn2_info) |
| && insn1_info.compatible_avl_p (insn2_info)) |
| return true; |
| } |
| |
| /* We only handle AVL is set by instructions with no side effects. */ |
| if (!single_set1 || !single_set2) |
| return false; |
| if (!rtx_equal_p (SET_SRC (single_set1), SET_SRC (single_set2))) |
| return false; |
| gcc_assert (insn1->uses ().size () == insn2->uses ().size ()); |
| for (size_t i = 0; i < insn1->uses ().size (); i++) |
| if (insn1->uses ()[i] != insn2->uses ()[i]) |
| return false; |
| return true; |
| } |
| |
| /* Helper function to get single same real RTL source. |
| return NULL if it is not a single real RTL source. */ |
| static insn_info * |
| extract_single_source (set_info *set) |
| { |
| if (!set) |
| return nullptr; |
| if (set->insn ()->is_real ()) |
| return set->insn (); |
| if (!set->insn ()->is_phi ()) |
| return nullptr; |
| hash_set<set_info *> sets = get_all_sets (set, true, false, true); |
| |
| insn_info *first_insn = (*sets.begin ())->insn (); |
| if (first_insn->is_artificial ()) |
| return nullptr; |
| for (const set_info *set : sets) |
| { |
| /* If there is a head or end insn, we conservative return |
| NULL so that VSETVL PASS will insert vsetvl directly. */ |
| if (set->insn ()->is_artificial ()) |
| return nullptr; |
| if (!source_equal_p (set->insn (), first_insn)) |
| return nullptr; |
| } |
| |
| return first_insn; |
| } |
| |
| avl_info::avl_info (const avl_info &other) |
| { |
| m_value = other.get_value (); |
| m_source = other.get_source (); |
| } |
| |
| avl_info::avl_info (rtx value_in, set_info *source_in) |
| : m_value (value_in), m_source (source_in) |
| {} |
| |
| bool |
| avl_info::single_source_equal_p (const avl_info &other) const |
| { |
| set_info *set1 = m_source; |
| set_info *set2 = other.get_source (); |
| insn_info *insn1 = extract_single_source (set1); |
| insn_info *insn2 = extract_single_source (set2); |
| if (!insn1 || !insn2) |
| return false; |
| return source_equal_p (insn1, insn2); |
| } |
| |
| bool |
| avl_info::multiple_source_equal_p (const avl_info &other) const |
| { |
| /* TODO: We don't do too much optimization here since it's |
| too complicated in case of analyzing the PHI node. |
| |
| For example: |
| void f (void * restrict in, void * restrict out, int n, int m, int cond) |
| { |
| size_t vl; |
| switch (cond) |
| { |
| case 1: |
| vl = 100; |
| break; |
| case 2: |
| vl = *(size_t*)(in + 100); |
| break; |
| case 3: |
| { |
| size_t new_vl = *(size_t*)(in + 500); |
| size_t new_vl2 = *(size_t*)(in + 600); |
| vl = new_vl + new_vl2 + 777; |
| break; |
| } |
| default: |
| vl = 4000; |
| break; |
| } |
| for (size_t i = 0; i < n; i++) |
| { |
| vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl); |
| __riscv_vse8_v_i8mf8 (out + i, v, vl); |
| |
| vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl); |
| __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl); |
| } |
| |
| size_t vl2; |
| switch (cond) |
| { |
| case 1: |
| vl2 = 100; |
| break; |
| case 2: |
| vl2 = *(size_t*)(in + 100); |
| break; |
| case 3: |
| { |
| size_t new_vl = *(size_t*)(in + 500); |
| size_t new_vl2 = *(size_t*)(in + 600); |
| vl2 = new_vl + new_vl2 + 777; |
| break; |
| } |
| default: |
| vl2 = 4000; |
| break; |
| } |
| for (size_t i = 0; i < m; i++) |
| { |
| vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl2); |
| __riscv_vse8_v_i8mf8 (out + i + 300, v, vl2); |
| vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 200, vl2); |
| __riscv_vse8_v_i8mf8 (out + i + 200, v2, vl2); |
| } |
| } |
| Such case may not be necessary to optimize since the codes of defining |
| vl and vl2 are redundant. */ |
| return m_source == other.get_source (); |
| } |
| |
| avl_info & |
| avl_info::operator= (const avl_info &other) |
| { |
| m_value = other.get_value (); |
| m_source = other.get_source (); |
| return *this; |
| } |
| |
| bool |
| avl_info::operator== (const avl_info &other) const |
| { |
| if (!m_value) |
| return !other.get_value (); |
| if (!other.get_value ()) |
| return false; |
| |
| if (GET_CODE (m_value) != GET_CODE (other.get_value ())) |
| return false; |
| |
| /* Handle CONST_INT AVL. */ |
| if (CONST_INT_P (m_value)) |
| return INTVAL (m_value) == INTVAL (other.get_value ()); |
| |
| /* Handle VLMAX AVL. */ |
| if (vlmax_avl_p (m_value)) |
| return vlmax_avl_p (other.get_value ()); |
| |
| /* If any source is undef value, we think they are not equal. */ |
| if (!m_source || !other.get_source ()) |
| return false; |
| |
| /* If both sources are single source (defined by a single real RTL) |
| and their definitions are same. */ |
| if (single_source_equal_p (other)) |
| return true; |
| |
| return multiple_source_equal_p (other); |
| } |
| |
| bool |
| avl_info::operator!= (const avl_info &other) const |
| { |
| return !(*this == other); |
| } |
| |
| /* Initialize VL/VTYPE information. */ |
| vl_vtype_info::vl_vtype_info (avl_info avl_in, uint8_t sew_in, |
| enum vlmul_type vlmul_in, uint8_t ratio_in, |
| bool ta_in, bool ma_in) |
| : m_avl (avl_in), m_sew (sew_in), m_vlmul (vlmul_in), m_ratio (ratio_in), |
| m_ta (ta_in), m_ma (ma_in) |
| { |
| gcc_assert (valid_sew_p (m_sew) && "Unexpected SEW"); |
| } |
| |
| bool |
| vl_vtype_info::operator== (const vl_vtype_info &other) const |
| { |
| return same_avl_p (other) && m_sew == other.get_sew () |
| && m_vlmul == other.get_vlmul () && m_ta == other.get_ta () |
| && m_ma == other.get_ma () && m_ratio == other.get_ratio (); |
| } |
| |
| bool |
| vl_vtype_info::operator!= (const vl_vtype_info &other) const |
| { |
| return !(*this == other); |
| } |
| |
| bool |
| vl_vtype_info::has_non_zero_avl () const |
| { |
| if (has_avl_imm ()) |
| return INTVAL (get_avl ()) > 0; |
| if (has_avl_reg ()) |
| return vlmax_avl_p (get_avl ()); |
| return false; |
| } |
| |
| bool |
| vl_vtype_info::same_avl_p (const vl_vtype_info &other) const |
| { |
| /* We need to compare both RTL and SET. If both AVL are CONST_INT. |
| For example, const_int 3 and const_int 4, we need to compare |
| RTL. If both AVL are REG and their REGNO are same, we need to |
| compare SET. */ |
| return get_avl () == other.get_avl () |
| && get_avl_source () == other.get_avl_source (); |
| } |
| |
| bool |
| vl_vtype_info::same_vtype_p (const vl_vtype_info &other) const |
| { |
| return get_sew () == other.get_sew () && get_vlmul () == other.get_vlmul () |
| && get_ta () == other.get_ta () && get_ma () == other.get_ma (); |
| } |
| |
| bool |
| vl_vtype_info::same_vlmax_p (const vl_vtype_info &other) const |
| { |
| return get_ratio () == other.get_ratio (); |
| } |
| |
| /* Compare the compatibility between Dem1 and Dem2. |
| If Dem1 > Dem2, Dem1 has bigger compatibility then Dem2 |
| meaning Dem1 is easier be compatible with others than Dem2 |
| or Dem2 is stricter than Dem1. |
| For example, Dem1 (demand SEW + LMUL) > Dem2 (demand RATIO). */ |
| bool |
| vector_insn_info::operator> (const vector_insn_info &other) const |
| { |
| if (other.compatible_p (static_cast<const vl_vtype_info &> (*this)) |
| && !this->compatible_p (static_cast<const vl_vtype_info &> (other))) |
| return true; |
| return false; |
| } |
| |
| bool |
| vector_insn_info::operator>= (const vector_insn_info &other) const |
| { |
| if (*this > other) |
| return true; |
| |
| if (*this == other) |
| return true; |
| |
| if (!compatible_p (other)) |
| return false; |
| |
| if (!demand_p (DEMAND_AVL) && other.demand_p (DEMAND_AVL)) |
| return false; |
| |
| if (same_vlmax_p (other)) |
| { |
| if (demand_p (DEMAND_RATIO) && !other.demand_p (DEMAND_RATIO) |
| && (get_sew () != other.get_sew () |
| || get_vlmul () != other.get_vlmul ())) |
| return false; |
| |
| if (get_sew () == other.get_sew () && get_vlmul () == other.get_vlmul ()) |
| { |
| if (demand_p (DEMAND_RATIO) && !other.demand_p (DEMAND_RATIO)) |
| return false; |
| } |
| } |
| |
| if (!demand_p (DEMAND_TAIL_POLICY) && other.demand_p (DEMAND_TAIL_POLICY)) |
| return false; |
| |
| if (!demand_p (DEMAND_MASK_POLICY) && other.demand_p (DEMAND_MASK_POLICY)) |
| return false; |
| |
| return true; |
| } |
| |
| bool |
| vector_insn_info::operator== (const vector_insn_info &other) const |
| { |
| gcc_assert (!uninit_p () && !other.uninit_p () |
| && "Uninitialization should not happen"); |
| |
| /* Empty is only equal to another Empty. */ |
| if (empty_p ()) |
| return other.empty_p (); |
| if (other.empty_p ()) |
| return empty_p (); |
| |
| /* Unknown is only equal to another Unknown. */ |
| if (unknown_p ()) |
| return other.unknown_p (); |
| if (other.unknown_p ()) |
| return unknown_p (); |
| |
| for (size_t i = 0; i < NUM_DEMAND; i++) |
| if (m_demands[i] != other.demand_p ((enum demand_type) i)) |
| return false; |
| |
| if (vector_config_insn_p (m_insn->rtl ()) |
| || vector_config_insn_p (other.get_insn ()->rtl ())) |
| if (m_insn != other.get_insn ()) |
| return false; |
| |
| if (!same_avl_p (other)) |
| return false; |
| |
| /* If the full VTYPE is valid, check that it is the same. */ |
| return same_vtype_p (other); |
| } |
| |
| void |
| vector_insn_info::parse_insn (rtx_insn *rinsn) |
| { |
| *this = vector_insn_info (); |
| if (!NONDEBUG_INSN_P (rinsn)) |
| return; |
| if (!has_vtype_op (rinsn)) |
| return; |
| m_state = VALID; |
| extract_insn_cached (rinsn); |
| const rtx avl = recog_data.operand[get_attr_vl_op_idx (rinsn)]; |
| m_avl = avl_info (avl, nullptr); |
| m_sew = ::get_sew (rinsn); |
| m_vlmul = ::get_vlmul (rinsn); |
| m_ta = tail_agnostic_p (rinsn); |
| m_ma = mask_agnostic_p (rinsn); |
| } |
| |
| void |
| vector_insn_info::parse_insn (insn_info *insn) |
| { |
| *this = vector_insn_info (); |
| |
| /* Return if it is debug insn for the consistency with optimize == 0. */ |
| if (insn->is_debug_insn ()) |
| return; |
| |
| /* We set it as unknown since we don't what will happen in CALL or ASM. */ |
| if (insn->is_call () || insn->is_asm ()) |
| { |
| set_unknown (); |
| return; |
| } |
| |
| /* If this is something that updates VL/VTYPE that we don't know about, set |
| the state to unknown. */ |
| if (!vector_config_insn_p (insn->rtl ()) |
| && (find_access (insn->defs (), VL_REGNUM) |
| || find_access (insn->defs (), VTYPE_REGNUM))) |
| { |
| set_unknown (); |
| return; |
| } |
| |
| if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ())) |
| return; |
| |
| /* Warning: This function has to work on both the lowered (i.e. post |
| emit_local_forward_vsetvls) and pre-lowering forms. The main implication |
| of this is that it can't use the value of a SEW, VL, or Policy operand as |
| they might be stale after lowering. */ |
| vl_vtype_info::operator= (get_vl_vtype_info (insn)); |
| m_insn = insn; |
| m_state = VALID; |
| if (vector_config_insn_p (insn->rtl ())) |
| { |
| m_demands[DEMAND_AVL] = true; |
| m_demands[DEMAND_RATIO] = true; |
| return; |
| } |
| |
| if (has_vl_op (insn->rtl ())) |
| m_demands[DEMAND_AVL] = true; |
| |
| if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE) |
| m_demands[DEMAND_RATIO] = true; |
| else |
| { |
| /* TODO: By default, if it doesn't demand RATIO, we set it |
| demand SEW && LMUL both. Some instructions may demand SEW |
| only and ignore LMUL, will fix it later. */ |
| m_demands[DEMAND_SEW] = true; |
| m_demands[DEMAND_LMUL] = true; |
| } |
| |
| if (get_attr_ta (insn->rtl ()) != INVALID_ATTRIBUTE) |
| m_demands[DEMAND_TAIL_POLICY] = true; |
| if (get_attr_ma (insn->rtl ()) != INVALID_ATTRIBUTE) |
| m_demands[DEMAND_MASK_POLICY] = true; |
| |
| if (vector_config_insn_p (insn->rtl ())) |
| return; |
| |
| if (!has_avl_reg () || !m_avl.get_source () |
| || !m_avl.get_source ()->insn ()->is_phi ()) |
| return; |
| |
| insn_info *def_insn = extract_single_source (m_avl.get_source ()); |
| if (def_insn) |
| { |
| vector_insn_info new_info; |
| new_info.parse_insn (def_insn); |
| if (!same_vlmax_p (new_info)) |
| return; |
| /* TODO: Currently, we don't forward AVL for non-VLMAX vsetvl. */ |
| if (vlmax_avl_p (new_info.get_avl ())) |
| set_avl_info (new_info.get_avl_info ()); |
| } |
| } |
| |
| void |
| vector_insn_info::demand_vl_vtype () |
| { |
| m_state = VALID; |
| m_demands[DEMAND_AVL] = true; |
| m_demands[DEMAND_SEW] = true; |
| m_demands[DEMAND_LMUL] = true; |
| m_demands[DEMAND_TAIL_POLICY] = true; |
| m_demands[DEMAND_MASK_POLICY] = true; |
| } |
| |
| bool |
| vector_insn_info::compatible_p (const vector_insn_info &other) const |
| { |
| gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p () |
| && "Can't compare invalid demanded infos"); |
| |
| /* Check SEW. */ |
| if (demand_p (DEMAND_SEW) && other.demand_p (DEMAND_SEW) |
| && get_sew () != other.get_sew ()) |
| return false; |
| |
| /* Check LMUL. */ |
| if (demand_p (DEMAND_LMUL) && other.demand_p (DEMAND_LMUL) |
| && get_vlmul () != other.get_vlmul ()) |
| return false; |
| |
| /* Check RATIO. */ |
| if (demand_p (DEMAND_RATIO) && other.demand_p (DEMAND_RATIO) |
| && get_ratio () != other.get_ratio ()) |
| return false; |
| if (demand_p (DEMAND_RATIO) && (other.get_sew () || other.get_vlmul ()) |
| && get_ratio () != other.get_ratio ()) |
| return false; |
| if (other.demand_p (DEMAND_RATIO) && (get_sew () || get_vlmul ()) |
| && get_ratio () != other.get_ratio ()) |
| return false; |
| |
| if (demand_p (DEMAND_TAIL_POLICY) && other.demand_p (DEMAND_TAIL_POLICY) |
| && get_ta () != other.get_ta ()) |
| return false; |
| if (demand_p (DEMAND_MASK_POLICY) && other.demand_p (DEMAND_MASK_POLICY) |
| && get_ma () != other.get_ma ()) |
| return false; |
| |
| if (demand_p (DEMAND_AVL) && other.demand_p (DEMAND_AVL)) |
| return compatible_avl_p (other); |
| |
| return true; |
| } |
| |
| bool |
| vector_insn_info::compatible_avl_p (const vl_vtype_info &other) const |
| { |
| gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info"); |
| gcc_assert (!unknown_p () && "Can't compare AVL in unknown state"); |
| if (!demand_p (DEMAND_AVL)) |
| return true; |
| return get_avl_info () == other.get_avl_info (); |
| } |
| |
| bool |
| vector_insn_info::compatible_avl_p (const avl_info &other) const |
| { |
| gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info"); |
| gcc_assert (!unknown_p () && "Can't compare AVL in unknown state"); |
| gcc_assert (demand_p (DEMAND_AVL) && "Can't compare AVL undemand state"); |
| return get_avl_info () == other; |
| } |
| |
| bool |
| vector_insn_info::compatible_vtype_p (const vl_vtype_info &other) const |
| { |
| gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info"); |
| gcc_assert (!unknown_p () && "Can't compare VTYPE in unknown state"); |
| if (demand_p (DEMAND_SEW) && m_sew != other.get_sew ()) |
| return false; |
| if (demand_p (DEMAND_LMUL) && m_vlmul != other.get_vlmul ()) |
| return false; |
| if (demand_p (DEMAND_RATIO) && m_ratio != other.get_ratio ()) |
| return false; |
| if (demand_p (DEMAND_TAIL_POLICY) && m_ta != other.get_ta ()) |
| return false; |
| if (demand_p (DEMAND_MASK_POLICY) && m_ma != other.get_ma ()) |
| return false; |
| return true; |
| } |
| |
| /* Determine whether the vector instructions requirements represented by |
| Require are compatible with the previous vsetvli instruction represented |
| by this. INSN is the instruction whose requirements we're considering. */ |
| bool |
| vector_insn_info::compatible_p (const vl_vtype_info &curr_info) const |
| { |
| gcc_assert (!uninit_p () && "Can't handle uninitialized info"); |
| if (empty_p ()) |
| return false; |
| |
| /* Nothing is compatible with Unknown. */ |
| if (unknown_p ()) |
| return false; |
| |
| /* If the instruction doesn't need an AVLReg and the SEW matches, consider |
| it compatible. */ |
| if (!demand_p (DEMAND_AVL)) |
| if (m_sew == curr_info.get_sew ()) |
| return true; |
| |
| return compatible_avl_p (curr_info) && compatible_vtype_p (curr_info); |
| } |
| |
| bool |
| vector_insn_info::available_p (const vector_insn_info &other) const |
| { |
| if (*this >= other) |
| return true; |
| return false; |
| } |
| |
| vector_insn_info |
| vector_insn_info::merge (const vector_insn_info &merge_info, |
| enum merge_type type = LOCAL_MERGE) const |
| { |
| if (!vsetvl_insn_p (get_insn ()->rtl ())) |
| gcc_assert (this->compatible_p (merge_info) |
| && "Can't merge incompatible demanded infos"); |
| |
| vector_insn_info new_info; |
| new_info.demand_vl_vtype (); |
| |
| if (type == LOCAL_MERGE) |
| { |
| /* For local backward data flow, we always update INSN && AVL as the |
| latest INSN and AVL so that we can keep track status of each INSN.*/ |
| new_info.set_insn (merge_info.get_insn ()); |
| if (merge_info.demand_p (DEMAND_AVL)) |
| new_info.set_avl_info (merge_info.get_avl_info ()); |
| else if (demand_p (DEMAND_AVL)) |
| new_info.set_avl_info (get_avl_info ()); |
| } |
| else |
| { |
| /* For global data flow, we should keep original INSN and AVL if they |
| valid since we should keep the life information of each block. |
| |
| For example: |
| bb 0 -> bb 1. |
| We should keep INSN && AVL of bb 1 since we will eventually emit |
| vsetvl instruction according to INSN and AVL of bb 1. */ |
| new_info.set_insn (get_insn ()); |
| if (demand_p (DEMAND_AVL)) |
| new_info.set_avl_info (get_avl_info ()); |
| else if (merge_info.demand_p (DEMAND_AVL)) |
| new_info.set_avl_info (merge_info.get_avl_info ()); |
| } |
| |
| if (!demand_p (DEMAND_AVL) && !merge_info.demand_p (DEMAND_AVL)) |
| new_info.undemand (DEMAND_AVL); |
| if (!demand_p (DEMAND_SEW) && !merge_info.demand_p (DEMAND_SEW)) |
| new_info.undemand (DEMAND_SEW); |
| if (!demand_p (DEMAND_LMUL) && !merge_info.demand_p (DEMAND_LMUL)) |
| new_info.undemand (DEMAND_LMUL); |
| |
| if (!demand_p (DEMAND_TAIL_POLICY) |
| && !merge_info.demand_p (DEMAND_TAIL_POLICY)) |
| new_info.undemand (DEMAND_TAIL_POLICY); |
| if (!demand_p (DEMAND_MASK_POLICY) |
| && !merge_info.demand_p (DEMAND_MASK_POLICY)) |
| new_info.undemand (DEMAND_MASK_POLICY); |
| |
| if (merge_info.demand_p (DEMAND_SEW)) |
| new_info.set_sew (merge_info.get_sew ()); |
| else if (demand_p (DEMAND_SEW)) |
| new_info.set_sew (get_sew ()); |
| |
| if (merge_info.demand_p (DEMAND_LMUL)) |
| new_info.set_vlmul (merge_info.get_vlmul ()); |
| else if (demand_p (DEMAND_LMUL)) |
| new_info.set_vlmul (get_vlmul ()); |
| |
| if (!new_info.demand_p (DEMAND_SEW) && !new_info.demand_p (DEMAND_LMUL)) |
| { |
| if (demand_p (DEMAND_RATIO) || merge_info.demand_p (DEMAND_RATIO)) |
| new_info.demand (DEMAND_RATIO); |
| /* Even though we don't demand_p SEW && VLMUL in this case, we still |
| * need them. */ |
| if (merge_info.demand_p (DEMAND_RATIO)) |
| { |
| new_info.set_sew (merge_info.get_sew ()); |
| new_info.set_vlmul (merge_info.get_vlmul ()); |
| new_info.set_ratio (merge_info.get_ratio ()); |
| } |
| else if (demand_p (DEMAND_RATIO)) |
| { |
| new_info.set_sew (get_sew ()); |
| new_info.set_vlmul (get_vlmul ()); |
| new_info.set_ratio (get_ratio ()); |
| } |
| } |
| else |
| { |
| /* when get_attr_ratio is invalid, this kind of instructions |
| doesn't care about ratio. However, we still need this value |
| in demand_p info backward analysis. */ |
| new_info.set_ratio ( |
| calculate_ratio (new_info.get_sew (), new_info.get_vlmul ())); |
| } |
| |
| if (merge_info.demand_p (DEMAND_TAIL_POLICY)) |
| new_info.set_ta (merge_info.get_ta ()); |
| else if (demand_p (DEMAND_TAIL_POLICY)) |
| new_info.set_ta (get_ta ()); |
| else |
| new_info.set_ta (get_default_ta ()); |
| |
| if (merge_info.demand_p (DEMAND_MASK_POLICY)) |
| new_info.set_ma (merge_info.get_ma ()); |
| else if (demand_p (DEMAND_MASK_POLICY)) |
| new_info.set_ma (get_ma ()); |
| else |
| new_info.set_ma (get_default_ma ()); |
| |
| return new_info; |
| } |
| |
| void |
| vector_insn_info::dump (FILE *file) const |
| { |
| fprintf (file, "["); |
| if (uninit_p ()) |
| fprintf (file, "UNINITIALIZED,"); |
| else if (valid_p ()) |
| fprintf (file, "VALID,"); |
| else if (unknown_p ()) |
| fprintf (file, "UNKNOWN,"); |
| else if (empty_p ()) |
| fprintf (file, "EMPTY,"); |
| else if (hard_empty_p ()) |
| fprintf (file, "HARD_EMPTY,"); |
| else if (dirty_with_killed_avl_p ()) |
| fprintf (file, "DIRTY_WITH_KILLED_AVL,"); |
| else |
| fprintf (file, "DIRTY,"); |
| |
| fprintf (file, "Demand field={%d(VL),", demand_p (DEMAND_AVL)); |
| fprintf (file, "%d(SEW),", demand_p (DEMAND_SEW)); |
| fprintf (file, "%d(LMUL),", demand_p (DEMAND_LMUL)); |
| fprintf (file, "%d(RATIO),", demand_p (DEMAND_RATIO)); |
| fprintf (file, "%d(TAIL_POLICY),", demand_p (DEMAND_TAIL_POLICY)); |
| fprintf (file, "%d(MASK_POLICY)}\n", demand_p (DEMAND_MASK_POLICY)); |
| |
| fprintf (file, "AVL="); |
| print_rtl_single (file, get_avl ()); |
| fprintf (file, "SEW=%d,", get_sew ()); |
| fprintf (file, "VLMUL=%d,", get_vlmul ()); |
| fprintf (file, "RATIO=%d,", get_ratio ()); |
| fprintf (file, "TAIL_POLICY=%d,", get_ta ()); |
| fprintf (file, "MASK_POLICY=%d", get_ma ()); |
| fprintf (file, "]\n"); |
| |
| if (valid_p ()) |
| { |
| if (get_insn ()) |
| { |
| fprintf (file, "The real INSN="); |
| print_rtl_single (file, get_insn ()->rtl ()); |
| } |
| } |
| } |
| |
| vector_infos_manager::vector_infos_manager () |
| { |
| vector_edge_list = nullptr; |
| vector_kill = nullptr; |
| vector_del = nullptr; |
| vector_insert = nullptr; |
| vector_antic = nullptr; |
| vector_transp = nullptr; |
| vector_comp = nullptr; |
| vector_avin = nullptr; |
| vector_avout = nullptr; |
| vector_insn_infos.safe_grow (get_max_uid ()); |
| vector_block_infos.safe_grow (last_basic_block_for_fn (cfun)); |
| if (!optimize) |
| { |
| basic_block cfg_bb; |
| rtx_insn *rinsn; |
| FOR_ALL_BB_FN (cfg_bb, cfun) |
| { |
| vector_block_infos[cfg_bb->index].local_dem = vector_insn_info (); |
| vector_block_infos[cfg_bb->index].reaching_out = vector_insn_info (); |
| FOR_BB_INSNS (cfg_bb, rinsn) |
| vector_insn_infos[INSN_UID (rinsn)].parse_insn (rinsn); |
| } |
| } |
| else |
| { |
| for (const bb_info *bb : crtl->ssa->bbs ()) |
| { |
| vector_block_infos[bb->index ()].local_dem = vector_insn_info (); |
| vector_block_infos[bb->index ()].reaching_out = vector_insn_info (); |
| for (insn_info *insn : bb->real_insns ()) |
| vector_insn_infos[insn->uid ()].parse_insn (insn); |
| vector_block_infos[bb->index ()].probability = profile_probability (); |
| } |
| } |
| } |
| |
| void |
| vector_infos_manager::create_expr (vector_insn_info &info) |
| { |
| for (size_t i = 0; i < vector_exprs.length (); i++) |
| if (*vector_exprs[i] == info) |
| return; |
| vector_exprs.safe_push (&info); |
| } |
| |
| size_t |
| vector_infos_manager::get_expr_id (const vector_insn_info &info) const |
| { |
| for (size_t i = 0; i < vector_exprs.length (); i++) |
| if (*vector_exprs[i] == info) |
| return i; |
| gcc_unreachable (); |
| } |
| |
| auto_vec<size_t> |
| vector_infos_manager::get_all_available_exprs ( |
| const vector_insn_info &info) const |
| { |
| auto_vec<size_t> available_list; |
| for (size_t i = 0; i < vector_exprs.length (); i++) |
| if (info.available_p (*vector_exprs[i])) |
| available_list.safe_push (i); |
| return available_list; |
| } |
| |
| bool |
| vector_infos_manager::all_same_ratio_p (sbitmap bitdata) const |
| { |
| if (bitmap_empty_p (bitdata)) |
| return false; |
| |
| int ratio = -1; |
| unsigned int bb_index; |
| sbitmap_iterator sbi; |
| |
| EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi) |
| { |
| if (ratio == -1) |
| ratio = vector_exprs[bb_index]->get_ratio (); |
| else if (vector_exprs[bb_index]->get_ratio () != ratio) |
| return false; |
| } |
| return true; |
| } |
| |
| bool |
| vector_infos_manager::all_same_avl_p (const basic_block cfg_bb, |
| sbitmap bitdata) const |
| { |
| if (bitmap_empty_p (bitdata)) |
| return false; |
| |
| const auto &block_info = vector_block_infos[cfg_bb->index]; |
| if (!block_info.local_dem.demand_p (DEMAND_AVL)) |
| return true; |
| |
| avl_info avl = block_info.local_dem.get_avl_info (); |
| unsigned int bb_index; |
| sbitmap_iterator sbi; |
| |
| EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi) |
| { |
| if (vector_exprs[bb_index]->get_avl_info () != avl) |
| return false; |
| } |
| return true; |
| } |
| |
| size_t |
| vector_infos_manager::expr_set_num (sbitmap bitdata) const |
| { |
| size_t count = 0; |
| for (size_t i = 0; i < vector_exprs.length (); i++) |
| if (bitmap_bit_p (bitdata, i)) |
| count++; |
| return count; |
| } |
| |
| void |
| vector_infos_manager::release (void) |
| { |
| if (!vector_insn_infos.is_empty ()) |
| vector_insn_infos.release (); |
| if (!vector_block_infos.is_empty ()) |
| vector_block_infos.release (); |
| if (!vector_exprs.is_empty ()) |
| vector_exprs.release (); |
| |
| if (optimize > 0) |
| free_bitmap_vectors (); |
| } |
| |
| void |
| vector_infos_manager::create_bitmap_vectors (void) |
| { |
| /* Create the bitmap vectors. */ |
| vector_antic = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), |
| vector_exprs.length ()); |
| vector_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), |
| vector_exprs.length ()); |
| vector_comp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), |
| vector_exprs.length ()); |
| vector_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), |
| vector_exprs.length ()); |
| vector_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), |
| vector_exprs.length ()); |
| vector_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), |
| vector_exprs.length ()); |
| |
| bitmap_vector_ones (vector_transp, last_basic_block_for_fn (cfun)); |
| bitmap_vector_clear (vector_antic, last_basic_block_for_fn (cfun)); |
| bitmap_vector_clear (vector_comp, last_basic_block_for_fn (cfun)); |
| } |
| |
| void |
| vector_infos_manager::free_bitmap_vectors (void) |
| { |
| /* Finished. Free up all the things we've allocated. */ |
| free_edge_list (vector_edge_list); |
| if (vector_del) |
| sbitmap_vector_free (vector_del); |
| if (vector_insert) |
| sbitmap_vector_free (vector_insert); |
| if (vector_kill) |
| sbitmap_vector_free (vector_kill); |
| if (vector_antic) |
| sbitmap_vector_free (vector_antic); |
| if (vector_transp) |
| sbitmap_vector_free (vector_transp); |
| if (vector_comp) |
| sbitmap_vector_free (vector_comp); |
| if (vector_avin) |
| sbitmap_vector_free (vector_avin); |
| if (vector_avout) |
| sbitmap_vector_free (vector_avout); |
| |
| vector_edge_list = nullptr; |
| vector_kill = nullptr; |
| vector_del = nullptr; |
| vector_insert = nullptr; |
| vector_antic = nullptr; |
| vector_transp = nullptr; |
| vector_comp = nullptr; |
| vector_avin = nullptr; |
| vector_avout = nullptr; |
| } |
| |
| void |
| vector_infos_manager::dump (FILE *file) const |
| { |
| basic_block cfg_bb; |
| rtx_insn *rinsn; |
| |
| fprintf (file, "\n"); |
| FOR_ALL_BB_FN (cfg_bb, cfun) |
| { |
| fprintf (file, "Local vector info of <bb %d>:\n", cfg_bb->index); |
| fprintf (file, "<HEADER>="); |
| vector_block_infos[cfg_bb->index].local_dem.dump (file); |
| FOR_BB_INSNS (cfg_bb, rinsn) |
| { |
| if (!NONDEBUG_INSN_P (rinsn) || !has_vtype_op (rinsn)) |
| continue; |
| fprintf (file, "<insn %d>=", INSN_UID (rinsn)); |
| const auto &info = vector_insn_infos[INSN_UID (rinsn)]; |
| info.dump (file); |
| } |
| fprintf (file, "<FOOTER>="); |
| vector_block_infos[cfg_bb->index].reaching_out.dump (file); |
| fprintf (file, "<Probability>="); |
| vector_block_infos[cfg_bb->index].probability.dump (file); |
| fprintf (file, "\n\n"); |
| } |
| |
| fprintf (file, "\n"); |
| FOR_ALL_BB_FN (cfg_bb, cfun) |
| { |
| fprintf (file, "Local properties of <bb %d>:\n", cfg_bb->index); |
| |
| fprintf (file, "<ANTLOC>="); |
| if (vector_antic == nullptr) |
| fprintf (file, "(nil)\n"); |
| else |
| dump_bitmap_file (file, vector_antic[cfg_bb->index]); |
| |
| fprintf (file, "<AVLOC>="); |
| if (vector_comp == nullptr) |
| fprintf (file, "(nil)\n"); |
| else |
| dump_bitmap_file (file, vector_comp[cfg_bb->index]); |
| |
| fprintf (file, "<TRANSP>="); |
| if (vector_transp == nullptr) |
| fprintf (file, "(nil)\n"); |
| else |
| dump_bitmap_file (file, vector_transp[cfg_bb->index]); |
| |
| fprintf (file, "<KILL>="); |
| if (vector_kill == nullptr) |
| fprintf (file, "(nil)\n"); |
| else |
| dump_bitmap_file (file, vector_kill[cfg_bb->index]); |
| } |
| |
| fprintf (file, "\n"); |
| FOR_ALL_BB_FN (cfg_bb, cfun) |
| { |
| fprintf (file, "Global LCM (Lazy code motion) result of <bb %d>:\n", |
| cfg_bb->index); |
| |
| fprintf (file, "<AVIN>="); |
| if (vector_avin == nullptr) |
| fprintf (file, "(nil)\n"); |
| else |
| dump_bitmap_file (file, vector_avin[cfg_bb->index]); |
| |
| fprintf (file, "<AVOUT>="); |
| if (vector_avout == nullptr) |
| fprintf (file, "(nil)\n"); |
| else |
| dump_bitmap_file (file, vector_avout[cfg_bb->index]); |
| |
| fprintf (file, "<DELETE>="); |
| if (vector_del == nullptr) |
| fprintf (file, "(nil)\n"); |
| else |
| dump_bitmap_file (file, vector_del[cfg_bb->index]); |
| } |
| |
| fprintf (file, "\nGlobal LCM (Lazy code motion) INSERT info:\n"); |
| for (size_t i = 0; i < vector_exprs.length (); i++) |
| { |
| for (int ed = 0; ed < NUM_EDGES (vector_edge_list); ed++) |
| { |
| edge eg = INDEX_EDGE (vector_edge_list, ed); |
| if (bitmap_bit_p (vector_insert[ed], i)) |
| fprintf (dump_file, |
| "INSERT edge %d from bb %d to bb %d for VSETVL " |
| "expr[%ld]\n", |
| ed, eg->src->index, eg->dest->index, i); |
| } |
| } |
| } |
| |
| const pass_data pass_data_vsetvl = { |
| RTL_PASS, /* type */ |
| "vsetvl", /* name */ |
| OPTGROUP_NONE, /* optinfo_flags */ |
| TV_NONE, /* tv_id */ |
| 0, /* properties_required */ |
| 0, /* properties_provided */ |
| 0, /* properties_destroyed */ |
| 0, /* todo_flags_start */ |
| 0, /* todo_flags_finish */ |
| }; |
| |
| class pass_vsetvl : public rtl_opt_pass |
| { |
| private: |
| class vector_infos_manager *m_vector_manager; |
| |
| void simple_vsetvl (void) const; |
| void lazy_vsetvl (void); |
| |
| /* Phase 1. */ |
| void compute_local_backward_infos (const bb_info *); |
| |
| /* Phase 2. */ |
| bool need_vsetvl (const vector_insn_info &, const vector_insn_info &) const; |
| void transfer_before (vector_insn_info &, insn_info *) const; |
| void transfer_after (vector_insn_info &, insn_info *) const; |
| void emit_local_forward_vsetvls (const bb_info *); |
| |
| /* Phase 3. */ |
| enum fusion_type get_backward_fusion_type (const bb_info *, |
| const vector_insn_info &); |
| bool hard_empty_block_p (const bb_info *, const vector_insn_info &) const; |
| bool backward_demand_fusion (void); |
| bool forward_demand_fusion (void); |
| bool cleanup_illegal_dirty_blocks (void); |
| void demand_fusion (void); |
| |
| /* Phase 4. */ |
| void prune_expressions (void); |
| void compute_local_properties (void); |
| bool can_refine_vsetvl_p (const basic_block, const vector_insn_info &) const; |
| void refine_vsetvls (void) const; |
| void cleanup_vsetvls (void); |
| bool commit_vsetvls (void); |
| void pre_vsetvl (void); |
| |
| /* Phase 5. */ |
| void cleanup_insns (void) const; |
| |
| /* Phase 6. */ |
| void propagate_avl (void) const; |
| |
| void init (void); |
| void done (void); |
| void compute_probabilities (void); |
| |
| public: |
| pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {} |
| |
| /* opt_pass methods: */ |
| virtual bool gate (function *) final override { return TARGET_VECTOR; } |
| virtual unsigned int execute (function *) final override; |
| }; // class pass_vsetvl |
| |
| /* Simple m_vsetvl_insert vsetvl for optimize == 0. */ |
| void |
| pass_vsetvl::simple_vsetvl (void) const |
| { |
| if (dump_file) |
| fprintf (dump_file, |
| "\nEntering Simple VSETVL PASS and Handling %d basic blocks for " |
| "function:%s\n", |
| n_basic_blocks_for_fn (cfun), function_name (cfun)); |
| |
| basic_block cfg_bb; |
| rtx_insn *rinsn; |
| FOR_ALL_BB_FN (cfg_bb, cfun) |
| { |
| FOR_BB_INSNS (cfg_bb, rinsn) |
| { |
| if (!NONDEBUG_INSN_P (rinsn)) |
| continue; |
| if (has_vtype_op (rinsn)) |
| { |
| const auto info |
| = m_vector_manager->vector_insn_infos[INSN_UID (rinsn)]; |
| emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_BEFORE, info, |
| NULL_RTX, rinsn); |
| } |
| } |
| } |
| } |
| |
| /* Compute demanded information by backward data-flow analysis. */ |
| void |
| pass_vsetvl::compute_local_backward_infos (const bb_info *bb) |
| { |
| vector_insn_info change; |
| change.set_empty (); |
| |
| auto &block_info = m_vector_manager->vector_block_infos[bb->index ()]; |
| block_info.reaching_out = change; |
| |
| for (insn_info *insn : bb->reverse_real_nondebug_insns ()) |
| { |
| auto &info = m_vector_manager->vector_insn_infos[insn->uid ()]; |
| |
| if (info.uninit_p ()) |
| /* If it is uninitialized, propagate it directly. */ |
| info = change; |
| else if (info.unknown_p ()) |
| change = info; |
| else |
| { |
| gcc_assert (info.valid_p () && "Unexpected Invalid demanded info"); |
| if (change.valid_p () && change.compatible_p (info)) |
| info = change.merge (info); |
| change = info; |
| } |
| } |
| |
| block_info.local_dem = change; |
| if (block_info.local_dem.empty_p ()) |
| block_info.reaching_out = block_info.local_dem; |
| } |
| |
| /* Return true if a dem_info is required to transition from curr_info to |
| require before INSN. */ |
| bool |
| pass_vsetvl::need_vsetvl (const vector_insn_info &require, |
| const vector_insn_info &curr_info) const |
| { |
| if (!curr_info.valid_p () || curr_info.unknown_p () || curr_info.uninit_p ()) |
| return true; |
| |
| if (require.compatible_p (curr_info)) |
| return false; |
| |
| return true; |
| } |
| |
| /* Given an incoming state reaching INSN, modifies that state so that it is |
| minimally compatible with INSN. The resulting state is guaranteed to be |
| semantically legal for INSN, but may not be the state requested by INSN. */ |
| void |
| pass_vsetvl::transfer_before (vector_insn_info &info, insn_info *insn) const |
| { |
| if (!has_vtype_op (insn->rtl ())) |
| return; |
| |
| const vector_insn_info require |
| = m_vector_manager->vector_insn_infos[insn->uid ()]; |
| if (info.valid_p () && !need_vsetvl (require, info)) |
| return; |
| info = require; |
| } |
| |
| /* Given a state with which we evaluated insn (see transfer_before above for why |
| this might be different that the state insn requested), modify the state to |
| reflect the changes insn might make. */ |
| void |
| pass_vsetvl::transfer_after (vector_insn_info &info, insn_info *insn) const |
| { |
| if (vector_config_insn_p (insn->rtl ())) |
| { |
| info = m_vector_manager->vector_insn_infos[insn->uid ()]; |
| return; |
| } |
| |
| /* TODO: Support fault first load info update VL in the future. */ |
| |
| /* If this is something that updates VL/VTYPE that we don't know about, set |
| the state to unknown. */ |
| if (insn->is_call () || insn->is_asm () |
| || find_access (insn->defs (), VL_REGNUM) |
| || find_access (insn->defs (), VTYPE_REGNUM)) |
| info = vector_insn_info::get_unknown (); |
| } |
| |
| /* Emit vsetvl within each block by forward data-flow analysis. */ |
| void |
| pass_vsetvl::emit_local_forward_vsetvls (const bb_info *bb) |
| { |
| auto &block_info = m_vector_manager->vector_block_infos[bb->index ()]; |
| if (block_info.local_dem.empty_p ()) |
| return; |
| |
| vector_insn_info curr_info; |
| for (insn_info *insn : bb->real_nondebug_insns ()) |
| { |
| const vector_insn_info prev_info = curr_info; |
| transfer_before (curr_info, insn); |
| |
| if (has_vtype_op (insn->rtl ())) |
| { |
| if (static_cast<const vl_vtype_info &> (prev_info) |
| != static_cast<const vl_vtype_info &> (curr_info)) |
| { |
| const auto require |
| = m_vector_manager->vector_insn_infos[insn->uid ()]; |
| if (!require.compatible_p ( |
| static_cast<const vl_vtype_info &> (prev_info))) |
| insert_vsetvl (EMIT_BEFORE, insn->rtl (), require, prev_info); |
| } |
| } |
| |
| transfer_after (curr_info, insn); |
| } |
| |
| block_info.reaching_out = curr_info; |
| } |
| |
| enum fusion_type |
| pass_vsetvl::get_backward_fusion_type (const bb_info *bb, |
| const vector_insn_info &prop) |
| { |
| insn_info *insn = prop.get_insn (); |
| |
| /* TODO: We don't backward propagate the explict VSETVL here |
| since we will change vsetvl and vsetvlmax intrinsics into |
| no side effects which can be optimized into optimal location |
| by GCC internal passes. We only need to support these backward |
| propagation if vsetvl intrinsics have side effects. */ |
| if (vsetvl_insn_p (insn->rtl ())) |
| return INVALID_FUSION; |
| |
| gcc_assert (has_vtype_op (insn->rtl ())); |
| rtx reg = NULL_RTX; |
| |
| /* Case 1: Don't need VL. Just let it backward propagate. */ |
| if (!has_vl_op (insn->rtl ())) |
| return VALID_AVL_FUSION; |
| else |
| { |
| /* Case 2: CONST_INT AVL, we don't need to check def. */ |
| if (prop.has_avl_imm ()) |
| return VALID_AVL_FUSION; |
| else |
| { |
| /* Case 3: REG AVL, we need to check the distance of def to make |
| sure we won't backward propagate over the def. */ |
| gcc_assert (prop.has_avl_reg ()); |
| if (vlmax_avl_p (prop.get_avl ())) |
| /* Check VL operand for vsetvl vl,zero. */ |
| reg = get_vl (insn->rtl ()); |
| else |
| /* Check AVL operand for vsetvl zero,avl. */ |
| reg = get_avl (insn->rtl ()); |
| } |
| } |
| |
| gcc_assert (reg); |
| def_info *def = find_access (insn->uses (), REGNO (reg))->def (); |
| if (!def->insn ()->is_phi () && def->insn ()->bb () == insn->bb ()) |
| return INVALID_FUSION; |
| hash_set<set_info *> sets |
| = get_all_sets (prop.get_avl_source (), true, true, true); |
| if (any_set_in_bb_p (sets, insn->bb ())) |
| return INVALID_FUSION; |
| |
| if (vlmax_avl_p (prop.get_avl ())) |
| { |
| if (find_reg_killed_by (bb, reg)) |
| return INVALID_FUSION; |
| else |
| return VALID_AVL_FUSION; |
| } |
| |
| /* By default, we always enable backward fusion so that we can |
| gain more optimizations. */ |
| if (!find_reg_killed_by (bb, reg)) |
| return VALID_AVL_FUSION; |
| return KILLED_AVL_FUSION; |
| } |
| |
| /* We almost enable all cases in get_backward_fusion_type, this function |
| disable the backward fusion by changing dirty blocks into hard empty |
| blocks in forward dataflow. We can have more accurate optimization by |
| this method. */ |
| bool |
| pass_vsetvl::hard_empty_block_p (const bb_info *bb, |
| const vector_insn_info &info) const |
| { |
| if (!info.dirty_p () || !info.has_avl_reg ()) |
| return false; |
| |
| basic_block cfg_bb = bb->cfg_bb (); |
| sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index]; |
| rtx avl = vlmax_avl_p (info.get_avl ()) ? get_vl (info.get_insn ()->rtl ()) |
| : get_avl (info.get_insn ()->rtl ()); |
| insn_info *insn = info.get_insn (); |
| set_info *set = find_access (insn->uses (), REGNO (avl))->def (); |
| hash_set<set_info *> sets = get_all_sets (set, true, false, false); |
| hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (cfg_bb); |
| |
| if (find_reg_killed_by (bb, avl)) |
| { |
| /* Condition 1: |
| Dirty block with killed AVL means that the empty block (no RVV |
| instructions) are polluted as Dirty blocks with the value of current |
| AVL is killed. For example: |
| bb 0: |
| ... |
| bb 1: |
| def a5 |
| bb 2: |
| RVV (use a5) |
| In backward dataflow, we will polluted BB0 and BB1 as Dirt with AVL |
| killed. since a5 is killed in BB1. |
| In this case, let's take a look at this example: |
| |
| bb 3: bb 4: |
| def3 a5 def4 a5 |
| bb 5: bb 6: |
| def1 a5 def2 a5 |
| \ / |
| \ / |
| \ / |
| \ / |
| bb 7: |
| RVV (use a5) |
| In thi case, we can polluted BB5 and BB6 as dirty if get-def |
| of a5 from RVV instruction in BB7 is the def1 in BB5 and |
| def2 BB6 so we can return false early here for HARD_EMPTY_BLOCK_P. |
| However, we are not sure whether BB3 and BB4 can be |
| polluted as Dirty with AVL killed so we can't return false |
| for HARD_EMPTY_BLOCK_P here since it's too early which will |
| potentially produce issues. */ |
| gcc_assert (info.dirty_with_killed_avl_p ()); |
| if (info.get_avl_source () |
| && get_same_bb_set (sets, bb->cfg_bb ()) == info.get_avl_source ()) |
| return false; |
| } |
| |
| /* Condition 2: |
| Suppress the VL/VTYPE info backward propagation too early: |
| ________ |
| | BB0 | |
| |________| |
| | |
| ____|____ |
| | BB1 | |
| |________| |
| In this case, suppose BB 1 has multiple predecessors, BB 0 is one |
| of them. BB1 has VL/VTYPE info (may be VALID or DIRTY) to backward |
| propagate. |
| The AVIN (available in) which is calculated by LCM is empty only |
| in these 2 circumstances: |
| 1. all predecessors of BB1 are empty (not VALID |
| and can not be polluted in backward fusion flow) |
| 2. VL/VTYPE info of BB1 predecessors are conflict. |
| |
| We keep it as dirty in 2nd circumstance and set it as HARD_EMPTY |
| (can not be polluted as DIRTY any more) in 1st circumstance. |
| We don't backward propagate in 1st circumstance since there is |
| no VALID RVV instruction and no polluted blocks (dirty blocks) |
| by backward propagation from other following blocks. |
| It's meaningless to keep it as Dirty anymore. |
| |
| However, since we keep it as dirty in 2nd since there are VALID or |
| Dirty blocks in predecessors, we can still gain the benefits and |
| optimization opportunities. For example, in this case: |
| for (size_t i = 0; i < n; i++) |
| { |
| if (i != cond) { |
| vint8mf8_t v = *(vint8mf8_t*)(in + i + 100); |
| *(vint8mf8_t*)(out + i + 100) = v; |
| } else { |
| vbool1_t v = *(vbool1_t*)(in + i + 400); |
| *(vbool1_t*)(out + i + 400) = v; |
| } |
| } |
| VL/VTYPE in if-else are conflict which will produce empty AVIN LCM result |
| but we can still keep dirty blocks if *(i != cond)* is very unlikely then |
| we can preset vsetvl (VL/VTYPE) info from else (static propability model). |
| |
| We don't want to backward propagate VL/VTYPE information too early |
| which is not the optimal and may potentially produce issues. */ |
| if (bitmap_empty_p (avin)) |
| { |
| bool hard_empty_p = true; |
| for (const basic_block pred_cfg_bb : pred_cfg_bbs) |
| { |
| if (pred_cfg_bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
| continue; |
| sbitmap avout = m_vector_manager->vector_avout[pred_cfg_bb->index]; |
| if (!bitmap_empty_p (avout)) |
| { |
| hard_empty_p = false; |
| break; |
| } |
| } |
| if (hard_empty_p) |
| return true; |
| } |
| |
| edge e; |
| edge_iterator ei; |
| bool has_avl_killed_insn_p = false; |
| FOR_EACH_EDGE (e, ei, cfg_bb->succs) |
| { |
| const auto block_info |
| = m_vector_manager->vector_block_infos[e->dest->index]; |
| if (block_info.local_dem.dirty_with_killed_avl_p ()) |
| { |
| has_avl_killed_insn_p = true; |
| break; |
| } |
| } |
| if (!has_avl_killed_insn_p) |
| return false; |
| |
| bool any_set_in_bbs_p = false; |
| for (const basic_block pred_cfg_bb : pred_cfg_bbs) |
| { |
| insn_info *def_insn = extract_single_source (set); |
| if (def_insn) |
| { |
| /* Condition 3: |
| |
| Case 1: Case 2: |
| bb 0: bb 0: |
| def a5 101 ... |
| bb 1: bb 1: |
| ... ... |
| bb 2: bb 2: |
| RVV 1 (use a5 with TAIL ANY) ... |
| bb 3: bb 3: |
| def a5 101 def a5 101 |
| bb 4: bb 4: |
| ... ... |
| bb 5: bb 5: |
| RVV 2 (use a5 with TU) RVV 1 (use a5) |
| |
| Case 1: We can pollute BB3,BB2,BB1,BB0 are all Dirt blocks |
| with killed AVL so that we can merge TU demand info from RVV 2 |
| into RVV 1 and elide the vsevl instruction in BB5. |
| |
| TODO: We only optimize for single source def since multiple source |
| def is quite complicated. |
| |
| Case 2: We only can pollute bb 3 as dirty and it has been accepted |
| in Condition 2 and we can't pollute BB3,BB2,BB1,BB0 like case 1. */ |
| insn_info *last_killed_insn |
| = find_reg_killed_by (crtl->ssa->bb (pred_cfg_bb), avl); |
| if (!last_killed_insn || pred_cfg_bb == def_insn->bb ()->cfg_bb ()) |
| continue; |
| if (source_equal_p (last_killed_insn, def_insn)) |
| { |
| any_set_in_bbs_p = true; |
| break; |
| } |
| } |
| else |
| { |
| /* Condition 4: |
| |
| bb 0: bb 1: bb 3: |
| def1 a5 def2 a5 ... |
| \ / / |
| \ / / |
| \ / / |
| \ / / |
| bb 4: / |
| | / |
| | / |
| bb 5: / |
| | / |
| | / |
| bb 6: / |
| | / |
| | / |
| bb 8: |
| RVV 1 (use a5) |
| If we get-def (REAL) of a5 from RVV 1 instruction, we will get |
| def1 from BB0 and def2 from BB1. So we will pollute BB6,BB5,BB4, |
| BB0,BB1 with DIRTY and set BB3 as HARD_EMPTY so that we won't |
| propagate AVL to BB3. */ |
| if (any_set_in_bb_p (sets, crtl->ssa->bb (pred_cfg_bb))) |
| { |
| any_set_in_bbs_p = true; |
| break; |
| } |
| } |
| } |
| if (!any_set_in_bbs_p) |
| return true; |
| return false; |
| } |
| |
| /* Compute global backward demanded info. */ |
| bool |
| pass_vsetvl::backward_demand_fusion (void) |
| { |
| /* We compute global infos by backward propagation. |
| We want to have better performance in these following cases: |
| |
| 1. for (size_t i = 0; i < n; i++) { |
| if (i != cond) { |
| vint8mf8_t v = *(vint8mf8_t*)(in + i + 100); |
| *(vint8mf8_t*)(out + i + 100) = v; |
| } else { |
| vbool1_t v = *(vbool1_t*)(in + i + 400); |
| *(vbool1_t*)(out + i + 400) = v; |
| } |
| } |
| |
| Since we don't have any RVV instruction in the BEFORE blocks, |
| LCM fails to optimize such case. We want to backward propagate |
| them into empty blocks so that we could have better performance |
| in LCM. |
| |
| 2. bb 0: |
| vsetvl e8,mf8 (demand RATIO) |
| bb 1: |
| vsetvl e32,mf2 (demand SEW and LMUL) |
| We backward propagate the first VSETVL into e32,mf2 so that we |
| could be able to eliminate the second VSETVL in LCM. */ |
| |
| bool changed_p = false; |
| for (const bb_info *bb : crtl->ssa->reverse_bbs ()) |
| { |
| basic_block cfg_bb = bb->cfg_bb (); |
| const auto &curr_block_info |
| = m_vector_manager->vector_block_infos[cfg_bb->index]; |
| const auto &prop = curr_block_info.local_dem; |
| |
| /* If there is nothing to propagate, just skip it. */ |
| if (!prop.valid_or_dirty_p ()) |
| continue; |
| |
| if (!backward_propagate_worthwhile_p (cfg_bb, curr_block_info)) |
| continue; |
| |
| edge e; |
| edge_iterator ei; |
| /* Backward propagate to each predecessor. */ |
| FOR_EACH_EDGE (e, ei, cfg_bb->preds) |
| { |
| auto &block_info |
| = m_vector_manager->vector_block_infos[e->src->index]; |
| |
| /* We don't propagate through critical edges. */ |
| if (e->flags & EDGE_COMPLEX) |
| continue; |
| if (e->src->index == ENTRY_BLOCK_PTR_FOR_FN (cfun)->index) |
| continue; |
| |
| if (block_info.reaching_out.unknown_p ()) |
| continue; |
| else if (block_info.reaching_out.hard_empty_p ()) |
| continue; |
| else if (block_info.reaching_out.empty_p ()) |
| { |
| enum fusion_type type |
| = get_backward_fusion_type (crtl->ssa->bb (e->src), prop); |
| if (type == INVALID_FUSION) |
| continue; |
| |
| block_info.reaching_out = prop; |
| block_info.reaching_out.set_dirty (type); |
| |
| if (prop.has_avl_reg () && !vlmax_avl_p (prop.get_avl ())) |
| { |
| hash_set<set_info *> sets |
| = get_all_sets (prop.get_avl_source (), true, true, true); |
| set_info *set = get_same_bb_set (sets, e->src); |
| if (set) |
| block_info.reaching_out.set_avl_info ( |
| avl_info (prop.get_avl (), set)); |
| } |
| |
| block_info.local_dem = block_info.reaching_out; |
| block_info.probability = curr_block_info.probability; |
| changed_p = true; |
| } |
| else if (block_info.reaching_out.dirty_p ()) |
| { |
| /* DIRTY -> DIRTY or VALID -> DIRTY. */ |
| vector_insn_info new_info; |
| |
| if (block_info.reaching_out.compatible_p (prop)) |
| { |
| if (block_info.reaching_out >= prop) |
| continue; |
| new_info = block_info.reaching_out.merge (prop, GLOBAL_MERGE); |
| new_info.set_dirty ( |
| block_info.reaching_out.dirty_with_killed_avl_p ()); |
| block_info.probability += curr_block_info.probability; |
| } |
| else |
| { |
| if (curr_block_info.probability > block_info.probability) |
| { |
| enum fusion_type type |
| = get_backward_fusion_type (crtl->ssa->bb (e->src), |
| prop); |
| if (type == INVALID_FUSION) |
| continue; |
| new_info = prop; |
| new_info.set_dirty (type); |
| block_info.probability = curr_block_info.probability; |
| } |
| else |
| continue; |
| } |
| |
| block_info.local_dem = new_info; |
| block_info.reaching_out = new_info; |
| changed_p = true; |
| } |
| else |
| { |
| /* We not only change the info during backward propagation, |
| but also change the VSETVL instruction. */ |
| gcc_assert (block_info.reaching_out.valid_p ()); |
| hash_set<set_info *> sets |
| = get_all_sets (prop.get_avl_source (), true, false, false); |
| set_info *set = get_same_bb_set (sets, e->src); |
| if (vsetvl_insn_p (block_info.reaching_out.get_insn ()->rtl ()) |
| && prop.has_avl_reg () && !vlmax_avl_p (prop.get_avl ())) |
| { |
| if (!block_info.reaching_out.same_vlmax_p (prop)) |
| continue; |
| if (block_info.reaching_out.same_vtype_p (prop)) |
| continue; |
| if (!set) |
| continue; |
| if (set->insn () != block_info.reaching_out.get_insn ()) |
| continue; |
| } |
| else |
| { |
| if (!block_info.reaching_out.compatible_p (prop)) |
| continue; |
| if (block_info.reaching_out >= prop) |
| continue; |
| } |
| |
| vector_insn_info be_merged = block_info.reaching_out; |
| if (block_info.local_dem == block_info.reaching_out) |
| be_merged = block_info.local_dem; |
| vector_insn_info new_info = be_merged.merge (prop, GLOBAL_MERGE); |
| |
| if (curr_block_info.probability > block_info.probability) |
| block_info.probability = curr_block_info.probability; |
| |
| change_vsetvl_insn (new_info.get_insn (), new_info); |
| if (block_info.local_dem == block_info.reaching_out) |
| block_info.local_dem = new_info; |
| block_info.reaching_out = new_info; |
| changed_p = true; |
| } |
| } |
| } |
| return changed_p; |
| } |
| |
| /* Compute global forward demanded info. */ |
| bool |
| pass_vsetvl::forward_demand_fusion (void) |
| { |
| /* Enhance the global information propagation especially |
| backward propagation miss the propagation. |
| Consider such case: |
| |
| bb0 |
| (TU) |
| / \ |
| bb1 bb2 |
| (TU) (ANY) |
| existing edge -----> \ / (TU) <----- LCM create this edge. |
| bb3 |
| (TU) |
| |
| Base on the situation, LCM fails to eliminate the VSETVL instruction and |
| insert an edge from bb2 to bb3 since we can't backward propagate bb3 into |
| bb2. To avoid this confusing LCM result and non-optimal codegen, we should |
| forward propagate information from bb0 to bb2 which is friendly to LCM. */ |
| bool changed_p = false; |
| for (const bb_info *bb : crtl->ssa->bbs ()) |
| { |
| basic_block cfg_bb = bb->cfg_bb (); |
| const auto &prop |
| = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out; |
| |
| /* If there is nothing to propagate, just skip it. */ |
| if (!prop.valid_or_dirty_p ()) |
| continue; |
| |
| if (cfg_bb == ENTRY_BLOCK_PTR_FOR_FN (cfun)) |
| continue; |
| |
| edge e; |
| edge_iterator ei; |
| /* Forward propagate to each successor. */ |
| FOR_EACH_EDGE (e, ei, cfg_bb->succs) |
| { |
| auto &local_dem |
| = m_vector_manager->vector_block_infos[e->dest->index].local_dem; |
| auto &reaching_out |
| = m_vector_manager->vector_block_infos[e->dest->index].reaching_out; |
| |
| /* It's quite obvious, we don't need to propagate itself. */ |
| if (e->dest->index == cfg_bb->index) |
| continue; |
| /* We don't propagate through critical edges. */ |
| if (e->flags & EDGE_COMPLEX) |
| continue; |
| if (e->dest->index == EXIT_BLOCK_PTR_FOR_FN (cfun)->index) |
| continue; |
| |
| /* If there is nothing to propagate, just skip it. */ |
| if (!local_dem.valid_or_dirty_p ()) |
| continue; |
| if (local_dem >= prop) |
| continue; |
| if (!local_dem.compatible_p (prop)) |
| continue; |
| |
| vector_insn_info new_info = local_dem.merge (prop, GLOBAL_MERGE); |
| new_info.set_insn (local_dem.get_insn ()); |
| if (local_dem.dirty_p ()) |
| { |
| gcc_assert (local_dem == reaching_out); |
| new_info.set_dirty (local_dem.dirty_with_killed_avl_p ()); |
| local_dem = new_info; |
| reaching_out = local_dem; |
| } |
| else |
| { |
| if (reaching_out == local_dem) |
| reaching_out = new_info; |
| local_dem = new_info; |
| change_vsetvl_insn (local_dem.get_insn (), new_info); |
| } |
| auto &prob |
| = m_vector_manager->vector_block_infos[e->dest->index].probability; |
| auto &curr_prob |
| = m_vector_manager->vector_block_infos[cfg_bb->index].probability; |
| prob = curr_prob * e->probability; |
| changed_p = true; |
| } |
| } |
| return changed_p; |
| } |
| |
| void |
| pass_vsetvl::demand_fusion (void) |
| { |
| bool changed_p = true; |
| while (changed_p) |
| { |
| changed_p = false; |
| /* To optimize the case like this: |
| void f2 (int8_t * restrict in, int8_t * restrict out, int n, int cond) |
| { |
| size_t vl = 101; |
| |
| for (size_t i = 0; i < n; i++) |
| { |
| vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl); |
| __riscv_vse8_v_i8mf8 (out + i + 300, v, vl); |
| } |
| |
| for (size_t i = 0; i < n; i++) |
| { |
| vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl); |
| __riscv_vse8_v_i8mf8 (out + i, v, vl); |
| |
| vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl); |
| __riscv_vse8_v_i8mf8 (out + i + 100, v2, vl); |
| } |
| } |
| |
| bb 0: li a5, 101 (killed avl) |
| ... |
| bb 1: vsetvli zero, a5, ta |
| ... |
| bb 2: li a5, 101 (killed avl) |
| ... |
| bb 3: vsetvli zero, a3, tu |
| |
| We want to fuse VSEVLI instructions on bb 1 and bb 3. However, there is |
| an AVL kill instruction in bb 2 that we can't backward fuse bb 3 or |
| forward bb 1 arbitrarily. We need available information of each block to |
| help for such cases. */ |
| changed_p |= backward_demand_fusion (); |
| changed_p |= forward_demand_fusion (); |
| } |
| |
| changed_p = true; |
| while (changed_p) |
| { |
| changed_p = false; |
| prune_expressions (); |
| m_vector_manager->create_bitmap_vectors (); |
| compute_local_properties (); |
| compute_available (m_vector_manager->vector_comp, |
| m_vector_manager->vector_kill, |
| m_vector_manager->vector_avout, |
| m_vector_manager->vector_avin); |
| changed_p |= cleanup_illegal_dirty_blocks (); |
| m_vector_manager->free_bitmap_vectors (); |
| if (!m_vector_manager->vector_exprs.is_empty ()) |
| m_vector_manager->vector_exprs.release (); |
| } |
| |
| if (dump_file) |
| { |
| fprintf (dump_file, "\n\nDirty blocks list: "); |
| for (const bb_info *bb : crtl->ssa->bbs ()) |
| if (m_vector_manager->vector_block_infos[bb->index ()] |
| .reaching_out.dirty_p ()) |
| fprintf (dump_file, "%d ", bb->index ()); |
| fprintf (dump_file, "\n\n"); |
| } |
| } |
| |
| /* Cleanup illegal dirty blocks. */ |
| bool |
| pass_vsetvl::cleanup_illegal_dirty_blocks (void) |
| { |
| bool changed_p = false; |
| for (const bb_info *bb : crtl->ssa->bbs ()) |
| { |
| basic_block cfg_bb = bb->cfg_bb (); |
| const auto &prop |
| = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out; |
| |
| /* If there is nothing to cleanup, just skip it. */ |
| if (!prop.valid_or_dirty_p ()) |
| continue; |
| |
| if (hard_empty_block_p (bb, prop)) |
| { |
| m_vector_manager->vector_block_infos[cfg_bb->index].local_dem |
| = vector_insn_info::get_hard_empty (); |
| m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out |
| = vector_insn_info::get_hard_empty (); |
| changed_p = true; |
| continue; |
| } |
| } |
| return changed_p; |
| } |
| |
| /* Assemble the candidates expressions for LCM. */ |
| void |
| pass_vsetvl::prune_expressions (void) |
| { |
| for (const bb_info *bb : crtl->ssa->bbs ()) |
| { |
| if (m_vector_manager->vector_block_infos[bb->index ()] |
| .local_dem.valid_or_dirty_p ()) |
| m_vector_manager->create_expr ( |
| m_vector_manager->vector_block_infos[bb->index ()].local_dem); |
| if (m_vector_manager->vector_block_infos[bb->index ()] |
| .reaching_out.valid_or_dirty_p ()) |
| m_vector_manager->create_expr ( |
| m_vector_manager->vector_block_infos[bb->index ()].reaching_out); |
| } |
| |
| if (dump_file) |
| { |
| fprintf (dump_file, "\nThe total VSETVL expression num = %d\n", |
| m_vector_manager->vector_exprs.length ()); |
| fprintf (dump_file, "Expression List:\n"); |
| for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) |
| { |
| fprintf (dump_file, "Expr[%ld]:\n", i); |
| m_vector_manager->vector_exprs[i]->dump (dump_file); |
| fprintf (dump_file, "\n"); |
| } |
| } |
| } |
| |
| /* Compute the local properties of each recorded expression. |
| |
| Local properties are those that are defined by the block, irrespective of |
| other blocks. |
| |
| An expression is transparent in a block if its operands are not modified |
| in the block. |
| |
| An expression is computed (locally available) in a block if it is computed |
| at least once and expression would contain the same value if the |
| computation was moved to the end of the block. |
| |
| An expression is locally anticipatable in a block if it is computed at |
| least once and expression would contain the same value if the computation |
| was moved to the beginning of the block. */ |
| void |
| pass_vsetvl::compute_local_properties (void) |
| { |
| /* - If T is locally available at the end of a block, then T' must be |
| available at the end of the same block. Since some optimization has |
| occurred earlier, T' might not be locally available, however, it must |
| have been previously computed on all paths. As a formula, T at AVLOC(B) |
| implies that T' at AVOUT(B). |
| An "available occurrence" is one that is the last occurrence in the |
| basic block and the operands are not modified by following statements in |
| the basic block [including this insn]. |
| |
| - If T is locally anticipated at the beginning of a block, then either |
| T', is locally anticipated or it is already available from previous |
| blocks. As a formula, this means that T at ANTLOC(B) implies that T' at |
| ANTLOC(B) at AVIN(B). |
| An "anticipatable occurrence" is one that is the first occurrence in the |
| basic block, the operands are not modified in the basic block prior |
| to the occurrence and the output is not used between the start of |
| the block and the occurrence. */ |
| |
| basic_block cfg_bb; |
| for (const bb_info *bb : crtl->ssa->bbs ()) |
| { |
| unsigned int curr_bb_idx = bb->index (); |
| const auto local_dem |
| = m_vector_manager->vector_block_infos[curr_bb_idx].local_dem; |
| const auto reaching_out |
| = m_vector_manager->vector_block_infos[curr_bb_idx].reaching_out; |
| |
| /* Compute transparent. */ |
| for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) |
| { |
| const vector_insn_info *expr = m_vector_manager->vector_exprs[i]; |
| if (local_dem.real_dirty_p () || local_dem.valid_p () |
| || local_dem.unknown_p () |
| || has_vsetvl_killed_avl_p (bb, local_dem)) |
| bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx], i); |
| /* FIXME: Here we set the block as non-transparent (killed) if there |
| is an instruction killed the value of AVL according to the |
| definition of Local transparent. This is true for such following |
| case: |
| |
| bb 0 (Loop label): |
| vsetvl zero, a5, e8, mf8 |
| bb 1: |
| def a5 |
| bb 2: |
| branch bb 0 (Loop label). |
| |
| In this case, we known there is a loop bb 0->bb 1->bb 2. According |
| to LCM definition, it is correct when we set vsetvl zero, a5, e8, |
| mf8 as non-transparent (killed) so that LCM will not hoist outside |
| the bb 0. |
| |
| However, such conservative configuration will forbid optimization |
| on some unlucky case. For example: |
| |
| bb 0: |
| li a5, 101 |
| bb 1: |
| vsetvl zero, a5, e8, mf8 |
| bb 2: |
| li a5, 101 |
| bb 3: |
| vsetvl zero, a5, e8, mf8. |
| So we also relax def a5 as transparent to gain more optimizations |
| as long as the all real def insn of avl do not come from this |
| block. This configuration may be still missing some optimization |
| opportunities. */ |
| if (find_reg_killed_by (bb, expr->get_avl ())) |
| { |
| hash_set<set_info *> sets |
| = get_all_sets (expr->get_avl_source (), true, false, false); |
| if (any_set_in_bb_p (sets, bb)) |
| bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx], |
| i); |
| } |
| } |
| |
| /* Compute anticipatable occurrences. */ |
| if (local_dem.valid_p () || local_dem.real_dirty_p () |
| || (has_vsetvl_killed_avl_p (bb, local_dem) |
| && vlmax_avl_p (local_dem.get_avl ()))) |
| if (anticipatable_occurrence_p (bb, local_dem)) |
| bitmap_set_bit (m_vector_manager->vector_antic[curr_bb_idx], |
| m_vector_manager->get_expr_id (local_dem)); |
| |
| /* Compute available occurrences. */ |
| if (reaching_out.valid_or_dirty_p ()) |
| { |
| auto_vec<size_t> available_list |
| = m_vector_manager->get_all_available_exprs (reaching_out); |
| for (size_t i = 0; i < available_list.length (); i++) |
| { |
| const vector_insn_info *expr |
| = m_vector_manager->vector_exprs[available_list[i]]; |
| if (reaching_out.real_dirty_p () |
| || has_vsetvl_killed_avl_p (bb, reaching_out) |
| || available_occurrence_p (bb, *expr)) |
| bitmap_set_bit (m_vector_manager->vector_comp[curr_bb_idx], |
| available_list[i]); |
| } |
| } |
| } |
| |
| /* Compute kill for each basic block using: |
| |
| ~(TRANSP | COMP) |
| */ |
| |
| FOR_EACH_BB_FN (cfg_bb, cfun) |
| { |
| bitmap_ior (m_vector_manager->vector_kill[cfg_bb->index], |
| m_vector_manager->vector_transp[cfg_bb->index], |
| m_vector_manager->vector_comp[cfg_bb->index]); |
| bitmap_not (m_vector_manager->vector_kill[cfg_bb->index], |
| m_vector_manager->vector_kill[cfg_bb->index]); |
| } |
| |
| FOR_EACH_BB_FN (cfg_bb, cfun) |
| { |
| edge e; |
| edge_iterator ei; |
| |
| /* If the current block is the destination of an abnormal edge, we |
| kill all trapping (for PRE) and memory (for hoist) expressions |
| because we won't be able to properly place the instruction on |
| the edge. So make them neither anticipatable nor transparent. |
| This is fairly conservative. |
| |
| ??? For hoisting it may be necessary to check for set-and-jump |
| instructions here, not just for abnormal edges. The general problem |
| is that when an expression cannot not be placed right at the end of |
| a basic block we should account for any side-effects of a subsequent |
| jump instructions that could clobber the expression. It would |
| be best to implement this check along the lines of |
| should_hoist_expr_to_dom where the target block is already known |
| and, hence, there's no need to conservatively prune expressions on |
| "intermediate" set-and-jump instructions. */ |
| FOR_EACH_EDGE (e, ei, cfg_bb->preds) |
| if (e->flags & EDGE_COMPLEX) |
| { |
| bitmap_clear (m_vector_manager->vector_antic[cfg_bb->index]); |
| bitmap_clear (m_vector_manager->vector_transp[cfg_bb->index]); |
| } |
| } |
| } |
| |
| /* Return true if VSETVL in the block can be refined as vsetvl zero,zero. */ |
| bool |
| pass_vsetvl::can_refine_vsetvl_p (const basic_block cfg_bb, |
| const vector_insn_info &info) const |
| { |
| if (!m_vector_manager->all_same_ratio_p ( |
| m_vector_manager->vector_avin[cfg_bb->index])) |
| return false; |
| |
| if (!m_vector_manager->all_same_avl_p ( |
| cfg_bb, m_vector_manager->vector_avin[cfg_bb->index])) |
| return false; |
| |
| size_t expr_id |
| = bitmap_first_set_bit (m_vector_manager->vector_avin[cfg_bb->index]); |
| if (!m_vector_manager->vector_exprs[expr_id]->same_vlmax_p (info)) |
| return false; |
| if (!m_vector_manager->vector_exprs[expr_id]->compatible_avl_p (info)) |
| return false; |
| |
| edge e; |
| edge_iterator ei; |
| bool all_valid_p = true; |
| FOR_EACH_EDGE (e, ei, cfg_bb->preds) |
| { |
| if (bitmap_empty_p (m_vector_manager->vector_avout[e->src->index])) |
| { |
| all_valid_p = false; |
| break; |
| } |
| } |
| |
| if (!all_valid_p) |
| return false; |
| return true; |
| } |
| |
| /* Optimize athe case like this: |
| |
| bb 0: |
| vsetvl 0 a5,zero,e8,mf8 |
| insn 0 (demand SEW + LMUL) |
| bb 1: |
| vsetvl 1 a5,zero,e16,mf4 |
| insn 1 (demand SEW + LMUL) |
| |
| In this case, we should be able to refine |
| vsetvl 1 into vsetvl zero, zero according AVIN. */ |
| void |
| pass_vsetvl::refine_vsetvls (void) const |
| { |
| basic_block cfg_bb; |
| FOR_EACH_BB_FN (cfg_bb, cfun) |
| { |
| auto info = m_vector_manager->vector_block_infos[cfg_bb->index].local_dem; |
| insn_info *insn = info.get_insn (); |
| if (!info.valid_p ()) |
| continue; |
| |
| rtx_insn *rinsn = insn->rtl (); |
| if (!can_refine_vsetvl_p (cfg_bb, info)) |
| continue; |
| |
| if (!vector_config_insn_p (rinsn)) |
| rinsn = PREV_INSN (rinsn); |
| rtx new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX); |
| change_insn (rinsn, new_pat); |
| } |
| } |
| |
| void |
| pass_vsetvl::cleanup_vsetvls () |
| { |
| basic_block cfg_bb; |
| FOR_EACH_BB_FN (cfg_bb, cfun) |
| { |
| auto &info |
| = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out; |
| gcc_assert (m_vector_manager->expr_set_num ( |
| m_vector_manager->vector_del[cfg_bb->index]) |
| <= 1); |
| for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) |
| { |
| if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], i)) |
| { |
| if (info.dirty_p ()) |
| info.set_unknown (); |
| else |
| { |
| const auto dem |
| = m_vector_manager->vector_block_infos[cfg_bb->index] |
| .local_dem; |
| gcc_assert (dem == *m_vector_manager->vector_exprs[i]); |
| insn_info *insn = dem.get_insn (); |
| gcc_assert (insn && insn->rtl ()); |
| rtx_insn *rinsn; |
| if (vector_config_insn_p (insn->rtl ())) |
| rinsn = insn->rtl (); |
| else |
| { |
| gcc_assert (has_vtype_op (insn->rtl ())); |
| rinsn = PREV_INSN (insn->rtl ()); |
| gcc_assert ( |
| vector_config_insn_p (PREV_INSN (insn->rtl ()))); |
| } |
| eliminate_insn (rinsn); |
| } |
| } |
| } |
| } |
| } |
| |
| bool |
| pass_vsetvl::commit_vsetvls (void) |
| { |
| bool need_commit = false; |
| |
| for (int ed = 0; ed < NUM_EDGES (m_vector_manager->vector_edge_list); ed++) |
| { |
| for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++) |
| { |
| edge eg = INDEX_EDGE (m_vector_manager->vector_edge_list, ed); |
| if (bitmap_bit_p (m_vector_manager->vector_insert[ed], i)) |
| { |
| const vector_insn_info *require |
| = m_vector_manager->vector_exprs[i]; |
| gcc_assert (require->valid_or_dirty_p ()); |
| rtl_profile_for_edge (eg); |
| start_sequence (); |
| |
| insn_info *insn = require->get_insn (); |
| vector_insn_info prev_info = vector_insn_info (); |
| sbitmap bitdata = m_vector_manager->vector_avout[eg->src->index]; |
| if (m_vector_manager->all_same_ratio_p (bitdata) |
| && m_vector_manager->all_same_avl_p (eg->dest, bitdata)) |
| { |
| size_t first = bitmap_first_set_bit (bitdata); |
| prev_info = *m_vector_manager->vector_exprs[first]; |
| } |
| |
| insert_vsetvl (EMIT_DIRECT, insn->rtl (), *require, prev_info); |
| rtx_insn *rinsn = get_insns (); |
| end_sequence (); |
| default_rtl_profile (); |
| |
| /* We should not get an abnormal edge here. */ |
| gcc_assert (!(eg->flags & EDGE_ABNORMAL)); |
| need_commit = true; |
| insert_insn_on_edge (rinsn, eg); |
| } |
| } |
| } |
| |
| for (const bb_info *bb : crtl->ssa->bbs ()) |
| { |
| basic_block cfg_bb = bb->cfg_bb (); |
| const auto reaching_out |
| = m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out; |
| if (!reaching_out.dirty_p ()) |
| continue; |
| |
| if (reaching_out.dirty_with_killed_avl_p ()) |
| { |
| if (!has_vsetvl_killed_avl_p (bb, reaching_out)) |
| continue; |
| |
| unsigned int bb_index; |
| sbitmap_iterator sbi; |
| sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index]; |
| bool available_p = false; |
| EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi) |
| { |
| if (*m_vector_manager->vector_exprs[bb_index] >= reaching_out) |
| { |
| available_p = true; |
| break; |
| } |
| } |
| if (available_p) |
| continue; |
| } |
| |
| rtx new_pat; |
| if (can_refine_vsetvl_p (cfg_bb, reaching_out)) |
| new_pat |
| = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, reaching_out, NULL_RTX); |
| else if (vlmax_avl_p (reaching_out.get_avl ())) |
| new_pat = gen_vsetvl_pat (VSETVL_NORMAL, reaching_out, |
| get_vl (reaching_out.get_insn ()->rtl ())); |
| else |
| new_pat |
| = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, reaching_out, NULL_RTX); |
| |
| start_sequence (); |
| emit_insn (new_pat); |
| rtx_insn *rinsn = get_insns (); |
| end_sequence (); |
| insert_insn_end_basic_block (rinsn, cfg_bb); |
| if (dump_file) |
| { |
| fprintf (dump_file, |
| "\nInsert vsetvl insn %d at the end of <bb %d>:\n", |
| INSN_UID (rinsn), cfg_bb->index); |
| print_rtl_single (dump_file, rinsn); |
| } |
| } |
| |
| return need_commit; |
| } |
| |
| void |
| pass_vsetvl::pre_vsetvl (void) |
| { |
| /* Compute entity list. */ |
| prune_expressions (); |
| |
| m_vector_manager->create_bitmap_vectors (); |
| compute_local_properties (); |
| m_vector_manager->vector_edge_list = pre_edge_lcm_avs ( |
| m_vector_manager->vector_exprs.length (), m_vector_manager->vector_transp, |
| m_vector_manager->vector_comp, m_vector_manager->vector_antic, |
| m_vector_manager->vector_kill, m_vector_manager->vector_avin, |
| m_vector_manager->vector_avout, &m_vector_manager->vector_insert, |
| &m_vector_manager->vector_del); |
| |
| /* We should dump the information before CFG is changed. Otherwise it will |
| produce ICE (internal compiler error). */ |
| if (dump_file) |
| m_vector_manager->dump (dump_file); |
| |
| refine_vsetvls (); |
| cleanup_vsetvls (); |
| bool need_commit = commit_vsetvls (); |
| if (need_commit) |
| commit_edge_insertions (); |
| } |
| |
| void |
| pass_vsetvl::cleanup_insns (void) const |
| { |
| for (const bb_info *bb : crtl->ssa->bbs ()) |
| { |
| for (insn_info *insn : bb->real_nondebug_insns ()) |
| { |
| rtx_insn *rinsn = insn->rtl (); |
| |
| if (vlmax_avl_insn_p (rinsn)) |
| { |
| eliminate_insn (rinsn); |
| continue; |
| } |
| |
| /* Erase the AVL operand from the instruction. */ |
| if (!has_vl_op (rinsn) || !REG_P (get_vl (rinsn))) |
| continue; |
| rtx avl = get_vl (rinsn); |
| if (count_occurrences (PATTERN (rinsn), avl, true) == 1) |
| { |
| /* Get the list of uses for the new instruction. */ |
| auto attempt = crtl->ssa->new_change_attempt (); |
| insn_change change (insn); |
| /* Remove the use of the substituted value. */ |
| access_array_builder uses_builder (attempt); |
| uses_builder.reserve (insn->num_uses () - 1); |
| for (use_info *use : insn->uses ()) |
| if (use != find_access (insn->uses (), REGNO (avl))) |
| uses_builder.quick_push (use); |
| use_array new_uses = use_array (uses_builder.finish ()); |
| change.new_uses = new_uses; |
| change.move_range = insn->ebb ()->insn_range (); |
| rtx pat = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx); |
| gcc_assert (change_insn (crtl->ssa, change, insn, pat)); |
| } |
| } |
| } |
| } |
| |
| void |
| pass_vsetvl::propagate_avl (void) const |
| { |
| /* Rebuild the RTL_SSA according to the new CFG generated by LCM. */ |
| /* Finalization of RTL_SSA. */ |
| free_dominance_info (CDI_DOMINATORS); |
| if (crtl->ssa->perform_pending_updates ()) |
| cleanup_cfg (0); |
| delete crtl->ssa; |
| crtl->ssa = nullptr; |
| /* Initialization of RTL_SSA. */ |
| calculate_dominance_info (CDI_DOMINATORS); |
| df_analyze (); |
| crtl->ssa = new function_info (cfun); |
| |
| hash_set<rtx_insn *> to_delete; |
| for (const bb_info *bb : crtl->ssa->bbs ()) |
| { |
| for (insn_info *insn : bb->real_nondebug_insns ()) |
| { |
| if (vsetvl_discard_result_insn_p (insn->rtl ())) |
| { |
| rtx avl = get_avl (insn->rtl ()); |
| if (!REG_P (avl)) |
| continue; |
| |
| set_info *set = find_access (insn->uses (), REGNO (avl))->def (); |
| insn_info *def_insn = extract_single_source (set); |
| if (!def_insn) |
| continue; |
| |
| /* Handle this case: |
| vsetvli a6,zero,e32,m1,ta,mu |
| li a5,4096 |
| add a7,a0,a5 |
| addi a7,a7,-96 |
| vsetvli t1,zero,e8,mf8,ta,ma |
| vle8.v v24,0(a7) |
| add a5,a3,a5 |
| addi a5,a5,-96 |
| vse8.v v24,0(a5) |
| vsetvli zero,a6,e32,m1,tu,ma |
| */ |
| if (vsetvl_insn_p (def_insn->rtl ())) |
| { |
| vl_vtype_info def_info = get_vl_vtype_info (def_insn); |
| vl_vtype_info info = get_vl_vtype_info (insn); |
| rtx avl = get_avl (def_insn->rtl ()); |
| rtx vl = get_vl (def_insn->rtl ()); |
| if (def_info.get_ratio () == info.get_ratio ()) |
| { |
| if (vlmax_avl_p (def_info.get_avl ())) |
| { |
| info.set_avl_info ( |
| avl_info (def_info.get_avl (), nullptr)); |
| rtx new_pat |
| = gen_vsetvl_pat (VSETVL_NORMAL, info, vl); |
| validate_change (insn->rtl (), |
| &PATTERN (insn->rtl ()), new_pat, |
| false); |
| continue; |
| } |
| if (def_info.has_avl_imm () || rtx_equal_p (avl, vl)) |
| { |
| info.set_avl_info (avl_info (avl, nullptr)); |
| emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_AFTER, |
| info, NULL_RTX, insn->rtl ()); |
| if (set->single_nondebug_insn_use ()) |
| { |
| to_delete.add (insn->rtl ()); |
| to_delete.add (def_insn->rtl ()); |
| } |
| continue; |
| } |
| } |
| } |
| } |
| |
| /* Change vsetvl rd, rs1 --> vsevl zero, rs1, |
| if rd is not used by any nondebug instructions. |
| Even though this PASS runs after RA and it doesn't help for |
| reduce register pressure, it can help instructions scheduling |
| since we remove the dependencies. */ |
| if (vsetvl_insn_p (insn->rtl ())) |
| { |
| rtx vl = get_vl (insn->rtl ()); |
| rtx avl = get_avl (insn->rtl ()); |
| if (vlmax_avl_p (avl)) |
| continue; |
| def_info *def = find_access (insn->defs (), REGNO (vl)); |
| set_info *set = safe_dyn_cast<set_info *> (def); |
| gcc_assert (set); |
| const vl_vtype_info info = get_vl_vtype_info (insn); |
| rtx new_pat |
| = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, info, NULL_RTX); |
| if (!set->has_nondebug_insn_uses ()) |
| { |
| validate_change (insn->rtl (), &PATTERN (insn->rtl ()), |
| new_pat, false); |
| continue; |
| } |
| } |
| } |
| } |
| |
| for (rtx_insn *rinsn : to_delete) |
| eliminate_insn (rinsn); |
| } |
| |
| void |
| pass_vsetvl::init (void) |
| { |
| if (optimize > 0) |
| { |
| /* Initialization of RTL_SSA. */ |
| calculate_dominance_info (CDI_DOMINATORS); |
| df_analyze (); |
| crtl->ssa = new function_info (cfun); |
| } |
| |
| m_vector_manager = new vector_infos_manager (); |
| compute_probabilities (); |
| |
| if (dump_file) |
| { |
| fprintf (dump_file, "\nPrologue: Initialize vector infos\n"); |
| m_vector_manager->dump (dump_file); |
| } |
| } |
| |
| void |
| pass_vsetvl::done (void) |
| { |
| if (optimize > 0) |
| { |
| /* Finalization of RTL_SSA. */ |
| free_dominance_info (CDI_DOMINATORS); |
| if (crtl->ssa->perform_pending_updates ()) |
| cleanup_cfg (0); |
| delete crtl->ssa; |
| crtl->ssa = nullptr; |
| } |
| m_vector_manager->release (); |
| delete m_vector_manager; |
| m_vector_manager = nullptr; |
| } |
| |
| /* Compute probability for each block. */ |
| void |
| pass_vsetvl::compute_probabilities (void) |
| { |
| /* Don't compute it in -O0 since we don't need it. */ |
| if (!optimize) |
| return; |
| edge e; |
| edge_iterator ei; |
| |
| for (const bb_info *bb : crtl->ssa->bbs ()) |
| { |
| basic_block cfg_bb = bb->cfg_bb (); |
| auto &curr_prob |
| = m_vector_manager->vector_block_infos[cfg_bb->index].probability; |
| if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb) |
| curr_prob = profile_probability::always (); |
| gcc_assert (curr_prob.initialized_p ()); |
| FOR_EACH_EDGE (e, ei, cfg_bb->succs) |
| { |
| auto &new_prob |
| = m_vector_manager->vector_block_infos[e->dest->index].probability; |
| if (!new_prob.initialized_p ()) |
| new_prob = curr_prob * e->probability; |
| else if (new_prob == profile_probability::always ()) |
| continue; |
| else |
| new_prob += curr_prob * e->probability; |
| } |
| } |
| auto &exit_block |
| = m_vector_manager->vector_block_infos[EXIT_BLOCK_PTR_FOR_FN (cfun)->index]; |
| exit_block.probability = profile_probability::always (); |
| } |
| |
| /* Lazy vsetvl insertion for optimize > 0. */ |
| void |
| pass_vsetvl::lazy_vsetvl (void) |
| { |
| if (dump_file) |
| fprintf (dump_file, |
| "\nEntering Lazy VSETVL PASS and Handling %d basic blocks for " |
| "function:%s\n", |
| n_basic_blocks_for_fn (cfun), function_name (cfun)); |
| |
| /* Phase 1 - Compute the local dems within each block. |
| The data-flow analysis within each block is backward analysis. */ |
| if (dump_file) |
| fprintf (dump_file, "\nPhase 1: Compute local backward vector infos\n"); |
| for (const bb_info *bb : crtl->ssa->bbs ()) |
| compute_local_backward_infos (bb); |
| if (dump_file) |
| m_vector_manager->dump (dump_file); |
| |
| /* Phase 2 - Emit vsetvl instructions within each basic block according to |
| demand, compute and save ANTLOC && AVLOC of each block. */ |
| if (dump_file) |
| fprintf (dump_file, |
| "\nPhase 2: Emit vsetvl instruction within each block\n"); |
| for (const bb_info *bb : crtl->ssa->bbs ()) |
| emit_local_forward_vsetvls (bb); |
| if (dump_file) |
| m_vector_manager->dump (dump_file); |
| |
| /* Phase 3 - Propagate demanded info across blocks. */ |
| if (dump_file) |
| fprintf (dump_file, "\nPhase 3: Demands propagation across blocks\n"); |
| demand_fusion (); |
| if (dump_file) |
| m_vector_manager->dump (dump_file); |
| |
| /* Phase 4 - Lazy code motion. */ |
| if (dump_file) |
| fprintf (dump_file, "\nPhase 4: PRE vsetvl by Lazy code motion (LCM)\n"); |
| pre_vsetvl (); |
| |
| /* Phase 5 - Cleanup AVL && VL operand of RVV instruction. */ |
| if (dump_file) |
| fprintf (dump_file, "\nPhase 5: Cleanup AVL and VL operands\n"); |
| cleanup_insns (); |
| |
| /* Phase 6 - Rebuild RTL_SSA to propagate AVL between vsetvls. */ |
| if (dump_file) |
| fprintf (dump_file, |
| "\nPhase 6: Rebuild RTL_SSA to propagate AVL between vsetvls\n"); |
| propagate_avl (); |
| } |
| |
| /* Main entry point for this pass. */ |
| unsigned int |
| pass_vsetvl::execute (function *) |
| { |
| if (n_basic_blocks_for_fn (cfun) <= 0) |
| return 0; |
| |
| /* The RVV instruction may change after split which is not a stable |
| instruction. We need to split it here to avoid potential issue |
| since the VSETVL PASS is insert before split PASS. */ |
| split_all_insns (); |
| |
| /* Early return for there is no vector instructions. */ |
| if (!has_vector_insn (cfun)) |
| return 0; |
| |
| init (); |
| |
| if (!optimize) |
| simple_vsetvl (); |
| else |
| lazy_vsetvl (); |
| |
| done (); |
| return 0; |
| } |
| |
| rtl_opt_pass * |
| make_pass_vsetvl (gcc::context *ctxt) |
| { |
| return new pass_vsetvl (ctxt); |
| } |