blob: ef5b74c58d241f8c5731c1e82def9e6fc89008d4 [file] [log] [blame]
/* VSETVL pass for RISC-V 'V' Extension for GNU compiler.
Copyright (C) 2022-2023 Free Software Foundation, Inc.
Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or(at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
/* This pass is to Set VL/VTYPE global status for RVV instructions
that depend on VL and VTYPE registers by Lazy code motion (LCM).
Strategy:
- Backward demanded info fusion within block.
- Lazy code motion (LCM) based demanded info backward propagation.
- RTL_SSA framework for def-use, PHI analysis.
- Lazy code motion (LCM) for global VL/VTYPE optimization.
Assumption:
- Each avl operand is either an immediate (must be in range 0 ~ 31) or reg.
This pass consists of 5 phases:
- Phase 1 - compute VL/VTYPE demanded information within each block
by backward data-flow analysis.
- Phase 2 - Emit vsetvl instructions within each basic block according to
demand, compute and save ANTLOC && AVLOC of each block.
- Phase 3 - Backward && forward demanded info propagation and fusion across
blocks.
- Phase 4 - Lazy code motion including: compute local properties,
pre_edge_lcm and vsetvl insertion && delete edges for LCM results.
- Phase 5 - Cleanup AVL operand of RVV instruction since it will not be
used any more and VL operand of VSETVL instruction if it is not used by
any non-debug instructions.
- Phase 6 - Propagate AVL between vsetvl instructions.
Implementation:
- The subroutine of optimize == 0 is simple_vsetvl.
This function simplily vsetvl insertion for each RVV
instruction. No optimization.
- The subroutine of optimize > 0 is lazy_vsetvl.
This function optimize vsetvl insertion process by
lazy code motion (LCM) layering on RTL_SSA. */
#define IN_TARGET_CODE 1
#define INCLUDE_ALGORITHM
#define INCLUDE_FUNCTIONAL
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "backend.h"
#include "rtl.h"
#include "target.h"
#include "tree-pass.h"
#include "df.h"
#include "rtl-ssa.h"
#include "cfgcleanup.h"
#include "insn-config.h"
#include "insn-attr.h"
#include "insn-opinit.h"
#include "tm-constrs.h"
#include "cfgrtl.h"
#include "cfganal.h"
#include "lcm.h"
#include "predict.h"
#include "profile-count.h"
#include "riscv-vsetvl.h"
using namespace rtl_ssa;
using namespace riscv_vector;
DEBUG_FUNCTION void
debug (const vector_insn_info *info)
{
info->dump (stderr);
}
DEBUG_FUNCTION void
debug (const vector_infos_manager *info)
{
info->dump (stderr);
}
static bool
vlmax_avl_p (rtx x)
{
return x && rtx_equal_p (x, RVV_VLMAX);
}
static bool
vlmax_avl_insn_p (rtx_insn *rinsn)
{
return (INSN_CODE (rinsn) == CODE_FOR_vlmax_avlsi
|| INSN_CODE (rinsn) == CODE_FOR_vlmax_avldi);
}
/* Return true if the block is a loop itself:
local_dem
__________
____|____ |
| | |
|________| |
|_________|
reaching_out
*/
static bool
loop_basic_block_p (const basic_block cfg_bb)
{
if (JUMP_P (BB_END (cfg_bb)) && any_condjump_p (BB_END (cfg_bb)))
{
edge e;
edge_iterator ei;
FOR_EACH_EDGE (e, ei, cfg_bb->succs)
if (e->dest->index == cfg_bb->index)
return true;
}
return false;
}
/* Return true if it is an RVV instruction depends on VTYPE global
status register. */
static bool
has_vtype_op (rtx_insn *rinsn)
{
return recog_memoized (rinsn) >= 0 && get_attr_has_vtype_op (rinsn);
}
/* Return true if it is an RVV instruction depends on VL global
status register. */
static bool
has_vl_op (rtx_insn *rinsn)
{
return recog_memoized (rinsn) >= 0 && get_attr_has_vl_op (rinsn);
}
/* Is this a SEW value that can be encoded into the VTYPE format. */
static bool
valid_sew_p (size_t sew)
{
return exact_log2 (sew) && sew >= 8 && sew <= 64;
}
/* Return true if it is a vsetvl instruction. */
static bool
vector_config_insn_p (rtx_insn *rinsn)
{
return recog_memoized (rinsn) >= 0 && get_attr_type (rinsn) == TYPE_VSETVL;
}
/* Return true if it is vsetvldi or vsetvlsi. */
static bool
vsetvl_insn_p (rtx_insn *rinsn)
{
if (!vector_config_insn_p (rinsn))
return false;
return (INSN_CODE (rinsn) == CODE_FOR_vsetvldi
|| INSN_CODE (rinsn) == CODE_FOR_vsetvlsi);
}
/* Return true if it is vsetvl zero, rs1. */
static bool
vsetvl_discard_result_insn_p (rtx_insn *rinsn)
{
if (!vector_config_insn_p (rinsn))
return false;
return (INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultdi
|| INSN_CODE (rinsn) == CODE_FOR_vsetvl_discard_resultsi);
}
static bool
real_insn_and_same_bb_p (const insn_info *insn, const bb_info *bb)
{
return insn != nullptr && insn->is_real () && insn->bb () == bb;
}
static bool
before_p (const insn_info *insn1, const insn_info *insn2)
{
return insn1->compare_with (insn2) < 0;
}
static insn_info *
find_reg_killed_by (const bb_info *bb, rtx x)
{
if (!x || vlmax_avl_p (x) || !REG_P (x))
return nullptr;
for (insn_info *insn : bb->reverse_real_nondebug_insns ())
if (find_access (insn->defs (), REGNO (x)))
return insn;
return nullptr;
}
/* Helper function to get VL operand. */
static rtx
get_vl (rtx_insn *rinsn)
{
if (has_vl_op (rinsn))
{
extract_insn_cached (rinsn);
return recog_data.operand[get_attr_vl_op_idx (rinsn)];
}
return SET_DEST (XVECEXP (PATTERN (rinsn), 0, 0));
}
static bool
has_vsetvl_killed_avl_p (const bb_info *bb, const vector_insn_info &info)
{
if (info.dirty_with_killed_avl_p ())
{
rtx avl = info.get_avl ();
if (vlmax_avl_p (avl))
return find_reg_killed_by (bb, get_vl (info.get_insn ()->rtl ()))
!= nullptr;
for (const insn_info *insn : bb->reverse_real_nondebug_insns ())
{
def_info *def = find_access (insn->defs (), REGNO (avl));
if (def)
{
set_info *set = safe_dyn_cast<set_info *> (def);
if (!set)
return false;
rtx new_avl = gen_rtx_REG (GET_MODE (avl), REGNO (avl));
gcc_assert (new_avl != avl);
if (!info.compatible_avl_p (avl_info (new_avl, set)))
return false;
return true;
}
}
}
return false;
}
/* An "anticipatable occurrence" is one that is the first occurrence in the
basic block, the operands are not modified in the basic block prior
to the occurrence and the output is not used between the start of
the block and the occurrence.
For VSETVL instruction, we have these following formats:
1. vsetvl zero, rs1.
2. vsetvl zero, imm.
3. vsetvl rd, rs1.
So base on these circumstances, a DEM is considered as a local anticipatable
occurrence should satisfy these following conditions:
1). rs1 (avl) are not modified in the basic block prior to the VSETVL.
2). rd (vl) are not modified in the basic block prior to the VSETVL.
3). rd (vl) is not used between the start of the block and the occurrence.
Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE
is modified prior to the occurrence. This case is already considered as
a non-local anticipatable occurrence.
*/
static bool
anticipatable_occurrence_p (const bb_info *bb, const vector_insn_info dem)
{
insn_info *insn = dem.get_insn ();
/* The only possible operand we care of VSETVL is AVL. */
if (dem.has_avl_reg ())
{
/* rs1 (avl) are not modified in the basic block prior to the VSETVL. */
if (!vlmax_avl_p (dem.get_avl ()))
{
set_info *set
= find_access (insn->uses (), REGNO (dem.get_avl ()))->def ();
/* If it's undefined, it's not anticipatable conservatively. */
if (!set)
return false;
if (real_insn_and_same_bb_p (set->insn (), bb)
&& before_p (set->insn (), insn))
return false;
}
}
/* rd (vl) is not used between the start of the block and the occurrence. */
if (vsetvl_insn_p (insn->rtl ()))
{
rtx dest = get_vl (insn->rtl ());
for (insn_info *i = insn->prev_nondebug_insn ();
real_insn_and_same_bb_p (i, bb); i = i->prev_nondebug_insn ())
{
/* rd (vl) is not used between the start of the block and the
* occurrence. */
if (find_access (i->uses (), REGNO (dest)))
return false;
/* rd (vl) are not modified in the basic block prior to the VSETVL. */
if (find_access (i->defs (), REGNO (dest)))
return false;
}
}
return true;
}
/* An "available occurrence" is one that is the last occurrence in the
basic block and the operands are not modified by following statements in
the basic block [including this insn].
For VSETVL instruction, we have these following formats:
1. vsetvl zero, rs1.
2. vsetvl zero, imm.
3. vsetvl rd, rs1.
So base on these circumstances, a DEM is considered as a local available
occurrence should satisfy these following conditions:
1). rs1 (avl) are not modified by following statements in
the basic block.
2). rd (vl) are not modified by following statements in
the basic block.
Note: We don't need to check VL/VTYPE here since DEM is UNKNOWN if VL/VTYPE
is modified prior to the occurrence. This case is already considered as
a non-local available occurrence.
*/
static bool
available_occurrence_p (const bb_info *bb, const vector_insn_info dem)
{
insn_info *insn = dem.get_insn ();
/* The only possible operand we care of VSETVL is AVL. */
if (dem.has_avl_reg ())
{
if (!vlmax_avl_p (dem.get_avl ()))
{
rtx dest = NULL_RTX;
if (vsetvl_insn_p (insn->rtl ()))
dest = get_vl (insn->rtl ());
for (const insn_info *i = insn; real_insn_and_same_bb_p (i, bb);
i = i->next_nondebug_insn ())
{
/* rs1 (avl) are not modified by following statements in
the basic block. */
if (find_access (i->defs (), REGNO (dem.get_avl ())))
return false;
/* rd (vl) are not modified by following statements in
the basic block. */
if (dest && find_access (i->defs (), REGNO (dest)))
return false;
}
}
}
return true;
}
/* Return true if the block is worthwhile backward propagation. */
static bool
backward_propagate_worthwhile_p (const basic_block cfg_bb,
const vector_block_info block_info)
{
if (loop_basic_block_p (cfg_bb))
{
if (block_info.reaching_out.valid_or_dirty_p ())
{
if (block_info.local_dem.compatible_p (block_info.reaching_out))
{
/* Case 1 (Can backward propagate):
....
bb0:
...
for (int i = 0; i < n; i++)
{
vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
__riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
}
The local_dem is compatible with reaching_out. Such case is
worthwhile backward propagation. */
return true;
}
else
{
/* Case 2 (Don't backward propagate):
....
bb0:
...
for (int i = 0; i < n; i++)
{
vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
__riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
vint16mf2_t v2 = __riscv_vle16_v_i16mf2 (in + i + 6, 8);
__riscv_vse16_v_i16mf2 (out + i + 6, v, 8);
}
The local_dem is incompatible with reaching_out.
It makes no sense to backward propagate the local_dem since we
can't avoid VSETVL inside the loop. */
return false;
}
}
else
{
gcc_assert (block_info.reaching_out.unknown_p ());
/* Case 3 (Don't backward propagate):
....
bb0:
...
for (int i = 0; i < n; i++)
{
vint16mf4_t v = __riscv_vle16_v_i16mf4 (in + i + 5, 7);
__riscv_vse16_v_i16mf4 (out + i + 5, v, 7);
fn3 ();
}
The local_dem is VALID, but the reaching_out is UNKNOWN.
It makes no sense to backward propagate the local_dem since we
can't avoid VSETVL inside the loop. */
return false;
}
}
return true;
}
static bool
insn_should_be_added_p (const insn_info *insn, unsigned int types)
{
if (insn->is_real () && (types & REAL_SET))
return true;
if (insn->is_phi () && (types & PHI_SET))
return true;
if (insn->is_bb_head () && (types & BB_HEAD_SET))
return true;
if (insn->is_bb_end () && (types & BB_END_SET))
return true;
return false;
}
/* Recursively find all define instructions. The kind of instruction is
specified by the DEF_TYPE. */
static hash_set<set_info *>
get_all_sets (phi_info *phi, unsigned int types)
{
hash_set<set_info *> insns;
auto_vec<phi_info *> work_list;
hash_set<phi_info *> visited_list;
if (!phi)
return hash_set<set_info *> ();
work_list.safe_push (phi);
while (!work_list.is_empty ())
{
phi_info *phi = work_list.pop ();
visited_list.add (phi);
for (use_info *use : phi->inputs ())
{
def_info *def = use->def ();
set_info *set = safe_dyn_cast<set_info *> (def);
if (!set)
return hash_set<set_info *> ();
gcc_assert (!set->insn ()->is_debug_insn ());
if (insn_should_be_added_p (set->insn (), types))
insns.add (set);
if (set->insn ()->is_phi ())
{
phi_info *new_phi = as_a<phi_info *> (set);
if (!visited_list.contains (new_phi))
work_list.safe_push (new_phi);
}
}
}
return insns;
}
static hash_set<set_info *>
get_all_sets (set_info *set, bool /* get_real_inst */ real_p,
bool /*get_phi*/ phi_p, bool /* get_function_parameter*/ param_p)
{
if (real_p && phi_p && param_p)
return get_all_sets (safe_dyn_cast<phi_info *> (set),
REAL_SET | PHI_SET | BB_HEAD_SET | BB_END_SET);
else if (real_p && param_p)
return get_all_sets (safe_dyn_cast<phi_info *> (set),
REAL_SET | BB_HEAD_SET | BB_END_SET);
else if (real_p)
return get_all_sets (safe_dyn_cast<phi_info *> (set), REAL_SET);
return hash_set<set_info *> ();
}
/* Helper function to get AVL operand. */
static rtx
get_avl (rtx_insn *rinsn)
{
if (vsetvl_insn_p (rinsn) || vsetvl_discard_result_insn_p (rinsn))
return XVECEXP (SET_SRC (XVECEXP (PATTERN (rinsn), 0, 0)), 0, 0);
if (!has_vl_op (rinsn))
return NULL_RTX;
if (get_attr_avl_type (rinsn) == VLMAX)
return RVV_VLMAX;
extract_insn_cached (rinsn);
return recog_data.operand[get_attr_vl_op_idx (rinsn)];
}
static set_info *
get_same_bb_set (hash_set<set_info *> &sets, const basic_block cfg_bb)
{
for (set_info *set : sets)
if (set->bb ()->cfg_bb () == cfg_bb)
return set;
return nullptr;
}
/* Recursively find all predecessor blocks for cfg_bb. */
static hash_set<basic_block>
get_all_predecessors (basic_block cfg_bb)
{
hash_set<basic_block> blocks;
auto_vec<basic_block> work_list;
hash_set<basic_block> visited_list;
work_list.safe_push (cfg_bb);
while (!work_list.is_empty ())
{
basic_block new_cfg_bb = work_list.pop ();
visited_list.add (new_cfg_bb);
edge e;
edge_iterator ei;
FOR_EACH_EDGE (e, ei, new_cfg_bb->preds)
{
if (!visited_list.contains (e->src))
work_list.safe_push (e->src);
blocks.add (e->src);
}
}
return blocks;
}
/* Return true if there is an INSN in insns staying in the block BB. */
static bool
any_set_in_bb_p (hash_set<set_info *> sets, const bb_info *bb)
{
for (const set_info *set : sets)
if (set->bb ()->index () == bb->index ())
return true;
return false;
}
/* Helper function to get SEW operand. We always have SEW value for
all RVV instructions that have VTYPE OP. */
static uint8_t
get_sew (rtx_insn *rinsn)
{
return get_attr_sew (rinsn);
}
/* Helper function to get VLMUL operand. We always have VLMUL value for
all RVV instructions that have VTYPE OP. */
static enum vlmul_type
get_vlmul (rtx_insn *rinsn)
{
return (enum vlmul_type) get_attr_vlmul (rinsn);
}
/* Get default tail policy. */
static bool
get_default_ta ()
{
/* For the instruction that doesn't require TA, we still need a default value
to emit vsetvl. We pick up the default value according to prefer policy. */
return (bool) (get_prefer_tail_policy () & 0x1
|| (get_prefer_tail_policy () >> 1 & 0x1));
}
/* Get default mask policy. */
static bool
get_default_ma ()
{
/* For the instruction that doesn't require MA, we still need a default value
to emit vsetvl. We pick up the default value according to prefer policy. */
return (bool) (get_prefer_mask_policy () & 0x1
|| (get_prefer_mask_policy () >> 1 & 0x1));
}
/* Helper function to get TA operand. */
static bool
tail_agnostic_p (rtx_insn *rinsn)
{
/* If it doesn't have TA, we return agnostic by default. */
extract_insn_cached (rinsn);
int ta = get_attr_ta (rinsn);
return ta == INVALID_ATTRIBUTE ? get_default_ta () : IS_AGNOSTIC (ta);
}
/* Helper function to get MA operand. */
static bool
mask_agnostic_p (rtx_insn *rinsn)
{
/* If it doesn't have MA, we return agnostic by default. */
extract_insn_cached (rinsn);
int ma = get_attr_ma (rinsn);
return ma == INVALID_ATTRIBUTE ? get_default_ma () : IS_AGNOSTIC (ma);
}
/* Return true if FN has a vector instruction that use VL/VTYPE. */
static bool
has_vector_insn (function *fn)
{
basic_block cfg_bb;
rtx_insn *rinsn;
FOR_ALL_BB_FN (cfg_bb, fn)
FOR_BB_INSNS (cfg_bb, rinsn)
if (NONDEBUG_INSN_P (rinsn) && has_vtype_op (rinsn))
return true;
return false;
}
/* Emit vsetvl instruction. */
static rtx
gen_vsetvl_pat (enum vsetvl_type insn_type, const vl_vtype_info &info, rtx vl)
{
rtx avl = info.get_avl ();
rtx sew = gen_int_mode (info.get_sew (), Pmode);
rtx vlmul = gen_int_mode (info.get_vlmul (), Pmode);
rtx ta = gen_int_mode (info.get_ta (), Pmode);
rtx ma = gen_int_mode (info.get_ma (), Pmode);
if (insn_type == VSETVL_NORMAL)
{
gcc_assert (vl != NULL_RTX);
return gen_vsetvl (Pmode, vl, avl, sew, vlmul, ta, ma);
}
else if (insn_type == VSETVL_VTYPE_CHANGE_ONLY)
return gen_vsetvl_vtype_change_only (sew, vlmul, ta, ma);
else
return gen_vsetvl_discard_result (Pmode, avl, sew, vlmul, ta, ma);
}
static rtx
gen_vsetvl_pat (rtx_insn *rinsn, const vector_insn_info &info)
{
rtx new_pat;
if (vsetvl_insn_p (rinsn) || vlmax_avl_p (info.get_avl ()))
{
rtx dest = get_vl (rinsn);
new_pat = gen_vsetvl_pat (VSETVL_NORMAL, info, dest);
}
else if (INSN_CODE (rinsn) == CODE_FOR_vsetvl_vtype_change_only)
new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX);
else
new_pat = gen_vsetvl_pat (VSETVL_DISCARD_RESULT, info, NULL_RTX);
return new_pat;
}
static void
emit_vsetvl_insn (enum vsetvl_type insn_type, enum emit_type emit_type,
const vl_vtype_info &info, rtx vl, rtx_insn *rinsn)
{
rtx pat = gen_vsetvl_pat (insn_type, info, vl);
if (dump_file)
{
fprintf (dump_file, "\nInsert vsetvl insn PATTERN:\n");
print_rtl_single (dump_file, pat);
}
if (emit_type == EMIT_DIRECT)
emit_insn (pat);
else if (emit_type == EMIT_BEFORE)
emit_insn_before (pat, rinsn);
else
emit_insn_after (pat, rinsn);
}
static void
eliminate_insn (rtx_insn *rinsn)
{
if (dump_file)
{
fprintf (dump_file, "\nEliminate insn %d:\n", INSN_UID (rinsn));
print_rtl_single (dump_file, rinsn);
}
if (in_sequence_p ())
remove_insn (rinsn);
else
delete_insn (rinsn);
}
static void
insert_vsetvl (enum emit_type emit_type, rtx_insn *rinsn,
const vector_insn_info &info, const vector_insn_info &prev_info)
{
/* Use X0, X0 form if the AVL is the same and the SEW+LMUL gives the same
VLMAX. */
if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p ()
&& info.compatible_avl_p (prev_info) && info.same_vlmax_p (prev_info))
{
emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX,
rinsn);
return;
}
if (info.has_avl_imm ())
{
emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX,
rinsn);
return;
}
if (info.has_avl_no_reg ())
{
/* We can only use x0, x0 if there's no chance of the vtype change causing
the previous vl to become invalid. */
if (prev_info.valid_or_dirty_p () && !prev_info.unknown_p ()
&& info.same_vlmax_p (prev_info))
{
emit_vsetvl_insn (VSETVL_VTYPE_CHANGE_ONLY, emit_type, info, NULL_RTX,
rinsn);
return;
}
/* Otherwise use an AVL of 0 to avoid depending on previous vl. */
vl_vtype_info new_info = info;
new_info.set_avl_info (avl_info (const0_rtx, nullptr));
emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, new_info, NULL_RTX,
rinsn);
return;
}
/* Use X0 as the DestReg unless AVLReg is X0. We also need to change the
opcode if the AVLReg is X0 as they have different register classes for
the AVL operand. */
if (vlmax_avl_p (info.get_avl ()))
{
gcc_assert (has_vtype_op (rinsn) || vsetvl_insn_p (rinsn));
rtx vl_op = get_vl (rinsn);
gcc_assert (!vlmax_avl_p (vl_op));
emit_vsetvl_insn (VSETVL_NORMAL, emit_type, info, vl_op, rinsn);
return;
}
emit_vsetvl_insn (VSETVL_DISCARD_RESULT, emit_type, info, NULL_RTX, rinsn);
if (dump_file)
{
fprintf (dump_file, "Update VL/VTYPE info, previous info=");
prev_info.dump (dump_file);
}
}
/* If X contains any LABEL_REF's, add REG_LABEL_OPERAND notes for them
to INSN. If such notes are added to an insn which references a
CODE_LABEL, the LABEL_NUSES count is incremented. We have to add
that note, because the following loop optimization pass requires
them. */
/* ??? If there was a jump optimization pass after gcse and before loop,
then we would not need to do this here, because jump would add the
necessary REG_LABEL_OPERAND and REG_LABEL_TARGET notes. */
static void
add_label_notes (rtx x, rtx_insn *rinsn)
{
enum rtx_code code = GET_CODE (x);
int i, j;
const char *fmt;
if (code == LABEL_REF && !LABEL_REF_NONLOCAL_P (x))
{
/* This code used to ignore labels that referred to dispatch tables to
avoid flow generating (slightly) worse code.
We no longer ignore such label references (see LABEL_REF handling in
mark_jump_label for additional information). */
/* There's no reason for current users to emit jump-insns with
such a LABEL_REF, so we don't have to handle REG_LABEL_TARGET
notes. */
gcc_assert (!JUMP_P (rinsn));
add_reg_note (rinsn, REG_LABEL_OPERAND, label_ref_label (x));
if (LABEL_P (label_ref_label (x)))
LABEL_NUSES (label_ref_label (x))++;
return;
}
for (i = GET_RTX_LENGTH (code) - 1, fmt = GET_RTX_FORMAT (code); i >= 0; i--)
{
if (fmt[i] == 'e')
add_label_notes (XEXP (x, i), rinsn);
else if (fmt[i] == 'E')
for (j = XVECLEN (x, i) - 1; j >= 0; j--)
add_label_notes (XVECEXP (x, i, j), rinsn);
}
}
/* Add EXPR to the end of basic block BB.
This is used by both the PRE and code hoisting. */
static void
insert_insn_end_basic_block (rtx_insn *rinsn, basic_block cfg_bb)
{
rtx_insn *end_rinsn = BB_END (cfg_bb);
rtx_insn *new_insn;
rtx_insn *pat, *pat_end;
pat = rinsn;
gcc_assert (pat && INSN_P (pat));
pat_end = pat;
while (NEXT_INSN (pat_end) != NULL_RTX)
pat_end = NEXT_INSN (pat_end);
/* If the last end_rinsn is a jump, insert EXPR in front. Similarly we need
to take care of trapping instructions in presence of non-call exceptions.
*/
if (JUMP_P (end_rinsn)
|| (NONJUMP_INSN_P (end_rinsn)
&& (!single_succ_p (cfg_bb)
|| single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL)))
{
/* FIXME: What if something in jump uses value set in new end_rinsn? */
new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb);
}
/* Likewise if the last end_rinsn is a call, as will happen in the presence
of exception handling. */
else if (CALL_P (end_rinsn)
&& (!single_succ_p (cfg_bb)
|| single_succ_edge (cfg_bb)->flags & EDGE_ABNORMAL))
{
/* Keeping in mind targets with small register classes and parameters
in registers, we search backward and place the instructions before
the first parameter is loaded. Do this for everyone for consistency
and a presumption that we'll get better code elsewhere as well. */
/* Since different machines initialize their parameter registers
in different orders, assume nothing. Collect the set of all
parameter registers. */
end_rinsn = find_first_parameter_load (end_rinsn, BB_HEAD (cfg_bb));
/* If we found all the parameter loads, then we want to insert
before the first parameter load.
If we did not find all the parameter loads, then we might have
stopped on the head of the block, which could be a CODE_LABEL.
If we inserted before the CODE_LABEL, then we would be putting
the end_rinsn in the wrong basic block. In that case, put the
end_rinsn after the CODE_LABEL. Also, respect NOTE_INSN_BASIC_BLOCK.
*/
while (LABEL_P (end_rinsn) || NOTE_INSN_BASIC_BLOCK_P (end_rinsn))
end_rinsn = NEXT_INSN (end_rinsn);
new_insn = emit_insn_before_noloc (pat, end_rinsn, cfg_bb);
}
else
new_insn = emit_insn_after_noloc (pat, end_rinsn, cfg_bb);
while (1)
{
if (INSN_P (pat))
add_label_notes (PATTERN (pat), new_insn);
if (pat == pat_end)
break;
pat = NEXT_INSN (pat);
}
}
/* Get VL/VTYPE information for INSN. */
static vl_vtype_info
get_vl_vtype_info (const insn_info *insn)
{
set_info *set = nullptr;
rtx avl = ::get_avl (insn->rtl ());
if (avl && REG_P (avl) && !vlmax_avl_p (avl))
set = find_access (insn->uses (), REGNO (avl))->def ();
uint8_t sew = get_sew (insn->rtl ());
enum vlmul_type vlmul = get_vlmul (insn->rtl ());
uint8_t ratio = get_attr_ratio (insn->rtl ());
/* when get_attr_ratio is invalid, this kind of instructions
doesn't care about ratio. However, we still need this value
in demand info backward analysis. */
if (ratio == INVALID_ATTRIBUTE)
ratio = calculate_ratio (sew, vlmul);
bool ta = tail_agnostic_p (insn->rtl ());
bool ma = mask_agnostic_p (insn->rtl ());
/* If merge operand is undef value, we prefer agnostic. */
int merge_op_idx = get_attr_merge_op_idx (insn->rtl ());
if (merge_op_idx != INVALID_ATTRIBUTE
&& satisfies_constraint_vu (recog_data.operand[merge_op_idx]))
{
ta = true;
ma = true;
}
vl_vtype_info info (avl_info (avl, set), sew, vlmul, ratio, ta, ma);
return info;
}
static void
change_insn (rtx_insn *rinsn, rtx new_pat)
{
/* We don't apply change on RTL_SSA here since it's possible a
new INSN we add in the PASS before which doesn't have RTL_SSA
info yet.*/
if (dump_file)
{
fprintf (dump_file, "\nChange PATTERN of insn %d from:\n",
INSN_UID (rinsn));
print_rtl_single (dump_file, PATTERN (rinsn));
}
validate_change (rinsn, &PATTERN (rinsn), new_pat, false);
if (dump_file)
{
fprintf (dump_file, "\nto:\n");
print_rtl_single (dump_file, PATTERN (rinsn));
}
}
static bool
change_insn (function_info *ssa, insn_change change, insn_info *insn,
rtx new_pat)
{
rtx_insn *rinsn = insn->rtl ();
auto attempt = ssa->new_change_attempt ();
if (!restrict_movement (change))
return false;
if (dump_file)
{
fprintf (dump_file, "\nChange PATTERN of insn %d from:\n",
INSN_UID (rinsn));
print_rtl_single (dump_file, PATTERN (rinsn));
}
insn_change_watermark watermark;
validate_change (rinsn, &PATTERN (rinsn), new_pat, true);
/* These routines report failures themselves. */
if (!recog (attempt, change) || !change_is_worthwhile (change, false))
return false;
confirm_change_group ();
ssa->change_insn (change);
if (dump_file)
{
fprintf (dump_file, "\nto:\n");
print_rtl_single (dump_file, PATTERN (rinsn));
}
return true;
}
static void
change_vsetvl_insn (const insn_info *insn, const vector_insn_info &info)
{
rtx_insn *rinsn;
if (vector_config_insn_p (insn->rtl ()))
{
rinsn = insn->rtl ();
gcc_assert (vsetvl_insn_p (rinsn) && "Can't handle X0, rs1 vsetvli yet");
}
else
{
gcc_assert (has_vtype_op (insn->rtl ()));
rinsn = PREV_INSN (insn->rtl ());
gcc_assert (vector_config_insn_p (rinsn));
}
rtx new_pat = gen_vsetvl_pat (rinsn, info);
change_insn (rinsn, new_pat);
}
static bool
source_equal_p (insn_info *insn1, insn_info *insn2)
{
if (!insn1 || !insn2)
return false;
rtx_insn *rinsn1 = insn1->rtl ();
rtx_insn *rinsn2 = insn2->rtl ();
if (!rinsn1 || !rinsn2)
return false;
rtx note1 = find_reg_equal_equiv_note (rinsn1);
rtx note2 = find_reg_equal_equiv_note (rinsn2);
rtx single_set1 = single_set (rinsn1);
rtx single_set2 = single_set (rinsn2);
if (note1 && note2 && rtx_equal_p (note1, note2))
return true;
/* Since vsetvl instruction is not single SET.
We handle this case specially here. */
if (vsetvl_insn_p (insn1->rtl ()) && vsetvl_insn_p (insn2->rtl ()))
{
/* For example:
vsetvl1 a6,a5,e32m1
RVV 1 (use a6 as AVL)
vsetvl2 a5,a5,e8mf4
RVV 2 (use a5 as AVL)
We consider AVL of RVV 1 and RVV 2 are same so that we can
gain more optimization opportunities.
Note: insn1_info.compatible_avl_p (insn2_info)
will make sure there is no instruction between vsetvl1 and vsetvl2
modify a5 since their def will be different if there is instruction
modify a5 and compatible_avl_p will return false. */
vector_insn_info insn1_info, insn2_info;
insn1_info.parse_insn (insn1);
insn2_info.parse_insn (insn2);
if (insn1_info.same_vlmax_p (insn2_info)
&& insn1_info.compatible_avl_p (insn2_info))
return true;
}
/* We only handle AVL is set by instructions with no side effects. */
if (!single_set1 || !single_set2)
return false;
if (!rtx_equal_p (SET_SRC (single_set1), SET_SRC (single_set2)))
return false;
gcc_assert (insn1->uses ().size () == insn2->uses ().size ());
for (size_t i = 0; i < insn1->uses ().size (); i++)
if (insn1->uses ()[i] != insn2->uses ()[i])
return false;
return true;
}
/* Helper function to get single same real RTL source.
return NULL if it is not a single real RTL source. */
static insn_info *
extract_single_source (set_info *set)
{
if (!set)
return nullptr;
if (set->insn ()->is_real ())
return set->insn ();
if (!set->insn ()->is_phi ())
return nullptr;
hash_set<set_info *> sets = get_all_sets (set, true, false, true);
insn_info *first_insn = (*sets.begin ())->insn ();
if (first_insn->is_artificial ())
return nullptr;
for (const set_info *set : sets)
{
/* If there is a head or end insn, we conservative return
NULL so that VSETVL PASS will insert vsetvl directly. */
if (set->insn ()->is_artificial ())
return nullptr;
if (!source_equal_p (set->insn (), first_insn))
return nullptr;
}
return first_insn;
}
avl_info::avl_info (const avl_info &other)
{
m_value = other.get_value ();
m_source = other.get_source ();
}
avl_info::avl_info (rtx value_in, set_info *source_in)
: m_value (value_in), m_source (source_in)
{}
bool
avl_info::single_source_equal_p (const avl_info &other) const
{
set_info *set1 = m_source;
set_info *set2 = other.get_source ();
insn_info *insn1 = extract_single_source (set1);
insn_info *insn2 = extract_single_source (set2);
if (!insn1 || !insn2)
return false;
return source_equal_p (insn1, insn2);
}
bool
avl_info::multiple_source_equal_p (const avl_info &other) const
{
/* TODO: We don't do too much optimization here since it's
too complicated in case of analyzing the PHI node.
For example:
void f (void * restrict in, void * restrict out, int n, int m, int cond)
{
size_t vl;
switch (cond)
{
case 1:
vl = 100;
break;
case 2:
vl = *(size_t*)(in + 100);
break;
case 3:
{
size_t new_vl = *(size_t*)(in + 500);
size_t new_vl2 = *(size_t*)(in + 600);
vl = new_vl + new_vl2 + 777;
break;
}
default:
vl = 4000;
break;
}
for (size_t i = 0; i < n; i++)
{
vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
__riscv_vse8_v_i8mf8 (out + i, v, vl);
vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl);
__riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
}
size_t vl2;
switch (cond)
{
case 1:
vl2 = 100;
break;
case 2:
vl2 = *(size_t*)(in + 100);
break;
case 3:
{
size_t new_vl = *(size_t*)(in + 500);
size_t new_vl2 = *(size_t*)(in + 600);
vl2 = new_vl + new_vl2 + 777;
break;
}
default:
vl2 = 4000;
break;
}
for (size_t i = 0; i < m; i++)
{
vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl2);
__riscv_vse8_v_i8mf8 (out + i + 300, v, vl2);
vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 200, vl2);
__riscv_vse8_v_i8mf8 (out + i + 200, v2, vl2);
}
}
Such case may not be necessary to optimize since the codes of defining
vl and vl2 are redundant. */
return m_source == other.get_source ();
}
avl_info &
avl_info::operator= (const avl_info &other)
{
m_value = other.get_value ();
m_source = other.get_source ();
return *this;
}
bool
avl_info::operator== (const avl_info &other) const
{
if (!m_value)
return !other.get_value ();
if (!other.get_value ())
return false;
if (GET_CODE (m_value) != GET_CODE (other.get_value ()))
return false;
/* Handle CONST_INT AVL. */
if (CONST_INT_P (m_value))
return INTVAL (m_value) == INTVAL (other.get_value ());
/* Handle VLMAX AVL. */
if (vlmax_avl_p (m_value))
return vlmax_avl_p (other.get_value ());
/* If any source is undef value, we think they are not equal. */
if (!m_source || !other.get_source ())
return false;
/* If both sources are single source (defined by a single real RTL)
and their definitions are same. */
if (single_source_equal_p (other))
return true;
return multiple_source_equal_p (other);
}
bool
avl_info::operator!= (const avl_info &other) const
{
return !(*this == other);
}
/* Initialize VL/VTYPE information. */
vl_vtype_info::vl_vtype_info (avl_info avl_in, uint8_t sew_in,
enum vlmul_type vlmul_in, uint8_t ratio_in,
bool ta_in, bool ma_in)
: m_avl (avl_in), m_sew (sew_in), m_vlmul (vlmul_in), m_ratio (ratio_in),
m_ta (ta_in), m_ma (ma_in)
{
gcc_assert (valid_sew_p (m_sew) && "Unexpected SEW");
}
bool
vl_vtype_info::operator== (const vl_vtype_info &other) const
{
return same_avl_p (other) && m_sew == other.get_sew ()
&& m_vlmul == other.get_vlmul () && m_ta == other.get_ta ()
&& m_ma == other.get_ma () && m_ratio == other.get_ratio ();
}
bool
vl_vtype_info::operator!= (const vl_vtype_info &other) const
{
return !(*this == other);
}
bool
vl_vtype_info::has_non_zero_avl () const
{
if (has_avl_imm ())
return INTVAL (get_avl ()) > 0;
if (has_avl_reg ())
return vlmax_avl_p (get_avl ());
return false;
}
bool
vl_vtype_info::same_avl_p (const vl_vtype_info &other) const
{
/* We need to compare both RTL and SET. If both AVL are CONST_INT.
For example, const_int 3 and const_int 4, we need to compare
RTL. If both AVL are REG and their REGNO are same, we need to
compare SET. */
return get_avl () == other.get_avl ()
&& get_avl_source () == other.get_avl_source ();
}
bool
vl_vtype_info::same_vtype_p (const vl_vtype_info &other) const
{
return get_sew () == other.get_sew () && get_vlmul () == other.get_vlmul ()
&& get_ta () == other.get_ta () && get_ma () == other.get_ma ();
}
bool
vl_vtype_info::same_vlmax_p (const vl_vtype_info &other) const
{
return get_ratio () == other.get_ratio ();
}
/* Compare the compatibility between Dem1 and Dem2.
If Dem1 > Dem2, Dem1 has bigger compatibility then Dem2
meaning Dem1 is easier be compatible with others than Dem2
or Dem2 is stricter than Dem1.
For example, Dem1 (demand SEW + LMUL) > Dem2 (demand RATIO). */
bool
vector_insn_info::operator> (const vector_insn_info &other) const
{
if (other.compatible_p (static_cast<const vl_vtype_info &> (*this))
&& !this->compatible_p (static_cast<const vl_vtype_info &> (other)))
return true;
return false;
}
bool
vector_insn_info::operator>= (const vector_insn_info &other) const
{
if (*this > other)
return true;
if (*this == other)
return true;
if (!compatible_p (other))
return false;
if (!demand_p (DEMAND_AVL) && other.demand_p (DEMAND_AVL))
return false;
if (same_vlmax_p (other))
{
if (demand_p (DEMAND_RATIO) && !other.demand_p (DEMAND_RATIO)
&& (get_sew () != other.get_sew ()
|| get_vlmul () != other.get_vlmul ()))
return false;
if (get_sew () == other.get_sew () && get_vlmul () == other.get_vlmul ())
{
if (demand_p (DEMAND_RATIO) && !other.demand_p (DEMAND_RATIO))
return false;
}
}
if (!demand_p (DEMAND_TAIL_POLICY) && other.demand_p (DEMAND_TAIL_POLICY))
return false;
if (!demand_p (DEMAND_MASK_POLICY) && other.demand_p (DEMAND_MASK_POLICY))
return false;
return true;
}
bool
vector_insn_info::operator== (const vector_insn_info &other) const
{
gcc_assert (!uninit_p () && !other.uninit_p ()
&& "Uninitialization should not happen");
/* Empty is only equal to another Empty. */
if (empty_p ())
return other.empty_p ();
if (other.empty_p ())
return empty_p ();
/* Unknown is only equal to another Unknown. */
if (unknown_p ())
return other.unknown_p ();
if (other.unknown_p ())
return unknown_p ();
for (size_t i = 0; i < NUM_DEMAND; i++)
if (m_demands[i] != other.demand_p ((enum demand_type) i))
return false;
if (vector_config_insn_p (m_insn->rtl ())
|| vector_config_insn_p (other.get_insn ()->rtl ()))
if (m_insn != other.get_insn ())
return false;
if (!same_avl_p (other))
return false;
/* If the full VTYPE is valid, check that it is the same. */
return same_vtype_p (other);
}
void
vector_insn_info::parse_insn (rtx_insn *rinsn)
{
*this = vector_insn_info ();
if (!NONDEBUG_INSN_P (rinsn))
return;
if (!has_vtype_op (rinsn))
return;
m_state = VALID;
extract_insn_cached (rinsn);
const rtx avl = recog_data.operand[get_attr_vl_op_idx (rinsn)];
m_avl = avl_info (avl, nullptr);
m_sew = ::get_sew (rinsn);
m_vlmul = ::get_vlmul (rinsn);
m_ta = tail_agnostic_p (rinsn);
m_ma = mask_agnostic_p (rinsn);
}
void
vector_insn_info::parse_insn (insn_info *insn)
{
*this = vector_insn_info ();
/* Return if it is debug insn for the consistency with optimize == 0. */
if (insn->is_debug_insn ())
return;
/* We set it as unknown since we don't what will happen in CALL or ASM. */
if (insn->is_call () || insn->is_asm ())
{
set_unknown ();
return;
}
/* If this is something that updates VL/VTYPE that we don't know about, set
the state to unknown. */
if (!vector_config_insn_p (insn->rtl ())
&& (find_access (insn->defs (), VL_REGNUM)
|| find_access (insn->defs (), VTYPE_REGNUM)))
{
set_unknown ();
return;
}
if (!vector_config_insn_p (insn->rtl ()) && !has_vtype_op (insn->rtl ()))
return;
/* Warning: This function has to work on both the lowered (i.e. post
emit_local_forward_vsetvls) and pre-lowering forms. The main implication
of this is that it can't use the value of a SEW, VL, or Policy operand as
they might be stale after lowering. */
vl_vtype_info::operator= (get_vl_vtype_info (insn));
m_insn = insn;
m_state = VALID;
if (vector_config_insn_p (insn->rtl ()))
{
m_demands[DEMAND_AVL] = true;
m_demands[DEMAND_RATIO] = true;
return;
}
if (has_vl_op (insn->rtl ()))
m_demands[DEMAND_AVL] = true;
if (get_attr_ratio (insn->rtl ()) != INVALID_ATTRIBUTE)
m_demands[DEMAND_RATIO] = true;
else
{
/* TODO: By default, if it doesn't demand RATIO, we set it
demand SEW && LMUL both. Some instructions may demand SEW
only and ignore LMUL, will fix it later. */
m_demands[DEMAND_SEW] = true;
m_demands[DEMAND_LMUL] = true;
}
if (get_attr_ta (insn->rtl ()) != INVALID_ATTRIBUTE)
m_demands[DEMAND_TAIL_POLICY] = true;
if (get_attr_ma (insn->rtl ()) != INVALID_ATTRIBUTE)
m_demands[DEMAND_MASK_POLICY] = true;
if (vector_config_insn_p (insn->rtl ()))
return;
if (!has_avl_reg () || !m_avl.get_source ()
|| !m_avl.get_source ()->insn ()->is_phi ())
return;
insn_info *def_insn = extract_single_source (m_avl.get_source ());
if (def_insn)
{
vector_insn_info new_info;
new_info.parse_insn (def_insn);
if (!same_vlmax_p (new_info))
return;
/* TODO: Currently, we don't forward AVL for non-VLMAX vsetvl. */
if (vlmax_avl_p (new_info.get_avl ()))
set_avl_info (new_info.get_avl_info ());
}
}
void
vector_insn_info::demand_vl_vtype ()
{
m_state = VALID;
m_demands[DEMAND_AVL] = true;
m_demands[DEMAND_SEW] = true;
m_demands[DEMAND_LMUL] = true;
m_demands[DEMAND_TAIL_POLICY] = true;
m_demands[DEMAND_MASK_POLICY] = true;
}
bool
vector_insn_info::compatible_p (const vector_insn_info &other) const
{
gcc_assert (valid_or_dirty_p () && other.valid_or_dirty_p ()
&& "Can't compare invalid demanded infos");
/* Check SEW. */
if (demand_p (DEMAND_SEW) && other.demand_p (DEMAND_SEW)
&& get_sew () != other.get_sew ())
return false;
/* Check LMUL. */
if (demand_p (DEMAND_LMUL) && other.demand_p (DEMAND_LMUL)
&& get_vlmul () != other.get_vlmul ())
return false;
/* Check RATIO. */
if (demand_p (DEMAND_RATIO) && other.demand_p (DEMAND_RATIO)
&& get_ratio () != other.get_ratio ())
return false;
if (demand_p (DEMAND_RATIO) && (other.get_sew () || other.get_vlmul ())
&& get_ratio () != other.get_ratio ())
return false;
if (other.demand_p (DEMAND_RATIO) && (get_sew () || get_vlmul ())
&& get_ratio () != other.get_ratio ())
return false;
if (demand_p (DEMAND_TAIL_POLICY) && other.demand_p (DEMAND_TAIL_POLICY)
&& get_ta () != other.get_ta ())
return false;
if (demand_p (DEMAND_MASK_POLICY) && other.demand_p (DEMAND_MASK_POLICY)
&& get_ma () != other.get_ma ())
return false;
if (demand_p (DEMAND_AVL) && other.demand_p (DEMAND_AVL))
return compatible_avl_p (other);
return true;
}
bool
vector_insn_info::compatible_avl_p (const vl_vtype_info &other) const
{
gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
gcc_assert (!unknown_p () && "Can't compare AVL in unknown state");
if (!demand_p (DEMAND_AVL))
return true;
return get_avl_info () == other.get_avl_info ();
}
bool
vector_insn_info::compatible_avl_p (const avl_info &other) const
{
gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
gcc_assert (!unknown_p () && "Can't compare AVL in unknown state");
gcc_assert (demand_p (DEMAND_AVL) && "Can't compare AVL undemand state");
return get_avl_info () == other;
}
bool
vector_insn_info::compatible_vtype_p (const vl_vtype_info &other) const
{
gcc_assert (valid_or_dirty_p () && "Can't compare invalid vl_vtype_info");
gcc_assert (!unknown_p () && "Can't compare VTYPE in unknown state");
if (demand_p (DEMAND_SEW) && m_sew != other.get_sew ())
return false;
if (demand_p (DEMAND_LMUL) && m_vlmul != other.get_vlmul ())
return false;
if (demand_p (DEMAND_RATIO) && m_ratio != other.get_ratio ())
return false;
if (demand_p (DEMAND_TAIL_POLICY) && m_ta != other.get_ta ())
return false;
if (demand_p (DEMAND_MASK_POLICY) && m_ma != other.get_ma ())
return false;
return true;
}
/* Determine whether the vector instructions requirements represented by
Require are compatible with the previous vsetvli instruction represented
by this. INSN is the instruction whose requirements we're considering. */
bool
vector_insn_info::compatible_p (const vl_vtype_info &curr_info) const
{
gcc_assert (!uninit_p () && "Can't handle uninitialized info");
if (empty_p ())
return false;
/* Nothing is compatible with Unknown. */
if (unknown_p ())
return false;
/* If the instruction doesn't need an AVLReg and the SEW matches, consider
it compatible. */
if (!demand_p (DEMAND_AVL))
if (m_sew == curr_info.get_sew ())
return true;
return compatible_avl_p (curr_info) && compatible_vtype_p (curr_info);
}
bool
vector_insn_info::available_p (const vector_insn_info &other) const
{
if (*this >= other)
return true;
return false;
}
vector_insn_info
vector_insn_info::merge (const vector_insn_info &merge_info,
enum merge_type type = LOCAL_MERGE) const
{
if (!vsetvl_insn_p (get_insn ()->rtl ()))
gcc_assert (this->compatible_p (merge_info)
&& "Can't merge incompatible demanded infos");
vector_insn_info new_info;
new_info.demand_vl_vtype ();
if (type == LOCAL_MERGE)
{
/* For local backward data flow, we always update INSN && AVL as the
latest INSN and AVL so that we can keep track status of each INSN.*/
new_info.set_insn (merge_info.get_insn ());
if (merge_info.demand_p (DEMAND_AVL))
new_info.set_avl_info (merge_info.get_avl_info ());
else if (demand_p (DEMAND_AVL))
new_info.set_avl_info (get_avl_info ());
}
else
{
/* For global data flow, we should keep original INSN and AVL if they
valid since we should keep the life information of each block.
For example:
bb 0 -> bb 1.
We should keep INSN && AVL of bb 1 since we will eventually emit
vsetvl instruction according to INSN and AVL of bb 1. */
new_info.set_insn (get_insn ());
if (demand_p (DEMAND_AVL))
new_info.set_avl_info (get_avl_info ());
else if (merge_info.demand_p (DEMAND_AVL))
new_info.set_avl_info (merge_info.get_avl_info ());
}
if (!demand_p (DEMAND_AVL) && !merge_info.demand_p (DEMAND_AVL))
new_info.undemand (DEMAND_AVL);
if (!demand_p (DEMAND_SEW) && !merge_info.demand_p (DEMAND_SEW))
new_info.undemand (DEMAND_SEW);
if (!demand_p (DEMAND_LMUL) && !merge_info.demand_p (DEMAND_LMUL))
new_info.undemand (DEMAND_LMUL);
if (!demand_p (DEMAND_TAIL_POLICY)
&& !merge_info.demand_p (DEMAND_TAIL_POLICY))
new_info.undemand (DEMAND_TAIL_POLICY);
if (!demand_p (DEMAND_MASK_POLICY)
&& !merge_info.demand_p (DEMAND_MASK_POLICY))
new_info.undemand (DEMAND_MASK_POLICY);
if (merge_info.demand_p (DEMAND_SEW))
new_info.set_sew (merge_info.get_sew ());
else if (demand_p (DEMAND_SEW))
new_info.set_sew (get_sew ());
if (merge_info.demand_p (DEMAND_LMUL))
new_info.set_vlmul (merge_info.get_vlmul ());
else if (demand_p (DEMAND_LMUL))
new_info.set_vlmul (get_vlmul ());
if (!new_info.demand_p (DEMAND_SEW) && !new_info.demand_p (DEMAND_LMUL))
{
if (demand_p (DEMAND_RATIO) || merge_info.demand_p (DEMAND_RATIO))
new_info.demand (DEMAND_RATIO);
/* Even though we don't demand_p SEW && VLMUL in this case, we still
* need them. */
if (merge_info.demand_p (DEMAND_RATIO))
{
new_info.set_sew (merge_info.get_sew ());
new_info.set_vlmul (merge_info.get_vlmul ());
new_info.set_ratio (merge_info.get_ratio ());
}
else if (demand_p (DEMAND_RATIO))
{
new_info.set_sew (get_sew ());
new_info.set_vlmul (get_vlmul ());
new_info.set_ratio (get_ratio ());
}
}
else
{
/* when get_attr_ratio is invalid, this kind of instructions
doesn't care about ratio. However, we still need this value
in demand_p info backward analysis. */
new_info.set_ratio (
calculate_ratio (new_info.get_sew (), new_info.get_vlmul ()));
}
if (merge_info.demand_p (DEMAND_TAIL_POLICY))
new_info.set_ta (merge_info.get_ta ());
else if (demand_p (DEMAND_TAIL_POLICY))
new_info.set_ta (get_ta ());
else
new_info.set_ta (get_default_ta ());
if (merge_info.demand_p (DEMAND_MASK_POLICY))
new_info.set_ma (merge_info.get_ma ());
else if (demand_p (DEMAND_MASK_POLICY))
new_info.set_ma (get_ma ());
else
new_info.set_ma (get_default_ma ());
return new_info;
}
void
vector_insn_info::dump (FILE *file) const
{
fprintf (file, "[");
if (uninit_p ())
fprintf (file, "UNINITIALIZED,");
else if (valid_p ())
fprintf (file, "VALID,");
else if (unknown_p ())
fprintf (file, "UNKNOWN,");
else if (empty_p ())
fprintf (file, "EMPTY,");
else if (hard_empty_p ())
fprintf (file, "HARD_EMPTY,");
else if (dirty_with_killed_avl_p ())
fprintf (file, "DIRTY_WITH_KILLED_AVL,");
else
fprintf (file, "DIRTY,");
fprintf (file, "Demand field={%d(VL),", demand_p (DEMAND_AVL));
fprintf (file, "%d(SEW),", demand_p (DEMAND_SEW));
fprintf (file, "%d(LMUL),", demand_p (DEMAND_LMUL));
fprintf (file, "%d(RATIO),", demand_p (DEMAND_RATIO));
fprintf (file, "%d(TAIL_POLICY),", demand_p (DEMAND_TAIL_POLICY));
fprintf (file, "%d(MASK_POLICY)}\n", demand_p (DEMAND_MASK_POLICY));
fprintf (file, "AVL=");
print_rtl_single (file, get_avl ());
fprintf (file, "SEW=%d,", get_sew ());
fprintf (file, "VLMUL=%d,", get_vlmul ());
fprintf (file, "RATIO=%d,", get_ratio ());
fprintf (file, "TAIL_POLICY=%d,", get_ta ());
fprintf (file, "MASK_POLICY=%d", get_ma ());
fprintf (file, "]\n");
if (valid_p ())
{
if (get_insn ())
{
fprintf (file, "The real INSN=");
print_rtl_single (file, get_insn ()->rtl ());
}
}
}
vector_infos_manager::vector_infos_manager ()
{
vector_edge_list = nullptr;
vector_kill = nullptr;
vector_del = nullptr;
vector_insert = nullptr;
vector_antic = nullptr;
vector_transp = nullptr;
vector_comp = nullptr;
vector_avin = nullptr;
vector_avout = nullptr;
vector_insn_infos.safe_grow (get_max_uid ());
vector_block_infos.safe_grow (last_basic_block_for_fn (cfun));
if (!optimize)
{
basic_block cfg_bb;
rtx_insn *rinsn;
FOR_ALL_BB_FN (cfg_bb, cfun)
{
vector_block_infos[cfg_bb->index].local_dem = vector_insn_info ();
vector_block_infos[cfg_bb->index].reaching_out = vector_insn_info ();
FOR_BB_INSNS (cfg_bb, rinsn)
vector_insn_infos[INSN_UID (rinsn)].parse_insn (rinsn);
}
}
else
{
for (const bb_info *bb : crtl->ssa->bbs ())
{
vector_block_infos[bb->index ()].local_dem = vector_insn_info ();
vector_block_infos[bb->index ()].reaching_out = vector_insn_info ();
for (insn_info *insn : bb->real_insns ())
vector_insn_infos[insn->uid ()].parse_insn (insn);
vector_block_infos[bb->index ()].probability = profile_probability ();
}
}
}
void
vector_infos_manager::create_expr (vector_insn_info &info)
{
for (size_t i = 0; i < vector_exprs.length (); i++)
if (*vector_exprs[i] == info)
return;
vector_exprs.safe_push (&info);
}
size_t
vector_infos_manager::get_expr_id (const vector_insn_info &info) const
{
for (size_t i = 0; i < vector_exprs.length (); i++)
if (*vector_exprs[i] == info)
return i;
gcc_unreachable ();
}
auto_vec<size_t>
vector_infos_manager::get_all_available_exprs (
const vector_insn_info &info) const
{
auto_vec<size_t> available_list;
for (size_t i = 0; i < vector_exprs.length (); i++)
if (info.available_p (*vector_exprs[i]))
available_list.safe_push (i);
return available_list;
}
bool
vector_infos_manager::all_same_ratio_p (sbitmap bitdata) const
{
if (bitmap_empty_p (bitdata))
return false;
int ratio = -1;
unsigned int bb_index;
sbitmap_iterator sbi;
EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi)
{
if (ratio == -1)
ratio = vector_exprs[bb_index]->get_ratio ();
else if (vector_exprs[bb_index]->get_ratio () != ratio)
return false;
}
return true;
}
bool
vector_infos_manager::all_same_avl_p (const basic_block cfg_bb,
sbitmap bitdata) const
{
if (bitmap_empty_p (bitdata))
return false;
const auto &block_info = vector_block_infos[cfg_bb->index];
if (!block_info.local_dem.demand_p (DEMAND_AVL))
return true;
avl_info avl = block_info.local_dem.get_avl_info ();
unsigned int bb_index;
sbitmap_iterator sbi;
EXECUTE_IF_SET_IN_BITMAP (bitdata, 0, bb_index, sbi)
{
if (vector_exprs[bb_index]->get_avl_info () != avl)
return false;
}
return true;
}
size_t
vector_infos_manager::expr_set_num (sbitmap bitdata) const
{
size_t count = 0;
for (size_t i = 0; i < vector_exprs.length (); i++)
if (bitmap_bit_p (bitdata, i))
count++;
return count;
}
void
vector_infos_manager::release (void)
{
if (!vector_insn_infos.is_empty ())
vector_insn_infos.release ();
if (!vector_block_infos.is_empty ())
vector_block_infos.release ();
if (!vector_exprs.is_empty ())
vector_exprs.release ();
if (optimize > 0)
free_bitmap_vectors ();
}
void
vector_infos_manager::create_bitmap_vectors (void)
{
/* Create the bitmap vectors. */
vector_antic = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
vector_exprs.length ());
vector_transp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
vector_exprs.length ());
vector_comp = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
vector_exprs.length ());
vector_avin = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
vector_exprs.length ());
vector_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
vector_exprs.length ());
vector_kill = sbitmap_vector_alloc (last_basic_block_for_fn (cfun),
vector_exprs.length ());
bitmap_vector_ones (vector_transp, last_basic_block_for_fn (cfun));
bitmap_vector_clear (vector_antic, last_basic_block_for_fn (cfun));
bitmap_vector_clear (vector_comp, last_basic_block_for_fn (cfun));
}
void
vector_infos_manager::free_bitmap_vectors (void)
{
/* Finished. Free up all the things we've allocated. */
free_edge_list (vector_edge_list);
if (vector_del)
sbitmap_vector_free (vector_del);
if (vector_insert)
sbitmap_vector_free (vector_insert);
if (vector_kill)
sbitmap_vector_free (vector_kill);
if (vector_antic)
sbitmap_vector_free (vector_antic);
if (vector_transp)
sbitmap_vector_free (vector_transp);
if (vector_comp)
sbitmap_vector_free (vector_comp);
if (vector_avin)
sbitmap_vector_free (vector_avin);
if (vector_avout)
sbitmap_vector_free (vector_avout);
vector_edge_list = nullptr;
vector_kill = nullptr;
vector_del = nullptr;
vector_insert = nullptr;
vector_antic = nullptr;
vector_transp = nullptr;
vector_comp = nullptr;
vector_avin = nullptr;
vector_avout = nullptr;
}
void
vector_infos_manager::dump (FILE *file) const
{
basic_block cfg_bb;
rtx_insn *rinsn;
fprintf (file, "\n");
FOR_ALL_BB_FN (cfg_bb, cfun)
{
fprintf (file, "Local vector info of <bb %d>:\n", cfg_bb->index);
fprintf (file, "<HEADER>=");
vector_block_infos[cfg_bb->index].local_dem.dump (file);
FOR_BB_INSNS (cfg_bb, rinsn)
{
if (!NONDEBUG_INSN_P (rinsn) || !has_vtype_op (rinsn))
continue;
fprintf (file, "<insn %d>=", INSN_UID (rinsn));
const auto &info = vector_insn_infos[INSN_UID (rinsn)];
info.dump (file);
}
fprintf (file, "<FOOTER>=");
vector_block_infos[cfg_bb->index].reaching_out.dump (file);
fprintf (file, "<Probability>=");
vector_block_infos[cfg_bb->index].probability.dump (file);
fprintf (file, "\n\n");
}
fprintf (file, "\n");
FOR_ALL_BB_FN (cfg_bb, cfun)
{
fprintf (file, "Local properties of <bb %d>:\n", cfg_bb->index);
fprintf (file, "<ANTLOC>=");
if (vector_antic == nullptr)
fprintf (file, "(nil)\n");
else
dump_bitmap_file (file, vector_antic[cfg_bb->index]);
fprintf (file, "<AVLOC>=");
if (vector_comp == nullptr)
fprintf (file, "(nil)\n");
else
dump_bitmap_file (file, vector_comp[cfg_bb->index]);
fprintf (file, "<TRANSP>=");
if (vector_transp == nullptr)
fprintf (file, "(nil)\n");
else
dump_bitmap_file (file, vector_transp[cfg_bb->index]);
fprintf (file, "<KILL>=");
if (vector_kill == nullptr)
fprintf (file, "(nil)\n");
else
dump_bitmap_file (file, vector_kill[cfg_bb->index]);
}
fprintf (file, "\n");
FOR_ALL_BB_FN (cfg_bb, cfun)
{
fprintf (file, "Global LCM (Lazy code motion) result of <bb %d>:\n",
cfg_bb->index);
fprintf (file, "<AVIN>=");
if (vector_avin == nullptr)
fprintf (file, "(nil)\n");
else
dump_bitmap_file (file, vector_avin[cfg_bb->index]);
fprintf (file, "<AVOUT>=");
if (vector_avout == nullptr)
fprintf (file, "(nil)\n");
else
dump_bitmap_file (file, vector_avout[cfg_bb->index]);
fprintf (file, "<DELETE>=");
if (vector_del == nullptr)
fprintf (file, "(nil)\n");
else
dump_bitmap_file (file, vector_del[cfg_bb->index]);
}
fprintf (file, "\nGlobal LCM (Lazy code motion) INSERT info:\n");
for (size_t i = 0; i < vector_exprs.length (); i++)
{
for (int ed = 0; ed < NUM_EDGES (vector_edge_list); ed++)
{
edge eg = INDEX_EDGE (vector_edge_list, ed);
if (bitmap_bit_p (vector_insert[ed], i))
fprintf (dump_file,
"INSERT edge %d from bb %d to bb %d for VSETVL "
"expr[%ld]\n",
ed, eg->src->index, eg->dest->index, i);
}
}
}
const pass_data pass_data_vsetvl = {
RTL_PASS, /* type */
"vsetvl", /* name */
OPTGROUP_NONE, /* optinfo_flags */
TV_NONE, /* tv_id */
0, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
};
class pass_vsetvl : public rtl_opt_pass
{
private:
class vector_infos_manager *m_vector_manager;
void simple_vsetvl (void) const;
void lazy_vsetvl (void);
/* Phase 1. */
void compute_local_backward_infos (const bb_info *);
/* Phase 2. */
bool need_vsetvl (const vector_insn_info &, const vector_insn_info &) const;
void transfer_before (vector_insn_info &, insn_info *) const;
void transfer_after (vector_insn_info &, insn_info *) const;
void emit_local_forward_vsetvls (const bb_info *);
/* Phase 3. */
enum fusion_type get_backward_fusion_type (const bb_info *,
const vector_insn_info &);
bool hard_empty_block_p (const bb_info *, const vector_insn_info &) const;
bool backward_demand_fusion (void);
bool forward_demand_fusion (void);
bool cleanup_illegal_dirty_blocks (void);
void demand_fusion (void);
/* Phase 4. */
void prune_expressions (void);
void compute_local_properties (void);
bool can_refine_vsetvl_p (const basic_block, const vector_insn_info &) const;
void refine_vsetvls (void) const;
void cleanup_vsetvls (void);
bool commit_vsetvls (void);
void pre_vsetvl (void);
/* Phase 5. */
void cleanup_insns (void) const;
/* Phase 6. */
void propagate_avl (void) const;
void init (void);
void done (void);
void compute_probabilities (void);
public:
pass_vsetvl (gcc::context *ctxt) : rtl_opt_pass (pass_data_vsetvl, ctxt) {}
/* opt_pass methods: */
virtual bool gate (function *) final override { return TARGET_VECTOR; }
virtual unsigned int execute (function *) final override;
}; // class pass_vsetvl
/* Simple m_vsetvl_insert vsetvl for optimize == 0. */
void
pass_vsetvl::simple_vsetvl (void) const
{
if (dump_file)
fprintf (dump_file,
"\nEntering Simple VSETVL PASS and Handling %d basic blocks for "
"function:%s\n",
n_basic_blocks_for_fn (cfun), function_name (cfun));
basic_block cfg_bb;
rtx_insn *rinsn;
FOR_ALL_BB_FN (cfg_bb, cfun)
{
FOR_BB_INSNS (cfg_bb, rinsn)
{
if (!NONDEBUG_INSN_P (rinsn))
continue;
if (has_vtype_op (rinsn))
{
const auto info
= m_vector_manager->vector_insn_infos[INSN_UID (rinsn)];
emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_BEFORE, info,
NULL_RTX, rinsn);
}
}
}
}
/* Compute demanded information by backward data-flow analysis. */
void
pass_vsetvl::compute_local_backward_infos (const bb_info *bb)
{
vector_insn_info change;
change.set_empty ();
auto &block_info = m_vector_manager->vector_block_infos[bb->index ()];
block_info.reaching_out = change;
for (insn_info *insn : bb->reverse_real_nondebug_insns ())
{
auto &info = m_vector_manager->vector_insn_infos[insn->uid ()];
if (info.uninit_p ())
/* If it is uninitialized, propagate it directly. */
info = change;
else if (info.unknown_p ())
change = info;
else
{
gcc_assert (info.valid_p () && "Unexpected Invalid demanded info");
if (change.valid_p () && change.compatible_p (info))
info = change.merge (info);
change = info;
}
}
block_info.local_dem = change;
if (block_info.local_dem.empty_p ())
block_info.reaching_out = block_info.local_dem;
}
/* Return true if a dem_info is required to transition from curr_info to
require before INSN. */
bool
pass_vsetvl::need_vsetvl (const vector_insn_info &require,
const vector_insn_info &curr_info) const
{
if (!curr_info.valid_p () || curr_info.unknown_p () || curr_info.uninit_p ())
return true;
if (require.compatible_p (curr_info))
return false;
return true;
}
/* Given an incoming state reaching INSN, modifies that state so that it is
minimally compatible with INSN. The resulting state is guaranteed to be
semantically legal for INSN, but may not be the state requested by INSN. */
void
pass_vsetvl::transfer_before (vector_insn_info &info, insn_info *insn) const
{
if (!has_vtype_op (insn->rtl ()))
return;
const vector_insn_info require
= m_vector_manager->vector_insn_infos[insn->uid ()];
if (info.valid_p () && !need_vsetvl (require, info))
return;
info = require;
}
/* Given a state with which we evaluated insn (see transfer_before above for why
this might be different that the state insn requested), modify the state to
reflect the changes insn might make. */
void
pass_vsetvl::transfer_after (vector_insn_info &info, insn_info *insn) const
{
if (vector_config_insn_p (insn->rtl ()))
{
info = m_vector_manager->vector_insn_infos[insn->uid ()];
return;
}
/* TODO: Support fault first load info update VL in the future. */
/* If this is something that updates VL/VTYPE that we don't know about, set
the state to unknown. */
if (insn->is_call () || insn->is_asm ()
|| find_access (insn->defs (), VL_REGNUM)
|| find_access (insn->defs (), VTYPE_REGNUM))
info = vector_insn_info::get_unknown ();
}
/* Emit vsetvl within each block by forward data-flow analysis. */
void
pass_vsetvl::emit_local_forward_vsetvls (const bb_info *bb)
{
auto &block_info = m_vector_manager->vector_block_infos[bb->index ()];
if (block_info.local_dem.empty_p ())
return;
vector_insn_info curr_info;
for (insn_info *insn : bb->real_nondebug_insns ())
{
const vector_insn_info prev_info = curr_info;
transfer_before (curr_info, insn);
if (has_vtype_op (insn->rtl ()))
{
if (static_cast<const vl_vtype_info &> (prev_info)
!= static_cast<const vl_vtype_info &> (curr_info))
{
const auto require
= m_vector_manager->vector_insn_infos[insn->uid ()];
if (!require.compatible_p (
static_cast<const vl_vtype_info &> (prev_info)))
insert_vsetvl (EMIT_BEFORE, insn->rtl (), require, prev_info);
}
}
transfer_after (curr_info, insn);
}
block_info.reaching_out = curr_info;
}
enum fusion_type
pass_vsetvl::get_backward_fusion_type (const bb_info *bb,
const vector_insn_info &prop)
{
insn_info *insn = prop.get_insn ();
/* TODO: We don't backward propagate the explict VSETVL here
since we will change vsetvl and vsetvlmax intrinsics into
no side effects which can be optimized into optimal location
by GCC internal passes. We only need to support these backward
propagation if vsetvl intrinsics have side effects. */
if (vsetvl_insn_p (insn->rtl ()))
return INVALID_FUSION;
gcc_assert (has_vtype_op (insn->rtl ()));
rtx reg = NULL_RTX;
/* Case 1: Don't need VL. Just let it backward propagate. */
if (!has_vl_op (insn->rtl ()))
return VALID_AVL_FUSION;
else
{
/* Case 2: CONST_INT AVL, we don't need to check def. */
if (prop.has_avl_imm ())
return VALID_AVL_FUSION;
else
{
/* Case 3: REG AVL, we need to check the distance of def to make
sure we won't backward propagate over the def. */
gcc_assert (prop.has_avl_reg ());
if (vlmax_avl_p (prop.get_avl ()))
/* Check VL operand for vsetvl vl,zero. */
reg = get_vl (insn->rtl ());
else
/* Check AVL operand for vsetvl zero,avl. */
reg = get_avl (insn->rtl ());
}
}
gcc_assert (reg);
def_info *def = find_access (insn->uses (), REGNO (reg))->def ();
if (!def->insn ()->is_phi () && def->insn ()->bb () == insn->bb ())
return INVALID_FUSION;
hash_set<set_info *> sets
= get_all_sets (prop.get_avl_source (), true, true, true);
if (any_set_in_bb_p (sets, insn->bb ()))
return INVALID_FUSION;
if (vlmax_avl_p (prop.get_avl ()))
{
if (find_reg_killed_by (bb, reg))
return INVALID_FUSION;
else
return VALID_AVL_FUSION;
}
/* By default, we always enable backward fusion so that we can
gain more optimizations. */
if (!find_reg_killed_by (bb, reg))
return VALID_AVL_FUSION;
return KILLED_AVL_FUSION;
}
/* We almost enable all cases in get_backward_fusion_type, this function
disable the backward fusion by changing dirty blocks into hard empty
blocks in forward dataflow. We can have more accurate optimization by
this method. */
bool
pass_vsetvl::hard_empty_block_p (const bb_info *bb,
const vector_insn_info &info) const
{
if (!info.dirty_p () || !info.has_avl_reg ())
return false;
basic_block cfg_bb = bb->cfg_bb ();
sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index];
rtx avl = vlmax_avl_p (info.get_avl ()) ? get_vl (info.get_insn ()->rtl ())
: get_avl (info.get_insn ()->rtl ());
insn_info *insn = info.get_insn ();
set_info *set = find_access (insn->uses (), REGNO (avl))->def ();
hash_set<set_info *> sets = get_all_sets (set, true, false, false);
hash_set<basic_block> pred_cfg_bbs = get_all_predecessors (cfg_bb);
if (find_reg_killed_by (bb, avl))
{
/* Condition 1:
Dirty block with killed AVL means that the empty block (no RVV
instructions) are polluted as Dirty blocks with the value of current
AVL is killed. For example:
bb 0:
...
bb 1:
def a5
bb 2:
RVV (use a5)
In backward dataflow, we will polluted BB0 and BB1 as Dirt with AVL
killed. since a5 is killed in BB1.
In this case, let's take a look at this example:
bb 3: bb 4:
def3 a5 def4 a5
bb 5: bb 6:
def1 a5 def2 a5
\ /
\ /
\ /
\ /
bb 7:
RVV (use a5)
In thi case, we can polluted BB5 and BB6 as dirty if get-def
of a5 from RVV instruction in BB7 is the def1 in BB5 and
def2 BB6 so we can return false early here for HARD_EMPTY_BLOCK_P.
However, we are not sure whether BB3 and BB4 can be
polluted as Dirty with AVL killed so we can't return false
for HARD_EMPTY_BLOCK_P here since it's too early which will
potentially produce issues. */
gcc_assert (info.dirty_with_killed_avl_p ());
if (info.get_avl_source ()
&& get_same_bb_set (sets, bb->cfg_bb ()) == info.get_avl_source ())
return false;
}
/* Condition 2:
Suppress the VL/VTYPE info backward propagation too early:
________
| BB0 |
|________|
|
____|____
| BB1 |
|________|
In this case, suppose BB 1 has multiple predecessors, BB 0 is one
of them. BB1 has VL/VTYPE info (may be VALID or DIRTY) to backward
propagate.
The AVIN (available in) which is calculated by LCM is empty only
in these 2 circumstances:
1. all predecessors of BB1 are empty (not VALID
and can not be polluted in backward fusion flow)
2. VL/VTYPE info of BB1 predecessors are conflict.
We keep it as dirty in 2nd circumstance and set it as HARD_EMPTY
(can not be polluted as DIRTY any more) in 1st circumstance.
We don't backward propagate in 1st circumstance since there is
no VALID RVV instruction and no polluted blocks (dirty blocks)
by backward propagation from other following blocks.
It's meaningless to keep it as Dirty anymore.
However, since we keep it as dirty in 2nd since there are VALID or
Dirty blocks in predecessors, we can still gain the benefits and
optimization opportunities. For example, in this case:
for (size_t i = 0; i < n; i++)
{
if (i != cond) {
vint8mf8_t v = *(vint8mf8_t*)(in + i + 100);
*(vint8mf8_t*)(out + i + 100) = v;
} else {
vbool1_t v = *(vbool1_t*)(in + i + 400);
*(vbool1_t*)(out + i + 400) = v;
}
}
VL/VTYPE in if-else are conflict which will produce empty AVIN LCM result
but we can still keep dirty blocks if *(i != cond)* is very unlikely then
we can preset vsetvl (VL/VTYPE) info from else (static propability model).
We don't want to backward propagate VL/VTYPE information too early
which is not the optimal and may potentially produce issues. */
if (bitmap_empty_p (avin))
{
bool hard_empty_p = true;
for (const basic_block pred_cfg_bb : pred_cfg_bbs)
{
if (pred_cfg_bb == ENTRY_BLOCK_PTR_FOR_FN (cfun))
continue;
sbitmap avout = m_vector_manager->vector_avout[pred_cfg_bb->index];
if (!bitmap_empty_p (avout))
{
hard_empty_p = false;
break;
}
}
if (hard_empty_p)
return true;
}
edge e;
edge_iterator ei;
bool has_avl_killed_insn_p = false;
FOR_EACH_EDGE (e, ei, cfg_bb->succs)
{
const auto block_info
= m_vector_manager->vector_block_infos[e->dest->index];
if (block_info.local_dem.dirty_with_killed_avl_p ())
{
has_avl_killed_insn_p = true;
break;
}
}
if (!has_avl_killed_insn_p)
return false;
bool any_set_in_bbs_p = false;
for (const basic_block pred_cfg_bb : pred_cfg_bbs)
{
insn_info *def_insn = extract_single_source (set);
if (def_insn)
{
/* Condition 3:
Case 1: Case 2:
bb 0: bb 0:
def a5 101 ...
bb 1: bb 1:
... ...
bb 2: bb 2:
RVV 1 (use a5 with TAIL ANY) ...
bb 3: bb 3:
def a5 101 def a5 101
bb 4: bb 4:
... ...
bb 5: bb 5:
RVV 2 (use a5 with TU) RVV 1 (use a5)
Case 1: We can pollute BB3,BB2,BB1,BB0 are all Dirt blocks
with killed AVL so that we can merge TU demand info from RVV 2
into RVV 1 and elide the vsevl instruction in BB5.
TODO: We only optimize for single source def since multiple source
def is quite complicated.
Case 2: We only can pollute bb 3 as dirty and it has been accepted
in Condition 2 and we can't pollute BB3,BB2,BB1,BB0 like case 1. */
insn_info *last_killed_insn
= find_reg_killed_by (crtl->ssa->bb (pred_cfg_bb), avl);
if (!last_killed_insn || pred_cfg_bb == def_insn->bb ()->cfg_bb ())
continue;
if (source_equal_p (last_killed_insn, def_insn))
{
any_set_in_bbs_p = true;
break;
}
}
else
{
/* Condition 4:
bb 0: bb 1: bb 3:
def1 a5 def2 a5 ...
\ / /
\ / /
\ / /
\ / /
bb 4: /
| /
| /
bb 5: /
| /
| /
bb 6: /
| /
| /
bb 8:
RVV 1 (use a5)
If we get-def (REAL) of a5 from RVV 1 instruction, we will get
def1 from BB0 and def2 from BB1. So we will pollute BB6,BB5,BB4,
BB0,BB1 with DIRTY and set BB3 as HARD_EMPTY so that we won't
propagate AVL to BB3. */
if (any_set_in_bb_p (sets, crtl->ssa->bb (pred_cfg_bb)))
{
any_set_in_bbs_p = true;
break;
}
}
}
if (!any_set_in_bbs_p)
return true;
return false;
}
/* Compute global backward demanded info. */
bool
pass_vsetvl::backward_demand_fusion (void)
{
/* We compute global infos by backward propagation.
We want to have better performance in these following cases:
1. for (size_t i = 0; i < n; i++) {
if (i != cond) {
vint8mf8_t v = *(vint8mf8_t*)(in + i + 100);
*(vint8mf8_t*)(out + i + 100) = v;
} else {
vbool1_t v = *(vbool1_t*)(in + i + 400);
*(vbool1_t*)(out + i + 400) = v;
}
}
Since we don't have any RVV instruction in the BEFORE blocks,
LCM fails to optimize such case. We want to backward propagate
them into empty blocks so that we could have better performance
in LCM.
2. bb 0:
vsetvl e8,mf8 (demand RATIO)
bb 1:
vsetvl e32,mf2 (demand SEW and LMUL)
We backward propagate the first VSETVL into e32,mf2 so that we
could be able to eliminate the second VSETVL in LCM. */
bool changed_p = false;
for (const bb_info *bb : crtl->ssa->reverse_bbs ())
{
basic_block cfg_bb = bb->cfg_bb ();
const auto &curr_block_info
= m_vector_manager->vector_block_infos[cfg_bb->index];
const auto &prop = curr_block_info.local_dem;
/* If there is nothing to propagate, just skip it. */
if (!prop.valid_or_dirty_p ())
continue;
if (!backward_propagate_worthwhile_p (cfg_bb, curr_block_info))
continue;
edge e;
edge_iterator ei;
/* Backward propagate to each predecessor. */
FOR_EACH_EDGE (e, ei, cfg_bb->preds)
{
auto &block_info
= m_vector_manager->vector_block_infos[e->src->index];
/* We don't propagate through critical edges. */
if (e->flags & EDGE_COMPLEX)
continue;
if (e->src->index == ENTRY_BLOCK_PTR_FOR_FN (cfun)->index)
continue;
if (block_info.reaching_out.unknown_p ())
continue;
else if (block_info.reaching_out.hard_empty_p ())
continue;
else if (block_info.reaching_out.empty_p ())
{
enum fusion_type type
= get_backward_fusion_type (crtl->ssa->bb (e->src), prop);
if (type == INVALID_FUSION)
continue;
block_info.reaching_out = prop;
block_info.reaching_out.set_dirty (type);
if (prop.has_avl_reg () && !vlmax_avl_p (prop.get_avl ()))
{
hash_set<set_info *> sets
= get_all_sets (prop.get_avl_source (), true, true, true);
set_info *set = get_same_bb_set (sets, e->src);
if (set)
block_info.reaching_out.set_avl_info (
avl_info (prop.get_avl (), set));
}
block_info.local_dem = block_info.reaching_out;
block_info.probability = curr_block_info.probability;
changed_p = true;
}
else if (block_info.reaching_out.dirty_p ())
{
/* DIRTY -> DIRTY or VALID -> DIRTY. */
vector_insn_info new_info;
if (block_info.reaching_out.compatible_p (prop))
{
if (block_info.reaching_out >= prop)
continue;
new_info = block_info.reaching_out.merge (prop, GLOBAL_MERGE);
new_info.set_dirty (
block_info.reaching_out.dirty_with_killed_avl_p ());
block_info.probability += curr_block_info.probability;
}
else
{
if (curr_block_info.probability > block_info.probability)
{
enum fusion_type type
= get_backward_fusion_type (crtl->ssa->bb (e->src),
prop);
if (type == INVALID_FUSION)
continue;
new_info = prop;
new_info.set_dirty (type);
block_info.probability = curr_block_info.probability;
}
else
continue;
}
block_info.local_dem = new_info;
block_info.reaching_out = new_info;
changed_p = true;
}
else
{
/* We not only change the info during backward propagation,
but also change the VSETVL instruction. */
gcc_assert (block_info.reaching_out.valid_p ());
hash_set<set_info *> sets
= get_all_sets (prop.get_avl_source (), true, false, false);
set_info *set = get_same_bb_set (sets, e->src);
if (vsetvl_insn_p (block_info.reaching_out.get_insn ()->rtl ())
&& prop.has_avl_reg () && !vlmax_avl_p (prop.get_avl ()))
{
if (!block_info.reaching_out.same_vlmax_p (prop))
continue;
if (block_info.reaching_out.same_vtype_p (prop))
continue;
if (!set)
continue;
if (set->insn () != block_info.reaching_out.get_insn ())
continue;
}
else
{
if (!block_info.reaching_out.compatible_p (prop))
continue;
if (block_info.reaching_out >= prop)
continue;
}
vector_insn_info be_merged = block_info.reaching_out;
if (block_info.local_dem == block_info.reaching_out)
be_merged = block_info.local_dem;
vector_insn_info new_info = be_merged.merge (prop, GLOBAL_MERGE);
if (curr_block_info.probability > block_info.probability)
block_info.probability = curr_block_info.probability;
change_vsetvl_insn (new_info.get_insn (), new_info);
if (block_info.local_dem == block_info.reaching_out)
block_info.local_dem = new_info;
block_info.reaching_out = new_info;
changed_p = true;
}
}
}
return changed_p;
}
/* Compute global forward demanded info. */
bool
pass_vsetvl::forward_demand_fusion (void)
{
/* Enhance the global information propagation especially
backward propagation miss the propagation.
Consider such case:
bb0
(TU)
/ \
bb1 bb2
(TU) (ANY)
existing edge -----> \ / (TU) <----- LCM create this edge.
bb3
(TU)
Base on the situation, LCM fails to eliminate the VSETVL instruction and
insert an edge from bb2 to bb3 since we can't backward propagate bb3 into
bb2. To avoid this confusing LCM result and non-optimal codegen, we should
forward propagate information from bb0 to bb2 which is friendly to LCM. */
bool changed_p = false;
for (const bb_info *bb : crtl->ssa->bbs ())
{
basic_block cfg_bb = bb->cfg_bb ();
const auto &prop
= m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out;
/* If there is nothing to propagate, just skip it. */
if (!prop.valid_or_dirty_p ())
continue;
if (cfg_bb == ENTRY_BLOCK_PTR_FOR_FN (cfun))
continue;
edge e;
edge_iterator ei;
/* Forward propagate to each successor. */
FOR_EACH_EDGE (e, ei, cfg_bb->succs)
{
auto &local_dem
= m_vector_manager->vector_block_infos[e->dest->index].local_dem;
auto &reaching_out
= m_vector_manager->vector_block_infos[e->dest->index].reaching_out;
/* It's quite obvious, we don't need to propagate itself. */
if (e->dest->index == cfg_bb->index)
continue;
/* We don't propagate through critical edges. */
if (e->flags & EDGE_COMPLEX)
continue;
if (e->dest->index == EXIT_BLOCK_PTR_FOR_FN (cfun)->index)
continue;
/* If there is nothing to propagate, just skip it. */
if (!local_dem.valid_or_dirty_p ())
continue;
if (local_dem >= prop)
continue;
if (!local_dem.compatible_p (prop))
continue;
vector_insn_info new_info = local_dem.merge (prop, GLOBAL_MERGE);
new_info.set_insn (local_dem.get_insn ());
if (local_dem.dirty_p ())
{
gcc_assert (local_dem == reaching_out);
new_info.set_dirty (local_dem.dirty_with_killed_avl_p ());
local_dem = new_info;
reaching_out = local_dem;
}
else
{
if (reaching_out == local_dem)
reaching_out = new_info;
local_dem = new_info;
change_vsetvl_insn (local_dem.get_insn (), new_info);
}
auto &prob
= m_vector_manager->vector_block_infos[e->dest->index].probability;
auto &curr_prob
= m_vector_manager->vector_block_infos[cfg_bb->index].probability;
prob = curr_prob * e->probability;
changed_p = true;
}
}
return changed_p;
}
void
pass_vsetvl::demand_fusion (void)
{
bool changed_p = true;
while (changed_p)
{
changed_p = false;
/* To optimize the case like this:
void f2 (int8_t * restrict in, int8_t * restrict out, int n, int cond)
{
size_t vl = 101;
for (size_t i = 0; i < n; i++)
{
vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i + 300, vl);
__riscv_vse8_v_i8mf8 (out + i + 300, v, vl);
}
for (size_t i = 0; i < n; i++)
{
vint8mf8_t v = __riscv_vle8_v_i8mf8 (in + i, vl);
__riscv_vse8_v_i8mf8 (out + i, v, vl);
vint8mf8_t v2 = __riscv_vle8_v_i8mf8_tu (v, in + i + 100, vl);
__riscv_vse8_v_i8mf8 (out + i + 100, v2, vl);
}
}
bb 0: li a5, 101 (killed avl)
...
bb 1: vsetvli zero, a5, ta
...
bb 2: li a5, 101 (killed avl)
...
bb 3: vsetvli zero, a3, tu
We want to fuse VSEVLI instructions on bb 1 and bb 3. However, there is
an AVL kill instruction in bb 2 that we can't backward fuse bb 3 or
forward bb 1 arbitrarily. We need available information of each block to
help for such cases. */
changed_p |= backward_demand_fusion ();
changed_p |= forward_demand_fusion ();
}
changed_p = true;
while (changed_p)
{
changed_p = false;
prune_expressions ();
m_vector_manager->create_bitmap_vectors ();
compute_local_properties ();
compute_available (m_vector_manager->vector_comp,
m_vector_manager->vector_kill,
m_vector_manager->vector_avout,
m_vector_manager->vector_avin);
changed_p |= cleanup_illegal_dirty_blocks ();
m_vector_manager->free_bitmap_vectors ();
if (!m_vector_manager->vector_exprs.is_empty ())
m_vector_manager->vector_exprs.release ();
}
if (dump_file)
{
fprintf (dump_file, "\n\nDirty blocks list: ");
for (const bb_info *bb : crtl->ssa->bbs ())
if (m_vector_manager->vector_block_infos[bb->index ()]
.reaching_out.dirty_p ())
fprintf (dump_file, "%d ", bb->index ());
fprintf (dump_file, "\n\n");
}
}
/* Cleanup illegal dirty blocks. */
bool
pass_vsetvl::cleanup_illegal_dirty_blocks (void)
{
bool changed_p = false;
for (const bb_info *bb : crtl->ssa->bbs ())
{
basic_block cfg_bb = bb->cfg_bb ();
const auto &prop
= m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out;
/* If there is nothing to cleanup, just skip it. */
if (!prop.valid_or_dirty_p ())
continue;
if (hard_empty_block_p (bb, prop))
{
m_vector_manager->vector_block_infos[cfg_bb->index].local_dem
= vector_insn_info::get_hard_empty ();
m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out
= vector_insn_info::get_hard_empty ();
changed_p = true;
continue;
}
}
return changed_p;
}
/* Assemble the candidates expressions for LCM. */
void
pass_vsetvl::prune_expressions (void)
{
for (const bb_info *bb : crtl->ssa->bbs ())
{
if (m_vector_manager->vector_block_infos[bb->index ()]
.local_dem.valid_or_dirty_p ())
m_vector_manager->create_expr (
m_vector_manager->vector_block_infos[bb->index ()].local_dem);
if (m_vector_manager->vector_block_infos[bb->index ()]
.reaching_out.valid_or_dirty_p ())
m_vector_manager->create_expr (
m_vector_manager->vector_block_infos[bb->index ()].reaching_out);
}
if (dump_file)
{
fprintf (dump_file, "\nThe total VSETVL expression num = %d\n",
m_vector_manager->vector_exprs.length ());
fprintf (dump_file, "Expression List:\n");
for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
{
fprintf (dump_file, "Expr[%ld]:\n", i);
m_vector_manager->vector_exprs[i]->dump (dump_file);
fprintf (dump_file, "\n");
}
}
}
/* Compute the local properties of each recorded expression.
Local properties are those that are defined by the block, irrespective of
other blocks.
An expression is transparent in a block if its operands are not modified
in the block.
An expression is computed (locally available) in a block if it is computed
at least once and expression would contain the same value if the
computation was moved to the end of the block.
An expression is locally anticipatable in a block if it is computed at
least once and expression would contain the same value if the computation
was moved to the beginning of the block. */
void
pass_vsetvl::compute_local_properties (void)
{
/* - If T is locally available at the end of a block, then T' must be
available at the end of the same block. Since some optimization has
occurred earlier, T' might not be locally available, however, it must
have been previously computed on all paths. As a formula, T at AVLOC(B)
implies that T' at AVOUT(B).
An "available occurrence" is one that is the last occurrence in the
basic block and the operands are not modified by following statements in
the basic block [including this insn].
- If T is locally anticipated at the beginning of a block, then either
T', is locally anticipated or it is already available from previous
blocks. As a formula, this means that T at ANTLOC(B) implies that T' at
ANTLOC(B) at AVIN(B).
An "anticipatable occurrence" is one that is the first occurrence in the
basic block, the operands are not modified in the basic block prior
to the occurrence and the output is not used between the start of
the block and the occurrence. */
basic_block cfg_bb;
for (const bb_info *bb : crtl->ssa->bbs ())
{
unsigned int curr_bb_idx = bb->index ();
const auto local_dem
= m_vector_manager->vector_block_infos[curr_bb_idx].local_dem;
const auto reaching_out
= m_vector_manager->vector_block_infos[curr_bb_idx].reaching_out;
/* Compute transparent. */
for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
{
const vector_insn_info *expr = m_vector_manager->vector_exprs[i];
if (local_dem.real_dirty_p () || local_dem.valid_p ()
|| local_dem.unknown_p ()
|| has_vsetvl_killed_avl_p (bb, local_dem))
bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx], i);
/* FIXME: Here we set the block as non-transparent (killed) if there
is an instruction killed the value of AVL according to the
definition of Local transparent. This is true for such following
case:
bb 0 (Loop label):
vsetvl zero, a5, e8, mf8
bb 1:
def a5
bb 2:
branch bb 0 (Loop label).
In this case, we known there is a loop bb 0->bb 1->bb 2. According
to LCM definition, it is correct when we set vsetvl zero, a5, e8,
mf8 as non-transparent (killed) so that LCM will not hoist outside
the bb 0.
However, such conservative configuration will forbid optimization
on some unlucky case. For example:
bb 0:
li a5, 101
bb 1:
vsetvl zero, a5, e8, mf8
bb 2:
li a5, 101
bb 3:
vsetvl zero, a5, e8, mf8.
So we also relax def a5 as transparent to gain more optimizations
as long as the all real def insn of avl do not come from this
block. This configuration may be still missing some optimization
opportunities. */
if (find_reg_killed_by (bb, expr->get_avl ()))
{
hash_set<set_info *> sets
= get_all_sets (expr->get_avl_source (), true, false, false);
if (any_set_in_bb_p (sets, bb))
bitmap_clear_bit (m_vector_manager->vector_transp[curr_bb_idx],
i);
}
}
/* Compute anticipatable occurrences. */
if (local_dem.valid_p () || local_dem.real_dirty_p ()
|| (has_vsetvl_killed_avl_p (bb, local_dem)
&& vlmax_avl_p (local_dem.get_avl ())))
if (anticipatable_occurrence_p (bb, local_dem))
bitmap_set_bit (m_vector_manager->vector_antic[curr_bb_idx],
m_vector_manager->get_expr_id (local_dem));
/* Compute available occurrences. */
if (reaching_out.valid_or_dirty_p ())
{
auto_vec<size_t> available_list
= m_vector_manager->get_all_available_exprs (reaching_out);
for (size_t i = 0; i < available_list.length (); i++)
{
const vector_insn_info *expr
= m_vector_manager->vector_exprs[available_list[i]];
if (reaching_out.real_dirty_p ()
|| has_vsetvl_killed_avl_p (bb, reaching_out)
|| available_occurrence_p (bb, *expr))
bitmap_set_bit (m_vector_manager->vector_comp[curr_bb_idx],
available_list[i]);
}
}
}
/* Compute kill for each basic block using:
~(TRANSP | COMP)
*/
FOR_EACH_BB_FN (cfg_bb, cfun)
{
bitmap_ior (m_vector_manager->vector_kill[cfg_bb->index],
m_vector_manager->vector_transp[cfg_bb->index],
m_vector_manager->vector_comp[cfg_bb->index]);
bitmap_not (m_vector_manager->vector_kill[cfg_bb->index],
m_vector_manager->vector_kill[cfg_bb->index]);
}
FOR_EACH_BB_FN (cfg_bb, cfun)
{
edge e;
edge_iterator ei;
/* If the current block is the destination of an abnormal edge, we
kill all trapping (for PRE) and memory (for hoist) expressions
because we won't be able to properly place the instruction on
the edge. So make them neither anticipatable nor transparent.
This is fairly conservative.
??? For hoisting it may be necessary to check for set-and-jump
instructions here, not just for abnormal edges. The general problem
is that when an expression cannot not be placed right at the end of
a basic block we should account for any side-effects of a subsequent
jump instructions that could clobber the expression. It would
be best to implement this check along the lines of
should_hoist_expr_to_dom where the target block is already known
and, hence, there's no need to conservatively prune expressions on
"intermediate" set-and-jump instructions. */
FOR_EACH_EDGE (e, ei, cfg_bb->preds)
if (e->flags & EDGE_COMPLEX)
{
bitmap_clear (m_vector_manager->vector_antic[cfg_bb->index]);
bitmap_clear (m_vector_manager->vector_transp[cfg_bb->index]);
}
}
}
/* Return true if VSETVL in the block can be refined as vsetvl zero,zero. */
bool
pass_vsetvl::can_refine_vsetvl_p (const basic_block cfg_bb,
const vector_insn_info &info) const
{
if (!m_vector_manager->all_same_ratio_p (
m_vector_manager->vector_avin[cfg_bb->index]))
return false;
if (!m_vector_manager->all_same_avl_p (
cfg_bb, m_vector_manager->vector_avin[cfg_bb->index]))
return false;
size_t expr_id
= bitmap_first_set_bit (m_vector_manager->vector_avin[cfg_bb->index]);
if (!m_vector_manager->vector_exprs[expr_id]->same_vlmax_p (info))
return false;
if (!m_vector_manager->vector_exprs[expr_id]->compatible_avl_p (info))
return false;
edge e;
edge_iterator ei;
bool all_valid_p = true;
FOR_EACH_EDGE (e, ei, cfg_bb->preds)
{
if (bitmap_empty_p (m_vector_manager->vector_avout[e->src->index]))
{
all_valid_p = false;
break;
}
}
if (!all_valid_p)
return false;
return true;
}
/* Optimize athe case like this:
bb 0:
vsetvl 0 a5,zero,e8,mf8
insn 0 (demand SEW + LMUL)
bb 1:
vsetvl 1 a5,zero,e16,mf4
insn 1 (demand SEW + LMUL)
In this case, we should be able to refine
vsetvl 1 into vsetvl zero, zero according AVIN. */
void
pass_vsetvl::refine_vsetvls (void) const
{
basic_block cfg_bb;
FOR_EACH_BB_FN (cfg_bb, cfun)
{
auto info = m_vector_manager->vector_block_infos[cfg_bb->index].local_dem;
insn_info *insn = info.get_insn ();
if (!info.valid_p ())
continue;
rtx_insn *rinsn = insn->rtl ();
if (!can_refine_vsetvl_p (cfg_bb, info))
continue;
if (!vector_config_insn_p (rinsn))
rinsn = PREV_INSN (rinsn);
rtx new_pat = gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, info, NULL_RTX);
change_insn (rinsn, new_pat);
}
}
void
pass_vsetvl::cleanup_vsetvls ()
{
basic_block cfg_bb;
FOR_EACH_BB_FN (cfg_bb, cfun)
{
auto &info
= m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out;
gcc_assert (m_vector_manager->expr_set_num (
m_vector_manager->vector_del[cfg_bb->index])
<= 1);
for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
{
if (bitmap_bit_p (m_vector_manager->vector_del[cfg_bb->index], i))
{
if (info.dirty_p ())
info.set_unknown ();
else
{
const auto dem
= m_vector_manager->vector_block_infos[cfg_bb->index]
.local_dem;
gcc_assert (dem == *m_vector_manager->vector_exprs[i]);
insn_info *insn = dem.get_insn ();
gcc_assert (insn && insn->rtl ());
rtx_insn *rinsn;
if (vector_config_insn_p (insn->rtl ()))
rinsn = insn->rtl ();
else
{
gcc_assert (has_vtype_op (insn->rtl ()));
rinsn = PREV_INSN (insn->rtl ());
gcc_assert (
vector_config_insn_p (PREV_INSN (insn->rtl ())));
}
eliminate_insn (rinsn);
}
}
}
}
}
bool
pass_vsetvl::commit_vsetvls (void)
{
bool need_commit = false;
for (int ed = 0; ed < NUM_EDGES (m_vector_manager->vector_edge_list); ed++)
{
for (size_t i = 0; i < m_vector_manager->vector_exprs.length (); i++)
{
edge eg = INDEX_EDGE (m_vector_manager->vector_edge_list, ed);
if (bitmap_bit_p (m_vector_manager->vector_insert[ed], i))
{
const vector_insn_info *require
= m_vector_manager->vector_exprs[i];
gcc_assert (require->valid_or_dirty_p ());
rtl_profile_for_edge (eg);
start_sequence ();
insn_info *insn = require->get_insn ();
vector_insn_info prev_info = vector_insn_info ();
sbitmap bitdata = m_vector_manager->vector_avout[eg->src->index];
if (m_vector_manager->all_same_ratio_p (bitdata)
&& m_vector_manager->all_same_avl_p (eg->dest, bitdata))
{
size_t first = bitmap_first_set_bit (bitdata);
prev_info = *m_vector_manager->vector_exprs[first];
}
insert_vsetvl (EMIT_DIRECT, insn->rtl (), *require, prev_info);
rtx_insn *rinsn = get_insns ();
end_sequence ();
default_rtl_profile ();
/* We should not get an abnormal edge here. */
gcc_assert (!(eg->flags & EDGE_ABNORMAL));
need_commit = true;
insert_insn_on_edge (rinsn, eg);
}
}
}
for (const bb_info *bb : crtl->ssa->bbs ())
{
basic_block cfg_bb = bb->cfg_bb ();
const auto reaching_out
= m_vector_manager->vector_block_infos[cfg_bb->index].reaching_out;
if (!reaching_out.dirty_p ())
continue;
if (reaching_out.dirty_with_killed_avl_p ())
{
if (!has_vsetvl_killed_avl_p (bb, reaching_out))
continue;
unsigned int bb_index;
sbitmap_iterator sbi;
sbitmap avin = m_vector_manager->vector_avin[cfg_bb->index];
bool available_p = false;
EXECUTE_IF_SET_IN_BITMAP (avin, 0, bb_index, sbi)
{
if (*m_vector_manager->vector_exprs[bb_index] >= reaching_out)
{
available_p = true;
break;
}
}
if (available_p)
continue;
}
rtx new_pat;
if (can_refine_vsetvl_p (cfg_bb, reaching_out))
new_pat
= gen_vsetvl_pat (VSETVL_VTYPE_CHANGE_ONLY, reaching_out, NULL_RTX);
else if (vlmax_avl_p (reaching_out.get_avl ()))
new_pat = gen_vsetvl_pat (VSETVL_NORMAL, reaching_out,
get_vl (reaching_out.get_insn ()->rtl ()));
else
new_pat
= gen_vsetvl_pat (VSETVL_DISCARD_RESULT, reaching_out, NULL_RTX);
start_sequence ();
emit_insn (new_pat);
rtx_insn *rinsn = get_insns ();
end_sequence ();
insert_insn_end_basic_block (rinsn, cfg_bb);
if (dump_file)
{
fprintf (dump_file,
"\nInsert vsetvl insn %d at the end of <bb %d>:\n",
INSN_UID (rinsn), cfg_bb->index);
print_rtl_single (dump_file, rinsn);
}
}
return need_commit;
}
void
pass_vsetvl::pre_vsetvl (void)
{
/* Compute entity list. */
prune_expressions ();
m_vector_manager->create_bitmap_vectors ();
compute_local_properties ();
m_vector_manager->vector_edge_list = pre_edge_lcm_avs (
m_vector_manager->vector_exprs.length (), m_vector_manager->vector_transp,
m_vector_manager->vector_comp, m_vector_manager->vector_antic,
m_vector_manager->vector_kill, m_vector_manager->vector_avin,
m_vector_manager->vector_avout, &m_vector_manager->vector_insert,
&m_vector_manager->vector_del);
/* We should dump the information before CFG is changed. Otherwise it will
produce ICE (internal compiler error). */
if (dump_file)
m_vector_manager->dump (dump_file);
refine_vsetvls ();
cleanup_vsetvls ();
bool need_commit = commit_vsetvls ();
if (need_commit)
commit_edge_insertions ();
}
void
pass_vsetvl::cleanup_insns (void) const
{
for (const bb_info *bb : crtl->ssa->bbs ())
{
for (insn_info *insn : bb->real_nondebug_insns ())
{
rtx_insn *rinsn = insn->rtl ();
if (vlmax_avl_insn_p (rinsn))
{
eliminate_insn (rinsn);
continue;
}
/* Erase the AVL operand from the instruction. */
if (!has_vl_op (rinsn) || !REG_P (get_vl (rinsn)))
continue;
rtx avl = get_vl (rinsn);
if (count_occurrences (PATTERN (rinsn), avl, true) == 1)
{
/* Get the list of uses for the new instruction. */
auto attempt = crtl->ssa->new_change_attempt ();
insn_change change (insn);
/* Remove the use of the substituted value. */
access_array_builder uses_builder (attempt);
uses_builder.reserve (insn->num_uses () - 1);
for (use_info *use : insn->uses ())
if (use != find_access (insn->uses (), REGNO (avl)))
uses_builder.quick_push (use);
use_array new_uses = use_array (uses_builder.finish ());
change.new_uses = new_uses;
change.move_range = insn->ebb ()->insn_range ();
rtx pat = simplify_replace_rtx (PATTERN (rinsn), avl, const0_rtx);
gcc_assert (change_insn (crtl->ssa, change, insn, pat));
}
}
}
}
void
pass_vsetvl::propagate_avl (void) const
{
/* Rebuild the RTL_SSA according to the new CFG generated by LCM. */
/* Finalization of RTL_SSA. */
free_dominance_info (CDI_DOMINATORS);
if (crtl->ssa->perform_pending_updates ())
cleanup_cfg (0);
delete crtl->ssa;
crtl->ssa = nullptr;
/* Initialization of RTL_SSA. */
calculate_dominance_info (CDI_DOMINATORS);
df_analyze ();
crtl->ssa = new function_info (cfun);
hash_set<rtx_insn *> to_delete;
for (const bb_info *bb : crtl->ssa->bbs ())
{
for (insn_info *insn : bb->real_nondebug_insns ())
{
if (vsetvl_discard_result_insn_p (insn->rtl ()))
{
rtx avl = get_avl (insn->rtl ());
if (!REG_P (avl))
continue;
set_info *set = find_access (insn->uses (), REGNO (avl))->def ();
insn_info *def_insn = extract_single_source (set);
if (!def_insn)
continue;
/* Handle this case:
vsetvli a6,zero,e32,m1,ta,mu
li a5,4096
add a7,a0,a5
addi a7,a7,-96
vsetvli t1,zero,e8,mf8,ta,ma
vle8.v v24,0(a7)
add a5,a3,a5
addi a5,a5,-96
vse8.v v24,0(a5)
vsetvli zero,a6,e32,m1,tu,ma
*/
if (vsetvl_insn_p (def_insn->rtl ()))
{
vl_vtype_info def_info = get_vl_vtype_info (def_insn);
vl_vtype_info info = get_vl_vtype_info (insn);
rtx avl = get_avl (def_insn->rtl ());
rtx vl = get_vl (def_insn->rtl ());
if (def_info.get_ratio () == info.get_ratio ())
{
if (vlmax_avl_p (def_info.get_avl ()))
{
info.set_avl_info (
avl_info (def_info.get_avl (), nullptr));
rtx new_pat
= gen_vsetvl_pat (VSETVL_NORMAL, info, vl);
validate_change (insn->rtl (),
&PATTERN (insn->rtl ()), new_pat,
false);
continue;
}
if (def_info.has_avl_imm () || rtx_equal_p (avl, vl))
{
info.set_avl_info (avl_info (avl, nullptr));
emit_vsetvl_insn (VSETVL_DISCARD_RESULT, EMIT_AFTER,
info, NULL_RTX, insn->rtl ());
if (set->single_nondebug_insn_use ())
{
to_delete.add (insn->rtl ());
to_delete.add (def_insn->rtl ());
}
continue;
}
}
}
}
/* Change vsetvl rd, rs1 --> vsevl zero, rs1,
if rd is not used by any nondebug instructions.
Even though this PASS runs after RA and it doesn't help for
reduce register pressure, it can help instructions scheduling
since we remove the dependencies. */
if (vsetvl_insn_p (insn->rtl ()))
{
rtx vl = get_vl (insn->rtl ());
rtx avl = get_avl (insn->rtl ());
if (vlmax_avl_p (avl))
continue;
def_info *def = find_access (insn->defs (), REGNO (vl));
set_info *set = safe_dyn_cast<set_info *> (def);
gcc_assert (set);
const vl_vtype_info info = get_vl_vtype_info (insn);
rtx new_pat
= gen_vsetvl_pat (VSETVL_DISCARD_RESULT, info, NULL_RTX);
if (!set->has_nondebug_insn_uses ())
{
validate_change (insn->rtl (), &PATTERN (insn->rtl ()),
new_pat, false);
continue;
}
}
}
}
for (rtx_insn *rinsn : to_delete)
eliminate_insn (rinsn);
}
void
pass_vsetvl::init (void)
{
if (optimize > 0)
{
/* Initialization of RTL_SSA. */
calculate_dominance_info (CDI_DOMINATORS);
df_analyze ();
crtl->ssa = new function_info (cfun);
}
m_vector_manager = new vector_infos_manager ();
compute_probabilities ();
if (dump_file)
{
fprintf (dump_file, "\nPrologue: Initialize vector infos\n");
m_vector_manager->dump (dump_file);
}
}
void
pass_vsetvl::done (void)
{
if (optimize > 0)
{
/* Finalization of RTL_SSA. */
free_dominance_info (CDI_DOMINATORS);
if (crtl->ssa->perform_pending_updates ())
cleanup_cfg (0);
delete crtl->ssa;
crtl->ssa = nullptr;
}
m_vector_manager->release ();
delete m_vector_manager;
m_vector_manager = nullptr;
}
/* Compute probability for each block. */
void
pass_vsetvl::compute_probabilities (void)
{
/* Don't compute it in -O0 since we don't need it. */
if (!optimize)
return;
edge e;
edge_iterator ei;
for (const bb_info *bb : crtl->ssa->bbs ())
{
basic_block cfg_bb = bb->cfg_bb ();
auto &curr_prob
= m_vector_manager->vector_block_infos[cfg_bb->index].probability;
if (ENTRY_BLOCK_PTR_FOR_FN (cfun) == cfg_bb)
curr_prob = profile_probability::always ();
gcc_assert (curr_prob.initialized_p ());
FOR_EACH_EDGE (e, ei, cfg_bb->succs)
{
auto &new_prob
= m_vector_manager->vector_block_infos[e->dest->index].probability;
if (!new_prob.initialized_p ())
new_prob = curr_prob * e->probability;
else if (new_prob == profile_probability::always ())
continue;
else
new_prob += curr_prob * e->probability;
}
}
auto &exit_block
= m_vector_manager->vector_block_infos[EXIT_BLOCK_PTR_FOR_FN (cfun)->index];
exit_block.probability = profile_probability::always ();
}
/* Lazy vsetvl insertion for optimize > 0. */
void
pass_vsetvl::lazy_vsetvl (void)
{
if (dump_file)
fprintf (dump_file,
"\nEntering Lazy VSETVL PASS and Handling %d basic blocks for "
"function:%s\n",
n_basic_blocks_for_fn (cfun), function_name (cfun));
/* Phase 1 - Compute the local dems within each block.
The data-flow analysis within each block is backward analysis. */
if (dump_file)
fprintf (dump_file, "\nPhase 1: Compute local backward vector infos\n");
for (const bb_info *bb : crtl->ssa->bbs ())
compute_local_backward_infos (bb);
if (dump_file)
m_vector_manager->dump (dump_file);
/* Phase 2 - Emit vsetvl instructions within each basic block according to
demand, compute and save ANTLOC && AVLOC of each block. */
if (dump_file)
fprintf (dump_file,
"\nPhase 2: Emit vsetvl instruction within each block\n");
for (const bb_info *bb : crtl->ssa->bbs ())
emit_local_forward_vsetvls (bb);
if (dump_file)
m_vector_manager->dump (dump_file);
/* Phase 3 - Propagate demanded info across blocks. */
if (dump_file)
fprintf (dump_file, "\nPhase 3: Demands propagation across blocks\n");
demand_fusion ();
if (dump_file)
m_vector_manager->dump (dump_file);
/* Phase 4 - Lazy code motion. */
if (dump_file)
fprintf (dump_file, "\nPhase 4: PRE vsetvl by Lazy code motion (LCM)\n");
pre_vsetvl ();
/* Phase 5 - Cleanup AVL && VL operand of RVV instruction. */
if (dump_file)
fprintf (dump_file, "\nPhase 5: Cleanup AVL and VL operands\n");
cleanup_insns ();
/* Phase 6 - Rebuild RTL_SSA to propagate AVL between vsetvls. */
if (dump_file)
fprintf (dump_file,
"\nPhase 6: Rebuild RTL_SSA to propagate AVL between vsetvls\n");
propagate_avl ();
}
/* Main entry point for this pass. */
unsigned int
pass_vsetvl::execute (function *)
{
if (n_basic_blocks_for_fn (cfun) <= 0)
return 0;
/* The RVV instruction may change after split which is not a stable
instruction. We need to split it here to avoid potential issue
since the VSETVL PASS is insert before split PASS. */
split_all_insns ();
/* Early return for there is no vector instructions. */
if (!has_vector_insn (cfun))
return 0;
init ();
if (!optimize)
simple_vsetvl ();
else
lazy_vsetvl ();
done ();
return 0;
}
rtl_opt_pass *
make_pass_vsetvl (gcc::context *ctxt)
{
return new pass_vsetvl (ctxt);
}