| // Late-stage instruction combination pass. |
| // Copyright (C) 2023-2025 Free Software Foundation, Inc. |
| // |
| // This file is part of GCC. |
| // |
| // GCC is free software; you can redistribute it and/or modify it under |
| // the terms of the GNU General Public License as published by the Free |
| // Software Foundation; either version 3, or (at your option) any later |
| // version. |
| // |
| // GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| // WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| // for more details. |
| // |
| // You should have received a copy of the GNU General Public License |
| // along with GCC; see the file COPYING3. If not see |
| // <http://www.gnu.org/licenses/>. |
| |
| // This pass currently has two purposes: |
| // |
| // - to substitute definitions into all uses, so that the definition |
| // can be removed. |
| // |
| // - to try to parallelise sets of condition-code registers with a |
| // related instruction (see combine_cc_setter for details). |
| // |
| // However, it could be extended to handle other combination-related |
| // optimizations in future. |
| // |
| // The pass can run before or after register allocation. When running |
| // before register allocation, it tries to avoid cases that are likely |
| // to increase register pressure. For the same reason, it avoids moving |
| // instructions around, even if doing so would allow an optimization to |
| // succeed. These limitations are removed when running after register |
| // allocation. |
| |
| #define INCLUDE_ALGORITHM |
| #define INCLUDE_FUNCTIONAL |
| #define INCLUDE_ARRAY |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "backend.h" |
| #include "rtl.h" |
| #include "df.h" |
| #include "rtl-ssa.h" |
| #include "print-rtl.h" |
| #include "tree-pass.h" |
| #include "cfgcleanup.h" |
| #include "target.h" |
| #include "dbgcnt.h" |
| |
| using namespace rtl_ssa; |
| |
| namespace { |
| const pass_data pass_data_late_combine = |
| { |
| RTL_PASS, // type |
| "late_combine", // name |
| OPTGROUP_NONE, // optinfo_flags |
| TV_LATE_COMBINE, // tv_id |
| 0, // properties_required |
| 0, // properties_provided |
| 0, // properties_destroyed |
| 0, // todo_flags_start |
| TODO_df_finish, // todo_flags_finish |
| }; |
| |
| // Represents an attempt to substitute a single-set definition into all |
| // uses of the definition. |
| class insn_combination |
| { |
| public: |
| insn_combination (set_info *, rtx, rtx); |
| bool run (); |
| array_slice<insn_change *const> use_changes () const; |
| |
| private: |
| use_array get_new_uses (use_info *); |
| bool substitute_nondebug_use (use_info *); |
| bool substitute_nondebug_uses (set_info *); |
| bool try_to_preserve_debug_info (insn_change &, use_info *); |
| void substitute_debug_use (use_info *); |
| bool substitute_note (insn_info *, rtx, bool); |
| void substitute_notes (insn_info *, bool); |
| void substitute_note_uses (use_info *); |
| void substitute_optional_uses (set_info *); |
| |
| // Represents the state of the function's RTL at the start of this |
| // combination attempt. |
| insn_change_watermark m_rtl_watermark; |
| |
| // Represents the rtl-ssa state at the start of this combination attempt. |
| obstack_watermark m_attempt; |
| |
| // The instruction that contains the definition, and that we're trying |
| // to delete. |
| insn_info *m_def_insn; |
| |
| // The definition itself. |
| set_info *m_def; |
| |
| // The destination and source of the single set that defines m_def. |
| // The destination is known to be a plain REG. |
| rtx m_dest; |
| rtx m_src; |
| |
| // Contains the full list of changes that we want to make, in reverse |
| // postorder. |
| auto_vec<insn_change *> m_nondebug_changes; |
| }; |
| |
| // Class that represents one run of the pass. |
| class late_combine |
| { |
| public: |
| unsigned int execute (function *); |
| |
| private: |
| rtx optimizable_set (set_info *); |
| bool check_register_pressure (insn_info *, rtx); |
| bool check_uses (set_info *, rtx); |
| bool combine_into_uses (set_info *, rtx, insn_info *); |
| bool parallelize_insns (set_info *, rtx, set_info *, rtx); |
| bool combine_cc_setter (set_info *, rtx); |
| bool combine_insn (insn_info *, insn_info *); |
| |
| auto_vec<insn_info *> m_worklist; |
| |
| // A spare PARALLEL rtx, or null if none. |
| rtx m_parallel = NULL_RTX; |
| }; |
| |
| insn_combination::insn_combination (set_info *def, rtx dest, rtx src) |
| : m_rtl_watermark (), |
| m_attempt (crtl->ssa->new_change_attempt ()), |
| m_def_insn (def->insn ()), |
| m_def (def), |
| m_dest (dest), |
| m_src (src), |
| m_nondebug_changes () |
| { |
| } |
| |
| array_slice<insn_change *const> |
| insn_combination::use_changes () const |
| { |
| return { m_nondebug_changes.address () + 1, |
| m_nondebug_changes.length () - 1 }; |
| } |
| |
| // USE is a direct or indirect use of m_def. Return the list of uses |
| // that would be needed after substituting m_def into the instruction. |
| // The returned list is marked as invalid if USE's insn and m_def_insn |
| // use different definitions for the same resource (register or memory). |
| use_array |
| insn_combination::get_new_uses (use_info *use) |
| { |
| auto *def = use->def (); |
| auto *use_insn = use->insn (); |
| |
| use_array new_uses = use_insn->uses (); |
| new_uses = remove_uses_of_def (m_attempt, new_uses, def); |
| new_uses = merge_access_arrays (m_attempt, m_def_insn->uses (), new_uses); |
| if (new_uses.is_valid () && use->ebb () != m_def->ebb ()) |
| new_uses = crtl->ssa->make_uses_available (m_attempt, new_uses, use->bb (), |
| use_insn->is_debug_insn ()); |
| return new_uses; |
| } |
| |
| // Start the process of trying to replace USE by substitution, given that |
| // USE occurs in a non-debug instruction. Check: |
| // |
| // - that the substitution can be represented in RTL |
| // |
| // - that each use of a resource (register or memory) within the new |
| // instruction has a consistent definition |
| // |
| // - that the new instruction is a recognized pattern |
| // |
| // - that the instruction can be placed somewhere that makes all definitions |
| // and uses valid, and that permits any new hard-register clobbers added |
| // during the recognition process |
| // |
| // Return true on success. |
| bool |
| insn_combination::substitute_nondebug_use (use_info *use) |
| { |
| insn_info *use_insn = use->insn (); |
| rtx_insn *use_rtl = use_insn->rtl (); |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| dump_insn_slim (dump_file, use->insn ()->rtl ()); |
| |
| // Reject second and subsequent uses if the target does not allow |
| // the defining instruction to be copied. |
| if (targetm.cannot_copy_insn_p |
| && m_nondebug_changes.length () >= 2 |
| && targetm.cannot_copy_insn_p (m_def_insn->rtl ())) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "-- The target does not allow multiple" |
| " copies of insn %d\n", m_def_insn->uid ()); |
| return false; |
| } |
| |
| // Check that we can change the instruction pattern. Leave recognition |
| // of the result till later. |
| insn_propagation prop (use_rtl, m_dest, m_src); |
| if (!prop.apply_to_pattern (&PATTERN (use_rtl)) |
| || prop.num_replacements == 0) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "-- RTL substitution failed\n"); |
| return false; |
| } |
| |
| use_array new_uses = get_new_uses (use); |
| if (!new_uses.is_valid ()) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "-- could not prove that all sources" |
| " are available\n"); |
| return false; |
| } |
| |
| // Create a tentative change for the use. |
| auto *where = XOBNEW (m_attempt, insn_change); |
| auto *use_change = new (where) insn_change (use_insn); |
| m_nondebug_changes.safe_push (use_change); |
| use_change->new_uses = new_uses; |
| |
| struct local_ignore : ignore_nothing |
| { |
| local_ignore (const set_info *def, const insn_info *use_insn) |
| : m_def (def), m_use_insn (use_insn) {} |
| |
| // We don't limit the number of insns per optimization, so ignoring all |
| // insns for all insns would lead to quadratic complexity. Just ignore |
| // the use and definition, which should be enough for most purposes. |
| bool |
| should_ignore_insn (const insn_info *insn) |
| { |
| return insn == m_def->insn () || insn == m_use_insn; |
| } |
| |
| // Ignore the definition that we're removing, and all uses of it. |
| bool should_ignore_def (const def_info *def) { return def == m_def; } |
| |
| const set_info *m_def; |
| const insn_info *m_use_insn; |
| }; |
| |
| auto ignore = local_ignore (m_def, use_insn); |
| |
| // Moving instructions before register allocation could increase |
| // register pressure. Only try moving them after RA. |
| if (reload_completed && can_move_insn_p (use_insn)) |
| use_change->move_range = { use_insn->bb ()->head_insn (), |
| use_insn->ebb ()->last_bb ()->end_insn () }; |
| if (!restrict_movement (*use_change, ignore)) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "-- cannot satisfy all definitions and uses" |
| " in insn %d\n", INSN_UID (use_insn->rtl ())); |
| return false; |
| } |
| |
| if (!recog (m_attempt, *use_change, ignore)) |
| return false; |
| |
| return true; |
| } |
| |
| // Apply substitute_nondebug_use to all direct and indirect uses of DEF. |
| // There will be at most one level of indirection. |
| bool |
| insn_combination::substitute_nondebug_uses (set_info *def) |
| { |
| for (use_info *use : def->nondebug_insn_uses ()) |
| if (!use->is_live_out_use () |
| && !use->only_occurs_in_notes () |
| && !substitute_nondebug_use (use)) |
| return false; |
| |
| for (use_info *use : def->phi_uses ()) |
| if (!substitute_nondebug_uses (use->phi ())) |
| return false; |
| |
| return true; |
| } |
| |
| // USE_CHANGE.insn () is a debug instruction that uses m_def. Try to |
| // substitute the definition into the instruction and try to describe |
| // the result in USE_CHANGE. Return true on success. Failure means that |
| // the instruction must be reset instead. |
| bool |
| insn_combination::try_to_preserve_debug_info (insn_change &use_change, |
| use_info *use) |
| { |
| // Punt on unsimplified subregs of hard registers. In that case, |
| // propagation can succeed and create a wider reg than the one we |
| // started with. |
| if (HARD_REGISTER_NUM_P (use->regno ()) |
| && use->includes_subregs ()) |
| return false; |
| |
| insn_info *use_insn = use_change.insn (); |
| rtx_insn *use_rtl = use_insn->rtl (); |
| |
| use_change.new_uses = get_new_uses (use); |
| if (!use_change.new_uses.is_valid () |
| || !restrict_movement (use_change)) |
| return false; |
| |
| insn_propagation prop (use_rtl, m_dest, m_src); |
| return prop.apply_to_pattern (&INSN_VAR_LOCATION_LOC (use_rtl)); |
| } |
| |
| // USE_INSN is a debug instruction that uses m_def. Update it to reflect |
| // the fact that m_def is going to disappear. Try to preserve the source |
| // value if possible, but reset the instruction if not. |
| void |
| insn_combination::substitute_debug_use (use_info *use) |
| { |
| auto *use_insn = use->insn (); |
| rtx_insn *use_rtl = use_insn->rtl (); |
| |
| auto use_change = insn_change (use_insn); |
| if (!try_to_preserve_debug_info (use_change, use)) |
| { |
| use_change.new_uses = {}; |
| use_change.move_range = use_change.insn (); |
| INSN_VAR_LOCATION_LOC (use_rtl) = gen_rtx_UNKNOWN_VAR_LOC (); |
| } |
| insn_change *changes[] = { &use_change }; |
| crtl->ssa->change_insns (changes); |
| } |
| |
| // NOTE is a reg note of USE_INSN, which previously used m_def. Update |
| // the note to reflect the fact that m_def is going to disappear. Return |
| // true on success, or false if the note must be deleted. |
| // |
| // CAN_PROPAGATE is true if m_dest can be replaced with m_use. |
| bool |
| insn_combination::substitute_note (insn_info *use_insn, rtx note, |
| bool can_propagate) |
| { |
| if (REG_NOTE_KIND (note) == REG_EQUAL |
| || REG_NOTE_KIND (note) == REG_EQUIV) |
| { |
| insn_propagation prop (use_insn->rtl (), m_dest, m_src); |
| return (prop.apply_to_note (&XEXP (note, 0)) |
| && (can_propagate || prop.num_replacements == 0)); |
| } |
| return true; |
| } |
| |
| // Update USE_INSN's notes after deciding to go ahead with the optimization. |
| // CAN_PROPAGATE is true if m_dest can be replaced with m_use. |
| void |
| insn_combination::substitute_notes (insn_info *use_insn, bool can_propagate) |
| { |
| rtx_insn *use_rtl = use_insn->rtl (); |
| rtx *ptr = ®_NOTES (use_rtl); |
| while (rtx note = *ptr) |
| { |
| if (substitute_note (use_insn, note, can_propagate)) |
| ptr = &XEXP (note, 1); |
| else |
| *ptr = XEXP (note, 1); |
| } |
| } |
| |
| // We've decided to go ahead with the substitution. Update all REG_NOTES |
| // involving USE. |
| void |
| insn_combination::substitute_note_uses (use_info *use) |
| { |
| insn_info *use_insn = use->insn (); |
| |
| bool can_propagate = true; |
| if (use->only_occurs_in_notes ()) |
| { |
| // The only uses are in notes. Try to keep the note if we can, |
| // but removing it is better than aborting the optimization. |
| insn_change use_change (use_insn); |
| use_change.new_uses = get_new_uses (use); |
| if (!use_change.new_uses.is_valid () |
| || !restrict_movement (use_change)) |
| { |
| use_change.move_range = use_insn; |
| use_change.new_uses = remove_uses_of_def (m_attempt, |
| use_insn->uses (), |
| use->def ()); |
| can_propagate = false; |
| } |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, "%s notes in:\n", |
| can_propagate ? "updating" : "removing"); |
| dump_insn_slim (dump_file, use_insn->rtl ()); |
| } |
| substitute_notes (use_insn, can_propagate); |
| insn_change *changes[] = { &use_change }; |
| crtl->ssa->change_insns (changes); |
| } |
| else |
| // We've already decided to update the insn's pattern and know that m_src |
| // will be available at the insn's new location. Now update its notes. |
| substitute_notes (use_insn, can_propagate); |
| } |
| |
| // We've decided to go ahead with the substitution and we've dealt with |
| // all uses that occur in the patterns of non-debug insns. Update all |
| // other uses for the fact that m_def is about to disappear. |
| void |
| insn_combination::substitute_optional_uses (set_info *def) |
| { |
| if (auto insn_uses = def->all_insn_uses ()) |
| { |
| use_info *use = *insn_uses.begin (); |
| while (use) |
| { |
| use_info *next_use = use->next_any_insn_use (); |
| if (use->is_in_debug_insn ()) |
| substitute_debug_use (use); |
| else if (!use->is_live_out_use ()) |
| substitute_note_uses (use); |
| use = next_use; |
| } |
| } |
| for (use_info *use : def->phi_uses ()) |
| substitute_optional_uses (use->phi ()); |
| } |
| |
| // Try to perform the substitution. Return true on success. |
| bool |
| insn_combination::run () |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, "\ntrying to combine definition of r%d in:\n", |
| m_def->regno ()); |
| dump_insn_slim (dump_file, m_def_insn->rtl ()); |
| fprintf (dump_file, "into:\n"); |
| } |
| |
| auto def_change = insn_change::delete_insn (m_def_insn); |
| m_nondebug_changes.safe_push (&def_change); |
| |
| if (!substitute_nondebug_uses (m_def) |
| || !changes_are_worthwhile (m_nondebug_changes) |
| || !crtl->ssa->verify_insn_changes (m_nondebug_changes)) |
| return false; |
| |
| // We've now decided that the optimization is valid and profitable. |
| // Allow it to be suppressed for bisection purposes. |
| if (!dbg_cnt (::late_combine)) |
| return false; |
| |
| substitute_optional_uses (m_def); |
| |
| confirm_change_group (); |
| crtl->ssa->change_insns (m_nondebug_changes); |
| return true; |
| } |
| |
| // DEF is the result of calling single_set_info on its instruction. |
| // See whether that instruction is a single_set that we can optimize. |
| // Return the set if so, otherwise return null. |
| rtx |
| late_combine::optimizable_set (set_info *def) |
| { |
| // For simplicity, don't try to handle sets of multiple hard registers. |
| // And for correctness, don't remove any assignments to the stack or |
| // frame pointers, since that would implicitly change the set of valid |
| // memory locations between this assignment and the next. |
| // |
| // Removing assignments to the hard frame pointer would invalidate |
| // backtraces. |
| if (!def->is_reg () |
| || def->regno () == STACK_POINTER_REGNUM |
| || def->regno () == FRAME_POINTER_REGNUM |
| || def->regno () == HARD_FRAME_POINTER_REGNUM) |
| return NULL_RTX; |
| |
| auto *insn = def->insn (); |
| if (!insn->can_be_optimized () |
| || insn->is_asm () |
| || insn->is_call () |
| || insn->has_volatile_refs () |
| || insn->has_pre_post_modify () |
| || !can_move_insn_p (insn)) |
| return NULL_RTX; |
| |
| rtx set = single_set (insn->rtl ()); |
| if (!set) |
| return NULL_RTX; |
| |
| // For simplicity, don't try to handle subreg destinations. |
| rtx dest = SET_DEST (set); |
| if (!REG_P (dest) || REG_NREGS (dest) != 1 || def->regno () != REGNO (dest)) |
| return NULL_RTX; |
| |
| return set; |
| } |
| |
| // Suppose that we can replace all uses of SET_DEST (SET) with SET_SRC (SET), |
| // where SET occurs in INSN. Return true if doing so is not likely to |
| // increase register pressure. |
| bool |
| late_combine::check_register_pressure (insn_info *insn, rtx set) |
| { |
| // Plain register-to-register moves do not establish a register class |
| // preference and have no well-defined effect on the register allocator. |
| // If changes in register class are needed, the register allocator is |
| // in the best position to place those changes. If no change in |
| // register class is needed, then the optimization reduces register |
| // pressure if SET_SRC (set) was already live at uses, otherwise the |
| // optimization is pressure-neutral. |
| rtx src = SET_SRC (set); |
| if (REG_P (src)) |
| return true; |
| |
| // On the same basis, substituting a SET_SRC that contains a single |
| // pseudo register either reduces pressure or is pressure-neutral, |
| // subject to the constraints below. We would need to do more |
| // analysis for SET_SRCs that use more than one pseudo register. |
| unsigned int nregs = 0; |
| for (auto *use : insn->uses ()) |
| if (use->is_reg () |
| && !HARD_REGISTER_NUM_P (use->regno ()) |
| && !use->only_occurs_in_notes ()) |
| if (++nregs > 1) |
| return false; |
| |
| // If there are no pseudo registers in SET_SRC then the optimization |
| // should improve register pressure. |
| if (nregs == 0) |
| return true; |
| |
| // We'd be substituting (set (reg R1) SRC) where SRC is known to |
| // contain a single pseudo register R2. Assume for simplicity that |
| // each new use of R2 would need to be in the same class C as the |
| // current use of R2. If, for a realistic allocation, C is a |
| // non-strict superset of the R1's register class, the effect on |
| // register pressure should be positive or neutral. If instead |
| // R1 occupies a different register class from R2, or if R1 has |
| // more allocation freedom than R2, then there's a higher risk that |
| // the effect on register pressure could be negative. |
| // |
| // First use constrain_operands to get the most likely choice of |
| // alternative. For simplicity, just handle the case where the |
| // output operand is operand 0. |
| extract_insn (insn->rtl ()); |
| rtx dest = SET_DEST (set); |
| if (recog_data.n_operands == 0 |
| || recog_data.operand[0] != dest) |
| return false; |
| |
| if (!constrain_operands (0, get_enabled_alternatives (insn->rtl ()))) |
| return false; |
| |
| preprocess_constraints (insn->rtl ()); |
| auto *alt = which_op_alt (); |
| auto dest_class = alt[0].cl; |
| |
| // Check operands 1 and above. |
| auto check_src = [&] (unsigned int i) |
| { |
| if (recog_data.is_operator[i]) |
| return true; |
| |
| rtx op = recog_data.operand[i]; |
| if (CONSTANT_P (op)) |
| return true; |
| |
| if (SUBREG_P (op)) |
| op = SUBREG_REG (op); |
| if (REG_P (op)) |
| { |
| // Ignore hard registers. We've already rejected uses of non-fixed |
| // hard registers in the SET_SRC. |
| if (HARD_REGISTER_P (op)) |
| return true; |
| |
| // Make sure that the source operand's class is at least as |
| // permissive as the destination operand's class. |
| auto src_class = alternative_class (alt, i); |
| if (dest_class != src_class) |
| { |
| auto extra_dest_regs = (reg_class_contents[dest_class] |
| & ~reg_class_contents[src_class] |
| & ~fixed_reg_set); |
| if (!hard_reg_set_empty_p (extra_dest_regs)) |
| return false; |
| } |
| |
| // Make sure that the source operand occupies no more hard |
| // registers than the destination operand. This mostly matters |
| // for subregs. |
| if (targetm.class_max_nregs (dest_class, GET_MODE (dest)) |
| < targetm.class_max_nregs (src_class, GET_MODE (op))) |
| return false; |
| |
| return true; |
| } |
| return false; |
| }; |
| for (int i = 1; i < recog_data.n_operands; ++i) |
| if (recog_data.operand_type[i] != OP_OUT && !check_src (i)) |
| return false; |
| |
| return true; |
| } |
| |
| // Check uses of DEF to see whether there is anything obvious that |
| // prevents the substitution of SET into uses of DEF. |
| bool |
| late_combine::check_uses (set_info *def, rtx set) |
| { |
| use_info *prev_use = nullptr; |
| for (use_info *use : def->nondebug_insn_uses ()) |
| { |
| insn_info *use_insn = use->insn (); |
| |
| if (use->is_live_out_use ()) |
| continue; |
| if (use->only_occurs_in_notes ()) |
| continue; |
| |
| // We cannot replace all uses if the value is live on exit. |
| if (use->is_artificial ()) |
| return false; |
| |
| // Avoid increasing the complexity of instructions that |
| // reference allocatable hard registers. |
| if (!REG_P (SET_SRC (set)) |
| && !reload_completed |
| && (accesses_include_nonfixed_hard_registers (use_insn->uses ()) |
| || accesses_include_nonfixed_hard_registers (use_insn->defs ()))) |
| return false; |
| |
| // Don't substitute into a non-local goto, since it can then be |
| // treated as a jump to local label, e.g. in shorten_branches. |
| // ??? But this shouldn't be necessary. |
| if (use_insn->is_jump () |
| && find_reg_note (use_insn->rtl (), REG_NON_LOCAL_GOTO, NULL_RTX)) |
| return false; |
| |
| // Reject cases where one of the uses is a function argument. |
| // The combine attempt should fail anyway, but this is a common |
| // case that is easy to check early. |
| if (use_insn->is_call () |
| && HARD_REGISTER_P (SET_DEST (set)) |
| && find_reg_fusage (use_insn->rtl (), USE, SET_DEST (set))) |
| return false; |
| |
| // We'll keep the uses in their original order, even if we move |
| // them relative to other instructions. Make sure that non-final |
| // uses do not change any values that occur in the SET_SRC. |
| if (prev_use && prev_use->ebb () == use->ebb ()) |
| { |
| def_info *ultimate_def = look_through_degenerate_phi (def); |
| if (insn_clobbers_resources (prev_use->insn (), |
| ultimate_def->insn ()->uses ())) |
| return false; |
| } |
| |
| prev_use = use; |
| } |
| |
| for (use_info *use : def->phi_uses ()) |
| if (!use->phi ()->is_degenerate () |
| || !check_uses (use->phi (), set)) |
| return false; |
| |
| return true; |
| } |
| |
| // Try to remove DEF's instruction by substituting DEF into all uses. |
| // SET is the rtx set associated with DEF. If the optimization moves any |
| // instructions before CURSOR, add those instructions to the end of m_worklist. |
| bool |
| late_combine::combine_into_uses (set_info *def, rtx set, insn_info *cursor) |
| { |
| auto *insn = def->insn (); |
| |
| // Don't prolong the live ranges of allocatable hard registers, or put |
| // them into more complicated instructions. Failing to prevent this |
| // could lead to spill failures, or at least to worst register allocation. |
| if (!reload_completed |
| && accesses_include_nonfixed_hard_registers (insn->uses ())) |
| return false; |
| |
| if (!reload_completed && !check_register_pressure (insn, set)) |
| return false; |
| |
| if (!check_uses (def, set)) |
| return false; |
| |
| insn_combination combination (def, SET_DEST (set), SET_SRC (set)); |
| if (!combination.run ()) |
| return false; |
| |
| for (auto *use_change : combination.use_changes ()) |
| if (*use_change->insn () < *cursor) |
| m_worklist.safe_push (use_change->insn ()); |
| else |
| break; |
| return true; |
| } |
| |
| // CC_SET is a single_set that sets (only) CC_DEF; OTHER_SET is likewise |
| // a single_set that sets (only) OTHER_DEF. CC_SET is known to set a |
| // condition-code register and the instruction that contains CC_SET is |
| // known to use OTHER_DEF. Try to do CC_SET and OTHER_SET in parallel. |
| bool |
| late_combine::parallelize_insns (set_info *cc_def, rtx cc_set, |
| set_info *other_def, rtx other_set) |
| { |
| auto attempt = crtl->ssa->new_change_attempt (); |
| |
| insn_info *cc_insn = cc_def->insn (); |
| insn_info *other_insn = other_def->insn (); |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "trying to parallelize insn %d and insn %d\n", |
| other_insn->uid (), cc_insn->uid ()); |
| |
| // Try to substitute OTHER_SET into CC_INSN. |
| insn_change_watermark rtl_watermark; |
| rtx_insn *cc_rtl = cc_insn->rtl (); |
| insn_propagation prop (cc_rtl, SET_DEST (other_set), SET_SRC (other_set)); |
| if (!prop.apply_to_pattern (&PATTERN (cc_rtl)) |
| || prop.num_replacements == 0) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "-- failed to substitute all uses of r%d\n", |
| other_def->regno ()); |
| return false; |
| } |
| |
| // Restrict the uses to those outside notes. |
| use_array cc_uses = remove_note_accesses (attempt, cc_insn->uses ()); |
| use_array other_set_uses = remove_note_accesses (attempt, |
| other_insn->uses ()); |
| |
| // Remove the use of the substituted value. |
| cc_uses = remove_uses_of_def (attempt, cc_uses, other_def); |
| |
| // Get the list of uses for the new instruction. |
| insn_change cc_change (cc_insn); |
| cc_change.new_uses = merge_access_arrays (attempt, other_set_uses, cc_uses); |
| if (!cc_change.new_uses.is_valid ()) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "-- cannot merge uses\n"); |
| return false; |
| } |
| |
| // The instruction initially defines just two registers. recog can add |
| // extra clobbers if necessary. |
| auto_vec<access_info *, 2> new_defs; |
| new_defs.quick_push (cc_def); |
| new_defs.quick_push (other_def); |
| sort_accesses (new_defs); |
| cc_change.new_defs = def_array (access_array (new_defs)); |
| |
| // Make sure there is somewhere that the new instruction could live. |
| auto other_change = insn_change::delete_insn (other_insn); |
| insn_change *changes[] = { &other_change, &cc_change }; |
| cc_change.move_range = cc_insn->ebb ()->insn_range (); |
| if (!restrict_movement (cc_change, ignore_changing_insns (changes))) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "-- cannot satisfy all definitions and uses\n"); |
| return false; |
| } |
| |
| // Tentatively install the new pattern. By convention, the CC set |
| // must be first. |
| if (m_parallel) |
| { |
| XVECEXP (m_parallel, 0, 0) = cc_set; |
| XVECEXP (m_parallel, 0, 1) = other_set; |
| } |
| else |
| { |
| rtvec vec = gen_rtvec (2, cc_set, other_set); |
| m_parallel = gen_rtx_PARALLEL (VOIDmode, vec); |
| } |
| validate_change (cc_rtl, &PATTERN (cc_rtl), m_parallel, 1); |
| |
| // These routines report failures themselves. |
| if (!recog (attempt, cc_change, ignore_changing_insns (changes)) |
| || !changes_are_worthwhile (changes) |
| || !crtl->ssa->verify_insn_changes (changes)) |
| return false; |
| |
| remove_reg_equal_equiv_notes (cc_rtl); |
| confirm_change_group (); |
| crtl->ssa->change_insns (changes); |
| m_parallel = NULL_RTX; |
| return true; |
| } |
| |
| // CC_SET is a single_set that sets (only) CC_DEF. See whether CC_DEF |
| // is a definition of a condition-code register and try to optimize it |
| // with related instructions if so. Return true if something changed. |
| // |
| // This function looks for sequences of the form: |
| // |
| // A: (set (reg R1) X1) |
| // B: ...instructions that might change the value of X1... |
| // C: (set (reg CC) X2) // X2 uses R1 |
| // |
| // and tries to change them to: |
| // |
| // C': [(set (reg CC) X2') |
| // (set (reg R1) X1)] |
| // B: ...instructions that might change the value of X1... |
| // |
| // where X2' is the result of replacing R1 with X1 in X2. |
| // |
| // Combine can handle this optimization if B doesn't exist and if A and |
| // C are in the same BB. This pass instead handles cases where B does |
| // exist and cases where A and C are in different BBs of the same EBB. |
| bool |
| late_combine::combine_cc_setter (set_info *cc_def, rtx cc_set) |
| { |
| // Check for a set of a CC register. This isn't needed for correctness; |
| // it's just a way of narrowing the search space. It could be relaxed if |
| // there are other situations that would benefit from the same optimization. |
| if (!HARD_REGISTER_NUM_P (cc_def->regno ()) |
| || GET_MODE_CLASS (cc_def->mode()) != MODE_CC) |
| return false; |
| |
| // Search the registers used by the CC setter for an easily-substitutable |
| // def-use chain. |
| for (use_info *other_use : cc_def->insn ()->uses ()) |
| if (auto *other_def = other_use->def ()) |
| if (other_use->regno () != cc_def->regno () |
| && other_def->ebb () == cc_def->ebb () |
| && other_def == single_set_info (other_def->insn ())) |
| if (rtx other_set = optimizable_set (other_def)) |
| if (parallelize_insns (cc_def, cc_set, other_def, other_set)) |
| return true; |
| |
| return false; |
| } |
| |
| // Try to optimize INSN in some way. If the optimization moves any |
| // instructions before CURSOR, and if further optimizations might be |
| // possible on those instructions, add them to the end of m_worklist. |
| bool |
| late_combine::combine_insn (insn_info *insn, insn_info *cursor) |
| { |
| if (set_info *def = single_set_info (insn)) |
| if (rtx set = optimizable_set (def)) |
| return (combine_into_uses (def, set, cursor) |
| || combine_cc_setter (def, set)); |
| |
| return false; |
| } |
| |
| // Run the pass on function FN. |
| unsigned int |
| late_combine::execute (function *fn) |
| { |
| // Initialization. |
| calculate_dominance_info (CDI_DOMINATORS); |
| df_analyze (); |
| crtl->ssa = new rtl_ssa::function_info (fn); |
| // Don't allow memory_operand to match volatile MEMs. |
| init_recog_no_volatile (); |
| |
| insn_info *insn = *crtl->ssa->nondebug_insns ().begin (); |
| while (insn) |
| { |
| if (!insn->is_artificial ()) |
| { |
| insn_info *prev = insn->prev_nondebug_insn (); |
| if (combine_insn (insn, prev)) |
| { |
| // Any instructions that get added to the worklist were |
| // previously after PREV. Thus if we were able to move |
| // an instruction X before PREV during one combination, |
| // X cannot depend on any instructions that we move before |
| // PREV during subsequent combinations. This means that |
| // the worklist should be free of backwards dependencies, |
| // even if it isn't necessarily in RPO. |
| for (unsigned int i = 0; i < m_worklist.length (); ++i) |
| combine_insn (m_worklist[i], prev); |
| m_worklist.truncate (0); |
| insn = prev; |
| } |
| } |
| insn = insn->next_nondebug_insn (); |
| } |
| |
| // Finalization. |
| if (crtl->ssa->perform_pending_updates ()) |
| cleanup_cfg (0); |
| |
| delete crtl->ssa; |
| crtl->ssa = nullptr; |
| |
| // Make the recognizer allow volatile MEMs again. |
| init_recog (); |
| free_dominance_info (CDI_DOMINATORS); |
| return 0; |
| } |
| |
| class pass_late_combine : public rtl_opt_pass |
| { |
| public: |
| pass_late_combine (gcc::context *ctxt) |
| : rtl_opt_pass (pass_data_late_combine, ctxt) |
| {} |
| |
| // opt_pass methods: |
| opt_pass *clone () override { return new pass_late_combine (m_ctxt); } |
| bool gate (function *) override; |
| unsigned int execute (function *) override; |
| }; |
| |
| bool |
| pass_late_combine::gate (function *) |
| { |
| return optimize > 0 && flag_late_combine_instructions; |
| } |
| |
| unsigned int |
| pass_late_combine::execute (function *fn) |
| { |
| return late_combine ().execute (fn); |
| } |
| |
| } // end namespace |
| |
| // Create a new CC fusion pass instance. |
| |
| rtl_opt_pass * |
| make_pass_late_combine (gcc::context *ctxt) |
| { |
| return new pass_late_combine (ctxt); |
| } |