gcc/late-combine.cc - gcc.git - Git at Google

 // Late-stage instruction combination pass.
 // Copyright (C) 2023-2025 Free Software Foundation, Inc.
 //
 // This file is part of GCC.
 //
 // GCC is free software; you can redistribute it and/or modify it under
 // the terms of the GNU General Public License as published by the Free
 // Software Foundation; either version 3, or (at your option) any later
 // version.
 //
 // GCC is distributed in the hope that it will be useful, but WITHOUT ANY
 // WARRANTY; without even the implied warranty of MERCHANTABILITY or
 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 // for more details.
 //
 // You should have received a copy of the GNU General Public License
 // along with GCC; see the file COPYING3.  If not see
 // <http://www.gnu.org/licenses/>.

 // This pass currently has two purposes:
 //
 // - to substitute definitions into all uses, so that the definition
 //   can be removed.
 //
 // - to try to parallelise sets of condition-code registers with a
 //   related instruction (see combine_cc_setter for details).
 //
 // However, it could be extended to handle other combination-related
 // optimizations in future.
 //
 // The pass can run before or after register allocation.  When running
 // before register allocation, it tries to avoid cases that are likely
 // to increase register pressure.  For the same reason, it avoids moving
 // instructions around, even if doing so would allow an optimization to
 // succeed.  These limitations are removed when running after register
 // allocation.

 #define INCLUDE_ALGORITHM
 #define INCLUDE_FUNCTIONAL
 #define INCLUDE_ARRAY
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
 #include "backend.h"
 #include "rtl.h"
 #include "df.h"
 #include "rtl-ssa.h"
 #include "print-rtl.h"
 #include "tree-pass.h"
 #include "cfgcleanup.h"
 #include "target.h"
 #include "dbgcnt.h"

 using namespace rtl_ssa;

 namespace {
 const pass_data pass_data_late_combine =
 {
   RTL_PASS, // type
   "late_combine", // name
   OPTGROUP_NONE, // optinfo_flags
   TV_LATE_COMBINE, // tv_id
   0, // properties_required
   0, // properties_provided
   0, // properties_destroyed
   0, // todo_flags_start
   TODO_df_finish, // todo_flags_finish
 };

 // Represents an attempt to substitute a single-set definition into all
 // uses of the definition.
 class insn_combination
 {
 public:
   insn_combination (set_info *, rtx, rtx);
   bool run ();
   array_slice<insn_change *const> use_changes () const;

 private:
   use_array get_new_uses (use_info *);
   bool substitute_nondebug_use (use_info *);
   bool substitute_nondebug_uses (set_info *);
   bool try_to_preserve_debug_info (insn_change &, use_info *);
   void substitute_debug_use (use_info *);
   bool substitute_note (insn_info *, rtx, bool);
   void substitute_notes (insn_info *, bool);
   void substitute_note_uses (use_info *);
   void substitute_optional_uses (set_info *);

   // Represents the state of the function's RTL at the start of this
   // combination attempt.
   insn_change_watermark m_rtl_watermark;

   // Represents the rtl-ssa state at the start of this combination attempt.
   obstack_watermark m_attempt;

   // The instruction that contains the definition, and that we're trying
   // to delete.
   insn_info *m_def_insn;

   // The definition itself.
   set_info *m_def;

   // The destination and source of the single set that defines m_def.
   // The destination is known to be a plain REG.
   rtx m_dest;
   rtx m_src;

   // Contains the full list of changes that we want to make, in reverse
   // postorder.
   auto_vec<insn_change *> m_nondebug_changes;
 };

 // Class that represents one run of the pass.
 class late_combine
 {
 public:
   unsigned int execute (function *);

 private:
   rtx optimizable_set (set_info *);
   bool check_register_pressure (insn_info *, rtx);
   bool check_uses (set_info *, rtx);
   bool combine_into_uses (set_info *, rtx, insn_info *);
   bool parallelize_insns (set_info *, rtx, set_info *, rtx);
   bool combine_cc_setter (set_info *, rtx);
   bool combine_insn (insn_info *, insn_info *);

   auto_vec<insn_info *> m_worklist;

   // A spare PARALLEL rtx, or null if none.
   rtx m_parallel = NULL_RTX;
 };

 insn_combination::insn_combination (set_info *def, rtx dest, rtx src)
   : m_rtl_watermark (),
     m_attempt (crtl->ssa->new_change_attempt ()),
     m_def_insn (def->insn ()),
     m_def (def),
     m_dest (dest),
     m_src (src),
     m_nondebug_changes ()
 {
 }

 array_slice<insn_change *const>
 insn_combination::use_changes () const
 {
   return { m_nondebug_changes.address () + 1,
 	   m_nondebug_changes.length () - 1 };
 }

 // USE is a direct or indirect use of m_def.  Return the list of uses
 // that would be needed after substituting m_def into the instruction.
 // The returned list is marked as invalid if USE's insn and m_def_insn
 // use different definitions for the same resource (register or memory).
 use_array
 insn_combination::get_new_uses (use_info *use)
 {
   auto *def = use->def ();
   auto *use_insn = use->insn ();

   use_array new_uses = use_insn->uses ();
   new_uses = remove_uses_of_def (m_attempt, new_uses, def);
   new_uses = merge_access_arrays (m_attempt, m_def_insn->uses (), new_uses);
   if (new_uses.is_valid () && use->ebb () != m_def->ebb ())
     new_uses = crtl->ssa->make_uses_available (m_attempt, new_uses, use->bb (),
 					       use_insn->is_debug_insn ());
   return new_uses;
 }

 // Start the process of trying to replace USE by substitution, given that
 // USE occurs in a non-debug instruction.  Check:
 //
 // - that the substitution can be represented in RTL
 //
 // - that each use of a resource (register or memory) within the new
 //   instruction has a consistent definition
 //
 // - that the new instruction is a recognized pattern
 //
 // - that the instruction can be placed somewhere that makes all definitions
 //   and uses valid, and that permits any new hard-register clobbers added
 //   during the recognition process
 //
 // Return true on success.
 bool
 insn_combination::substitute_nondebug_use (use_info *use)
 {
   insn_info *use_insn = use->insn ();
   rtx_insn *use_rtl = use_insn->rtl ();

   if (dump_file && (dump_flags & TDF_DETAILS))
     dump_insn_slim (dump_file, use->insn ()->rtl ());

   // Reject second and subsequent uses if the target does not allow
   // the defining instruction to be copied.
   if (targetm.cannot_copy_insn_p
       && m_nondebug_changes.length () >= 2
       && targetm.cannot_copy_insn_p (m_def_insn->rtl ()))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "-- The target does not allow multiple"
 		 " copies of insn %d\n", m_def_insn->uid ());
       return false;
     }

   // Check that we can change the instruction pattern.  Leave recognition
   // of the result till later.
   insn_propagation prop (use_rtl, m_dest, m_src);
   if (!prop.apply_to_pattern (&PATTERN (use_rtl))
       || prop.num_replacements == 0)
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "-- RTL substitution failed\n");
       return false;
     }

   use_array new_uses = get_new_uses (use);
   if (!new_uses.is_valid ())
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "-- could not prove that all sources"
 		 " are available\n");
       return false;
     }

   // Create a tentative change for the use.
   auto *where = XOBNEW (m_attempt, insn_change);
   auto *use_change = new (where) insn_change (use_insn);
   m_nondebug_changes.safe_push (use_change);
   use_change->new_uses = new_uses;

   struct local_ignore : ignore_nothing
   {
     local_ignore (const set_info *def, const insn_info *use_insn)
       : m_def (def), m_use_insn (use_insn) {}

     // We don't limit the number of insns per optimization, so ignoring all
     // insns for all insns would lead to quadratic complexity.  Just ignore
     // the use and definition, which should be enough for most purposes.
     bool
     should_ignore_insn (const insn_info *insn)
     {
       return insn == m_def->insn () || insn == m_use_insn;
     }

     // Ignore the definition that we're removing, and all uses of it.
     bool should_ignore_def (const def_info *def) { return def == m_def; }

     const set_info *m_def;
     const insn_info *m_use_insn;
   };

   auto ignore = local_ignore (m_def, use_insn);

   // Moving instructions before register allocation could increase
   // register pressure.  Only try moving them after RA.
   if (reload_completed && can_move_insn_p (use_insn))
     use_change->move_range = { use_insn->bb ()->head_insn (),
 			       use_insn->ebb ()->last_bb ()->end_insn () };
   if (!restrict_movement (*use_change, ignore))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "-- cannot satisfy all definitions and uses"
 		 " in insn %d\n", INSN_UID (use_insn->rtl ()));
       return false;
     }

   if (!recog (m_attempt, *use_change, ignore))
     return false;

   return true;
 }

 // Apply substitute_nondebug_use to all direct and indirect uses of DEF.
 // There will be at most one level of indirection.
 bool
 insn_combination::substitute_nondebug_uses (set_info *def)
 {
   for (use_info *use : def->nondebug_insn_uses ())
     if (!use->is_live_out_use ()
 	&& !use->only_occurs_in_notes ()
 	&& !substitute_nondebug_use (use))
       return false;

   for (use_info *use : def->phi_uses ())
     if (!substitute_nondebug_uses (use->phi ()))
       return false;

   return true;
 }

 // USE_CHANGE.insn () is a debug instruction that uses m_def.  Try to
 // substitute the definition into the instruction and try to describe
 // the result in USE_CHANGE.  Return true on success.  Failure means that
 // the instruction must be reset instead.
 bool
 insn_combination::try_to_preserve_debug_info (insn_change &use_change,
 					      use_info *use)
 {
   // Punt on unsimplified subregs of hard registers.  In that case,
   // propagation can succeed and create a wider reg than the one we
   // started with.
   if (HARD_REGISTER_NUM_P (use->regno ())
       && use->includes_subregs ())
     return false;

   insn_info *use_insn = use_change.insn ();
   rtx_insn *use_rtl = use_insn->rtl ();

   use_change.new_uses = get_new_uses (use);
   if (!use_change.new_uses.is_valid ()
       || !restrict_movement (use_change))
     return false;

   insn_propagation prop (use_rtl, m_dest, m_src);
   return prop.apply_to_pattern (&INSN_VAR_LOCATION_LOC (use_rtl));
 }

 // USE_INSN is a debug instruction that uses m_def.  Update it to reflect
 // the fact that m_def is going to disappear.  Try to preserve the source
 // value if possible, but reset the instruction if not.
 void
 insn_combination::substitute_debug_use (use_info *use)
 {
   auto *use_insn = use->insn ();
   rtx_insn *use_rtl = use_insn->rtl ();

   auto use_change = insn_change (use_insn);
   if (!try_to_preserve_debug_info (use_change, use))
     {
       use_change.new_uses = {};
       use_change.move_range = use_change.insn ();
       INSN_VAR_LOCATION_LOC (use_rtl) = gen_rtx_UNKNOWN_VAR_LOC ();
     }
   insn_change *changes[] = { &use_change };
   crtl->ssa->change_insns (changes);
 }

 // NOTE is a reg note of USE_INSN, which previously used m_def.  Update
 // the note to reflect the fact that m_def is going to disappear.  Return
 // true on success, or false if the note must be deleted.
 //
 // CAN_PROPAGATE is true if m_dest can be replaced with m_use.
 bool
 insn_combination::substitute_note (insn_info *use_insn, rtx note,
 				   bool can_propagate)
 {
   if (REG_NOTE_KIND (note) == REG_EQUAL
       || REG_NOTE_KIND (note) == REG_EQUIV)
     {
       insn_propagation prop (use_insn->rtl (), m_dest, m_src);
       return (prop.apply_to_note (&XEXP (note, 0))
 	      && (can_propagate || prop.num_replacements == 0));
     }
   return true;
 }

 // Update USE_INSN's notes after deciding to go ahead with the optimization.
 // CAN_PROPAGATE is true if m_dest can be replaced with m_use.
 void
 insn_combination::substitute_notes (insn_info *use_insn, bool can_propagate)
 {
   rtx_insn *use_rtl = use_insn->rtl ();
   rtx *ptr = &REG_NOTES (use_rtl);
   while (rtx note = *ptr)
     {
       if (substitute_note (use_insn, note, can_propagate))
 	ptr = &XEXP (note, 1);
       else
 	*ptr = XEXP (note, 1);
     }
 }

 // We've decided to go ahead with the substitution.  Update all REG_NOTES
 // involving USE.
 void
 insn_combination::substitute_note_uses (use_info *use)
 {
   insn_info *use_insn = use->insn ();

   bool can_propagate = true;
   if (use->only_occurs_in_notes ())
     {
       // The only uses are in notes.  Try to keep the note if we can,
       // but removing it is better than aborting the optimization.
       insn_change use_change (use_insn);
       use_change.new_uses = get_new_uses (use);
       if (!use_change.new_uses.is_valid ()
 	  || !restrict_movement (use_change))
 	{
 	  use_change.move_range = use_insn;
 	  use_change.new_uses = remove_uses_of_def (m_attempt,
 						    use_insn->uses (),
 						    use->def ());
 	  can_propagate = false;
 	}
       if (dump_file && (dump_flags & TDF_DETAILS))
 	{
 	  fprintf (dump_file, "%s notes in:\n",
 		   can_propagate ? "updating" : "removing");
 	  dump_insn_slim (dump_file, use_insn->rtl ());
 	}
       substitute_notes (use_insn, can_propagate);
       insn_change *changes[] = { &use_change };
       crtl->ssa->change_insns (changes);
     }
   else
     // We've already decided to update the insn's pattern and know that m_src
     // will be available at the insn's new location.  Now update its notes.
     substitute_notes (use_insn, can_propagate);
 }

 // We've decided to go ahead with the substitution and we've dealt with
 // all uses that occur in the patterns of non-debug insns.  Update all
 // other uses for the fact that m_def is about to disappear.
 void
 insn_combination::substitute_optional_uses (set_info *def)
 {
   if (auto insn_uses = def->all_insn_uses ())
     {
       use_info *use = *insn_uses.begin ();
       while (use)
 	{
 	  use_info *next_use = use->next_any_insn_use ();
 	  if (use->is_in_debug_insn ())
 	    substitute_debug_use (use);
 	  else if (!use->is_live_out_use ())
 	    substitute_note_uses (use);
 	  use = next_use;
 	}
     }
   for (use_info *use : def->phi_uses ())
     substitute_optional_uses (use->phi ());
 }

 // Try to perform the substitution.  Return true on success.
 bool
 insn_combination::run ()
 {
   if (dump_file && (dump_flags & TDF_DETAILS))
     {
       fprintf (dump_file, "\ntrying to combine definition of r%d in:\n",
 	       m_def->regno ());
       dump_insn_slim (dump_file, m_def_insn->rtl ());
       fprintf (dump_file, "into:\n");
     }

   auto def_change = insn_change::delete_insn (m_def_insn);
   m_nondebug_changes.safe_push (&def_change);

   if (!substitute_nondebug_uses (m_def)
       || !changes_are_worthwhile (m_nondebug_changes)
       || !crtl->ssa->verify_insn_changes (m_nondebug_changes))
     return false;

   // We've now decided that the optimization is valid and profitable.
   // Allow it to be suppressed for bisection purposes.
   if (!dbg_cnt (::late_combine))
     return false;

   substitute_optional_uses (m_def);

   confirm_change_group ();
   crtl->ssa->change_insns (m_nondebug_changes);
   return true;
 }

 // DEF is the result of calling single_set_info on its instruction.
 // See whether that instruction is a single_set that we can optimize.
 // Return the set if so, otherwise return null.
 rtx
 late_combine::optimizable_set (set_info *def)
 {
   // For simplicity, don't try to handle sets of multiple hard registers.
   // And for correctness, don't remove any assignments to the stack or
   // frame pointers, since that would implicitly change the set of valid
   // memory locations between this assignment and the next.
   //
   // Removing assignments to the hard frame pointer would invalidate
   // backtraces.
   if (!def->is_reg ()
       || def->regno () == STACK_POINTER_REGNUM
       || def->regno () == FRAME_POINTER_REGNUM
       || def->regno () == HARD_FRAME_POINTER_REGNUM)
     return NULL_RTX;

   auto *insn = def->insn ();
   if (!insn->can_be_optimized ()
       || insn->is_asm ()
       || insn->is_call ()
       || insn->has_volatile_refs ()
       || insn->has_pre_post_modify ()
       || !can_move_insn_p (insn))
     return NULL_RTX;

   rtx set = single_set (insn->rtl ());
   if (!set)
     return NULL_RTX;

   // For simplicity, don't try to handle subreg destinations.
   rtx dest = SET_DEST (set);
   if (!REG_P (dest) || REG_NREGS (dest) != 1 || def->regno () != REGNO (dest))
     return NULL_RTX;

   return set;
 }

 // Suppose that we can replace all uses of SET_DEST (SET) with SET_SRC (SET),
 // where SET occurs in INSN.  Return true if doing so is not likely to
 // increase register pressure.
 bool
 late_combine::check_register_pressure (insn_info *insn, rtx set)
 {
   // Plain register-to-register moves do not establish a register class
   // preference and have no well-defined effect on the register allocator.
   // If changes in register class are needed, the register allocator is
   // in the best position to place those changes.  If no change in
   // register class is needed, then the optimization reduces register
   // pressure if SET_SRC (set) was already live at uses, otherwise the
   // optimization is pressure-neutral.
   rtx src = SET_SRC (set);
   if (REG_P (src))
     return true;

   // On the same basis, substituting a SET_SRC that contains a single
   // pseudo register either reduces pressure or is pressure-neutral,
   // subject to the constraints below.  We would need to do more
   // analysis for SET_SRCs that use more than one pseudo register.
   unsigned int nregs = 0;
   for (auto *use : insn->uses ())
     if (use->is_reg ()
 	&& !HARD_REGISTER_NUM_P (use->regno ())
 	&& !use->only_occurs_in_notes ())
       if (++nregs > 1)
 	return false;

   // If there are no pseudo registers in SET_SRC then the optimization
   // should improve register pressure.
   if (nregs == 0)
     return true;

   // We'd be substituting (set (reg R1) SRC) where SRC is known to
   // contain a single pseudo register R2.  Assume for simplicity that
   // each new use of R2 would need to be in the same class C as the
   // current use of R2.  If, for a realistic allocation, C is a
   // non-strict superset of the R1's register class, the effect on
   // register pressure should be positive or neutral.  If instead
   // R1 occupies a different register class from R2, or if R1 has
   // more allocation freedom than R2, then there's a higher risk that
   // the effect on register pressure could be negative.
   //
   // First use constrain_operands to get the most likely choice of
   // alternative.  For simplicity, just handle the case where the
   // output operand is operand 0.
   extract_insn (insn->rtl ());
   rtx dest = SET_DEST (set);
   if (recog_data.n_operands == 0
       || recog_data.operand[0] != dest)
     return false;

   if (!constrain_operands (0, get_enabled_alternatives (insn->rtl ())))
     return false;

   preprocess_constraints (insn->rtl ());
   auto *alt = which_op_alt ();
   auto dest_class = alt[0].cl;

   // Check operands 1 and above.
   auto check_src = [&] (unsigned int i)
     {
       if (recog_data.is_operator[i])
 	return true;

       rtx op = recog_data.operand[i];
       if (CONSTANT_P (op))
 	return true;

       if (SUBREG_P (op))
 	op = SUBREG_REG (op);
       if (REG_P (op))
 	{
 	  // Ignore hard registers.  We've already rejected uses of non-fixed
 	  // hard registers in the SET_SRC.
 	  if (HARD_REGISTER_P (op))
 	    return true;

 	  // Make sure that the source operand's class is at least as
 	  // permissive as the destination operand's class.
 	  auto src_class = alternative_class (alt, i);
 	  if (dest_class != src_class)
 	    {
 	      auto extra_dest_regs = (reg_class_contents[dest_class]
 				      & ~reg_class_contents[src_class]
 				      & ~fixed_reg_set);
 	      if (!hard_reg_set_empty_p (extra_dest_regs))
 		return false;
 	    }

 	  // Make sure that the source operand occupies no more hard
 	  // registers than the destination operand.  This mostly matters
 	  // for subregs.
 	  if (targetm.class_max_nregs (dest_class, GET_MODE (dest))
 	      < targetm.class_max_nregs (src_class, GET_MODE (op)))
 	    return false;

 	  return true;
 	}
       return false;
     };
   for (int i = 1; i < recog_data.n_operands; ++i)
     if (recog_data.operand_type[i] != OP_OUT && !check_src (i))
       return false;

   return true;
 }

 // Check uses of DEF to see whether there is anything obvious that
 // prevents the substitution of SET into uses of DEF.
 bool
 late_combine::check_uses (set_info *def, rtx set)
 {
   use_info *prev_use = nullptr;
   for (use_info *use : def->nondebug_insn_uses ())
     {
       insn_info *use_insn = use->insn ();

       if (use->is_live_out_use ())
 	continue;
       if (use->only_occurs_in_notes ())
 	continue;

       // We cannot replace all uses if the value is live on exit.
       if (use->is_artificial ())
 	return false;

       // Avoid increasing the complexity of instructions that
       // reference allocatable hard registers.
       if (!REG_P (SET_SRC (set))
 	  && !reload_completed
 	  && (accesses_include_nonfixed_hard_registers (use_insn->uses ())
 	      || accesses_include_nonfixed_hard_registers (use_insn->defs ())))
 	return false;

       // Don't substitute into a non-local goto, since it can then be
       // treated as a jump to local label, e.g. in shorten_branches.
       // ??? But this shouldn't be necessary.
       if (use_insn->is_jump ()
 	  && find_reg_note (use_insn->rtl (), REG_NON_LOCAL_GOTO, NULL_RTX))
 	return false;

       // Reject cases where one of the uses is a function argument.
       // The combine attempt should fail anyway, but this is a common
       // case that is easy to check early.
       if (use_insn->is_call ()
 	  && HARD_REGISTER_P (SET_DEST (set))
 	  && find_reg_fusage (use_insn->rtl (), USE, SET_DEST (set)))
 	return false;

       // We'll keep the uses in their original order, even if we move
       // them relative to other instructions.  Make sure that non-final
       // uses do not change any values that occur in the SET_SRC.
       if (prev_use && prev_use->ebb () == use->ebb ())
 	{
 	  def_info *ultimate_def = look_through_degenerate_phi (def);
 	  if (insn_clobbers_resources (prev_use->insn (),
 				       ultimate_def->insn ()->uses ()))
 	    return false;
 	}

       prev_use = use;
     }

   for (use_info *use : def->phi_uses ())
     if (!use->phi ()->is_degenerate ()
 	|| !check_uses (use->phi (), set))
       return false;

   return true;
 }

 // Try to remove DEF's instruction by substituting DEF into all uses.
 // SET is the rtx set associated with DEF.  If the optimization moves any
 // instructions before CURSOR, add those instructions to the end of m_worklist.
 bool
 late_combine::combine_into_uses (set_info *def, rtx set, insn_info *cursor)
 {
   auto *insn = def->insn ();

   // Don't prolong the live ranges of allocatable hard registers, or put
   // them into more complicated instructions.  Failing to prevent this
   // could lead to spill failures, or at least to worst register allocation.
   if (!reload_completed
       && accesses_include_nonfixed_hard_registers (insn->uses ()))
     return false;

   if (!reload_completed && !check_register_pressure (insn, set))
     return false;

   if (!check_uses (def, set))
     return false;

   insn_combination combination (def, SET_DEST (set), SET_SRC (set));
   if (!combination.run ())
     return false;

   for (auto *use_change : combination.use_changes ())
     if (*use_change->insn () < *cursor)
       m_worklist.safe_push (use_change->insn ());
     else
       break;
   return true;
 }

 // CC_SET is a single_set that sets (only) CC_DEF; OTHER_SET is likewise
 // a single_set that sets (only) OTHER_DEF.  CC_SET is known to set a
 // condition-code register and the instruction that contains CC_SET is
 // known to use OTHER_DEF.  Try to do CC_SET and OTHER_SET in parallel.
 bool
 late_combine::parallelize_insns (set_info *cc_def, rtx cc_set,
 				 set_info *other_def, rtx other_set)
 {
   auto attempt = crtl->ssa->new_change_attempt ();

   insn_info *cc_insn = cc_def->insn ();
   insn_info *other_insn = other_def->insn ();
   if (dump_file && (dump_flags & TDF_DETAILS))
     fprintf (dump_file, "trying to parallelize insn %d and insn %d\n",
 	     other_insn->uid (), cc_insn->uid ());

   // Try to substitute OTHER_SET into CC_INSN.
   insn_change_watermark rtl_watermark;
   rtx_insn *cc_rtl = cc_insn->rtl ();
   insn_propagation prop (cc_rtl, SET_DEST (other_set), SET_SRC (other_set));
   if (!prop.apply_to_pattern (&PATTERN (cc_rtl))
       || prop.num_replacements == 0)
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "-- failed to substitute all uses of r%d\n",
 		 other_def->regno ());
       return false;
     }

   // Restrict the uses to those outside notes.
   use_array cc_uses = remove_note_accesses (attempt, cc_insn->uses ());
   use_array other_set_uses = remove_note_accesses (attempt,
 						   other_insn->uses ());

   // Remove the use of the substituted value.
   cc_uses = remove_uses_of_def (attempt, cc_uses, other_def);

   // Get the list of uses for the new instruction.
   insn_change cc_change (cc_insn);
   cc_change.new_uses = merge_access_arrays (attempt, other_set_uses, cc_uses);
   if (!cc_change.new_uses.is_valid ())
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "-- cannot merge uses\n");
       return false;
     }

   // The instruction initially defines just two registers.  recog can add
   // extra clobbers if necessary.
   auto_vec<access_info *, 2> new_defs;
   new_defs.quick_push (cc_def);
   new_defs.quick_push (other_def);
   sort_accesses (new_defs);
   cc_change.new_defs = def_array (access_array (new_defs));

   // Make sure there is somewhere that the new instruction could live.
   auto other_change = insn_change::delete_insn (other_insn);
   insn_change *changes[] = { &other_change, &cc_change };
   cc_change.move_range = cc_insn->ebb ()->insn_range ();
   if (!restrict_movement (cc_change, ignore_changing_insns (changes)))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "-- cannot satisfy all definitions and uses\n");
       return false;
     }

   // Tentatively install the new pattern.  By convention, the CC set
   // must be first.
   if (m_parallel)
     {
       XVECEXP (m_parallel, 0, 0) = cc_set;
       XVECEXP (m_parallel, 0, 1) = other_set;
     }
   else
     {
       rtvec vec = gen_rtvec (2, cc_set, other_set);
       m_parallel = gen_rtx_PARALLEL (VOIDmode, vec);
     }
   validate_change (cc_rtl, &PATTERN (cc_rtl), m_parallel, 1);

   // These routines report failures themselves.
   if (!recog (attempt, cc_change, ignore_changing_insns (changes))
       || !changes_are_worthwhile (changes)
       || !crtl->ssa->verify_insn_changes (changes))
     return false;

   remove_reg_equal_equiv_notes (cc_rtl);
   confirm_change_group ();
   crtl->ssa->change_insns (changes);
   m_parallel = NULL_RTX;
   return true;
 }

 // CC_SET is a single_set that sets (only) CC_DEF.  See whether CC_DEF
 // is a definition of a condition-code register and try to optimize it
 // with related instructions if so.  Return true if something changed.
 //
 // This function looks for sequences of the form:
 //
 //    A: (set (reg R1) X1)
 //    B: ...instructions that might change the value of X1...
 //    C: (set (reg CC) X2) // X2 uses R1
 //
 // and tries to change them to:
 //
 //    C': [(set (reg CC) X2')
 //         (set (reg R1) X1)]
 //    B: ...instructions that might change the value of X1...
 //
 // where X2' is the result of replacing R1 with X1 in X2.
 //
 // Combine can handle this optimization if B doesn't exist and if A and
 // C are in the same BB.  This pass instead handles cases where B does
 // exist and cases where A and C are in different BBs of the same EBB.
 bool
 late_combine::combine_cc_setter (set_info *cc_def, rtx cc_set)
 {
   // Check for a set of a CC register.  This isn't needed for correctness;
   // it's just a way of narrowing the search space.  It could be relaxed if
   // there are other situations that would benefit from the same optimization.
   if (!HARD_REGISTER_NUM_P (cc_def->regno ())
       || GET_MODE_CLASS (cc_def->mode()) != MODE_CC)
     return false;

   // Search the registers used by the CC setter for an easily-substitutable
   // def-use chain.
   for (use_info *other_use : cc_def->insn ()->uses ())
     if (auto *other_def = other_use->def ())
       if (other_use->regno () != cc_def->regno ()
 	  && other_def->ebb () == cc_def->ebb ()
 	  && other_def == single_set_info (other_def->insn ()))
 	if (rtx other_set = optimizable_set (other_def))
 	  if (parallelize_insns (cc_def, cc_set, other_def, other_set))
 	    return true;

   return false;
 }

 // Try to optimize INSN in some way.  If the optimization moves any
 // instructions before CURSOR, and if further optimizations might be
 // possible on those instructions, add them to the end of m_worklist.
 bool
 late_combine::combine_insn (insn_info *insn, insn_info *cursor)
 {
   if (set_info *def = single_set_info (insn))
     if (rtx set = optimizable_set (def))
       return (combine_into_uses (def, set, cursor)
 	      || combine_cc_setter (def, set));

   return false;
 }

 // Run the pass on function FN.
 unsigned int
 late_combine::execute (function *fn)
 {
   // Initialization.
   calculate_dominance_info (CDI_DOMINATORS);
   df_analyze ();
   crtl->ssa = new rtl_ssa::function_info (fn);
   // Don't allow memory_operand to match volatile MEMs.
   init_recog_no_volatile ();

   insn_info *insn = *crtl->ssa->nondebug_insns ().begin ();
   while (insn)
     {
       if (!insn->is_artificial ())
 	{
 	  insn_info *prev = insn->prev_nondebug_insn ();
 	  if (combine_insn (insn, prev))
 	    {
 	      // Any instructions that get added to the worklist were
 	      // previously after PREV.  Thus if we were able to move
 	      // an instruction X before PREV during one combination,
 	      // X cannot depend on any instructions that we move before
 	      // PREV during subsequent combinations.  This means that
 	      // the worklist should be free of backwards dependencies,
 	      // even if it isn't necessarily in RPO.
 	      for (unsigned int i = 0; i < m_worklist.length (); ++i)
 		combine_insn (m_worklist[i], prev);
 	      m_worklist.truncate (0);
 	      insn = prev;
 	    }
 	}
       insn = insn->next_nondebug_insn ();
     }

   // Finalization.
   if (crtl->ssa->perform_pending_updates ())
     cleanup_cfg (0);

   delete crtl->ssa;
   crtl->ssa = nullptr;

   // Make the recognizer allow volatile MEMs again.
   init_recog ();
   free_dominance_info (CDI_DOMINATORS);
   return 0;
 }

 class pass_late_combine : public rtl_opt_pass
 {
 public:
   pass_late_combine (gcc::context *ctxt)
     : rtl_opt_pass (pass_data_late_combine, ctxt)
   {}

   // opt_pass methods:
   opt_pass *clone () override { return new pass_late_combine (m_ctxt); }
   bool gate (function *) override;
   unsigned int execute (function *) override;
 };

 bool
 pass_late_combine::gate (function *)
 {
   return optimize > 0 && flag_late_combine_instructions;
 }

 unsigned int
 pass_late_combine::execute (function *fn)
 {
   return late_combine ().execute (fn);
 }

 } // end namespace

 // Create a new CC fusion pass instance.

 rtl_opt_pass *
 make_pass_late_combine (gcc::context *ctxt)
 {
   return new pass_late_combine (ctxt);
 }