| /* Shrink-wrapping related optimizations. |
| Copyright (C) 1987-2015 Free Software Foundation, Inc. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 3, or (at your option) any later |
| version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| /* This file handles shrink-wrapping related optimizations. */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "tm.h" |
| #include "rtl-error.h" |
| #include "hash-set.h" |
| #include "machmode.h" |
| #include "vec.h" |
| #include "double-int.h" |
| #include "input.h" |
| #include "alias.h" |
| #include "symtab.h" |
| #include "wide-int.h" |
| #include "inchash.h" |
| #include "tree.h" |
| #include "fold-const.h" |
| #include "stor-layout.h" |
| #include "varasm.h" |
| #include "stringpool.h" |
| #include "flags.h" |
| #include "except.h" |
| #include "hard-reg-set.h" |
| #include "function.h" |
| #include "hashtab.h" |
| #include "rtl.h" |
| #include "statistics.h" |
| #include "real.h" |
| #include "fixed-value.h" |
| #include "insn-config.h" |
| #include "expmed.h" |
| #include "dojump.h" |
| #include "explow.h" |
| #include "calls.h" |
| #include "emit-rtl.h" |
| #include "stmt.h" |
| #include "expr.h" |
| #include "insn-codes.h" |
| #include "optabs.h" |
| #include "libfuncs.h" |
| #include "regs.h" |
| #include "recog.h" |
| #include "output.h" |
| #include "tm_p.h" |
| #include "langhooks.h" |
| #include "target.h" |
| #include "common/common-target.h" |
| #include "gimple-expr.h" |
| #include "gimplify.h" |
| #include "tree-pass.h" |
| #include "predict.h" |
| #include "dominance.h" |
| #include "cfg.h" |
| #include "cfgrtl.h" |
| #include "basic-block.h" |
| #include "df.h" |
| #include "params.h" |
| #include "bb-reorder.h" |
| #include "shrink-wrap.h" |
| #include "regcprop.h" |
| #include "rtl-iter.h" |
| #include "valtrack.h" |
| |
| #ifdef HAVE_simple_return |
| |
| /* Return true if INSN requires the stack frame to be set up. |
| PROLOGUE_USED contains the hard registers used in the function |
| prologue. SET_UP_BY_PROLOGUE is the set of registers we expect the |
| prologue to set up for the function. */ |
| bool |
| requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used, |
| HARD_REG_SET set_up_by_prologue) |
| { |
| df_ref def, use; |
| HARD_REG_SET hardregs; |
| unsigned regno; |
| |
| if (CALL_P (insn)) |
| return !SIBLING_CALL_P (insn); |
| |
| /* We need a frame to get the unique CFA expected by the unwinder. */ |
| if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn)) |
| return true; |
| |
| CLEAR_HARD_REG_SET (hardregs); |
| FOR_EACH_INSN_DEF (def, insn) |
| { |
| rtx dreg = DF_REF_REG (def); |
| |
| if (!REG_P (dreg)) |
| continue; |
| |
| add_to_hard_reg_set (&hardregs, GET_MODE (dreg), |
| REGNO (dreg)); |
| } |
| if (hard_reg_set_intersect_p (hardregs, prologue_used)) |
| return true; |
| AND_COMPL_HARD_REG_SET (hardregs, call_used_reg_set); |
| for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| if (TEST_HARD_REG_BIT (hardregs, regno) |
| && df_regs_ever_live_p (regno)) |
| return true; |
| |
| FOR_EACH_INSN_USE (use, insn) |
| { |
| rtx reg = DF_REF_REG (use); |
| |
| if (!REG_P (reg)) |
| continue; |
| |
| add_to_hard_reg_set (&hardregs, GET_MODE (reg), |
| REGNO (reg)); |
| } |
| if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue)) |
| return true; |
| |
| return false; |
| } |
| |
| /* See whether there has a single live edge from BB, which dest uses |
| [REGNO, END_REGNO). Return the live edge if its dest bb has |
| one or two predecessors. Otherwise return NULL. */ |
| |
| static edge |
| live_edge_for_reg (basic_block bb, int regno, int end_regno) |
| { |
| edge e, live_edge; |
| edge_iterator ei; |
| bitmap live; |
| int i; |
| |
| live_edge = NULL; |
| FOR_EACH_EDGE (e, ei, bb->succs) |
| { |
| live = df_get_live_in (e->dest); |
| for (i = regno; i < end_regno; i++) |
| if (REGNO_REG_SET_P (live, i)) |
| { |
| if (live_edge && live_edge != e) |
| return NULL; |
| live_edge = e; |
| } |
| } |
| |
| /* We can sometimes encounter dead code. Don't try to move it |
| into the exit block. */ |
| if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
| return NULL; |
| |
| /* Reject targets of abnormal edges. This is needed for correctness |
| on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on |
| exception edges even though it is generally treated as call-saved |
| for the majority of the compilation. Moving across abnormal edges |
| isn't going to be interesting for shrink-wrap usage anyway. */ |
| if (live_edge->flags & EDGE_ABNORMAL) |
| return NULL; |
| |
| /* When live_edge->dest->preds == 2, we can create a new block on |
| the edge to make it meet the requirement. */ |
| if (EDGE_COUNT (live_edge->dest->preds) > 2) |
| return NULL; |
| |
| return live_edge; |
| } |
| |
| /* Try to move INSN from BB to a successor. Return true on success. |
| USES and DEFS are the set of registers that are used and defined |
| after INSN in BB. SPLIT_P indicates whether a live edge from BB |
| is splitted or not. */ |
| |
| static bool |
| move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn, |
| const HARD_REG_SET uses, |
| const HARD_REG_SET defs, |
| bool *split_p, |
| struct dead_debug_local *debug) |
| { |
| rtx set, src, dest; |
| bitmap live_out, live_in, bb_uses, bb_defs; |
| unsigned int i, dregno, end_dregno; |
| unsigned int sregno = FIRST_PSEUDO_REGISTER; |
| unsigned int end_sregno = FIRST_PSEUDO_REGISTER; |
| basic_block next_block; |
| edge live_edge; |
| rtx_insn *dinsn; |
| df_ref def; |
| |
| /* Look for a simple register assignment. We don't use single_set here |
| because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary |
| destinations. */ |
| if (!INSN_P (insn)) |
| return false; |
| set = PATTERN (insn); |
| if (GET_CODE (set) != SET) |
| return false; |
| src = SET_SRC (set); |
| dest = SET_DEST (set); |
| |
| /* For the destination, we want only a register. Also disallow STACK |
| or FRAME related adjustments. They are likely part of the prologue, |
| so keep them in the entry block. */ |
| if (!REG_P (dest) |
| || dest == stack_pointer_rtx |
| || dest == frame_pointer_rtx |
| || dest == hard_frame_pointer_rtx) |
| return false; |
| |
| /* For the source, we want one of: |
| (1) A (non-overlapping) register |
| (2) A constant, |
| (3) An expression involving no more than one register. |
| |
| That last point comes from the code following, which was originally |
| written to handle only register move operations, and still only handles |
| a single source register when checking for overlaps. Happily, the |
| same checks can be applied to expressions like (plus reg const). */ |
| |
| if (CONSTANT_P (src)) |
| ; |
| else if (!REG_P (src)) |
| { |
| rtx src_inner = NULL_RTX; |
| |
| if (can_throw_internal (insn)) |
| return false; |
| |
| subrtx_var_iterator::array_type array; |
| FOR_EACH_SUBRTX_VAR (iter, array, src, ALL) |
| { |
| rtx x = *iter; |
| switch (GET_RTX_CLASS (GET_CODE (x))) |
| { |
| case RTX_CONST_OBJ: |
| case RTX_COMPARE: |
| case RTX_COMM_COMPARE: |
| case RTX_BIN_ARITH: |
| case RTX_COMM_ARITH: |
| case RTX_UNARY: |
| case RTX_TERNARY: |
| /* Constant or expression. Continue. */ |
| break; |
| |
| case RTX_OBJ: |
| case RTX_EXTRA: |
| switch (GET_CODE (x)) |
| { |
| case UNSPEC: |
| case SUBREG: |
| case STRICT_LOW_PART: |
| case PC: |
| case LO_SUM: |
| /* Ok. Continue. */ |
| break; |
| |
| case REG: |
| /* Fail if we see a second inner register. */ |
| if (src_inner != NULL) |
| return false; |
| src_inner = x; |
| break; |
| |
| default: |
| return false; |
| } |
| break; |
| |
| default: |
| return false; |
| } |
| } |
| |
| if (src_inner != NULL) |
| src = src_inner; |
| } |
| |
| /* Make sure that the source register isn't defined later in BB. */ |
| if (REG_P (src)) |
| { |
| sregno = REGNO (src); |
| end_sregno = END_REGNO (src); |
| if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno)) |
| return false; |
| } |
| |
| /* Make sure that the destination register isn't referenced later in BB. */ |
| dregno = REGNO (dest); |
| end_dregno = END_REGNO (dest); |
| if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno) |
| || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno)) |
| return false; |
| |
| /* See whether there is a successor block to which we could move INSN. */ |
| live_edge = live_edge_for_reg (bb, dregno, end_dregno); |
| if (!live_edge) |
| return false; |
| |
| next_block = live_edge->dest; |
| /* Create a new basic block on the edge. */ |
| if (EDGE_COUNT (next_block->preds) == 2) |
| { |
| /* split_edge for a block with only one successor is meaningless. */ |
| if (EDGE_COUNT (bb->succs) == 1) |
| return false; |
| |
| /* If DF_LIVE doesn't exist, i.e. at -O1, just give up. */ |
| if (!df_live) |
| return false; |
| |
| basic_block old_dest = live_edge->dest; |
| next_block = split_edge (live_edge); |
| |
| /* We create a new basic block. Call df_grow_bb_info to make sure |
| all data structures are allocated. */ |
| df_grow_bb_info (df_live); |
| |
| bitmap_and (df_get_live_in (next_block), df_get_live_out (bb), |
| df_get_live_in (old_dest)); |
| df_set_bb_dirty (next_block); |
| |
| /* We should not split more than once for a function. */ |
| if (*split_p) |
| return false; |
| |
| *split_p = true; |
| } |
| |
| /* At this point we are committed to moving INSN, but let's try to |
| move it as far as we can. */ |
| do |
| { |
| if (MAY_HAVE_DEBUG_INSNS) |
| { |
| FOR_BB_INSNS_REVERSE (bb, dinsn) |
| if (DEBUG_INSN_P (dinsn)) |
| { |
| df_ref use; |
| FOR_EACH_INSN_USE (use, dinsn) |
| if (refers_to_regno_p (dregno, end_dregno, |
| DF_REF_REG (use), (rtx *) NULL)) |
| dead_debug_add (debug, use, DF_REF_REGNO (use)); |
| } |
| else if (dinsn == insn) |
| break; |
| } |
| live_out = df_get_live_out (bb); |
| live_in = df_get_live_in (next_block); |
| bb = next_block; |
| |
| /* Check whether BB uses DEST or clobbers DEST. We need to add |
| INSN to BB if so. Either way, DEST is no longer live on entry, |
| except for any part that overlaps SRC (next loop). */ |
| bb_uses = &DF_LR_BB_INFO (bb)->use; |
| bb_defs = &DF_LR_BB_INFO (bb)->def; |
| if (df_live) |
| { |
| for (i = dregno; i < end_dregno; i++) |
| { |
| if (*split_p |
| || REGNO_REG_SET_P (bb_uses, i) |
| || REGNO_REG_SET_P (bb_defs, i) |
| || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i)) |
| next_block = NULL; |
| CLEAR_REGNO_REG_SET (live_out, i); |
| CLEAR_REGNO_REG_SET (live_in, i); |
| } |
| |
| /* Check whether BB clobbers SRC. We need to add INSN to BB if so. |
| Either way, SRC is now live on entry. */ |
| for (i = sregno; i < end_sregno; i++) |
| { |
| if (*split_p |
| || REGNO_REG_SET_P (bb_defs, i) |
| || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i)) |
| next_block = NULL; |
| SET_REGNO_REG_SET (live_out, i); |
| SET_REGNO_REG_SET (live_in, i); |
| } |
| } |
| else |
| { |
| /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and |
| DF_REF_CONDITIONAL defs. So if DF_LIVE doesn't exist, i.e. |
| at -O1, just give up searching NEXT_BLOCK. */ |
| next_block = NULL; |
| for (i = dregno; i < end_dregno; i++) |
| { |
| CLEAR_REGNO_REG_SET (live_out, i); |
| CLEAR_REGNO_REG_SET (live_in, i); |
| } |
| |
| for (i = sregno; i < end_sregno; i++) |
| { |
| SET_REGNO_REG_SET (live_out, i); |
| SET_REGNO_REG_SET (live_in, i); |
| } |
| } |
| |
| /* If we don't need to add the move to BB, look for a single |
| successor block. */ |
| if (next_block) |
| { |
| live_edge = live_edge_for_reg (next_block, dregno, end_dregno); |
| if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1) |
| break; |
| next_block = live_edge->dest; |
| } |
| } |
| while (next_block); |
| |
| /* For the new created basic block, there is no dataflow info at all. |
| So skip the following dataflow update and check. */ |
| if (!(*split_p)) |
| { |
| /* BB now defines DEST. It only uses the parts of DEST that overlap SRC |
| (next loop). */ |
| for (i = dregno; i < end_dregno; i++) |
| { |
| CLEAR_REGNO_REG_SET (bb_uses, i); |
| SET_REGNO_REG_SET (bb_defs, i); |
| } |
| |
| /* BB now uses SRC. */ |
| for (i = sregno; i < end_sregno; i++) |
| SET_REGNO_REG_SET (bb_uses, i); |
| } |
| |
| /* Insert debug temps for dead REGs used in subsequent debug insns. */ |
| if (debug->used && !bitmap_empty_p (debug->used)) |
| FOR_EACH_INSN_DEF (def, insn) |
| dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn, |
| DEBUG_TEMP_BEFORE_WITH_VALUE); |
| |
| emit_insn_after (PATTERN (insn), bb_note (bb)); |
| delete_insn (insn); |
| return true; |
| } |
| |
| /* Look for register copies in the first block of the function, and move |
| them down into successor blocks if the register is used only on one |
| path. This exposes more opportunities for shrink-wrapping. These |
| kinds of sets often occur when incoming argument registers are moved |
| to call-saved registers because their values are live across one or |
| more calls during the function. */ |
| |
| void |
| prepare_shrink_wrap (basic_block entry_block) |
| { |
| rtx_insn *insn, *curr; |
| rtx x; |
| HARD_REG_SET uses, defs; |
| df_ref def, use; |
| bool split_p = false; |
| unsigned int i; |
| struct dead_debug_local debug; |
| |
| if (JUMP_P (BB_END (entry_block))) |
| { |
| /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries |
| to sink the copies from parameter to callee saved register out of |
| entry block. copyprop_hardreg_forward_bb_without_debug_insn is called |
| to release some dependences. */ |
| copyprop_hardreg_forward_bb_without_debug_insn (entry_block); |
| } |
| |
| dead_debug_local_init (&debug, NULL, NULL); |
| CLEAR_HARD_REG_SET (uses); |
| CLEAR_HARD_REG_SET (defs); |
| |
| FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr) |
| if (NONDEBUG_INSN_P (insn) |
| && !move_insn_for_shrink_wrap (entry_block, insn, uses, defs, |
| &split_p, &debug)) |
| { |
| /* Add all defined registers to DEFs. */ |
| FOR_EACH_INSN_DEF (def, insn) |
| { |
| x = DF_REF_REG (def); |
| if (REG_P (x) && HARD_REGISTER_P (x)) |
| for (i = REGNO (x); i < END_REGNO (x); i++) |
| SET_HARD_REG_BIT (defs, i); |
| } |
| |
| /* Add all used registers to USESs. */ |
| FOR_EACH_INSN_USE (use, insn) |
| { |
| x = DF_REF_REG (use); |
| if (REG_P (x) && HARD_REGISTER_P (x)) |
| for (i = REGNO (x); i < END_REGNO (x); i++) |
| SET_HARD_REG_BIT (uses, i); |
| } |
| } |
| |
| dead_debug_local_finish (&debug, NULL); |
| } |
| |
| /* Create a copy of BB instructions and insert at BEFORE. Redirect |
| preds of BB to COPY_BB if they don't appear in NEED_PROLOGUE. */ |
| void |
| dup_block_and_redirect (basic_block bb, basic_block copy_bb, rtx_insn *before, |
| bitmap_head *need_prologue) |
| { |
| edge_iterator ei; |
| edge e; |
| rtx_insn *insn = BB_END (bb); |
| |
| /* We know BB has a single successor, so there is no need to copy a |
| simple jump at the end of BB. */ |
| if (simplejump_p (insn)) |
| insn = PREV_INSN (insn); |
| |
| start_sequence (); |
| duplicate_insn_chain (BB_HEAD (bb), insn); |
| if (dump_file) |
| { |
| unsigned count = 0; |
| for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) |
| if (active_insn_p (insn)) |
| ++count; |
| fprintf (dump_file, "Duplicating bb %d to bb %d, %u active insns.\n", |
| bb->index, copy_bb->index, count); |
| } |
| insn = get_insns (); |
| end_sequence (); |
| emit_insn_before (insn, before); |
| |
| /* Redirect all the paths that need no prologue into copy_bb. */ |
| for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));) |
| if (!bitmap_bit_p (need_prologue, e->src->index)) |
| { |
| int freq = EDGE_FREQUENCY (e); |
| copy_bb->count += e->count; |
| copy_bb->frequency += EDGE_FREQUENCY (e); |
| e->dest->count -= e->count; |
| if (e->dest->count < 0) |
| e->dest->count = 0; |
| e->dest->frequency -= freq; |
| if (e->dest->frequency < 0) |
| e->dest->frequency = 0; |
| redirect_edge_and_branch_force (e, copy_bb); |
| continue; |
| } |
| else |
| ei_next (&ei); |
| } |
| |
| |
| /* Try to perform a kind of shrink-wrapping, making sure the |
| prologue/epilogue is emitted only around those parts of the |
| function that require it. */ |
| |
| void |
| try_shrink_wrapping (edge *entry_edge, edge orig_entry_edge, |
| bitmap_head *bb_flags, rtx_insn *prologue_seq) |
| { |
| edge e; |
| edge_iterator ei; |
| bool nonempty_prologue = false; |
| unsigned max_grow_size; |
| rtx_insn *seq; |
| |
| for (seq = prologue_seq; seq; seq = NEXT_INSN (seq)) |
| if (!NOTE_P (seq) || NOTE_KIND (seq) != NOTE_INSN_PROLOGUE_END) |
| { |
| nonempty_prologue = true; |
| break; |
| } |
| |
| if (flag_shrink_wrap && HAVE_simple_return |
| && (targetm.profile_before_prologue () || !crtl->profile) |
| && nonempty_prologue && !crtl->calls_eh_return) |
| { |
| HARD_REG_SET prologue_clobbered, prologue_used, live_on_edge; |
| struct hard_reg_set_container set_up_by_prologue; |
| rtx_insn *p_insn; |
| vec<basic_block> vec; |
| basic_block bb; |
| bitmap_head bb_antic_flags; |
| bitmap_head bb_on_list; |
| bitmap_head bb_tail; |
| |
| if (dump_file) |
| fprintf (dump_file, "Attempting shrink-wrapping optimization.\n"); |
| |
| /* Compute the registers set and used in the prologue. */ |
| CLEAR_HARD_REG_SET (prologue_clobbered); |
| CLEAR_HARD_REG_SET (prologue_used); |
| for (p_insn = prologue_seq; p_insn; p_insn = NEXT_INSN (p_insn)) |
| { |
| HARD_REG_SET this_used; |
| if (!NONDEBUG_INSN_P (p_insn)) |
| continue; |
| |
| CLEAR_HARD_REG_SET (this_used); |
| note_uses (&PATTERN (p_insn), record_hard_reg_uses, |
| &this_used); |
| AND_COMPL_HARD_REG_SET (this_used, prologue_clobbered); |
| IOR_HARD_REG_SET (prologue_used, this_used); |
| note_stores (PATTERN (p_insn), record_hard_reg_sets, |
| &prologue_clobbered); |
| } |
| |
| prepare_shrink_wrap ((*entry_edge)->dest); |
| |
| bitmap_initialize (&bb_antic_flags, &bitmap_default_obstack); |
| bitmap_initialize (&bb_on_list, &bitmap_default_obstack); |
| bitmap_initialize (&bb_tail, &bitmap_default_obstack); |
| |
| /* Find the set of basic blocks that require a stack frame, |
| and blocks that are too big to be duplicated. */ |
| |
| vec.create (n_basic_blocks_for_fn (cfun)); |
| |
| CLEAR_HARD_REG_SET (set_up_by_prologue.set); |
| add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, |
| STACK_POINTER_REGNUM); |
| add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM); |
| if (frame_pointer_needed) |
| add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, |
| HARD_FRAME_POINTER_REGNUM); |
| if (pic_offset_table_rtx |
| && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) |
| add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, |
| PIC_OFFSET_TABLE_REGNUM); |
| if (crtl->drap_reg) |
| add_to_hard_reg_set (&set_up_by_prologue.set, |
| GET_MODE (crtl->drap_reg), |
| REGNO (crtl->drap_reg)); |
| if (targetm.set_up_by_prologue) |
| targetm.set_up_by_prologue (&set_up_by_prologue); |
| |
| /* We don't use a different max size depending on |
| optimize_bb_for_speed_p because increasing shrink-wrapping |
| opportunities by duplicating tail blocks can actually result |
| in an overall decrease in code size. */ |
| max_grow_size = get_uncond_jump_length (); |
| max_grow_size *= PARAM_VALUE (PARAM_MAX_GROW_COPY_BB_INSNS); |
| |
| FOR_EACH_BB_FN (bb, cfun) |
| { |
| rtx_insn *insn; |
| unsigned size = 0; |
| |
| FOR_BB_INSNS (bb, insn) |
| if (NONDEBUG_INSN_P (insn)) |
| { |
| if (requires_stack_frame_p (insn, prologue_used, |
| set_up_by_prologue.set)) |
| { |
| if (bb == (*entry_edge)->dest) |
| goto fail_shrinkwrap; |
| bitmap_set_bit (bb_flags, bb->index); |
| vec.quick_push (bb); |
| break; |
| } |
| else if (size <= max_grow_size) |
| { |
| size += get_attr_min_length (insn); |
| if (size > max_grow_size) |
| bitmap_set_bit (&bb_on_list, bb->index); |
| } |
| } |
| } |
| |
| /* Blocks that really need a prologue, or are too big for tails. */ |
| bitmap_ior_into (&bb_on_list, bb_flags); |
| |
| /* For every basic block that needs a prologue, mark all blocks |
| reachable from it, so as to ensure they are also seen as |
| requiring a prologue. */ |
| while (!vec.is_empty ()) |
| { |
| basic_block tmp_bb = vec.pop (); |
| |
| FOR_EACH_EDGE (e, ei, tmp_bb->succs) |
| if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun) |
| && bitmap_set_bit (bb_flags, e->dest->index)) |
| vec.quick_push (e->dest); |
| } |
| |
| /* Find the set of basic blocks that need no prologue, have a |
| single successor, can be duplicated, meet a max size |
| requirement, and go to the exit via like blocks. */ |
| vec.quick_push (EXIT_BLOCK_PTR_FOR_FN (cfun)); |
| while (!vec.is_empty ()) |
| { |
| basic_block tmp_bb = vec.pop (); |
| |
| FOR_EACH_EDGE (e, ei, tmp_bb->preds) |
| if (single_succ_p (e->src) |
| && !bitmap_bit_p (&bb_on_list, e->src->index) |
| && can_duplicate_block_p (e->src)) |
| { |
| edge pe; |
| edge_iterator pei; |
| |
| /* If there is predecessor of e->src which doesn't |
| need prologue and the edge is complex, |
| we might not be able to redirect the branch |
| to a copy of e->src. */ |
| FOR_EACH_EDGE (pe, pei, e->src->preds) |
| if ((pe->flags & EDGE_COMPLEX) != 0 |
| && !bitmap_bit_p (bb_flags, pe->src->index)) |
| break; |
| if (pe == NULL && bitmap_set_bit (&bb_tail, e->src->index)) |
| vec.quick_push (e->src); |
| } |
| } |
| |
| /* Now walk backwards from every block that is marked as needing |
| a prologue to compute the bb_antic_flags bitmap. Exclude |
| tail blocks; They can be duplicated to be used on paths not |
| needing a prologue. */ |
| bitmap_clear (&bb_on_list); |
| bitmap_and_compl (&bb_antic_flags, bb_flags, &bb_tail); |
| FOR_EACH_BB_FN (bb, cfun) |
| { |
| if (!bitmap_bit_p (&bb_antic_flags, bb->index)) |
| continue; |
| FOR_EACH_EDGE (e, ei, bb->preds) |
| if (!bitmap_bit_p (&bb_antic_flags, e->src->index) |
| && bitmap_set_bit (&bb_on_list, e->src->index)) |
| vec.quick_push (e->src); |
| } |
| while (!vec.is_empty ()) |
| { |
| basic_block tmp_bb = vec.pop (); |
| bool all_set = true; |
| |
| bitmap_clear_bit (&bb_on_list, tmp_bb->index); |
| FOR_EACH_EDGE (e, ei, tmp_bb->succs) |
| if (!bitmap_bit_p (&bb_antic_flags, e->dest->index)) |
| { |
| all_set = false; |
| break; |
| } |
| |
| if (all_set) |
| { |
| bitmap_set_bit (&bb_antic_flags, tmp_bb->index); |
| FOR_EACH_EDGE (e, ei, tmp_bb->preds) |
| if (!bitmap_bit_p (&bb_antic_flags, e->src->index) |
| && bitmap_set_bit (&bb_on_list, e->src->index)) |
| vec.quick_push (e->src); |
| } |
| } |
| /* Find exactly one edge that leads to a block in ANTIC from |
| a block that isn't. */ |
| if (!bitmap_bit_p (&bb_antic_flags, (*entry_edge)->dest->index)) |
| FOR_EACH_BB_FN (bb, cfun) |
| { |
| if (!bitmap_bit_p (&bb_antic_flags, bb->index)) |
| continue; |
| FOR_EACH_EDGE (e, ei, bb->preds) |
| if (!bitmap_bit_p (&bb_antic_flags, e->src->index)) |
| { |
| if (*entry_edge != orig_entry_edge) |
| { |
| *entry_edge = orig_entry_edge; |
| if (dump_file) |
| fprintf (dump_file, "More than one candidate edge.\n"); |
| goto fail_shrinkwrap; |
| } |
| if (dump_file) |
| fprintf (dump_file, "Found candidate edge for " |
| "shrink-wrapping, %d->%d.\n", e->src->index, |
| e->dest->index); |
| *entry_edge = e; |
| } |
| } |
| |
| if (*entry_edge != orig_entry_edge) |
| { |
| /* Test whether the prologue is known to clobber any register |
| (other than FP or SP) which are live on the edge. */ |
| CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM); |
| if (frame_pointer_needed) |
| CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM); |
| REG_SET_TO_HARD_REG_SET (live_on_edge, |
| df_get_live_in ((*entry_edge)->dest)); |
| if (hard_reg_set_intersect_p (live_on_edge, prologue_clobbered)) |
| { |
| *entry_edge = orig_entry_edge; |
| if (dump_file) |
| fprintf (dump_file, |
| "Shrink-wrapping aborted due to clobber.\n"); |
| } |
| } |
| if (*entry_edge != orig_entry_edge) |
| { |
| crtl->shrink_wrapped = true; |
| if (dump_file) |
| fprintf (dump_file, "Performing shrink-wrapping.\n"); |
| |
| /* Find tail blocks reachable from both blocks needing a |
| prologue and blocks not needing a prologue. */ |
| if (!bitmap_empty_p (&bb_tail)) |
| FOR_EACH_BB_FN (bb, cfun) |
| { |
| bool some_pro, some_no_pro; |
| if (!bitmap_bit_p (&bb_tail, bb->index)) |
| continue; |
| some_pro = some_no_pro = false; |
| FOR_EACH_EDGE (e, ei, bb->preds) |
| { |
| if (bitmap_bit_p (bb_flags, e->src->index)) |
| some_pro = true; |
| else |
| some_no_pro = true; |
| } |
| if (some_pro && some_no_pro) |
| vec.quick_push (bb); |
| else |
| bitmap_clear_bit (&bb_tail, bb->index); |
| } |
| /* Find the head of each tail. */ |
| while (!vec.is_empty ()) |
| { |
| basic_block tbb = vec.pop (); |
| |
| if (!bitmap_bit_p (&bb_tail, tbb->index)) |
| continue; |
| |
| while (single_succ_p (tbb)) |
| { |
| tbb = single_succ (tbb); |
| bitmap_clear_bit (&bb_tail, tbb->index); |
| } |
| } |
| /* Now duplicate the tails. */ |
| if (!bitmap_empty_p (&bb_tail)) |
| FOR_EACH_BB_REVERSE_FN (bb, cfun) |
| { |
| basic_block copy_bb, tbb; |
| rtx_insn *insert_point; |
| int eflags; |
| |
| if (!bitmap_clear_bit (&bb_tail, bb->index)) |
| continue; |
| |
| /* Create a copy of BB, instructions and all, for |
| use on paths that don't need a prologue. |
| Ideal placement of the copy is on a fall-thru edge |
| or after a block that would jump to the copy. */ |
| FOR_EACH_EDGE (e, ei, bb->preds) |
| if (!bitmap_bit_p (bb_flags, e->src->index) |
| && single_succ_p (e->src)) |
| break; |
| if (e) |
| { |
| /* Make sure we insert after any barriers. */ |
| rtx_insn *end = get_last_bb_insn (e->src); |
| copy_bb = create_basic_block (NEXT_INSN (end), |
| NULL_RTX, e->src); |
| BB_COPY_PARTITION (copy_bb, e->src); |
| } |
| else |
| { |
| /* Otherwise put the copy at the end of the function. */ |
| copy_bb = create_basic_block (NULL_RTX, NULL_RTX, |
| EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb); |
| BB_COPY_PARTITION (copy_bb, bb); |
| } |
| |
| insert_point = emit_note_after (NOTE_INSN_DELETED, |
| BB_END (copy_bb)); |
| emit_barrier_after (BB_END (copy_bb)); |
| |
| tbb = bb; |
| while (1) |
| { |
| dup_block_and_redirect (tbb, copy_bb, insert_point, |
| bb_flags); |
| tbb = single_succ (tbb); |
| if (tbb == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
| break; |
| e = split_block (copy_bb, PREV_INSN (insert_point)); |
| copy_bb = e->dest; |
| } |
| |
| /* Quiet verify_flow_info by (ab)using EDGE_FAKE. |
| We have yet to add a simple_return to the tails, |
| as we'd like to first convert_jumps_to_returns in |
| case the block is no longer used after that. */ |
| eflags = EDGE_FAKE; |
| if (CALL_P (PREV_INSN (insert_point)) |
| && SIBLING_CALL_P (PREV_INSN (insert_point))) |
| eflags = EDGE_SIBCALL | EDGE_ABNORMAL; |
| make_single_succ_edge (copy_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), |
| eflags); |
| |
| /* verify_flow_info doesn't like a note after a |
| sibling call. */ |
| delete_insn (insert_point); |
| if (bitmap_empty_p (&bb_tail)) |
| break; |
| } |
| } |
| |
| fail_shrinkwrap: |
| bitmap_clear (&bb_tail); |
| bitmap_clear (&bb_antic_flags); |
| bitmap_clear (&bb_on_list); |
| vec.release (); |
| } |
| } |
| |
| /* If we're allowed to generate a simple return instruction, then by |
| definition we don't need a full epilogue. If the last basic |
| block before the exit block does not contain active instructions, |
| examine its predecessors and try to emit (conditional) return |
| instructions. */ |
| |
| edge |
| get_unconverted_simple_return (edge exit_fallthru_edge, bitmap_head bb_flags, |
| vec<edge> *unconverted_simple_returns, |
| rtx_insn **returnjump) |
| { |
| if (optimize) |
| { |
| unsigned i, last; |
| |
| /* convert_jumps_to_returns may add to preds of the exit block |
| (but won't remove). Stop at end of current preds. */ |
| last = EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds); |
| for (i = 0; i < last; i++) |
| { |
| edge e = EDGE_I (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds, i); |
| if (LABEL_P (BB_HEAD (e->src)) |
| && !bitmap_bit_p (&bb_flags, e->src->index) |
| && !active_insn_between (BB_HEAD (e->src), BB_END (e->src))) |
| *unconverted_simple_returns |
| = convert_jumps_to_returns (e->src, true, |
| *unconverted_simple_returns); |
| } |
| } |
| |
| if (exit_fallthru_edge != NULL |
| && EDGE_COUNT (exit_fallthru_edge->src->preds) != 0 |
| && !bitmap_bit_p (&bb_flags, exit_fallthru_edge->src->index)) |
| { |
| basic_block last_bb; |
| |
| last_bb = emit_return_for_exit (exit_fallthru_edge, true); |
| *returnjump = BB_END (last_bb); |
| exit_fallthru_edge = NULL; |
| } |
| return exit_fallthru_edge; |
| } |
| |
| /* If there were branches to an empty LAST_BB which we tried to |
| convert to conditional simple_returns, but couldn't for some |
| reason, create a block to hold a simple_return insn and redirect |
| those remaining edges. */ |
| |
| void |
| convert_to_simple_return (edge entry_edge, edge orig_entry_edge, |
| bitmap_head bb_flags, rtx_insn *returnjump, |
| vec<edge> unconverted_simple_returns) |
| { |
| edge e; |
| edge_iterator ei; |
| |
| if (!unconverted_simple_returns.is_empty ()) |
| { |
| basic_block simple_return_block_hot = NULL; |
| basic_block simple_return_block_cold = NULL; |
| edge pending_edge_hot = NULL; |
| edge pending_edge_cold = NULL; |
| basic_block exit_pred; |
| int i; |
| |
| gcc_assert (entry_edge != orig_entry_edge); |
| |
| /* See if we can reuse the last insn that was emitted for the |
| epilogue. */ |
| if (returnjump != NULL_RTX |
| && JUMP_LABEL (returnjump) == simple_return_rtx) |
| { |
| e = split_block (BLOCK_FOR_INSN (returnjump), PREV_INSN (returnjump)); |
| if (BB_PARTITION (e->src) == BB_HOT_PARTITION) |
| simple_return_block_hot = e->dest; |
| else |
| simple_return_block_cold = e->dest; |
| } |
| |
| /* Also check returns we might need to add to tail blocks. */ |
| FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
| if (EDGE_COUNT (e->src->preds) != 0 |
| && (e->flags & EDGE_FAKE) != 0 |
| && !bitmap_bit_p (&bb_flags, e->src->index)) |
| { |
| if (BB_PARTITION (e->src) == BB_HOT_PARTITION) |
| pending_edge_hot = e; |
| else |
| pending_edge_cold = e; |
| } |
| |
| /* Save a pointer to the exit's predecessor BB for use in |
| inserting new BBs at the end of the function. Do this |
| after the call to split_block above which may split |
| the original exit pred. */ |
| exit_pred = EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb; |
| |
| FOR_EACH_VEC_ELT (unconverted_simple_returns, i, e) |
| { |
| basic_block *pdest_bb; |
| edge pending; |
| |
| if (BB_PARTITION (e->src) == BB_HOT_PARTITION) |
| { |
| pdest_bb = &simple_return_block_hot; |
| pending = pending_edge_hot; |
| } |
| else |
| { |
| pdest_bb = &simple_return_block_cold; |
| pending = pending_edge_cold; |
| } |
| |
| if (*pdest_bb == NULL && pending != NULL) |
| { |
| emit_return_into_block (true, pending->src); |
| pending->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE); |
| *pdest_bb = pending->src; |
| } |
| else if (*pdest_bb == NULL) |
| { |
| basic_block bb; |
| rtx_insn *start; |
| |
| bb = create_basic_block (NULL, NULL, exit_pred); |
| BB_COPY_PARTITION (bb, e->src); |
| start = emit_jump_insn_after (gen_simple_return (), |
| BB_END (bb)); |
| JUMP_LABEL (start) = simple_return_rtx; |
| emit_barrier_after (start); |
| |
| *pdest_bb = bb; |
| make_edge (bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0); |
| } |
| redirect_edge_and_branch_force (e, *pdest_bb); |
| } |
| unconverted_simple_returns.release (); |
| } |
| |
| if (entry_edge != orig_entry_edge) |
| { |
| FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) |
| if (EDGE_COUNT (e->src->preds) != 0 |
| && (e->flags & EDGE_FAKE) != 0 |
| && !bitmap_bit_p (&bb_flags, e->src->index)) |
| { |
| emit_return_into_block (true, e->src); |
| e->flags &= ~(EDGE_FALLTHRU | EDGE_FAKE); |
| } |
| } |
| } |
| |
| #endif |