| /* Code to analyze doloop loops in order for targets to perform late |
| optimizations converting doloops to other forms of hardware loops. |
| Copyright (C) 2011-2020 Free Software Foundation, Inc. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 3, or (at your option) any later |
| version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "backend.h" |
| #include "rtl.h" |
| #include "df.h" |
| #include "insn-config.h" |
| #include "regs.h" |
| #include "memmodel.h" |
| #include "emit-rtl.h" |
| #include "recog.h" |
| #include "cfgrtl.h" |
| #include "hw-doloop.h" |
| #include "dumpfile.h" |
| |
| /* Dump information collected in LOOPS. */ |
| static void |
| dump_hwloops (hwloop_info loops) |
| { |
| hwloop_info loop; |
| |
| for (loop = loops; loop; loop = loop->next) |
| { |
| hwloop_info i; |
| basic_block b; |
| unsigned ix; |
| |
| fprintf (dump_file, ";; loop %d: ", loop->loop_no); |
| if (loop->bad) |
| fprintf (dump_file, "(bad) "); |
| fprintf (dump_file, "{head:%d, depth:%d, reg:%u}", |
| loop->head == NULL ? -1 : loop->head->index, |
| loop->depth, REGNO (loop->iter_reg)); |
| |
| fprintf (dump_file, " blocks: [ "); |
| for (ix = 0; loop->blocks.iterate (ix, &b); ix++) |
| fprintf (dump_file, "%d ", b->index); |
| fprintf (dump_file, "] "); |
| |
| fprintf (dump_file, " inner loops: [ "); |
| for (ix = 0; loop->loops.iterate (ix, &i); ix++) |
| fprintf (dump_file, "%d ", i->loop_no); |
| fprintf (dump_file, "]\n"); |
| } |
| fprintf (dump_file, "\n"); |
| } |
| |
| /* Return true if BB is part of LOOP. */ |
| static bool |
| bb_in_loop_p (hwloop_info loop, basic_block bb) |
| { |
| return bitmap_bit_p (loop->block_bitmap, bb->index); |
| } |
| |
| /* Scan the blocks of LOOP (and its inferiors), and record things such as |
| hard registers set, jumps out of and within the loop. */ |
| static void |
| scan_loop (hwloop_info loop) |
| { |
| unsigned ix; |
| basic_block bb; |
| |
| if (loop->bad) |
| return; |
| |
| if (REGNO_REG_SET_P (df_get_live_in (loop->successor), |
| REGNO (loop->iter_reg))) |
| loop->iter_reg_used_outside = true; |
| |
| for (ix = 0; loop->blocks.iterate (ix, &bb); ix++) |
| { |
| rtx_insn *insn; |
| edge e; |
| edge_iterator ei; |
| |
| if (bb != loop->tail) |
| FOR_EACH_EDGE (e, ei, bb->succs) |
| { |
| if (bb_in_loop_p (loop, e->dest)) |
| { |
| if (!(e->flags & EDGE_FALLTHRU)) |
| loop->jumps_within = true; |
| } |
| else |
| { |
| loop->jumps_outof = true; |
| if (!loop->bad) |
| gcc_assert (!REGNO_REG_SET_P (df_get_live_in (e->dest), |
| REGNO (loop->iter_reg))); |
| } |
| } |
| |
| for (insn = BB_HEAD (bb); |
| insn != NEXT_INSN (BB_END (bb)); |
| insn = NEXT_INSN (insn)) |
| { |
| df_ref def; |
| HARD_REG_SET set_this_insn; |
| |
| if (!NONDEBUG_INSN_P (insn)) |
| continue; |
| |
| if (recog_memoized (insn) < 0 |
| && (GET_CODE (PATTERN (insn)) == ASM_INPUT |
| || asm_noperands (PATTERN (insn)) >= 0)) |
| loop->has_asm = true; |
| |
| CLEAR_HARD_REG_SET (set_this_insn); |
| FOR_EACH_INSN_DEF (def, insn) |
| { |
| rtx dreg = DF_REF_REG (def); |
| |
| if (!REG_P (dreg)) |
| continue; |
| |
| add_to_hard_reg_set (&set_this_insn, GET_MODE (dreg), |
| REGNO (dreg)); |
| } |
| |
| if (insn == loop->loop_end) |
| CLEAR_HARD_REG_BIT (set_this_insn, REGNO (loop->iter_reg)); |
| else if (reg_mentioned_p (loop->iter_reg, PATTERN (insn))) |
| loop->iter_reg_used = true; |
| loop->regs_set_in_loop |= set_this_insn; |
| } |
| } |
| } |
| |
| /* Compute the incoming_dest and incoming_src members of LOOP by |
| identifying the edges that jump into the loop. If there is more |
| than one block that jumps into the loop, incoming_src will be set |
| to NULL; likewise, if there is more than one block in the loop that |
| is the destination of an incoming edge, incoming_dest will be NULL. |
| |
| Return true if either of these two fields is nonnull, false |
| otherwise. */ |
| static bool |
| process_incoming_edges (hwloop_info loop) |
| { |
| edge e; |
| edge_iterator ei; |
| bool first = true; |
| |
| FOR_EACH_EDGE (e, ei, loop->incoming) |
| { |
| if (first) |
| { |
| loop->incoming_src = e->src; |
| loop->incoming_dest = e->dest; |
| first = false; |
| } |
| else |
| { |
| if (e->dest != loop->incoming_dest) |
| loop->incoming_dest = NULL; |
| if (e->src != loop->incoming_src) |
| loop->incoming_src = NULL; |
| } |
| } |
| if (loop->incoming_src == NULL && loop->incoming_dest == NULL) |
| return false; |
| |
| return true; |
| } |
| |
| /* Try to identify a forwarder block that jump into LOOP, and add it to |
| the set of blocks in the loop, updating the vector of incoming blocks as |
| well. This transformation gives a second chance to loops we couldn't |
| otherwise handle by increasing the chance that we'll end up with one |
| incoming_src block. |
| Return true if we made a change, false otherwise. */ |
| static bool |
| add_forwarder_blocks (hwloop_info loop) |
| { |
| edge e; |
| edge_iterator ei; |
| |
| FOR_EACH_EDGE (e, ei, loop->incoming) |
| { |
| if (forwarder_block_p (e->src)) |
| { |
| edge e2; |
| edge_iterator ei2; |
| |
| if (dump_file) |
| fprintf (dump_file, |
| ";; Adding forwarder block %d to loop %d and retrying\n", |
| e->src->index, loop->loop_no); |
| loop->blocks.safe_push (e->src); |
| bitmap_set_bit (loop->block_bitmap, e->src->index); |
| FOR_EACH_EDGE (e2, ei2, e->src->preds) |
| vec_safe_push (loop->incoming, e2); |
| loop->incoming->unordered_remove (ei.index); |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /* Called from reorg_loops when a potential loop end is found. LOOP is |
| a newly set up structure describing the loop, it is this function's |
| responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the |
| loop_end insn and its enclosing basic block. REG is the loop counter |
| register. |
| For our purposes, a loop is defined by the set of blocks reachable from |
| the loop head in which the loop counter register is live. This matches |
| the expected use; targets that call into this code usually replace the |
| loop counter with a different special register. */ |
| static void |
| discover_loop (hwloop_info loop, basic_block tail_bb, rtx_insn *tail_insn, rtx reg) |
| { |
| bool found_tail; |
| unsigned dwork = 0; |
| basic_block bb; |
| |
| loop->tail = tail_bb; |
| loop->loop_end = tail_insn; |
| loop->iter_reg = reg; |
| vec_alloc (loop->incoming, 2); |
| loop->start_label = as_a <rtx_insn *> (JUMP_LABEL (tail_insn)); |
| |
| if (EDGE_COUNT (tail_bb->succs) != 2) |
| { |
| loop->bad = true; |
| return; |
| } |
| loop->head = BRANCH_EDGE (tail_bb)->dest; |
| loop->successor = FALLTHRU_EDGE (tail_bb)->dest; |
| |
| auto_vec<basic_block, 20> works; |
| works.safe_push (loop->head); |
| |
| found_tail = false; |
| for (dwork = 0; works.iterate (dwork, &bb); dwork++) |
| { |
| edge e; |
| edge_iterator ei; |
| if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
| { |
| /* We've reached the exit block. The loop must be bad. */ |
| if (dump_file) |
| fprintf (dump_file, |
| ";; Loop is bad - reached exit block while scanning\n"); |
| loop->bad = true; |
| break; |
| } |
| |
| if (bitmap_bit_p (loop->block_bitmap, bb->index)) |
| continue; |
| |
| /* We've not seen this block before. Add it to the loop's |
| list and then add each successor to the work list. */ |
| |
| loop->blocks.safe_push (bb); |
| bitmap_set_bit (loop->block_bitmap, bb->index); |
| |
| if (bb == tail_bb) |
| found_tail = true; |
| else |
| { |
| FOR_EACH_EDGE (e, ei, bb->succs) |
| { |
| basic_block succ = EDGE_SUCC (bb, ei.index)->dest; |
| if (REGNO_REG_SET_P (df_get_live_in (succ), |
| REGNO (loop->iter_reg))) |
| works.safe_push (succ); |
| } |
| } |
| } |
| |
| if (!found_tail) |
| loop->bad = true; |
| |
| /* Find the predecessor, and make sure nothing else jumps into this loop. */ |
| if (!loop->bad) |
| { |
| FOR_EACH_VEC_ELT (loop->blocks, dwork, bb) |
| { |
| edge e; |
| edge_iterator ei; |
| FOR_EACH_EDGE (e, ei, bb->preds) |
| { |
| basic_block pred = e->src; |
| |
| if (!bb_in_loop_p (loop, pred)) |
| { |
| if (dump_file) |
| fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n", |
| loop->loop_no, pred->index, |
| e->dest->index); |
| vec_safe_push (loop->incoming, e); |
| } |
| } |
| } |
| |
| if (!process_incoming_edges (loop)) |
| { |
| if (dump_file) |
| fprintf (dump_file, |
| ";; retrying loop %d with forwarder blocks\n", |
| loop->loop_no); |
| if (!add_forwarder_blocks (loop)) |
| { |
| if (dump_file) |
| fprintf (dump_file, ";; No forwarder blocks found\n"); |
| loop->bad = true; |
| } |
| else if (!process_incoming_edges (loop)) |
| { |
| if (dump_file) |
| fprintf (dump_file, |
| ";; can't find suitable entry for loop %d\n", |
| loop->loop_no); |
| } |
| } |
| } |
| } |
| |
| /* Analyze the structure of the loops in the current function. Use |
| LOOP_STACK for bitmap allocations. Returns all the valid candidates for |
| hardware loops found in this function. HOOKS is the argument |
| passed to reorg_loops, used here to find the iteration registers |
| from a loop_end pattern. */ |
| static hwloop_info |
| discover_loops (bitmap_obstack *loop_stack, struct hw_doloop_hooks *hooks) |
| { |
| hwloop_info loops = NULL; |
| hwloop_info loop; |
| basic_block bb; |
| int nloops = 0; |
| |
| /* Find all the possible loop tails. This means searching for every |
| loop_end instruction. For each one found, create a hwloop_info |
| structure and add the head block to the work list. */ |
| FOR_EACH_BB_FN (bb, cfun) |
| { |
| rtx_insn *tail = BB_END (bb); |
| rtx_insn *insn; |
| rtx reg; |
| |
| while (tail && NOTE_P (tail) && tail != BB_HEAD (bb)) |
| tail = PREV_INSN (tail); |
| |
| if (tail == NULL_RTX) |
| continue; |
| |
| if (!JUMP_P (tail)) |
| continue; |
| reg = hooks->end_pattern_reg (tail); |
| if (reg == NULL_RTX) |
| continue; |
| |
| /* A possible loop end */ |
| |
| /* There's a degenerate case we can handle - an empty loop consisting |
| of only a back branch. Handle that by deleting the branch. */ |
| insn = JUMP_LABEL_AS_INSN (tail); |
| while (insn && !NONDEBUG_INSN_P (insn)) |
| insn = NEXT_INSN (insn); |
| if (insn == tail) |
| { |
| basic_block succ = FALLTHRU_EDGE (bb)->dest; |
| if (dump_file) |
| { |
| fprintf (dump_file, ";; degenerate loop ending at\n"); |
| print_rtl_single (dump_file, tail); |
| } |
| if (!REGNO_REG_SET_P (df_get_live_in (succ), REGNO (reg))) |
| { |
| if (dump_file) |
| fprintf (dump_file, ";; deleting it\n"); |
| delete_insn_and_edges (tail); |
| continue; |
| } |
| } |
| |
| loop = XCNEW (struct hwloop_info_d); |
| loop->next = loops; |
| loops = loop; |
| loop->loop_no = nloops++; |
| loop->blocks.create (20); |
| loop->block_bitmap = BITMAP_ALLOC (loop_stack); |
| |
| if (dump_file) |
| { |
| fprintf (dump_file, ";; potential loop %d ending at\n", |
| loop->loop_no); |
| print_rtl_single (dump_file, tail); |
| } |
| |
| discover_loop (loop, bb, tail, reg); |
| } |
| |
| /* Compute loop nestings. Given two loops A and B, either the sets |
| of their blocks don't intersect at all, or one is the subset of |
| the other, or the blocks don't form a good nesting structure. */ |
| for (loop = loops; loop; loop = loop->next) |
| { |
| hwloop_info other; |
| |
| if (loop->bad) |
| continue; |
| |
| for (other = loops; other; other = other->next) |
| { |
| if (other->bad) |
| continue; |
| |
| if (!bitmap_intersect_p (other->block_bitmap, loop->block_bitmap)) |
| continue; |
| if (!bitmap_intersect_compl_p (other->block_bitmap, |
| loop->block_bitmap)) |
| loop->loops.safe_push (other); |
| else if (!bitmap_intersect_compl_p (loop->block_bitmap, |
| other->block_bitmap)) |
| other->loops.safe_push (loop); |
| else |
| { |
| if (dump_file) |
| fprintf (dump_file, |
| ";; can't find suitable nesting for loops %d and %d\n", |
| loop->loop_no, other->loop_no); |
| loop->bad = other->bad = true; |
| } |
| } |
| } |
| |
| if (dump_file) |
| dump_hwloops (loops); |
| |
| return loops; |
| } |
| |
| /* Free up the loop structures in LOOPS. */ |
| static void |
| free_loops (hwloop_info loops) |
| { |
| while (loops) |
| { |
| hwloop_info loop = loops; |
| loops = loop->next; |
| loop->loops.release (); |
| loop->blocks.release (); |
| BITMAP_FREE (loop->block_bitmap); |
| XDELETE (loop); |
| } |
| } |
| |
| #define BB_AUX_INDEX(BB) ((intptr_t) (BB)->aux) |
| |
| /* Initialize the aux fields to give ascending indices to basic blocks. */ |
| static void |
| set_bb_indices (void) |
| { |
| basic_block bb; |
| intptr_t index; |
| |
| index = 0; |
| FOR_EACH_BB_FN (bb, cfun) |
| bb->aux = (void *) index++; |
| } |
| |
| /* The taken-branch edge from the loop end can actually go forward. |
| If the target's hardware loop support requires that the loop end be |
| after the loop start, try to reorder a loop's basic blocks when we |
| find such a case. |
| This is not very aggressive; it only moves at most one block. It |
| does not introduce new branches into loops; it may remove them, or |
| it may switch fallthru/jump edges. */ |
| static void |
| reorder_loops (hwloop_info loops) |
| { |
| basic_block bb; |
| hwloop_info loop; |
| |
| cfg_layout_initialize (0); |
| |
| set_bb_indices (); |
| |
| for (loop = loops; loop; loop = loop->next) |
| { |
| edge e; |
| edge_iterator ei; |
| |
| if (loop->bad) |
| continue; |
| |
| if (BB_AUX_INDEX (loop->head) <= BB_AUX_INDEX (loop->tail)) |
| continue; |
| |
| FOR_EACH_EDGE (e, ei, loop->head->succs) |
| { |
| if (bitmap_bit_p (loop->block_bitmap, e->dest->index) |
| && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail)) |
| { |
| basic_block start_bb = e->dest; |
| basic_block start_prev_bb = start_bb->prev_bb; |
| |
| if (dump_file) |
| fprintf (dump_file, ";; Moving block %d before block %d\n", |
| loop->head->index, start_bb->index); |
| loop->head->prev_bb->next_bb = loop->head->next_bb; |
| loop->head->next_bb->prev_bb = loop->head->prev_bb; |
| |
| loop->head->prev_bb = start_prev_bb; |
| loop->head->next_bb = start_bb; |
| start_prev_bb->next_bb = start_bb->prev_bb = loop->head; |
| |
| set_bb_indices (); |
| break; |
| } |
| } |
| loops = loops->next; |
| } |
| |
| FOR_EACH_BB_FN (bb, cfun) |
| { |
| if (bb->next_bb != EXIT_BLOCK_PTR_FOR_FN (cfun)) |
| bb->aux = bb->next_bb; |
| else |
| bb->aux = NULL; |
| } |
| cfg_layout_finalize (); |
| clear_aux_for_blocks (); |
| df_analyze (); |
| } |
| |
| /* Call the OPT function for LOOP and all of its sub-loops. This is |
| done in a depth-first search; innermost loops are visited first. |
| OPTIMIZE and FAIL are the functions passed to reorg_loops by the |
| target's reorg pass. */ |
| static void |
| optimize_loop (hwloop_info loop, struct hw_doloop_hooks *hooks) |
| { |
| int ix; |
| hwloop_info inner; |
| int inner_depth = 0; |
| |
| if (loop->visited) |
| return; |
| |
| loop->visited = 1; |
| |
| if (loop->bad) |
| { |
| if (dump_file) |
| fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no); |
| goto bad_loop; |
| } |
| |
| /* Every loop contains in its list of inner loops every loop nested inside |
| it, even if there are intermediate loops. This works because we're doing |
| a depth-first search here and never visit a loop more than once. |
| Recursion depth is effectively limited by the number of available |
| hardware registers. */ |
| for (ix = 0; loop->loops.iterate (ix, &inner); ix++) |
| { |
| optimize_loop (inner, hooks); |
| |
| if (!inner->bad && inner_depth < inner->depth) |
| inner_depth = inner->depth; |
| /* The set of registers may be changed while optimizing the inner |
| loop. */ |
| loop->regs_set_in_loop |= inner->regs_set_in_loop; |
| } |
| |
| loop->depth = inner_depth + 1; |
| |
| if (hooks->opt (loop)) |
| return; |
| |
| bad_loop: |
| if (dump_file) |
| fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no); |
| |
| loop->bad = true; |
| hooks->fail (loop); |
| } |
| |
| /* This function can be used from a port's machine_dependent_reorg to |
| find and analyze loops that end in loop_end instructions. It uses |
| a set of function pointers in HOOKS to call back into the |
| target-specific functions to perform the actual machine-specific |
| transformations. |
| |
| Such transformations typically involve additional set-up |
| instructions before the loop, to define loop bounds or set up a |
| special loop counter register. |
| |
| DO_REORDER should be set to true if we should try to use the |
| reorder_loops function to ensure the loop end occurs after the loop |
| start. This is for use by targets where the loop hardware requires |
| this condition. |
| |
| HOOKS is used to pass in target specific hooks; see |
| hw-doloop.h. */ |
| void |
| reorg_loops (bool do_reorder, struct hw_doloop_hooks *hooks) |
| { |
| hwloop_info loops = NULL; |
| hwloop_info loop; |
| bitmap_obstack loop_stack; |
| |
| df_live_add_problem (); |
| df_live_set_all_dirty (); |
| df_analyze (); |
| |
| bitmap_obstack_initialize (&loop_stack); |
| |
| if (dump_file) |
| fprintf (dump_file, ";; Find loops, first pass\n\n"); |
| |
| loops = discover_loops (&loop_stack, hooks); |
| |
| /* We can't enter cfglayout mode anymore if basic block partitioning |
| already happened. */ |
| if (do_reorder && !crtl->has_bb_partition) |
| { |
| reorder_loops (loops); |
| free_loops (loops); |
| |
| if (dump_file) |
| fprintf (dump_file, ";; Find loops, second pass\n\n"); |
| |
| loops = discover_loops (&loop_stack, hooks); |
| } |
| |
| for (loop = loops; loop; loop = loop->next) |
| scan_loop (loop); |
| |
| /* Now apply the optimizations. */ |
| for (loop = loops; loop; loop = loop->next) |
| optimize_loop (loop, hooks); |
| |
| if (dump_file) |
| { |
| fprintf (dump_file, ";; After hardware loops optimization:\n\n"); |
| dump_hwloops (loops); |
| } |
| |
| free_loops (loops); |
| bitmap_obstack_release (&loop_stack); |
| |
| if (dump_file) |
| print_rtl (dump_file, get_insns ()); |
| } |