| /* Instruction scheduling pass. |
| Copyright (C) 1992, 93-97, 1998 Free Software Foundation, Inc. |
| Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by, |
| and currently maintained by, Jim Wilson (wilson@cygnus.com) |
| |
| This file is part of GNU CC. |
| |
| GNU CC is free software; you can redistribute it and/or modify it |
| under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 2, or (at your option) |
| any later version. |
| |
| GNU CC is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GNU CC; see the file COPYING. If not, write to the Free |
| the Free Software Foundation, 59 Temple Place - Suite 330, |
| Boston, MA 02111-1307, USA. */ |
| |
| |
| /* Instruction scheduling pass. |
| |
| This pass implements list scheduling within basic blocks. It is |
| run twice: (1) after flow analysis, but before register allocation, |
| and (2) after register allocation. |
| |
| The first run performs interblock scheduling, moving insns between |
| different blocks in the same "region", and the second runs only |
| basic block scheduling. |
| |
| Interblock motions performed are useful motions and speculative |
| motions, including speculative loads. Motions requiring code |
| duplication are not supported. The identification of motion type |
| and the check for validity of speculative motions requires |
| construction and analysis of the function's control flow graph. |
| The scheduler works as follows: |
| |
| We compute insn priorities based on data dependencies. Flow |
| analysis only creates a fraction of the data-dependencies we must |
| observe: namely, only those dependencies which the combiner can be |
| expected to use. For this pass, we must therefore create the |
| remaining dependencies we need to observe: register dependencies, |
| memory dependencies, dependencies to keep function calls in order, |
| and the dependence between a conditional branch and the setting of |
| condition codes are all dealt with here. |
| |
| The scheduler first traverses the data flow graph, starting with |
| the last instruction, and proceeding to the first, assigning values |
| to insn_priority as it goes. This sorts the instructions |
| topologically by data dependence. |
| |
| Once priorities have been established, we order the insns using |
| list scheduling. This works as follows: starting with a list of |
| all the ready insns, and sorted according to priority number, we |
| schedule the insn from the end of the list by placing its |
| predecessors in the list according to their priority order. We |
| consider this insn scheduled by setting the pointer to the "end" of |
| the list to point to the previous insn. When an insn has no |
| predecessors, we either queue it until sufficient time has elapsed |
| or add it to the ready list. As the instructions are scheduled or |
| when stalls are introduced, the queue advances and dumps insns into |
| the ready list. When all insns down to the lowest priority have |
| been scheduled, the critical path of the basic block has been made |
| as short as possible. The remaining insns are then scheduled in |
| remaining slots. |
| |
| Function unit conflicts are resolved during forward list scheduling |
| by tracking the time when each insn is committed to the schedule |
| and from that, the time the function units it uses must be free. |
| As insns on the ready list are considered for scheduling, those |
| that would result in a blockage of the already committed insns are |
| queued until no blockage will result. |
| |
| The following list shows the order in which we want to break ties |
| among insns in the ready list: |
| |
| 1. choose insn with the longest path to end of bb, ties |
| broken by |
| 2. choose insn with least contribution to register pressure, |
| ties broken by |
| 3. prefer in-block upon interblock motion, ties broken by |
| 4. prefer useful upon speculative motion, ties broken by |
| 5. choose insn with largest control flow probability, ties |
| broken by |
| 6. choose insn with the least dependences upon the previously |
| scheduled insn, or finally |
| 7 choose the insn which has the most insns dependent on it. |
| 8. choose insn with lowest UID. |
| |
| Memory references complicate matters. Only if we can be certain |
| that memory references are not part of the data dependency graph |
| (via true, anti, or output dependence), can we move operations past |
| memory references. To first approximation, reads can be done |
| independently, while writes introduce dependencies. Better |
| approximations will yield fewer dependencies. |
| |
| Before reload, an extended analysis of interblock data dependences |
| is required for interblock scheduling. This is performed in |
| compute_block_backward_dependences (). |
| |
| Dependencies set up by memory references are treated in exactly the |
| same way as other dependencies, by using LOG_LINKS backward |
| dependences. LOG_LINKS are translated into INSN_DEPEND forward |
| dependences for the purpose of forward list scheduling. |
| |
| Having optimized the critical path, we may have also unduly |
| extended the lifetimes of some registers. If an operation requires |
| that constants be loaded into registers, it is certainly desirable |
| to load those constants as early as necessary, but no earlier. |
| I.e., it will not do to load up a bunch of registers at the |
| beginning of a basic block only to use them at the end, if they |
| could be loaded later, since this may result in excessive register |
| utilization. |
| |
| Note that since branches are never in basic blocks, but only end |
| basic blocks, this pass will not move branches. But that is ok, |
| since we can use GNU's delayed branch scheduling pass to take care |
| of this case. |
| |
| Also note that no further optimizations based on algebraic |
| identities are performed, so this pass would be a good one to |
| perform instruction splitting, such as breaking up a multiply |
| instruction into shifts and adds where that is profitable. |
| |
| Given the memory aliasing analysis that this pass should perform, |
| it should be possible to remove redundant stores to memory, and to |
| load values from registers instead of hitting memory. |
| |
| Before reload, speculative insns are moved only if a 'proof' exists |
| that no exception will be caused by this, and if no live registers |
| exist that inhibit the motion (live registers constraints are not |
| represented by data dependence edges). |
| |
| This pass must update information that subsequent passes expect to |
| be correct. Namely: reg_n_refs, reg_n_sets, reg_n_deaths, |
| reg_n_calls_crossed, and reg_live_length. Also, basic_block_head, |
| basic_block_end. |
| |
| The information in the line number notes is carefully retained by |
| this pass. Notes that refer to the starting and ending of |
| exception regions are also carefully retained by this pass. All |
| other NOTE insns are grouped in their same relative order at the |
| beginning of basic blocks and regions that have been scheduled. |
| |
| The main entry point for this pass is schedule_insns(), called for |
| each function. The work of the scheduler is organized in three |
| levels: (1) function level: insns are subject to splitting, |
| control-flow-graph is constructed, regions are computed (after |
| reload, each region is of one block), (2) region level: control |
| flow graph attributes required for interblock scheduling are |
| computed (dominators, reachability, etc.), data dependences and |
| priorities are computed, and (3) block level: insns in the block |
| are actually scheduled. */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "rtl.h" |
| #include "basic-block.h" |
| #include "regs.h" |
| #include "hard-reg-set.h" |
| #include "flags.h" |
| #include "insn-config.h" |
| #include "insn-attr.h" |
| #include "except.h" |
| #include "toplev.h" |
| |
| extern char *reg_known_equiv_p; |
| extern rtx *reg_known_value; |
| |
| #ifdef INSN_SCHEDULING |
| |
| /* target_units bitmask has 1 for each unit in the cpu. It should be |
| possible to compute this variable from the machine description. |
| But currently it is computed by examinning the insn list. Since |
| this is only needed for visualization, it seems an acceptable |
| solution. (For understanding the mapping of bits to units, see |
| definition of function_units[] in "insn-attrtab.c") */ |
| |
| static int target_units = 0; |
| |
| /* issue_rate is the number of insns that can be scheduled in the same |
| machine cycle. It can be defined in the config/mach/mach.h file, |
| otherwise we set it to 1. */ |
| |
| static int issue_rate; |
| |
| #ifndef ISSUE_RATE |
| #define ISSUE_RATE 1 |
| #endif |
| |
| /* sched-verbose controls the amount of debugging output the |
| scheduler prints. It is controlled by -fsched-verbose-N: |
| N>0 and no -DSR : the output is directed to stderr. |
| N>=10 will direct the printouts to stderr (regardless of -dSR). |
| N=1: same as -dSR. |
| N=2: bb's probabilities, detailed ready list info, unit/insn info. |
| N=3: rtl at abort point, control-flow, regions info. |
| N=5: dependences info. */ |
| |
| #define MAX_RGN_BLOCKS 10 |
| #define MAX_RGN_INSNS 100 |
| |
| static int sched_verbose_param = 0; |
| static int sched_verbose = 0; |
| |
| /* nr_inter/spec counts interblock/speculative motion for the function */ |
| static int nr_inter, nr_spec; |
| |
| |
| /* debugging file. all printouts are sent to dump, which is always set, |
| either to stderr, or to the dump listing file (-dRS). */ |
| static FILE *dump = 0; |
| |
| /* fix_sched_param() is called from toplev.c upon detection |
| of the -fsched-***-N options. */ |
| |
| void |
| fix_sched_param (param, val) |
| char *param, *val; |
| { |
| if (!strcmp (param, "verbose")) |
| sched_verbose_param = atoi (val); |
| else |
| warning ("fix_sched_param: unknown param: %s", param); |
| } |
| |
| |
| /* Arrays set up by scheduling for the same respective purposes as |
| similar-named arrays set up by flow analysis. We work with these |
| arrays during the scheduling pass so we can compare values against |
| unscheduled code. |
| |
| Values of these arrays are copied at the end of this pass into the |
| arrays set up by flow analysis. */ |
| static int *sched_reg_n_calls_crossed; |
| static int *sched_reg_live_length; |
| static int *sched_reg_basic_block; |
| |
| /* We need to know the current block number during the post scheduling |
| update of live register information so that we can also update |
| REG_BASIC_BLOCK if a register changes blocks. */ |
| static int current_block_num; |
| |
| /* Element N is the next insn that sets (hard or pseudo) register |
| N within the current basic block; or zero, if there is no |
| such insn. Needed for new registers which may be introduced |
| by splitting insns. */ |
| static rtx *reg_last_uses; |
| static rtx *reg_last_sets; |
| static regset reg_pending_sets; |
| static int reg_pending_sets_all; |
| |
| /* Vector indexed by INSN_UID giving the original ordering of the insns. */ |
| static int *insn_luid; |
| #define INSN_LUID(INSN) (insn_luid[INSN_UID (INSN)]) |
| |
| /* Vector indexed by INSN_UID giving each instruction a priority. */ |
| static int *insn_priority; |
| #define INSN_PRIORITY(INSN) (insn_priority[INSN_UID (INSN)]) |
| |
| static short *insn_costs; |
| #define INSN_COST(INSN) insn_costs[INSN_UID (INSN)] |
| |
| /* Vector indexed by INSN_UID giving an encoding of the function units |
| used. */ |
| static short *insn_units; |
| #define INSN_UNIT(INSN) insn_units[INSN_UID (INSN)] |
| |
| /* Vector indexed by INSN_UID giving each instruction a register-weight. |
| This weight is an estimation of the insn contribution to registers pressure. */ |
| static int *insn_reg_weight; |
| #define INSN_REG_WEIGHT(INSN) (insn_reg_weight[INSN_UID (INSN)]) |
| |
| /* Vector indexed by INSN_UID giving list of insns which |
| depend upon INSN. Unlike LOG_LINKS, it represents forward dependences. */ |
| static rtx *insn_depend; |
| #define INSN_DEPEND(INSN) insn_depend[INSN_UID (INSN)] |
| |
| /* Vector indexed by INSN_UID. Initialized to the number of incoming |
| edges in forward dependence graph (= number of LOG_LINKS). As |
| scheduling procedes, dependence counts are decreased. An |
| instruction moves to the ready list when its counter is zero. */ |
| static int *insn_dep_count; |
| #define INSN_DEP_COUNT(INSN) (insn_dep_count[INSN_UID (INSN)]) |
| |
| /* Vector indexed by INSN_UID giving an encoding of the blockage range |
| function. The unit and the range are encoded. */ |
| static unsigned int *insn_blockage; |
| #define INSN_BLOCKAGE(INSN) insn_blockage[INSN_UID (INSN)] |
| #define UNIT_BITS 5 |
| #define BLOCKAGE_MASK ((1 << BLOCKAGE_BITS) - 1) |
| #define ENCODE_BLOCKAGE(U, R) \ |
| ((((U) << UNIT_BITS) << BLOCKAGE_BITS \ |
| | MIN_BLOCKAGE_COST (R)) << BLOCKAGE_BITS \ |
| | MAX_BLOCKAGE_COST (R)) |
| #define UNIT_BLOCKED(B) ((B) >> (2 * BLOCKAGE_BITS)) |
| #define BLOCKAGE_RANGE(B) \ |
| (((((B) >> BLOCKAGE_BITS) & BLOCKAGE_MASK) << (HOST_BITS_PER_INT / 2)) \ |
| | ((B) & BLOCKAGE_MASK)) |
| |
| /* Encodings of the `<name>_unit_blockage_range' function. */ |
| #define MIN_BLOCKAGE_COST(R) ((R) >> (HOST_BITS_PER_INT / 2)) |
| #define MAX_BLOCKAGE_COST(R) ((R) & ((1 << (HOST_BITS_PER_INT / 2)) - 1)) |
| |
| #define DONE_PRIORITY -1 |
| #define MAX_PRIORITY 0x7fffffff |
| #define TAIL_PRIORITY 0x7ffffffe |
| #define LAUNCH_PRIORITY 0x7f000001 |
| #define DONE_PRIORITY_P(INSN) (INSN_PRIORITY (INSN) < 0) |
| #define LOW_PRIORITY_P(INSN) ((INSN_PRIORITY (INSN) & 0x7f000000) == 0) |
| |
| /* Vector indexed by INSN_UID giving number of insns referring to this insn. */ |
| static int *insn_ref_count; |
| #define INSN_REF_COUNT(INSN) (insn_ref_count[INSN_UID (INSN)]) |
| |
| /* Vector indexed by INSN_UID giving line-number note in effect for each |
| insn. For line-number notes, this indicates whether the note may be |
| reused. */ |
| static rtx *line_note; |
| #define LINE_NOTE(INSN) (line_note[INSN_UID (INSN)]) |
| |
| /* Vector indexed by basic block number giving the starting line-number |
| for each basic block. */ |
| static rtx *line_note_head; |
| |
| /* List of important notes we must keep around. This is a pointer to the |
| last element in the list. */ |
| static rtx note_list; |
| |
| /* Regsets telling whether a given register is live or dead before the last |
| scheduled insn. Must scan the instructions once before scheduling to |
| determine what registers are live or dead at the end of the block. */ |
| static regset bb_live_regs; |
| |
| /* Regset telling whether a given register is live after the insn currently |
| being scheduled. Before processing an insn, this is equal to bb_live_regs |
| above. This is used so that we can find registers that are newly born/dead |
| after processing an insn. */ |
| static regset old_live_regs; |
| |
| /* The chain of REG_DEAD notes. REG_DEAD notes are removed from all insns |
| during the initial scan and reused later. If there are not exactly as |
| many REG_DEAD notes in the post scheduled code as there were in the |
| prescheduled code then we trigger an abort because this indicates a bug. */ |
| static rtx dead_notes; |
| |
| /* Queues, etc. */ |
| |
| /* An instruction is ready to be scheduled when all insns preceding it |
| have already been scheduled. It is important to ensure that all |
| insns which use its result will not be executed until its result |
| has been computed. An insn is maintained in one of four structures: |
| |
| (P) the "Pending" set of insns which cannot be scheduled until |
| their dependencies have been satisfied. |
| (Q) the "Queued" set of insns that can be scheduled when sufficient |
| time has passed. |
| (R) the "Ready" list of unscheduled, uncommitted insns. |
| (S) the "Scheduled" list of insns. |
| |
| Initially, all insns are either "Pending" or "Ready" depending on |
| whether their dependencies are satisfied. |
| |
| Insns move from the "Ready" list to the "Scheduled" list as they |
| are committed to the schedule. As this occurs, the insns in the |
| "Pending" list have their dependencies satisfied and move to either |
| the "Ready" list or the "Queued" set depending on whether |
| sufficient time has passed to make them ready. As time passes, |
| insns move from the "Queued" set to the "Ready" list. Insns may |
| move from the "Ready" list to the "Queued" set if they are blocked |
| due to a function unit conflict. |
| |
| The "Pending" list (P) are the insns in the INSN_DEPEND of the unscheduled |
| insns, i.e., those that are ready, queued, and pending. |
| The "Queued" set (Q) is implemented by the variable `insn_queue'. |
| The "Ready" list (R) is implemented by the variables `ready' and |
| `n_ready'. |
| The "Scheduled" list (S) is the new insn chain built by this pass. |
| |
| The transition (R->S) is implemented in the scheduling loop in |
| `schedule_block' when the best insn to schedule is chosen. |
| The transition (R->Q) is implemented in `queue_insn' when an |
| insn is found to have a function unit conflict with the already |
| committed insns. |
| The transitions (P->R and P->Q) are implemented in `schedule_insn' as |
| insns move from the ready list to the scheduled list. |
| The transition (Q->R) is implemented in 'queue_to_insn' as time |
| passes or stalls are introduced. */ |
| |
| /* Implement a circular buffer to delay instructions until sufficient |
| time has passed. INSN_QUEUE_SIZE is a power of two larger than |
| MAX_BLOCKAGE and MAX_READY_COST computed by genattr.c. This is the |
| longest time an isnsn may be queued. */ |
| static rtx insn_queue[INSN_QUEUE_SIZE]; |
| static int q_ptr = 0; |
| static int q_size = 0; |
| #define NEXT_Q(X) (((X)+1) & (INSN_QUEUE_SIZE-1)) |
| #define NEXT_Q_AFTER(X, C) (((X)+C) & (INSN_QUEUE_SIZE-1)) |
| |
| /* Vector indexed by INSN_UID giving the minimum clock tick at which |
| the insn becomes ready. This is used to note timing constraints for |
| insns in the pending list. */ |
| static int *insn_tick; |
| #define INSN_TICK(INSN) (insn_tick[INSN_UID (INSN)]) |
| |
| /* Data structure for keeping track of register information |
| during that register's life. */ |
| |
| struct sometimes |
| { |
| int regno; |
| int live_length; |
| int calls_crossed; |
| }; |
| |
| /* Forward declarations. */ |
| static void add_dependence PROTO ((rtx, rtx, enum reg_note)); |
| static void remove_dependence PROTO ((rtx, rtx)); |
| static rtx find_insn_list PROTO ((rtx, rtx)); |
| static int insn_unit PROTO ((rtx)); |
| static unsigned int blockage_range PROTO ((int, rtx)); |
| static void clear_units PROTO ((void)); |
| static int actual_hazard_this_instance PROTO ((int, int, rtx, int, int)); |
| static void schedule_unit PROTO ((int, rtx, int)); |
| static int actual_hazard PROTO ((int, rtx, int, int)); |
| static int potential_hazard PROTO ((int, rtx, int)); |
| static int insn_cost PROTO ((rtx, rtx, rtx)); |
| static int priority PROTO ((rtx)); |
| static void free_pending_lists PROTO ((void)); |
| static void add_insn_mem_dependence PROTO ((rtx *, rtx *, rtx, rtx)); |
| static void flush_pending_lists PROTO ((rtx, int)); |
| static void sched_analyze_1 PROTO ((rtx, rtx)); |
| static void sched_analyze_2 PROTO ((rtx, rtx)); |
| static void sched_analyze_insn PROTO ((rtx, rtx, rtx)); |
| static void sched_analyze PROTO ((rtx, rtx)); |
| static void sched_note_set PROTO ((rtx, int)); |
| static int rank_for_schedule PROTO ((const GENERIC_PTR, const GENERIC_PTR)); |
| static void swap_sort PROTO ((rtx *, int)); |
| static void queue_insn PROTO ((rtx, int)); |
| static int schedule_insn PROTO ((rtx, rtx *, int, int)); |
| static void create_reg_dead_note PROTO ((rtx, rtx)); |
| static void attach_deaths PROTO ((rtx, rtx, int)); |
| static void attach_deaths_insn PROTO ((rtx)); |
| static int new_sometimes_live PROTO ((struct sometimes *, int, int)); |
| static void finish_sometimes_live PROTO ((struct sometimes *, int)); |
| static int schedule_block PROTO ((int, int)); |
| static rtx regno_use_in PROTO ((int, rtx)); |
| static void split_hard_reg_notes PROTO ((rtx, rtx, rtx)); |
| static void new_insn_dead_notes PROTO ((rtx, rtx, rtx, rtx)); |
| static void update_n_sets PROTO ((rtx, int)); |
| static void update_flow_info PROTO ((rtx, rtx, rtx, rtx)); |
| static char *safe_concat PROTO ((char *, char *, char *)); |
| static int insn_issue_delay PROTO ((rtx)); |
| static int birthing_insn_p PROTO ((rtx)); |
| static void adjust_priority PROTO ((rtx)); |
| |
| /* Mapping of insns to their original block prior to scheduling. */ |
| static int *insn_orig_block; |
| #define INSN_BLOCK(insn) (insn_orig_block[INSN_UID (insn)]) |
| |
| /* Some insns (e.g. call) are not allowed to move across blocks. */ |
| static char *cant_move; |
| #define CANT_MOVE(insn) (cant_move[INSN_UID (insn)]) |
| |
| /* Control flow graph edges are kept in circular lists. */ |
| typedef struct |
| { |
| int from_block; |
| int to_block; |
| int next_in; |
| int next_out; |
| } |
| edge; |
| static edge *edge_table; |
| |
| #define NEXT_IN(edge) (edge_table[edge].next_in) |
| #define NEXT_OUT(edge) (edge_table[edge].next_out) |
| #define FROM_BLOCK(edge) (edge_table[edge].from_block) |
| #define TO_BLOCK(edge) (edge_table[edge].to_block) |
| |
| /* Number of edges in the control flow graph. (in fact larger than |
| that by 1, since edge 0 is unused.) */ |
| static int nr_edges; |
| |
| /* Circular list of incoming/outgoing edges of a block */ |
| static int *in_edges; |
| static int *out_edges; |
| |
| #define IN_EDGES(block) (in_edges[block]) |
| #define OUT_EDGES(block) (out_edges[block]) |
| |
| /* List of labels which cannot be deleted, needed for control |
| flow graph construction. */ |
| extern rtx forced_labels; |
| |
| |
| static int is_cfg_nonregular PROTO ((void)); |
| static int build_control_flow PROTO ((int_list_ptr *, int_list_ptr *, |
| int *, int *)); |
| static void new_edge PROTO ((int, int)); |
| |
| |
| /* A region is the main entity for interblock scheduling: insns |
| are allowed to move between blocks in the same region, along |
| control flow graph edges, in the 'up' direction. */ |
| typedef struct |
| { |
| int rgn_nr_blocks; /* number of blocks in region */ |
| int rgn_blocks; /* blocks in the region (actually index in rgn_bb_table) */ |
| } |
| region; |
| |
| /* Number of regions in the procedure */ |
| static int nr_regions; |
| |
| /* Table of region descriptions */ |
| static region *rgn_table; |
| |
| /* Array of lists of regions' blocks */ |
| static int *rgn_bb_table; |
| |
| /* Topological order of blocks in the region (if b2 is reachable from |
| b1, block_to_bb[b2] > block_to_bb[b1]). |
| Note: A basic block is always referred to by either block or b, |
| while its topological order name (in the region) is refered to by |
| bb. |
| */ |
| static int *block_to_bb; |
| |
| /* The number of the region containing a block. */ |
| static int *containing_rgn; |
| |
| #define RGN_NR_BLOCKS(rgn) (rgn_table[rgn].rgn_nr_blocks) |
| #define RGN_BLOCKS(rgn) (rgn_table[rgn].rgn_blocks) |
| #define BLOCK_TO_BB(block) (block_to_bb[block]) |
| #define CONTAINING_RGN(block) (containing_rgn[block]) |
| |
| void debug_regions PROTO ((void)); |
| static void find_single_block_region PROTO ((void)); |
| static void find_rgns PROTO ((int_list_ptr *, int_list_ptr *, |
| int *, int *, sbitmap *)); |
| static int too_large PROTO ((int, int *, int *)); |
| |
| extern void debug_live PROTO ((int, int)); |
| |
| /* Blocks of the current region being scheduled. */ |
| static int current_nr_blocks; |
| static int current_blocks; |
| |
| /* The mapping from bb to block */ |
| #define BB_TO_BLOCK(bb) (rgn_bb_table[current_blocks + (bb)]) |
| |
| |
| /* Bit vectors and bitset operations are needed for computations on |
| the control flow graph. */ |
| |
| typedef unsigned HOST_WIDE_INT *bitset; |
| typedef struct |
| { |
| int *first_member; /* pointer to the list start in bitlst_table. */ |
| int nr_members; /* the number of members of the bit list. */ |
| } |
| bitlst; |
| |
| static int bitlst_table_last; |
| static int bitlst_table_size; |
| static int *bitlst_table; |
| |
| static char bitset_member PROTO ((bitset, int, int)); |
| static void extract_bitlst PROTO ((bitset, int, bitlst *)); |
| |
| /* target info declarations. |
| |
| The block currently being scheduled is referred to as the "target" block, |
| while other blocks in the region from which insns can be moved to the |
| target are called "source" blocks. The candidate structure holds info |
| about such sources: are they valid? Speculative? Etc. */ |
| typedef bitlst bblst; |
| typedef struct |
| { |
| char is_valid; |
| char is_speculative; |
| int src_prob; |
| bblst split_bbs; |
| bblst update_bbs; |
| } |
| candidate; |
| |
| static candidate *candidate_table; |
| |
| /* A speculative motion requires checking live information on the path |
| from 'source' to 'target'. The split blocks are those to be checked. |
| After a speculative motion, live information should be modified in |
| the 'update' blocks. |
| |
| Lists of split and update blocks for each candidate of the current |
| target are in array bblst_table */ |
| static int *bblst_table, bblst_size, bblst_last; |
| |
| #define IS_VALID(src) ( candidate_table[src].is_valid ) |
| #define IS_SPECULATIVE(src) ( candidate_table[src].is_speculative ) |
| #define SRC_PROB(src) ( candidate_table[src].src_prob ) |
| |
| /* The bb being currently scheduled. */ |
| static int target_bb; |
| |
| /* List of edges. */ |
| typedef bitlst edgelst; |
| |
| /* target info functions */ |
| static void split_edges PROTO ((int, int, edgelst *)); |
| static void compute_trg_info PROTO ((int)); |
| void debug_candidate PROTO ((int)); |
| void debug_candidates PROTO ((int)); |
| |
| |
| /* Bit-set of bbs, where bit 'i' stands for bb 'i'. */ |
| typedef bitset bbset; |
| |
| /* Number of words of the bbset. */ |
| static int bbset_size; |
| |
| /* Dominators array: dom[i] contains the bbset of dominators of |
| bb i in the region. */ |
| static bbset *dom; |
| |
| /* bb 0 is the only region entry */ |
| #define IS_RGN_ENTRY(bb) (!bb) |
| |
| /* Is bb_src dominated by bb_trg. */ |
| #define IS_DOMINATED(bb_src, bb_trg) \ |
| ( bitset_member (dom[bb_src], bb_trg, bbset_size) ) |
| |
| /* Probability: Prob[i] is a float in [0, 1] which is the probability |
| of bb i relative to the region entry. */ |
| static float *prob; |
| |
| /* The probability of bb_src, relative to bb_trg. Note, that while the |
| 'prob[bb]' is a float in [0, 1], this macro returns an integer |
| in [0, 100]. */ |
| #define GET_SRC_PROB(bb_src, bb_trg) ((int) (100.0 * (prob[bb_src] / \ |
| prob[bb_trg]))) |
| |
| /* Bit-set of edges, where bit i stands for edge i. */ |
| typedef bitset edgeset; |
| |
| /* Number of edges in the region. */ |
| static int rgn_nr_edges; |
| |
| /* Array of size rgn_nr_edges. */ |
| static int *rgn_edges; |
| |
| /* Number of words in an edgeset. */ |
| static int edgeset_size; |
| |
| /* Mapping from each edge in the graph to its number in the rgn. */ |
| static int *edge_to_bit; |
| #define EDGE_TO_BIT(edge) (edge_to_bit[edge]) |
| |
| /* The split edges of a source bb is different for each target |
| bb. In order to compute this efficiently, the 'potential-split edges' |
| are computed for each bb prior to scheduling a region. This is actually |
| the split edges of each bb relative to the region entry. |
| |
| pot_split[bb] is the set of potential split edges of bb. */ |
| static edgeset *pot_split; |
| |
| /* For every bb, a set of its ancestor edges. */ |
| static edgeset *ancestor_edges; |
| |
| static void compute_dom_prob_ps PROTO ((int)); |
| |
| #define ABS_VALUE(x) (((x)<0)?(-(x)):(x)) |
| #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (INSN_BLOCK (INSN)))) |
| #define IS_SPECULATIVE_INSN(INSN) (IS_SPECULATIVE (BLOCK_TO_BB (INSN_BLOCK (INSN)))) |
| #define INSN_BB(INSN) (BLOCK_TO_BB (INSN_BLOCK (INSN))) |
| |
| /* parameters affecting the decision of rank_for_schedule() */ |
| #define MIN_DIFF_PRIORITY 2 |
| #define MIN_PROBABILITY 40 |
| #define MIN_PROB_DIFF 10 |
| |
| /* speculative scheduling functions */ |
| static int check_live_1 PROTO ((int, rtx)); |
| static void update_live_1 PROTO ((int, rtx)); |
| static int check_live PROTO ((rtx, int)); |
| static void update_live PROTO ((rtx, int)); |
| static void set_spec_fed PROTO ((rtx)); |
| static int is_pfree PROTO ((rtx, int, int)); |
| static int find_conditional_protection PROTO ((rtx, int)); |
| static int is_conditionally_protected PROTO ((rtx, int, int)); |
| static int may_trap_exp PROTO ((rtx, int)); |
| static int haifa_classify_insn PROTO ((rtx)); |
| static int is_prisky PROTO ((rtx, int, int)); |
| static int is_exception_free PROTO ((rtx, int, int)); |
| |
| static char find_insn_mem_list PROTO ((rtx, rtx, rtx, rtx)); |
| static void compute_block_forward_dependences PROTO ((int)); |
| static void init_rgn_data_dependences PROTO ((int)); |
| static void add_branch_dependences PROTO ((rtx, rtx)); |
| static void compute_block_backward_dependences PROTO ((int)); |
| void debug_dependencies PROTO ((void)); |
| |
| /* Notes handling mechanism: |
| ========================= |
| Generally, NOTES are saved before scheduling and restored after scheduling. |
| The scheduler distinguishes between three types of notes: |
| |
| (1) LINE_NUMBER notes, generated and used for debugging. Here, |
| before scheduling a region, a pointer to the LINE_NUMBER note is |
| added to the insn following it (in save_line_notes()), and the note |
| is removed (in rm_line_notes() and unlink_line_notes()). After |
| scheduling the region, this pointer is used for regeneration of |
| the LINE_NUMBER note (in restore_line_notes()). |
| |
| (2) LOOP_BEGIN, LOOP_END, SETJMP, EHREGION_BEG, EHREGION_END notes: |
| Before scheduling a region, a pointer to the note is added to the insn |
| that follows or precedes it. (This happens as part of the data dependence |
| computation). After scheduling an insn, the pointer contained in it is |
| used for regenerating the corresponding note (in reemit_notes). |
| |
| (3) All other notes (e.g. INSN_DELETED): Before scheduling a block, |
| these notes are put in a list (in rm_other_notes() and |
| unlink_other_notes ()). After scheduling the block, these notes are |
| inserted at the beginning of the block (in schedule_block()). */ |
| |
| static rtx unlink_other_notes PROTO ((rtx, rtx)); |
| static rtx unlink_line_notes PROTO ((rtx, rtx)); |
| static void rm_line_notes PROTO ((int)); |
| static void save_line_notes PROTO ((int)); |
| static void restore_line_notes PROTO ((int)); |
| static void rm_redundant_line_notes PROTO ((void)); |
| static void rm_other_notes PROTO ((rtx, rtx)); |
| static rtx reemit_notes PROTO ((rtx, rtx)); |
| |
| static void get_block_head_tail PROTO ((int, rtx *, rtx *)); |
| |
| static void find_pre_sched_live PROTO ((int)); |
| static void find_post_sched_live PROTO ((int)); |
| static void update_reg_usage PROTO ((void)); |
| static int queue_to_ready PROTO ((rtx [], int)); |
| |
| static void debug_ready_list PROTO ((rtx[], int)); |
| static void init_target_units PROTO ((void)); |
| static void insn_print_units PROTO ((rtx)); |
| static int get_visual_tbl_length PROTO ((void)); |
| static void init_block_visualization PROTO ((void)); |
| static void print_block_visualization PROTO ((int, char *)); |
| static void visualize_scheduled_insns PROTO ((int, int)); |
| static void visualize_no_unit PROTO ((rtx)); |
| static void visualize_stall_cycles PROTO ((int, int)); |
| static void print_exp PROTO ((char *, rtx, int)); |
| static void print_value PROTO ((char *, rtx, int)); |
| static void print_pattern PROTO ((char *, rtx, int)); |
| static void print_insn PROTO ((char *, rtx, int)); |
| void debug_reg_vector PROTO ((regset)); |
| |
| static rtx move_insn1 PROTO ((rtx, rtx)); |
| static rtx move_insn PROTO ((rtx, rtx)); |
| static rtx group_leader PROTO ((rtx)); |
| static int set_priorities PROTO ((int)); |
| static void init_rtx_vector PROTO ((rtx **, rtx *, int, int)); |
| static void schedule_region PROTO ((int)); |
| static void split_block_insns PROTO ((int)); |
| |
| #endif /* INSN_SCHEDULING */ |
| |
| #define SIZE_FOR_MODE(X) (GET_MODE_SIZE (GET_MODE (X))) |
| |
| /* Helper functions for instruction scheduling. */ |
| |
| /* An INSN_LIST containing all INSN_LISTs allocated but currently unused. */ |
| static rtx unused_insn_list; |
| |
| /* An EXPR_LIST containing all EXPR_LISTs allocated but currently unused. */ |
| static rtx unused_expr_list; |
| |
| static void free_list PROTO ((rtx *, rtx *)); |
| static rtx alloc_INSN_LIST PROTO ((rtx, rtx)); |
| static rtx alloc_EXPR_LIST PROTO ((int, rtx, rtx)); |
| |
| static void |
| free_list (listp, unused_listp) |
| rtx *listp, *unused_listp; |
| { |
| register rtx link, prev_link; |
| |
| if (*listp == 0) |
| return; |
| |
| prev_link = *listp; |
| link = XEXP (prev_link, 1); |
| |
| while (link) |
| { |
| prev_link = link; |
| link = XEXP (link, 1); |
| } |
| |
| XEXP (prev_link, 1) = *unused_listp; |
| *unused_listp = *listp; |
| *listp = 0; |
| } |
| |
| static rtx |
| alloc_INSN_LIST (val, next) |
| rtx val, next; |
| { |
| rtx r; |
| |
| if (unused_insn_list) |
| { |
| r = unused_insn_list; |
| unused_insn_list = XEXP (r, 1); |
| XEXP (r, 0) = val; |
| XEXP (r, 1) = next; |
| PUT_REG_NOTE_KIND (r, VOIDmode); |
| } |
| else |
| r = gen_rtx_INSN_LIST (VOIDmode, val, next); |
| |
| return r; |
| } |
| |
| static rtx |
| alloc_EXPR_LIST (kind, val, next) |
| int kind; |
| rtx val, next; |
| { |
| rtx r; |
| |
| if (unused_expr_list) |
| { |
| r = unused_expr_list; |
| unused_expr_list = XEXP (r, 1); |
| XEXP (r, 0) = val; |
| XEXP (r, 1) = next; |
| PUT_REG_NOTE_KIND (r, kind); |
| } |
| else |
| r = gen_rtx_EXPR_LIST (kind, val, next); |
| |
| return r; |
| } |
| |
| /* Add ELEM wrapped in an INSN_LIST with reg note kind DEP_TYPE to the |
| LOG_LINKS of INSN, if not already there. DEP_TYPE indicates the type |
| of dependence that this link represents. */ |
| |
| static void |
| add_dependence (insn, elem, dep_type) |
| rtx insn; |
| rtx elem; |
| enum reg_note dep_type; |
| { |
| rtx link, next; |
| |
| /* Don't depend an insn on itself. */ |
| if (insn == elem) |
| return; |
| |
| /* If elem is part of a sequence that must be scheduled together, then |
| make the dependence point to the last insn of the sequence. |
| When HAVE_cc0, it is possible for NOTEs to exist between users and |
| setters of the condition codes, so we must skip past notes here. |
| Otherwise, NOTEs are impossible here. */ |
| |
| next = NEXT_INSN (elem); |
| |
| #ifdef HAVE_cc0 |
| while (next && GET_CODE (next) == NOTE) |
| next = NEXT_INSN (next); |
| #endif |
| |
| if (next && SCHED_GROUP_P (next) |
| && GET_CODE (next) != CODE_LABEL) |
| { |
| /* Notes will never intervene here though, so don't bother checking |
| for them. */ |
| /* We must reject CODE_LABELs, so that we don't get confused by one |
| that has LABEL_PRESERVE_P set, which is represented by the same |
| bit in the rtl as SCHED_GROUP_P. A CODE_LABEL can never be |
| SCHED_GROUP_P. */ |
| while (NEXT_INSN (next) && SCHED_GROUP_P (NEXT_INSN (next)) |
| && GET_CODE (NEXT_INSN (next)) != CODE_LABEL) |
| next = NEXT_INSN (next); |
| |
| /* Again, don't depend an insn on itself. */ |
| if (insn == next) |
| return; |
| |
| /* Make the dependence to NEXT, the last insn of the group, instead |
| of the original ELEM. */ |
| elem = next; |
| } |
| |
| #ifdef INSN_SCHEDULING |
| /* (This code is guarded by INSN_SCHEDULING, otherwise INSN_BB is undefined.) |
| No need for interblock dependences with calls, since |
| calls are not moved between blocks. Note: the edge where |
| elem is a CALL is still required. */ |
| if (GET_CODE (insn) == CALL_INSN |
| && (INSN_BB (elem) != INSN_BB (insn))) |
| return; |
| |
| #endif |
| |
| /* Check that we don't already have this dependence. */ |
| for (link = LOG_LINKS (insn); link; link = XEXP (link, 1)) |
| if (XEXP (link, 0) == elem) |
| { |
| /* If this is a more restrictive type of dependence than the existing |
| one, then change the existing dependence to this type. */ |
| if ((int) dep_type < (int) REG_NOTE_KIND (link)) |
| PUT_REG_NOTE_KIND (link, dep_type); |
| return; |
| } |
| /* Might want to check one level of transitivity to save conses. */ |
| |
| link = alloc_INSN_LIST (elem, LOG_LINKS (insn)); |
| LOG_LINKS (insn) = link; |
| |
| /* Insn dependency, not data dependency. */ |
| PUT_REG_NOTE_KIND (link, dep_type); |
| } |
| |
| /* Remove ELEM wrapped in an INSN_LIST from the LOG_LINKS |
| of INSN. Abort if not found. */ |
| |
| static void |
| remove_dependence (insn, elem) |
| rtx insn; |
| rtx elem; |
| { |
| rtx prev, link, next; |
| int found = 0; |
| |
| for (prev = 0, link = LOG_LINKS (insn); link; link = next) |
| { |
| next = XEXP (link, 1); |
| if (XEXP (link, 0) == elem) |
| { |
| if (prev) |
| XEXP (prev, 1) = next; |
| else |
| LOG_LINKS (insn) = next; |
| |
| XEXP (link, 1) = unused_insn_list; |
| unused_insn_list = link; |
| |
| found = 1; |
| } |
| else |
| prev = link; |
| } |
| |
| if (!found) |
| abort (); |
| return; |
| } |
| |
| #ifndef INSN_SCHEDULING |
| void |
| schedule_insns (dump_file) |
| FILE *dump_file; |
| { |
| } |
| #else |
| #ifndef __GNUC__ |
| #define __inline |
| #endif |
| |
| #ifndef HAIFA_INLINE |
| #define HAIFA_INLINE __inline |
| #endif |
| |
| /* Computation of memory dependencies. */ |
| |
| /* The *_insns and *_mems are paired lists. Each pending memory operation |
| will have a pointer to the MEM rtx on one list and a pointer to the |
| containing insn on the other list in the same place in the list. */ |
| |
| /* We can't use add_dependence like the old code did, because a single insn |
| may have multiple memory accesses, and hence needs to be on the list |
| once for each memory access. Add_dependence won't let you add an insn |
| to a list more than once. */ |
| |
| /* An INSN_LIST containing all insns with pending read operations. */ |
| static rtx pending_read_insns; |
| |
| /* An EXPR_LIST containing all MEM rtx's which are pending reads. */ |
| static rtx pending_read_mems; |
| |
| /* An INSN_LIST containing all insns with pending write operations. */ |
| static rtx pending_write_insns; |
| |
| /* An EXPR_LIST containing all MEM rtx's which are pending writes. */ |
| static rtx pending_write_mems; |
| |
| /* Indicates the combined length of the two pending lists. We must prevent |
| these lists from ever growing too large since the number of dependencies |
| produced is at least O(N*N), and execution time is at least O(4*N*N), as |
| a function of the length of these pending lists. */ |
| |
| static int pending_lists_length; |
| |
| /* The last insn upon which all memory references must depend. |
| This is an insn which flushed the pending lists, creating a dependency |
| between it and all previously pending memory references. This creates |
| a barrier (or a checkpoint) which no memory reference is allowed to cross. |
| |
| This includes all non constant CALL_INSNs. When we do interprocedural |
| alias analysis, this restriction can be relaxed. |
| This may also be an INSN that writes memory if the pending lists grow |
| too large. */ |
| |
| static rtx last_pending_memory_flush; |
| |
| /* The last function call we have seen. All hard regs, and, of course, |
| the last function call, must depend on this. */ |
| |
| static rtx last_function_call; |
| |
| /* The LOG_LINKS field of this is a list of insns which use a pseudo register |
| that does not already cross a call. We create dependencies between each |
| of those insn and the next call insn, to ensure that they won't cross a call |
| after scheduling is done. */ |
| |
| static rtx sched_before_next_call; |
| |
| /* Pointer to the last instruction scheduled. Used by rank_for_schedule, |
| so that insns independent of the last scheduled insn will be preferred |
| over dependent instructions. */ |
| |
| static rtx last_scheduled_insn; |
| |
| /* Data structures for the computation of data dependences in a regions. We |
| keep one copy of each of the declared above variables for each bb in the |
| region. Before analyzing the data dependences for a bb, its variables |
| are initialized as a function of the variables of its predecessors. When |
| the analysis for a bb completes, we save the contents of each variable X |
| to a corresponding bb_X[bb] variable. For example, pending_read_insns is |
| copied to bb_pending_read_insns[bb]. Another change is that few |
| variables are now a list of insns rather than a single insn: |
| last_pending_memory_flash, last_function_call, reg_last_sets. The |
| manipulation of these variables was changed appropriately. */ |
| |
| static rtx **bb_reg_last_uses; |
| static rtx **bb_reg_last_sets; |
| |
| static rtx *bb_pending_read_insns; |
| static rtx *bb_pending_read_mems; |
| static rtx *bb_pending_write_insns; |
| static rtx *bb_pending_write_mems; |
| static int *bb_pending_lists_length; |
| |
| static rtx *bb_last_pending_memory_flush; |
| static rtx *bb_last_function_call; |
| static rtx *bb_sched_before_next_call; |
| |
| /* functions for construction of the control flow graph. */ |
| |
| /* Return 1 if control flow graph should not be constructed, 0 otherwise. |
| |
| We decide not to build the control flow graph if there is possibly more |
| than one entry to the function, if computed branches exist, of if we |
| have nonlocal gotos. */ |
| |
| static int |
| is_cfg_nonregular () |
| { |
| int b; |
| rtx insn; |
| RTX_CODE code; |
| |
| /* If we have a label that could be the target of a nonlocal goto, then |
| the cfg is not well structured. */ |
| if (nonlocal_label_rtx_list () != NULL) |
| return 1; |
| |
| /* If we have any forced labels, then the cfg is not well structured. */ |
| if (forced_labels) |
| return 1; |
| |
| /* If this function has a computed jump, then we consider the cfg |
| not well structured. */ |
| if (current_function_has_computed_jump) |
| return 1; |
| |
| /* If we have exception handlers, then we consider the cfg not well |
| structured. ?!? We should be able to handle this now that flow.c |
| computes an accurate cfg for EH. */ |
| if (exception_handler_labels) |
| return 1; |
| |
| /* If we have non-jumping insns which refer to labels, then we consider |
| the cfg not well structured. */ |
| /* check for labels referred to other thn by jumps */ |
| for (b = 0; b < n_basic_blocks; b++) |
| for (insn = basic_block_head[b];; insn = NEXT_INSN (insn)) |
| { |
| code = GET_CODE (insn); |
| if (GET_RTX_CLASS (code) == 'i') |
| { |
| rtx note; |
| |
| for (note = REG_NOTES (insn); note; note = XEXP (note, 1)) |
| if (REG_NOTE_KIND (note) == REG_LABEL) |
| return 1; |
| } |
| |
| if (insn == basic_block_end[b]) |
| break; |
| } |
| |
| /* All the tests passed. Consider the cfg well structured. */ |
| return 0; |
| } |
| |
| /* Build the control flow graph and set nr_edges. |
| |
| Instead of trying to build a cfg ourselves, we rely on flow to |
| do it for us. Stamp out useless code (and bug) duplication. |
| |
| Return nonzero if an irregularity in the cfg is found which would |
| prevent cross block scheduling. */ |
| |
| static int |
| build_control_flow (s_preds, s_succs, num_preds, num_succs) |
| int_list_ptr *s_preds; |
| int_list_ptr *s_succs; |
| int *num_preds; |
| int *num_succs; |
| { |
| int i; |
| int_list_ptr succ; |
| int unreachable; |
| |
| /* Count the number of edges in the cfg. */ |
| nr_edges = 0; |
| unreachable = 0; |
| for (i = 0; i < n_basic_blocks; i++) |
| { |
| nr_edges += num_succs[i]; |
| |
| /* Unreachable loops with more than one basic block are detected |
| during the DFS traversal in find_rgns. |
| |
| Unreachable loops with a single block are detected here. This |
| test is redundant with the one in find_rgns, but it's much |
| cheaper to go ahead and catch the trivial case here. */ |
| if (num_preds[i] == 0 |
| || (num_preds[i] == 1 && INT_LIST_VAL (s_preds[i]) == i)) |
| unreachable = 1; |
| } |
| |
| /* Account for entry/exit edges. */ |
| nr_edges += 2; |
| |
| in_edges = (int *) xmalloc (n_basic_blocks * sizeof (int)); |
| out_edges = (int *) xmalloc (n_basic_blocks * sizeof (int)); |
| bzero ((char *) in_edges, n_basic_blocks * sizeof (int)); |
| bzero ((char *) out_edges, n_basic_blocks * sizeof (int)); |
| |
| edge_table = (edge *) xmalloc ((nr_edges) * sizeof (edge)); |
| bzero ((char *) edge_table, ((nr_edges) * sizeof (edge))); |
| |
| nr_edges = 0; |
| for (i = 0; i < n_basic_blocks; i++) |
| for (succ = s_succs[i]; succ; succ = succ->next) |
| { |
| if (INT_LIST_VAL (succ) != EXIT_BLOCK) |
| new_edge (i, INT_LIST_VAL (succ)); |
| } |
| |
| /* increment by 1, since edge 0 is unused. */ |
| nr_edges++; |
| |
| return unreachable; |
| } |
| |
| |
| /* Record an edge in the control flow graph from SOURCE to TARGET. |
| |
| In theory, this is redundant with the s_succs computed above, but |
| we have not converted all of haifa to use information from the |
| integer lists. */ |
| |
| static void |
| new_edge (source, target) |
| int source, target; |
| { |
| int e, next_edge; |
| int curr_edge, fst_edge; |
| |
| /* check for duplicates */ |
| fst_edge = curr_edge = OUT_EDGES (source); |
| while (curr_edge) |
| { |
| if (FROM_BLOCK (curr_edge) == source |
| && TO_BLOCK (curr_edge) == target) |
| { |
| return; |
| } |
| |
| curr_edge = NEXT_OUT (curr_edge); |
| |
| if (fst_edge == curr_edge) |
| break; |
| } |
| |
| e = ++nr_edges; |
| |
| FROM_BLOCK (e) = source; |
| TO_BLOCK (e) = target; |
| |
| if (OUT_EDGES (source)) |
| { |
| next_edge = NEXT_OUT (OUT_EDGES (source)); |
| NEXT_OUT (OUT_EDGES (source)) = e; |
| NEXT_OUT (e) = next_edge; |
| } |
| else |
| { |
| OUT_EDGES (source) = e; |
| NEXT_OUT (e) = e; |
| } |
| |
| if (IN_EDGES (target)) |
| { |
| next_edge = NEXT_IN (IN_EDGES (target)); |
| NEXT_IN (IN_EDGES (target)) = e; |
| NEXT_IN (e) = next_edge; |
| } |
| else |
| { |
| IN_EDGES (target) = e; |
| NEXT_IN (e) = e; |
| } |
| } |
| |
| |
| /* BITSET macros for operations on the control flow graph. */ |
| |
| /* Compute bitwise union of two bitsets. */ |
| #define BITSET_UNION(set1, set2, len) \ |
| do { register bitset tp = set1, sp = set2; \ |
| register int i; \ |
| for (i = 0; i < len; i++) \ |
| *(tp++) |= *(sp++); } while (0) |
| |
| /* Compute bitwise intersection of two bitsets. */ |
| #define BITSET_INTER(set1, set2, len) \ |
| do { register bitset tp = set1, sp = set2; \ |
| register int i; \ |
| for (i = 0; i < len; i++) \ |
| *(tp++) &= *(sp++); } while (0) |
| |
| /* Compute bitwise difference of two bitsets. */ |
| #define BITSET_DIFFER(set1, set2, len) \ |
| do { register bitset tp = set1, sp = set2; \ |
| register int i; \ |
| for (i = 0; i < len; i++) \ |
| *(tp++) &= ~*(sp++); } while (0) |
| |
| /* Inverts every bit of bitset 'set' */ |
| #define BITSET_INVERT(set, len) \ |
| do { register bitset tmpset = set; \ |
| register int i; \ |
| for (i = 0; i < len; i++, tmpset++) \ |
| *tmpset = ~*tmpset; } while (0) |
| |
| /* Turn on the index'th bit in bitset set. */ |
| #define BITSET_ADD(set, index, len) \ |
| { \ |
| if (index >= HOST_BITS_PER_WIDE_INT * len) \ |
| abort (); \ |
| else \ |
| set[index/HOST_BITS_PER_WIDE_INT] |= \ |
| 1 << (index % HOST_BITS_PER_WIDE_INT); \ |
| } |
| |
| /* Turn off the index'th bit in set. */ |
| #define BITSET_REMOVE(set, index, len) \ |
| { \ |
| if (index >= HOST_BITS_PER_WIDE_INT * len) \ |
| abort (); \ |
| else \ |
| set[index/HOST_BITS_PER_WIDE_INT] &= \ |
| ~(1 << (index%HOST_BITS_PER_WIDE_INT)); \ |
| } |
| |
| |
| /* Check if the index'th bit in bitset set is on. */ |
| |
| static char |
| bitset_member (set, index, len) |
| bitset set; |
| int index, len; |
| { |
| if (index >= HOST_BITS_PER_WIDE_INT * len) |
| abort (); |
| return (set[index / HOST_BITS_PER_WIDE_INT] & |
| 1 << (index % HOST_BITS_PER_WIDE_INT)) ? 1 : 0; |
| } |
| |
| |
| /* Translate a bit-set SET to a list BL of the bit-set members. */ |
| |
| static void |
| extract_bitlst (set, len, bl) |
| bitset set; |
| int len; |
| bitlst *bl; |
| { |
| int i, j, offset; |
| unsigned HOST_WIDE_INT word; |
| |
| /* bblst table space is reused in each call to extract_bitlst */ |
| bitlst_table_last = 0; |
| |
| bl->first_member = &bitlst_table[bitlst_table_last]; |
| bl->nr_members = 0; |
| |
| for (i = 0; i < len; i++) |
| { |
| word = set[i]; |
| offset = i * HOST_BITS_PER_WIDE_INT; |
| for (j = 0; word; j++) |
| { |
| if (word & 1) |
| { |
| bitlst_table[bitlst_table_last++] = offset; |
| (bl->nr_members)++; |
| } |
| word >>= 1; |
| ++offset; |
| } |
| } |
| |
| } |
| |
| |
| /* functions for the construction of regions */ |
| |
| /* Print the regions, for debugging purposes. Callable from debugger. */ |
| |
| void |
| debug_regions () |
| { |
| int rgn, bb; |
| |
| fprintf (dump, "\n;; ------------ REGIONS ----------\n\n"); |
| for (rgn = 0; rgn < nr_regions; rgn++) |
| { |
| fprintf (dump, ";;\trgn %d nr_blocks %d:\n", rgn, |
| rgn_table[rgn].rgn_nr_blocks); |
| fprintf (dump, ";;\tbb/block: "); |
| |
| for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++) |
| { |
| current_blocks = RGN_BLOCKS (rgn); |
| |
| if (bb != BLOCK_TO_BB (BB_TO_BLOCK (bb))) |
| abort (); |
| |
| fprintf (dump, " %d/%d ", bb, BB_TO_BLOCK (bb)); |
| } |
| |
| fprintf (dump, "\n\n"); |
| } |
| } |
| |
| |
| /* Build a single block region for each basic block in the function. |
| This allows for using the same code for interblock and basic block |
| scheduling. */ |
| |
| static void |
| find_single_block_region () |
| { |
| int i; |
| |
| for (i = 0; i < n_basic_blocks; i++) |
| { |
| rgn_bb_table[i] = i; |
| RGN_NR_BLOCKS (i) = 1; |
| RGN_BLOCKS (i) = i; |
| CONTAINING_RGN (i) = i; |
| BLOCK_TO_BB (i) = 0; |
| } |
| nr_regions = n_basic_blocks; |
| } |
| |
| |
| /* Update number of blocks and the estimate for number of insns |
| in the region. Return 1 if the region is "too large" for interblock |
| scheduling (compile time considerations), otherwise return 0. */ |
| |
| static int |
| too_large (block, num_bbs, num_insns) |
| int block, *num_bbs, *num_insns; |
| { |
| (*num_bbs)++; |
| (*num_insns) += (INSN_LUID (basic_block_end[block]) - |
| INSN_LUID (basic_block_head[block])); |
| if ((*num_bbs > MAX_RGN_BLOCKS) || (*num_insns > MAX_RGN_INSNS)) |
| return 1; |
| else |
| return 0; |
| } |
| |
| |
| /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk] |
| is still an inner loop. Put in max_hdr[blk] the header of the most inner |
| loop containing blk. */ |
| #define UPDATE_LOOP_RELATIONS(blk, hdr) \ |
| { \ |
| if (max_hdr[blk] == -1) \ |
| max_hdr[blk] = hdr; \ |
| else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \ |
| RESET_BIT (inner, hdr); \ |
| else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \ |
| { \ |
| RESET_BIT (inner,max_hdr[blk]); \ |
| max_hdr[blk] = hdr; \ |
| } \ |
| } |
| |
| |
| /* Find regions for interblock scheduling. |
| |
| A region for scheduling can be: |
| |
| * A loop-free procedure, or |
| |
| * A reducible inner loop, or |
| |
| * A basic block not contained in any other region. |
| |
| |
| ?!? In theory we could build other regions based on extended basic |
| blocks or reverse extended basic blocks. Is it worth the trouble? |
| |
| Loop blocks that form a region are put into the region's block list |
| in topological order. |
| |
| This procedure stores its results into the following global (ick) variables |
| |
| * rgn_nr |
| * rgn_table |
| * rgn_bb_table |
| * block_to_bb |
| * containing region |
| |
| |
| We use dominator relationships to avoid making regions out of non-reducible |
| loops. |
| |
| This procedure needs to be converted to work on pred/succ lists instead |
| of edge tables. That would simplify it somewhat. */ |
| |
| static void |
| find_rgns (s_preds, s_succs, num_preds, num_succs, dom) |
| int_list_ptr *s_preds; |
| int_list_ptr *s_succs; |
| int *num_preds; |
| int *num_succs; |
| sbitmap *dom; |
| { |
| int *max_hdr, *dfs_nr, *stack, *queue, *degree; |
| char no_loops = 1; |
| int node, child, loop_head, i, head, tail; |
| int count = 0, sp, idx = 0, current_edge = out_edges[0]; |
| int num_bbs, num_insns, unreachable; |
| int too_large_failure; |
| |
| /* Note if an edge has been passed. */ |
| sbitmap passed; |
| |
| /* Note if a block is a natural loop header. */ |
| sbitmap header; |
| |
| /* Note if a block is an natural inner loop header. */ |
| sbitmap inner; |
| |
| /* Note if a block is in the block queue. */ |
| sbitmap in_queue; |
| |
| /* Note if a block is in the block queue. */ |
| sbitmap in_stack; |
| |
| /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops |
| and a mapping from block to its loop header (if the block is contained |
| in a loop, else -1). |
| |
| Store results in HEADER, INNER, and MAX_HDR respectively, these will |
| be used as inputs to the second traversal. |
| |
| STACK, SP and DFS_NR are only used during the first traversal. */ |
| |
| /* Allocate and initialize variables for the first traversal. */ |
| max_hdr = (int *) alloca (n_basic_blocks * sizeof (int)); |
| dfs_nr = (int *) alloca (n_basic_blocks * sizeof (int)); |
| bzero ((char *) dfs_nr, n_basic_blocks * sizeof (int)); |
| stack = (int *) alloca (nr_edges * sizeof (int)); |
| |
| inner = sbitmap_alloc (n_basic_blocks); |
| sbitmap_ones (inner); |
| |
| header = sbitmap_alloc (n_basic_blocks); |
| sbitmap_zero (header); |
| |
| passed = sbitmap_alloc (nr_edges); |
| sbitmap_zero (passed); |
| |
| in_queue = sbitmap_alloc (n_basic_blocks); |
| sbitmap_zero (in_queue); |
| |
| in_stack = sbitmap_alloc (n_basic_blocks); |
| sbitmap_zero (in_stack); |
| |
| for (i = 0; i < n_basic_blocks; i++) |
| max_hdr[i] = -1; |
| |
| /* DFS traversal to find inner loops in the cfg. */ |
| |
| sp = -1; |
| while (1) |
| { |
| if (current_edge == 0 || TEST_BIT (passed, current_edge)) |
| { |
| /* We have reached a leaf node or a node that was already |
| processed. Pop edges off the stack until we find |
| an edge that has not yet been processed. */ |
| while (sp >= 0 |
| && (current_edge == 0 || TEST_BIT (passed, current_edge))) |
| { |
| /* Pop entry off the stack. */ |
| current_edge = stack[sp--]; |
| node = FROM_BLOCK (current_edge); |
| child = TO_BLOCK (current_edge); |
| RESET_BIT (in_stack, child); |
| if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child])) |
| UPDATE_LOOP_RELATIONS (node, max_hdr[child]); |
| current_edge = NEXT_OUT (current_edge); |
| } |
| |
| /* See if have finished the DFS tree traversal. */ |
| if (sp < 0 && TEST_BIT (passed, current_edge)) |
| break; |
| |
| /* Nope, continue the traversal with the popped node. */ |
| continue; |
| } |
| |
| /* Process a node. */ |
| node = FROM_BLOCK (current_edge); |
| child = TO_BLOCK (current_edge); |
| SET_BIT (in_stack, node); |
| dfs_nr[node] = ++count; |
| |
| /* If the successor is in the stack, then we've found a loop. |
| Mark the loop, if it is not a natural loop, then it will |
| be rejected during the second traversal. */ |
| if (TEST_BIT (in_stack, child)) |
| { |
| no_loops = 0; |
| SET_BIT (header, child); |
| UPDATE_LOOP_RELATIONS (node, child); |
| SET_BIT (passed, current_edge); |
| current_edge = NEXT_OUT (current_edge); |
| continue; |
| } |
| |
| /* If the child was already visited, then there is no need to visit |
| it again. Just update the loop relationships and restart |
| with a new edge. */ |
| if (dfs_nr[child]) |
| { |
| if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child])) |
| UPDATE_LOOP_RELATIONS (node, max_hdr[child]); |
| SET_BIT (passed, current_edge); |
| current_edge = NEXT_OUT (current_edge); |
| continue; |
| } |
| |
| /* Push an entry on the stack and continue DFS traversal. */ |
| stack[++sp] = current_edge; |
| SET_BIT (passed, current_edge); |
| current_edge = OUT_EDGES (child); |
| } |
| |
| /* Another check for unreachable blocks. The earlier test in |
| is_cfg_nonregular only finds unreachable blocks that do not |
| form a loop. |
| |
| The DFS traversal will mark every block that is reachable from |
| the entry node by placing a nonzero value in dfs_nr. Thus if |
| dfs_nr is zero for any block, then it must be unreachable. */ |
| unreachable = 0; |
| for (i = 0; i < n_basic_blocks; i++) |
| if (dfs_nr[i] == 0) |
| { |
| unreachable = 1; |
| break; |
| } |
| |
| /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array |
| to hold degree counts. */ |
| degree = dfs_nr; |
| |
| /* Compute the in-degree of every block in the graph */ |
| for (i = 0; i < n_basic_blocks; i++) |
| degree[i] = num_preds[i]; |
| |
| /* Do not perform region scheduling if there are any unreachable |
| blocks. */ |
| if (!unreachable) |
| { |
| if (no_loops) |
| SET_BIT (header, 0); |
| |
| /* Second travsersal:find reducible inner loops and topologically sort |
| block of each region. */ |
| |
| queue = (int *) alloca (n_basic_blocks * sizeof (int)); |
| |
| /* Find blocks which are inner loop headers. We still have non-reducible |
| loops to consider at this point. */ |
| for (i = 0; i < n_basic_blocks; i++) |
| { |
| if (TEST_BIT (header, i) && TEST_BIT (inner, i)) |
| { |
| int_list_ptr ps; |
| int j; |
| |
| /* Now check that the loop is reducible. We do this separate |
| from finding inner loops so that we do not find a reducible |
| loop which contains an inner non-reducible loop. |
| |
| A simple way to find reducible/natrual loops is to verify |
| that each block in the loop is dominated by the loop |
| header. |
| |
| If there exists a block that is not dominated by the loop |
| header, then the block is reachable from outside the loop |
| and thus the loop is not a natural loop. */ |
| for (j = 0; j < n_basic_blocks; j++) |
| { |
| /* First identify blocks in the loop, except for the loop |
| entry block. */ |
| if (i == max_hdr[j] && i != j) |
| { |
| /* Now verify that the block is dominated by the loop |
| header. */ |
| if (!TEST_BIT (dom[j], i)) |
| break; |
| } |
| } |
| |
| /* If we exited the loop early, then I is the header of a non |
| reducible loop and we should quit processing it now. */ |
| if (j != n_basic_blocks) |
| continue; |
| |
| /* I is a header of an inner loop, or block 0 in a subroutine |
| with no loops at all. */ |
| head = tail = -1; |
| too_large_failure = 0; |
| loop_head = max_hdr[i]; |
| |
| /* Decrease degree of all I's successors for topological |
| ordering. */ |
| for (ps = s_succs[i]; ps; ps = ps->next) |
| if (INT_LIST_VAL (ps) != EXIT_BLOCK |
| && INT_LIST_VAL (ps) != ENTRY_BLOCK) |
| --degree[INT_LIST_VAL(ps)]; |
| |
| /* Estimate # insns, and count # blocks in the region. */ |
| num_bbs = 1; |
| num_insns = (INSN_LUID (basic_block_end[i]) |
| - INSN_LUID (basic_block_head[i])); |
| |
| |
| /* Find all loop latches (blocks which back edges to the loop |
| header) or all the leaf blocks in the cfg has no loops. |
| |
| Place those blocks into the queue. */ |
| if (no_loops) |
| { |
| for (j = 0; j < n_basic_blocks; j++) |
| /* Leaf nodes have only a single successor which must |
| be EXIT_BLOCK. */ |
| if (num_succs[j] == 1 |
| && INT_LIST_VAL (s_succs[j]) == EXIT_BLOCK) |
| { |
| queue[++tail] = j; |
| SET_BIT (in_queue, j); |
| |
| if (too_large (j, &num_bbs, &num_insns)) |
| { |
| too_large_failure = 1; |
| break; |
| } |
| } |
| } |
| else |
| { |
| int_list_ptr ps; |
| |
| for (ps = s_preds[i]; ps; ps = ps->next) |
| { |
| node = INT_LIST_VAL (ps); |
| |
| if (node == ENTRY_BLOCK || node == EXIT_BLOCK) |
| continue; |
| |
| if (max_hdr[node] == loop_head && node != i) |
| { |
| /* This is a loop latch. */ |
| queue[++tail] = node; |
| SET_BIT (in_queue, node); |
| |
| if (too_large (node, &num_bbs, &num_insns)) |
| { |
| too_large_failure = 1; |
| break; |
| } |
| } |
| |
| } |
| } |
| |
| /* Now add all the blocks in the loop to the queue. |
| |
| We know the loop is a natural loop; however the algorithm |
| above will not always mark certain blocks as being in the |
| loop. Consider: |
| node children |
| a b,c |
| b c |
| c a,d |
| d b |
| |
| |
| The algorithm in the DFS traversal may not mark B & D as part |
| of the loop (ie they will not have max_hdr set to A). |
| |
| We know they can not be loop latches (else they would have |
| had max_hdr set since they'd have a backedge to a dominator |
| block). So we don't need them on the initial queue. |
| |
| We know they are part of the loop because they are dominated |
| by the loop header and can be reached by a backwards walk of |
| the edges starting with nodes on the initial queue. |
| |
| It is safe and desirable to include those nodes in the |
| loop/scheduling region. To do so we would need to decrease |
| the degree of a node if it is the target of a backedge |
| within the loop itself as the node is placed in the queue. |
| |
| We do not do this because I'm not sure that the actual |
| scheduling code will properly handle this case. ?!? */ |
| |
| while (head < tail && !too_large_failure) |
| { |
| int_list_ptr ps; |
| child = queue[++head]; |
| |
| for (ps = s_preds[child]; ps; ps = ps->next) |
| { |
| node = INT_LIST_VAL (ps); |
| |
| /* See discussion above about nodes not marked as in |
| this loop during the initial DFS traversal. */ |
| if (node == ENTRY_BLOCK || node == EXIT_BLOCK |
| || max_hdr[node] != loop_head) |
| { |
| tail = -1; |
| break; |
| } |
| else if (!TEST_BIT (in_queue, node) && node != i) |
| { |
| queue[++tail] = node; |
| SET_BIT (in_queue, node); |
| |
| if (too_large (node, &num_bbs, &num_insns)) |
| { |
| too_large_failure = 1; |
| break; |
| } |
| } |
| } |
| } |
| |
| if (tail >= 0 && !too_large_failure) |
| { |
| /* Place the loop header into list of region blocks. */ |
| degree[i] = -1; |
| rgn_bb_table[idx] = i; |
| RGN_NR_BLOCKS (nr_regions) = num_bbs; |
| RGN_BLOCKS (nr_regions) = idx++; |
| CONTAINING_RGN (i) = nr_regions; |
| BLOCK_TO_BB (i) = count = 0; |
| |
| /* Remove blocks from queue[] when their in degree becomes |
| zero. Repeat until no blocks are left on the list. This |
| produces a topological list of blocks in the region. */ |
| while (tail >= 0) |
| { |
| int_list_ptr ps; |
| |
| if (head < 0) |
| head = tail; |
| child = queue[head]; |
| if (degree[child] == 0) |
| { |
| degree[child] = -1; |
| rgn_bb_table[idx++] = child; |
| BLOCK_TO_BB (child) = ++count; |
| CONTAINING_RGN (child) = nr_regions; |
| queue[head] = queue[tail--]; |
| |
| for (ps = s_succs[child]; ps; ps = ps->next) |
| if (INT_LIST_VAL (ps) != ENTRY_BLOCK |
| && INT_LIST_VAL (ps) != EXIT_BLOCK) |
| --degree[INT_LIST_VAL (ps)]; |
| } |
| else |
| --head; |
| } |
| ++nr_regions; |
| } |
| } |
| } |
| } |
| |
| /* Any block that did not end up in a region is placed into a region |
| by itself. */ |
| for (i = 0; i < n_basic_blocks; i++) |
| if (degree[i] >= 0) |
| { |
| rgn_bb_table[idx] = i; |
| RGN_NR_BLOCKS (nr_regions) = 1; |
| RGN_BLOCKS (nr_regions) = idx++; |
| CONTAINING_RGN (i) = nr_regions++; |
| BLOCK_TO_BB (i) = 0; |
| } |
| |
| free (passed); |
| free (header); |
| free (inner); |
| free (in_queue); |
| free (in_stack); |
| } |
| |
| |
| /* functions for regions scheduling information */ |
| |
| /* Compute dominators, probability, and potential-split-edges of bb. |
| Assume that these values were already computed for bb's predecessors. */ |
| |
| static void |
| compute_dom_prob_ps (bb) |
| int bb; |
| { |
| int nxt_in_edge, fst_in_edge, pred; |
| int fst_out_edge, nxt_out_edge, nr_out_edges, nr_rgn_out_edges; |
| |
| prob[bb] = 0.0; |
| if (IS_RGN_ENTRY (bb)) |
| { |
| BITSET_ADD (dom[bb], 0, bbset_size); |
| prob[bb] = 1.0; |
| return; |
| } |
| |
| fst_in_edge = nxt_in_edge = IN_EDGES (BB_TO_BLOCK (bb)); |
| |
| /* intialize dom[bb] to '111..1' */ |
| BITSET_INVERT (dom[bb], bbset_size); |
| |
| do |
| { |
| pred = FROM_BLOCK (nxt_in_edge); |
| BITSET_INTER (dom[bb], dom[BLOCK_TO_BB (pred)], bbset_size); |
| |
| BITSET_UNION (ancestor_edges[bb], ancestor_edges[BLOCK_TO_BB (pred)], |
| edgeset_size); |
| |
| BITSET_ADD (ancestor_edges[bb], EDGE_TO_BIT (nxt_in_edge), edgeset_size); |
| |
| nr_out_edges = 1; |
| nr_rgn_out_edges = 0; |
| fst_out_edge = OUT_EDGES (pred); |
| nxt_out_edge = NEXT_OUT (fst_out_edge); |
| BITSET_UNION (pot_split[bb], pot_split[BLOCK_TO_BB (pred)], |
| edgeset_size); |
| |
| BITSET_ADD (pot_split[bb], EDGE_TO_BIT (fst_out_edge), edgeset_size); |
| |
| /* the successor doesn't belong the region? */ |
| if (CONTAINING_RGN (TO_BLOCK (fst_out_edge)) != |
| CONTAINING_RGN (BB_TO_BLOCK (bb))) |
| ++nr_rgn_out_edges; |
| |
| while (fst_out_edge != nxt_out_edge) |
| { |
| ++nr_out_edges; |
| /* the successor doesn't belong the region? */ |
| if (CONTAINING_RGN (TO_BLOCK (nxt_out_edge)) != |
| CONTAINING_RGN (BB_TO_BLOCK (bb))) |
| ++nr_rgn_out_edges; |
| BITSET_ADD (pot_split[bb], EDGE_TO_BIT (nxt_out_edge), edgeset_size); |
| nxt_out_edge = NEXT_OUT (nxt_out_edge); |
| |
| } |
| |
| /* now nr_rgn_out_edges is the number of region-exit edges from pred, |
| and nr_out_edges will be the number of pred out edges not leaving |
| the region. */ |
| nr_out_edges -= nr_rgn_out_edges; |
| if (nr_rgn_out_edges > 0) |
| prob[bb] += 0.9 * prob[BLOCK_TO_BB (pred)] / nr_out_edges; |
| else |
| prob[bb] += prob[BLOCK_TO_BB (pred)] / nr_out_edges; |
| nxt_in_edge = NEXT_IN (nxt_in_edge); |
| } |
| while (fst_in_edge != nxt_in_edge); |
| |
| BITSET_ADD (dom[bb], bb, bbset_size); |
| BITSET_DIFFER (pot_split[bb], ancestor_edges[bb], edgeset_size); |
| |
| if (sched_verbose >= 2) |
| fprintf (dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb), (int) (100.0 * prob[bb])); |
| } /* compute_dom_prob_ps */ |
| |
| /* functions for target info */ |
| |
| /* Compute in BL the list of split-edges of bb_src relatively to bb_trg. |
| Note that bb_trg dominates bb_src. */ |
| |
| static void |
| split_edges (bb_src, bb_trg, bl) |
| int bb_src; |
| int bb_trg; |
| edgelst *bl; |
| { |
| int es = edgeset_size; |
| edgeset src = (edgeset) alloca (es * sizeof (HOST_WIDE_INT)); |
| |
| while (es--) |
| src[es] = (pot_split[bb_src])[es]; |
| BITSET_DIFFER (src, pot_split[bb_trg], edgeset_size); |
| extract_bitlst (src, edgeset_size, bl); |
| } |
| |
| |
| /* Find the valid candidate-source-blocks for the target block TRG, compute |
| their probability, and check if they are speculative or not. |
| For speculative sources, compute their update-blocks and split-blocks. */ |
| |
| static void |
| compute_trg_info (trg) |
| int trg; |
| { |
| register candidate *sp; |
| edgelst el; |
| int check_block, update_idx; |
| int i, j, k, fst_edge, nxt_edge; |
| |
| /* define some of the fields for the target bb as well */ |
| sp = candidate_table + trg; |
| sp->is_valid = 1; |
| sp->is_speculative = 0; |
| sp->src_prob = 100; |
| |
| for (i = trg + 1; i < current_nr_blocks; i++) |
| { |
| sp = candidate_table + i; |
| |
| sp->is_valid = IS_DOMINATED (i, trg); |
| if (sp->is_valid) |
| { |
| sp->src_prob = GET_SRC_PROB (i, trg); |
| sp->is_valid = (sp->src_prob >= MIN_PROBABILITY); |
| } |
| |
| if (sp->is_valid) |
| { |
| split_edges (i, trg, &el); |
| sp->is_speculative = (el.nr_members) ? 1 : 0; |
| if (sp->is_speculative && !flag_schedule_speculative) |
| sp->is_valid = 0; |
| } |
| |
| if (sp->is_valid) |
| { |
| sp->split_bbs.first_member = &bblst_table[bblst_last]; |
| sp->split_bbs.nr_members = el.nr_members; |
| for (j = 0; j < el.nr_members; bblst_last++, j++) |
| bblst_table[bblst_last] = |
| TO_BLOCK (rgn_edges[el.first_member[j]]); |
| sp->update_bbs.first_member = &bblst_table[bblst_last]; |
| update_idx = 0; |
| for (j = 0; j < el.nr_members; j++) |
| { |
| check_block = FROM_BLOCK (rgn_edges[el.first_member[j]]); |
| fst_edge = nxt_edge = OUT_EDGES (check_block); |
| do |
| { |
| for (k = 0; k < el.nr_members; k++) |
| if (EDGE_TO_BIT (nxt_edge) == el.first_member[k]) |
| break; |
| |
| if (k >= el.nr_members) |
| { |
| bblst_table[bblst_last++] = TO_BLOCK (nxt_edge); |
| update_idx++; |
| } |
| |
| nxt_edge = NEXT_OUT (nxt_edge); |
| } |
| while (fst_edge != nxt_edge); |
| } |
| sp->update_bbs.nr_members = update_idx; |
| |
| } |
| else |
| { |
| sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0; |
| |
| sp->is_speculative = 0; |
| sp->src_prob = 0; |
| } |
| } |
| } /* compute_trg_info */ |
| |
| |
| /* Print candidates info, for debugging purposes. Callable from debugger. */ |
| |
| void |
| debug_candidate (i) |
| int i; |
| { |
| if (!candidate_table[i].is_valid) |
| return; |
| |
| if (candidate_table[i].is_speculative) |
| { |
| int j; |
| fprintf (dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i); |
| |
| fprintf (dump, "split path: "); |
| for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++) |
| { |
| int b = candidate_table[i].split_bbs.first_member[j]; |
| |
| fprintf (dump, " %d ", b); |
| } |
| fprintf (dump, "\n"); |
| |
| fprintf (dump, "update path: "); |
| for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++) |
| { |
| int b = candidate_table[i].update_bbs.first_member[j]; |
| |
| fprintf (dump, " %d ", b); |
| } |
| fprintf (dump, "\n"); |
| } |
| else |
| { |
| fprintf (dump, " src %d equivalent\n", BB_TO_BLOCK (i)); |
| } |
| } |
| |
| |
| /* Print candidates info, for debugging purposes. Callable from debugger. */ |
| |
| void |
| debug_candidates (trg) |
| int trg; |
| { |
| int i; |
| |
| fprintf (dump, "----------- candidate table: target: b=%d bb=%d ---\n", |
| BB_TO_BLOCK (trg), trg); |
| for (i = trg + 1; i < current_nr_blocks; i++) |
| debug_candidate (i); |
| } |
| |
| |
| /* functions for speculative scheduing */ |
| |
| /* Return 0 if x is a set of a register alive in the beginning of one |
| of the split-blocks of src, otherwise return 1. */ |
| |
| static int |
| check_live_1 (src, x) |
| int src; |
| rtx x; |
| { |
| register int i; |
| register int regno; |
| register rtx reg = SET_DEST (x); |
| |
| if (reg == 0) |
| return 1; |
| |
| while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT |
| || GET_CODE (reg) == SIGN_EXTRACT |
| || GET_CODE (reg) == STRICT_LOW_PART) |
| reg = XEXP (reg, 0); |
| |
| if (GET_CODE (reg) != REG) |
| return 1; |
| |
| regno = REGNO (reg); |
| |
| if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno]) |
| { |
| /* Global registers are assumed live */ |
| return 0; |
| } |
| else |
| { |
| if (regno < FIRST_PSEUDO_REGISTER) |
| { |
| /* check for hard registers */ |
| int j = HARD_REGNO_NREGS (regno, GET_MODE (reg)); |
| while (--j >= 0) |
| { |
| for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++) |
| { |
| int b = candidate_table[src].split_bbs.first_member[i]; |
| |
| if (REGNO_REG_SET_P (basic_block_live_at_start[b], regno + j)) |
| { |
| return 0; |
| } |
| } |
| } |
| } |
| else |
| { |
| /* check for psuedo registers */ |
| for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++) |
| { |
| int b = candidate_table[src].split_bbs.first_member[i]; |
| |
| if (REGNO_REG_SET_P (basic_block_live_at_start[b], regno)) |
| { |
| return 0; |
| } |
| } |
| } |
| } |
| |
| return 1; |
| } |
| |
| |
| /* If x is a set of a register R, mark that R is alive in the beginning |
| of every update-block of src. */ |
| |
| static void |
| update_live_1 (src, x) |
| int src; |
| rtx x; |
| { |
| register int i; |
| register int regno; |
| register rtx reg = SET_DEST (x); |
| |
| if (reg == 0) |
| return; |
| |
| while (GET_CODE (reg) == SUBREG || GET_CODE (reg) == ZERO_EXTRACT |
| || GET_CODE (reg) == SIGN_EXTRACT |
| || GET_CODE (reg) == STRICT_LOW_PART) |
| reg = XEXP (reg, 0); |
| |
| if (GET_CODE (reg) != REG) |
| return; |
| |
| /* Global registers are always live, so the code below does not apply |
| to them. */ |
| |
| regno = REGNO (reg); |
| |
| if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno]) |
| { |
| if (regno < FIRST_PSEUDO_REGISTER) |
| { |
| int j = HARD_REGNO_NREGS (regno, GET_MODE (reg)); |
| while (--j >= 0) |
| { |
| for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++) |
| { |
| int b = candidate_table[src].update_bbs.first_member[i]; |
| |
| SET_REGNO_REG_SET (basic_block_live_at_start[b], regno + j); |
| } |
| } |
| } |
| else |
| { |
| for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++) |
| { |
| int b = candidate_table[src].update_bbs.first_member[i]; |
| |
| SET_REGNO_REG_SET (basic_block_live_at_start[b], regno); |
| } |
| } |
| } |
| } |
| |
| |
| /* Return 1 if insn can be speculatively moved from block src to trg, |
| otherwise return 0. Called before first insertion of insn to |
| ready-list or before the scheduling. */ |
| |
| static int |
| check_live (insn, src) |
| rtx insn; |
| int src; |
| { |
| /* find the registers set by instruction */ |
| if (GET_CODE (PATTERN (insn)) == SET |
| || GET_CODE (PATTERN (insn)) == CLOBBER) |
| return check_live_1 (src, PATTERN (insn)); |
| else if (GET_CODE (PATTERN (insn)) == PARALLEL) |
| { |
| int j; |
| for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--) |
| if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET |
| || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER) |
| && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j))) |
| return 0; |
| |
| return 1; |
| } |
| |
| return 1; |
| } |
| |
| |
| /* Update the live registers info after insn was moved speculatively from |
| block src to trg. */ |
| |
| static void |
| update_live (insn, src) |
| rtx insn; |
| int src; |
| { |
| /* find the registers set by instruction */ |
| if (GET_CODE (PATTERN (insn)) == SET |
| || GET_CODE (PATTERN (insn)) == CLOBBER) |
| update_live_1 (src, PATTERN (insn)); |
| else if (GET_CODE (PATTERN (insn)) == PARALLEL) |
| { |
| int j; |
| for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--) |
| if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET |
| || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER) |
| update_live_1 (src, XVECEXP (PATTERN (insn), 0, j)); |
| } |
| } |
| |
| /* Exception Free Loads: |
| |
| We define five classes of speculative loads: IFREE, IRISKY, |
| PFREE, PRISKY, and MFREE. |
| |
| IFREE loads are loads that are proved to be exception-free, just |
| by examining the load insn. Examples for such loads are loads |
| from TOC and loads of global data. |
| |
| IRISKY loads are loads that are proved to be exception-risky, |
| just by examining the load insn. Examples for such loads are |
| volatile loads and loads from shared memory. |
| |
| PFREE loads are loads for which we can prove, by examining other |
| insns, that they are exception-free. Currently, this class consists |
| of loads for which we are able to find a "similar load", either in |
| the target block, or, if only one split-block exists, in that split |
| block. Load2 is similar to load1 if both have same single base |
| register. We identify only part of the similar loads, by finding |
| an insn upon which both load1 and load2 have a DEF-USE dependence. |
| |
| PRISKY loads are loads for which we can prove, by examining other |
| insns, that they are exception-risky. Currently we have two proofs for |
| such loads. The first proof detects loads that are probably guarded by a |
| test on the memory address. This proof is based on the |
| backward and forward data dependence information for the region. |
| Let load-insn be the examined load. |
| Load-insn is PRISKY iff ALL the following hold: |
| |
| - insn1 is not in the same block as load-insn |
| - there is a DEF-USE dependence chain (insn1, ..., load-insn) |
| - test-insn is either a compare or a branch, not in the same block as load-insn |
| - load-insn is reachable from test-insn |
| - there is a DEF-USE dependence chain (insn1, ..., test-insn) |
| |
| This proof might fail when the compare and the load are fed |
| by an insn not in the region. To solve this, we will add to this |
| group all loads that have no input DEF-USE dependence. |
| |
| The second proof detects loads that are directly or indirectly |
| fed by a speculative load. This proof is affected by the |
| scheduling process. We will use the flag fed_by_spec_load. |
| Initially, all insns have this flag reset. After a speculative |
| motion of an insn, if insn is either a load, or marked as |
| fed_by_spec_load, we will also mark as fed_by_spec_load every |
| insn1 for which a DEF-USE dependence (insn, insn1) exists. A |
| load which is fed_by_spec_load is also PRISKY. |
| |
| MFREE (maybe-free) loads are all the remaining loads. They may be |
| exception-free, but we cannot prove it. |
| |
| Now, all loads in IFREE and PFREE classes are considered |
| exception-free, while all loads in IRISKY and PRISKY classes are |
| considered exception-risky. As for loads in the MFREE class, |
| these are considered either exception-free or exception-risky, |
| depending on whether we are pessimistic or optimistic. We have |
| to take the pessimistic approach to assure the safety of |
| speculative scheduling, but we can take the optimistic approach |
| by invoking the -fsched_spec_load_dangerous option. */ |
| |
| enum INSN_TRAP_CLASS |
| { |
| TRAP_FREE = 0, IFREE = 1, PFREE_CANDIDATE = 2, |
| PRISKY_CANDIDATE = 3, IRISKY = 4, TRAP_RISKY = 5 |
| }; |
| |
| #define WORST_CLASS(class1, class2) \ |
| ((class1 > class2) ? class1 : class2) |
| |
| /* Indexed by INSN_UID, and set if there's DEF-USE dependence between */ |
| /* some speculatively moved load insn and this one. */ |
| char *fed_by_spec_load; |
| char *is_load_insn; |
| |
| /* Non-zero if block bb_to is equal to, or reachable from block bb_from. */ |
| #define IS_REACHABLE(bb_from, bb_to) \ |
| (bb_from == bb_to \ |
| || IS_RGN_ENTRY (bb_from) \ |
| || (bitset_member (ancestor_edges[bb_to], \ |
| EDGE_TO_BIT (IN_EDGES (BB_TO_BLOCK (bb_from))), \ |
| edgeset_size))) |
| #define FED_BY_SPEC_LOAD(insn) (fed_by_spec_load[INSN_UID (insn)]) |
| #define IS_LOAD_INSN(insn) (is_load_insn[INSN_UID (insn)]) |
| |
| /* Non-zero iff the address is comprised from at most 1 register */ |
| #define CONST_BASED_ADDRESS_P(x) \ |
| (GET_CODE (x) == REG \ |
| || ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS \ |
| || (GET_CODE (x) == LO_SUM)) \ |
| && (GET_CODE (XEXP (x, 0)) == CONST_INT \ |
| || GET_CODE (XEXP (x, 1)) == CONST_INT))) |
| |
| /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */ |
| |
| static void |
| set_spec_fed (load_insn) |
| rtx load_insn; |
| { |
| rtx link; |
| |
| for (link = INSN_DEPEND (load_insn); link; link = XEXP (link, 1)) |
| if (GET_MODE (link) == VOIDmode) |
| FED_BY_SPEC_LOAD (XEXP (link, 0)) = 1; |
| } /* set_spec_fed */ |
| |
| /* On the path from the insn to load_insn_bb, find a conditional branch */ |
| /* depending on insn, that guards the speculative load. */ |
| |
| static int |
| find_conditional_protection (insn, load_insn_bb) |
| rtx insn; |
| int load_insn_bb; |
| { |
| rtx link; |
| |
| /* iterate through DEF-USE forward dependences */ |
| for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1)) |
| { |
| rtx next = XEXP (link, 0); |
| if ((CONTAINING_RGN (INSN_BLOCK (next)) == |
| CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb))) |
| && IS_REACHABLE (INSN_BB (next), load_insn_bb) |
| && load_insn_bb != INSN_BB (next) |
| && GET_MODE (link) == VOIDmode |
| && (GET_CODE (next) == JUMP_INSN |
| || find_conditional_protection (next, load_insn_bb))) |
| return 1; |
| } |
| return 0; |
| } /* find_conditional_protection */ |
| |
| /* Returns 1 if the same insn1 that participates in the computation |
| of load_insn's address is feeding a conditional branch that is |
| guarding on load_insn. This is true if we find a the two DEF-USE |
| chains: |
| insn1 -> ... -> conditional-branch |
| insn1 -> ... -> load_insn, |
| and if a flow path exist: |
| insn1 -> ... -> conditional-branch -> ... -> load_insn, |
| and if insn1 is on the path |
| region-entry -> ... -> bb_trg -> ... load_insn. |
| |
| Locate insn1 by climbing on LOG_LINKS from load_insn. |
| Locate the branch by following INSN_DEPEND from insn1. */ |
| |
| static int |
| is_conditionally_protected (load_insn, bb_src, bb_trg) |
| rtx load_insn; |
| int bb_src, bb_trg; |
| { |
| rtx link; |
| |
| for (link = LOG_LINKS (load_insn); link; link = XEXP (link, 1)) |
| { |
| rtx insn1 = XEXP (link, 0); |
| |
| /* must be a DEF-USE dependence upon non-branch */ |
| if (GET_MODE (link) != VOIDmode |
| || GET_CODE (insn1) == JUMP_INSN) |
| continue; |
| |
| /* must exist a path: region-entry -> ... -> bb_trg -> ... load_insn */ |
| if (INSN_BB (insn1) == bb_src |
| || (CONTAINING_RGN (INSN_BLOCK (insn1)) |
| != CONTAINING_RGN (BB_TO_BLOCK (bb_src))) |
| || (!IS_REACHABLE (bb_trg, INSN_BB (insn1)) |
| && !IS_REACHABLE (INSN_BB (insn1), bb_trg))) |
| continue; |
| |
| /* now search for the conditional-branch */ |
| if (find_conditional_protection (insn1, bb_src)) |
| return 1; |
| |
| /* recursive step: search another insn1, "above" current insn1. */ |
| return is_conditionally_protected (insn1, bb_src, bb_trg); |
| } |
| |
| /* the chain does not exsist */ |
| return 0; |
| } /* is_conditionally_protected */ |
| |
| /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence |
| load_insn can move speculatively from bb_src to bb_trg. All the |
| following must hold: |
| |
| (1) both loads have 1 base register (PFREE_CANDIDATEs). |
| (2) load_insn and load1 have a def-use dependence upon |
| the same insn 'insn1'. |
| (3) either load2 is in bb_trg, or: |
| - there's only one split-block, and |
| - load1 is on the escape path, and |
| |
| From all these we can conclude that the two loads access memory |
| addresses that differ at most by a constant, and hence if moving |
| load_insn would cause an exception, it would have been caused by |
| load2 anyhow. */ |
| |
| static int |
| is_pfree (load_insn, bb_src, bb_trg) |
| rtx load_insn; |
| int bb_src, bb_trg; |
| { |
| rtx back_link; |
| register candidate *candp = candidate_table + bb_src; |
| |
| if (candp->split_bbs.nr_members != 1) |
| /* must have exactly one escape block */ |
| return 0; |
| |
| for (back_link = LOG_LINKS (load_insn); |
| back_link; back_link = XEXP (back_link, 1)) |
| { |
| rtx insn1 = XEXP (back_link, 0); |
| |
| if (GET_MODE (back_link) == VOIDmode) |
| { |
| /* found a DEF-USE dependence (insn1, load_insn) */ |
| rtx fore_link; |
| |
| for (fore_link = INSN_DEPEND (insn1); |
| fore_link; fore_link = XEXP (fore_link, 1)) |
| { |
| rtx insn2 = XEXP (fore_link, 0); |
| if (GET_MODE (fore_link) == VOIDmode) |
| { |
| /* found a DEF-USE dependence (insn1, insn2) */ |
| if (haifa_classify_insn (insn2) != PFREE_CANDIDATE) |
| /* insn2 not guaranteed to be a 1 base reg load */ |
| continue; |
| |
| if (INSN_BB (insn2) == bb_trg) |
| /* insn2 is the similar load, in the target block */ |
| return 1; |
| |
| if (*(candp->split_bbs.first_member) == INSN_BLOCK (insn2)) |
| /* insn2 is a similar load, in a split-block */ |
| return 1; |
| } |
| } |
| } |
| } |
| |
| /* couldn't find a similar load */ |
| return 0; |
| } /* is_pfree */ |
| |
| /* Returns a class that insn with GET_DEST(insn)=x may belong to, |
| as found by analyzing insn's expression. */ |
| |
| static int |
| may_trap_exp (x, is_store) |
| rtx x; |
| int is_store; |
| { |
| enum rtx_code code; |
| |
| if (x == 0) |
| return TRAP_FREE; |
| code = GET_CODE (x); |
| if (is_store) |
| { |
| if (code == MEM) |
| return TRAP_RISKY; |
| else |
| return TRAP_FREE; |
| } |
| if (code == MEM) |
| { |
| /* The insn uses memory */ |
| /* a volatile load */ |
| if (MEM_VOLATILE_P (x)) |
| return IRISKY; |
| /* an exception-free load */ |
| if (!may_trap_p (x)) |
| return IFREE; |
| /* a load with 1 base register, to be further checked */ |
| if (CONST_BASED_ADDRESS_P (XEXP (x, 0))) |
| return PFREE_CANDIDATE; |
| /* no info on the load, to be further checked */ |
| return PRISKY_CANDIDATE; |
| } |
| else |
| { |
| char *fmt; |
| int i, insn_class = TRAP_FREE; |
| |
| /* neither store nor load, check if it may cause a trap */ |
| if (may_trap_p (x)) |
| return TRAP_RISKY; |
| /* recursive step: walk the insn... */ |
| fmt = GET_RTX_FORMAT (code); |
| for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) |
| { |
| if (fmt[i] == 'e') |
| { |
| int tmp_class = may_trap_exp (XEXP (x, i), is_store); |
| insn_class = WORST_CLASS (insn_class, tmp_class); |
| } |
| else if (fmt[i] == 'E') |
| { |
| int j; |
| for (j = 0; j < XVECLEN (x, i); j++) |
| { |
| int tmp_class = may_trap_exp (XVECEXP (x, i, j), is_store); |
| insn_class = WORST_CLASS (insn_class, tmp_class); |
| if (insn_class == TRAP_RISKY || insn_class == IRISKY) |
| break; |
| } |
| } |
| if (insn_class == TRAP_RISKY || insn_class == IRISKY) |
| break; |
| } |
| return insn_class; |
| } |
| } /* may_trap_exp */ |
| |
| |
| /* Classifies insn for the purpose of verifying that it can be |
| moved speculatively, by examining it's patterns, returning: |
| TRAP_RISKY: store, or risky non-load insn (e.g. division by variable). |
| TRAP_FREE: non-load insn. |
| IFREE: load from a globaly safe location. |
| IRISKY: volatile load. |
| PFREE_CANDIDATE, PRISKY_CANDIDATE: load that need to be checked for |
| being either PFREE or PRISKY. */ |
| |
| static int |
| haifa_classify_insn (insn) |
| rtx insn; |
| { |
| rtx pat = PATTERN (insn); |
| int tmp_class = TRAP_FREE; |
| int insn_class = TRAP_FREE; |
| enum rtx_code code; |
| |
| if (GET_CODE (pat) == PARALLEL) |
| { |
| int i, len = XVECLEN (pat, 0); |
| |
| for (i = len - 1; i >= 0; i--) |
| { |
| code = GET_CODE (XVECEXP (pat, 0, i)); |
| switch (code) |
| { |
| case CLOBBER: |
| /* test if it is a 'store' */ |
| tmp_class = may_trap_exp (XEXP (XVECEXP (pat, 0, i), 0), 1); |
| break; |
| case SET: |
| /* test if it is a store */ |
| tmp_class = may_trap_exp (SET_DEST (XVECEXP (pat, 0, i)), 1); |
| if (tmp_class == TRAP_RISKY) |
| break; |
| /* test if it is a load */ |
| tmp_class = |
| WORST_CLASS (tmp_class, |
| may_trap_exp (SET_SRC (XVECEXP (pat, 0, i)), 0)); |
| break; |
| case TRAP_IF: |
| tmp_class = TRAP_RISKY; |
| break; |
| default:; |
| } |
| insn_class = WORST_CLASS (insn_class, tmp_class); |
| if (insn_class == TRAP_RISKY || insn_class == IRISKY) |
| break; |
| } |
| } |
| else |
| { |
| code = GET_CODE (pat); |
| switch (code) |
| { |
| case CLOBBER: |
| /* test if it is a 'store' */ |
| tmp_class = may_trap_exp (XEXP (pat, 0), 1); |
| break; |
| case SET: |
| /* test if it is a store */ |
| tmp_class = may_trap_exp (SET_DEST (pat), 1); |
| if (tmp_class == TRAP_RISKY) |
| break; |
| /* test if it is a load */ |
| tmp_class = |
| WORST_CLASS (tmp_class, |
| may_trap_exp (SET_SRC (pat), 0)); |
| break; |
| case TRAP_IF: |
| tmp_class = TRAP_RISKY; |
| break; |
| default:; |
| } |
| insn_class = tmp_class; |
| } |
| |
| return insn_class; |
| |
| } /* haifa_classify_insn */ |
| |
| /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by |
| a load moved speculatively, or if load_insn is protected by |
| a compare on load_insn's address). */ |
| |
| static int |
| is_prisky (load_insn, bb_src, bb_trg) |
| rtx load_insn; |
| int bb_src, bb_trg; |
| { |
| if (FED_BY_SPEC_LOAD (load_insn)) |
| return 1; |
| |
| if (LOG_LINKS (load_insn) == NULL) |
| /* dependence may 'hide' out of the region. */ |
| return 1; |
| |
| if (is_conditionally_protected (load_insn, bb_src, bb_trg)) |
| return 1; |
| |
| return 0; |
| } /* is_prisky */ |
| |
| /* Insn is a candidate to be moved speculatively from bb_src to bb_trg. |
| Return 1 if insn is exception-free (and the motion is valid) |
| and 0 otherwise. */ |
| |
| static int |
| is_exception_free (insn, bb_src, bb_trg) |
| rtx insn; |
| int bb_src, bb_trg; |
| { |
| int insn_class = haifa_classify_insn (insn); |
| |
| /* handle non-load insns */ |
| switch (insn_class) |
| { |
| case TRAP_FREE: |
| return 1; |
| case TRAP_RISKY: |
| return 0; |
| default:; |
| } |
| |
| /* handle loads */ |
| if (!flag_schedule_speculative_load) |
| return 0; |
| IS_LOAD_INSN (insn) = 1; |
| switch (insn_class) |
| { |
| case IFREE: |
| return (1); |
| case IRISKY: |
| return 0; |
| case PFREE_CANDIDATE: |
| if (is_pfree (insn, bb_src, bb_trg)) |
| return 1; |
| /* don't 'break' here: PFREE-candidate is also PRISKY-candidate */ |
| case PRISKY_CANDIDATE: |
| if (!flag_schedule_speculative_load_dangerous |
| || is_prisky (insn, bb_src, bb_trg)) |
| return 0; |
| break; |
| default:; |
| } |
| |
| return flag_schedule_speculative_load_dangerous; |
| } /* is_exception_free */ |
| |
| |
| /* Process an insn's memory dependencies. There are four kinds of |
| dependencies: |
| |
| (0) read dependence: read follows read |
| (1) true dependence: read follows write |
| (2) anti dependence: write follows read |
| (3) output dependence: write follows write |
| |
| We are careful to build only dependencies which actually exist, and |
| use transitivity to avoid building too many links. */ |
| |
| /* Return the INSN_LIST containing INSN in LIST, or NULL |
| if LIST does not contain INSN. */ |
| |
| HAIFA_INLINE static rtx |
| find_insn_list (insn, list) |
| rtx insn; |
| rtx list; |
| { |
| while (list) |
| { |
| if (XEXP (list, 0) == insn) |
| return list; |
| list = XEXP (list, 1); |
| } |
| return 0; |
| } |
| |
| |
| /* Return 1 if the pair (insn, x) is found in (LIST, LIST1), or 0 otherwise. */ |
| |
| HAIFA_INLINE static char |
| find_insn_mem_list (insn, x, list, list1) |
| rtx insn, x; |
| rtx list, list1; |
| { |
| while (list) |
| { |
| if (XEXP (list, 0) == insn |
| && XEXP (list1, 0) == x) |
| return 1; |
| list = XEXP (list, 1); |
| list1 = XEXP (list1, 1); |
| } |
| return 0; |
| } |
| |
| |
| /* Compute the function units used by INSN. This caches the value |
| returned by function_units_used. A function unit is encoded as the |
| unit number if the value is non-negative and the compliment of a |
| mask if the value is negative. A function unit index is the |
| non-negative encoding. */ |
| |
| HAIFA_INLINE static int |
| insn_unit (insn) |
| rtx insn; |
| { |
| register int unit = INSN_UNIT (insn); |
| |
| if (unit == 0) |
| { |
| recog_memoized (insn); |
| |
| /* A USE insn, or something else we don't need to understand. |
| We can't pass these directly to function_units_used because it will |
| trigger a fatal error for unrecognizable insns. */ |
| if (INSN_CODE (insn) < 0) |
| unit = -1; |
| else |
| { |
| unit = function_units_used (insn); |
| /* Increment non-negative values so we can cache zero. */ |
| if (unit >= 0) |
| unit++; |
| } |
| /* We only cache 16 bits of the result, so if the value is out of |
| range, don't cache it. */ |
| if (FUNCTION_UNITS_SIZE < HOST_BITS_PER_SHORT |
| || unit >= 0 |
| || (~unit & ((1 << (HOST_BITS_PER_SHORT - 1)) - 1)) == 0) |
| INSN_UNIT (insn) = unit; |
| } |
| return (unit > 0 ? unit - 1 : unit); |
| } |
| |
| /* Compute the blockage range for executing INSN on UNIT. This caches |
| the value returned by the blockage_range_function for the unit. |
| These values are encoded in an int where the upper half gives the |
| minimum value and the lower half gives the maximum value. */ |
| |
| HAIFA_INLINE static unsigned int |
| blockage_range (unit, insn) |
| int unit; |
| rtx insn; |
| { |
| unsigned int blockage = INSN_BLOCKAGE (insn); |
| unsigned int range; |
| |
| if (UNIT_BLOCKED (blockage) != unit + 1) |
| { |
| range = function_units[unit].blockage_range_function (insn); |
| /* We only cache the blockage range for one unit and then only if |
| the values fit. */ |
| if (HOST_BITS_PER_INT >= UNIT_BITS + 2 * BLOCKAGE_BITS) |
| INSN_BLOCKAGE (insn) = ENCODE_BLOCKAGE (unit + 1, range); |
| } |
| else |
| range = BLOCKAGE_RANGE (blockage); |
| |
| return range; |
| } |
| |
| /* A vector indexed by function unit instance giving the last insn to use |
| the unit. The value of the function unit instance index for unit U |
| instance I is (U + I * FUNCTION_UNITS_SIZE). */ |
| static rtx unit_last_insn[FUNCTION_UNITS_SIZE * MAX_MULTIPLICITY]; |
| |
| /* A vector indexed by function unit instance giving the minimum time when |
| the unit will unblock based on the maximum blockage cost. */ |
| static int unit_tick[FUNCTION_UNITS_SIZE * MAX_MULTIPLICITY]; |
| |
| /* A vector indexed by function unit number giving the number of insns |
| that remain to use the unit. */ |
| static int unit_n_insns[FUNCTION_UNITS_SIZE]; |
| |
| /* Reset the function unit state to the null state. */ |
| |
| static void |
| clear_units () |
| { |
| bzero ((char *) unit_last_insn, sizeof (unit_last_insn)); |
| bzero ((char *) unit_tick, sizeof (unit_tick)); |
| bzero ((char *) unit_n_insns, sizeof (unit_n_insns)); |
| } |
| |
| /* Return the issue-delay of an insn */ |
| |
| HAIFA_INLINE static int |
| insn_issue_delay (insn) |
| rtx insn; |
| { |
| int i, delay = 0; |
| int unit = insn_unit (insn); |
| |
| /* efficiency note: in fact, we are working 'hard' to compute a |
| value that was available in md file, and is not available in |
| function_units[] structure. It would be nice to have this |
| value there, too. */ |
| if (unit >= 0) |
| { |
| if (function_units[unit].blockage_range_function && |
| function_units[unit].blockage_function) |
| delay = function_units[unit].blockage_function (insn, insn); |
| } |
| else |
| for (i = 0, unit = ~unit; unit; i++, unit >>= 1) |
| if ((unit & 1) != 0 && function_units[i].blockage_range_function |
| && function_units[i].blockage_function) |
| delay = MAX (delay, function_units[i].blockage_function (insn, insn)); |
| |
| return delay; |
| } |
| |
| /* Return the actual hazard cost of executing INSN on the unit UNIT, |
| instance INSTANCE at time CLOCK if the previous actual hazard cost |
| was COST. */ |
| |
| HAIFA_INLINE static int |
| actual_hazard_this_instance (unit, instance, insn, clock, cost) |
| int unit, instance, clock, cost; |
| rtx insn; |
| { |
| int tick = unit_tick[instance]; /* issue time of the last issued insn */ |
| |
| if (tick - clock > cost) |
| { |
| /* The scheduler is operating forward, so unit's last insn is the |
| executing insn and INSN is the candidate insn. We want a |
| more exact measure of the blockage if we execute INSN at CLOCK |
| given when we committed the execution of the unit's last insn. |
| |
| The blockage value is given by either the unit's max blockage |
| constant, blockage range function, or blockage function. Use |
| the most exact form for the given unit. */ |
| |
| if (function_units[unit].blockage_range_function) |
| { |
| if (function_units[unit].blockage_function) |
| tick += (function_units[unit].blockage_function |
| (unit_last_insn[instance], insn) |
| - function_units[unit].max_blockage); |
| else |
| tick += ((int) MAX_BLOCKAGE_COST (blockage_range (unit, insn)) |
| - function_units[unit].max_blockage); |
| } |
| if (tick - clock > cost) |
| cost = tick - clock; |
| } |
| return cost; |
| } |
| |
| /* Record INSN as having begun execution on the units encoded by UNIT at |
| time CLOCK. */ |
| |
| HAIFA_INLINE static void |
| schedule_unit (unit, insn, clock) |
| int unit, clock; |
| rtx insn; |
| { |
| int i; |
| |
| if (unit >= 0) |
| { |
| int instance = unit; |
| #if MAX_MULTIPLICITY > 1 |
| /* Find the first free instance of the function unit and use that |
| one. We assume that one is free. */ |
| for (i = function_units[unit].multiplicity - 1; i > 0; i--) |
| { |
| if (!actual_hazard_this_instance (unit, instance, insn, clock, 0)) |
| break; |
| instance += FUNCTION_UNITS_SIZE; |
| } |
| #endif |
| unit_last_insn[instance] = insn; |
| unit_tick[instance] = (clock + function_units[unit].max_blockage); |
| } |
| else |
| for (i = 0, unit = ~unit; unit; i++, unit >>= 1) |
| if ((unit & 1) != 0) |
| schedule_unit (i, insn, clock); |
| } |
| |
| /* Return the actual hazard cost of executing INSN on the units encoded by |
| UNIT at time CLOCK if the previous actual hazard cost was COST. */ |
| |
| HAIFA_INLINE static int |
| actual_hazard (unit, insn, clock, cost) |
| int unit, clock, cost; |
| rtx insn; |
| { |
| int i; |
| |
| if (unit >= 0) |
| { |
| /* Find the instance of the function unit with the minimum hazard. */ |
| int instance = unit; |
| int best_cost = actual_hazard_this_instance (unit, instance, insn, |
| clock, cost); |
| int this_cost; |
| |
| #if MAX_MULTIPLICITY > 1 |
| if (best_cost > cost) |
| { |
| for (i = function_units[unit].multiplicity - 1; i > 0; i--) |
| { |
| instance += FUNCTION_UNITS_SIZE; |
| this_cost = actual_hazard_this_instance (unit, instance, insn, |
| clock, cost); |
| if (this_cost < best_cost) |
| { |
| best_cost = this_cost; |
| if (this_cost <= cost) |
| break; |
| } |
| } |
| } |
| #endif |
| cost = MAX (cost, best_cost); |
| } |
| else |
| for (i = 0, unit = ~unit; unit; i++, unit >>= 1) |
| if ((unit & 1) != 0) |
| cost = actual_hazard (i, insn, clock, cost); |
| |
| return cost; |
| } |
| |
| /* Return the potential hazard cost of executing an instruction on the |
| units encoded by UNIT if the previous potential hazard cost was COST. |
| An insn with a large blockage time is chosen in preference to one |
| with a smaller time; an insn that uses a unit that is more likely |
| to be used is chosen in preference to one with a unit that is less |
| used. We are trying to minimize a subsequent actual hazard. */ |
| |
| HAIFA_INLINE static int |
| potential_hazard (unit, insn, cost) |
| int unit, cost; |
| rtx insn; |
| { |
| int i, ncost; |
| unsigned int minb, maxb; |
| |
| if (unit >= 0) |
| { |
| minb = maxb = function_units[unit].max_blockage; |
| if (maxb > 1) |
| { |
| if (function_units[unit].blockage_range_function) |
| { |
| maxb = minb = blockage_range (unit, insn); |
| maxb = MAX_BLOCKAGE_COST (maxb); |
| minb = MIN_BLOCKAGE_COST (minb); |
| } |
| |
| if (maxb > 1) |
| { |
| /* Make the number of instructions left dominate. Make the |
| minimum delay dominate the maximum delay. If all these |
| are the same, use the unit number to add an arbitrary |
| ordering. Other terms can be added. */ |
| ncost = minb * 0x40 + maxb; |
| ncost *= (unit_n_insns[unit] - 1) * 0x1000 + unit; |
| if (ncost > cost) |
| cost = ncost; |
| } |
| } |
| } |
| else |
| for (i = 0, unit = ~unit; unit; i++, unit >>= 1) |
| if ((unit & 1) != 0) |
| cost = potential_hazard (i, insn, cost); |
| |
| return cost; |
| } |
| |
| /* Compute cost of executing INSN given the dependence LINK on the insn USED. |
| This is the number of cycles between instruction issue and |
| instruction results. */ |
| |
| HAIFA_INLINE static int |
| insn_cost (insn, link, used) |
| rtx insn, link, used; |
| { |
| register int cost = INSN_COST (insn); |
| |
| if (cost == 0) |
| { |
| recog_memoized (insn); |
| |
| /* A USE insn, or something else we don't need to understand. |
| We can't pass these directly to result_ready_cost because it will |
| trigger a fatal error for unrecognizable insns. */ |
| if (INSN_CODE (insn) < 0) |
| { |
| INSN_COST (insn) = 1; |
| return 1; |
| } |
| else |
| { |
| cost = result_ready_cost (insn); |
| |
| if (cost < 1) |
| cost = 1; |
| |
| INSN_COST (insn) = cost; |
| } |
| } |
| |
| /* in this case estimate cost without caring how insn is used. */ |
| if (link == 0 && used == 0) |
| return cost; |
| |
| /* A USE insn should never require the value used to be computed. This |
| allows the computation of a function's result and parameter values to |
| overlap the return and call. */ |
| recog_memoized (used); |
| if (INSN_CODE (used) < 0) |
| LINK_COST_FREE (link) = 1; |
| |
| /* If some dependencies vary the cost, compute the adjustment. Most |
| commonly, the adjustment is complete: either the cost is ignored |
| (in the case of an output- or anti-dependence), or the cost is |
| unchanged. These values are cached in the link as LINK_COST_FREE |
| and LINK_COST_ZERO. */ |
| |
| if (LINK_COST_FREE (link)) |
| cost = 1; |
| #ifdef ADJUST_COST |
| else if (!LINK_COST_ZERO (link)) |
| { |
| int ncost = cost; |
| |
| ADJUST_COST (used, link, insn, ncost); |
| if (ncost <= 1) |
| LINK_COST_FREE (link) = ncost = 1; |
| if (cost == ncost) |
| LINK_COST_ZERO (link) = 1; |
| cost = ncost; |
| } |
| #endif |
| return cost; |
| } |
| |
| /* Compute the priority number for INSN. */ |
| |
| static int |
| priority (insn) |
| rtx insn; |
| { |
| int this_priority; |
| rtx link; |
| |
| if (GET_RTX_CLASS (GET_CODE (insn)) != 'i') |
| return 0; |
| |
| if ((this_priority = INSN_PRIORITY (insn)) == 0) |
| { |
| if (INSN_DEPEND (insn) == 0) |
| this_priority = insn_cost (insn, 0, 0); |
| else |
| for (link = INSN_DEPEND (insn); link; link = XEXP (link, 1)) |
| { |
| rtx next; |
| int next_priority; |
| |
| if (RTX_INTEGRATED_P (link)) |
| continue; |
| |
| next = XEXP (link, 0); |
| |
| /* critical path is meaningful in block boundaries only */ |
| if (INSN_BLOCK (next) != INSN_BLOCK (insn)) |
| continue; |
| |
| next_priority = insn_cost (insn, link, next) + priority (next); |
| if (next_priority > this_priority) |
| this_priority = next_priority; |
| } |
| INSN_PRIORITY (insn) = this_priority; |
| } |
| return this_priority; |
| } |
| |
| |
| /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add |
| them to the unused_*_list variables, so that they can be reused. */ |
| |
| static void |
| free_pending_lists () |
| { |
| if (current_nr_blocks <= 1) |
| { |
| free_list (&pending_read_insns, &unused_insn_list); |
| free_list (&pending_write_insns, &unused_insn_list); |
| free_list (&pending_read_mems, &unused_expr_list); |
| free_list (&pending_write_mems, &unused_expr_list); |
| } |
| else |
| { |
| /* interblock scheduling */ |
| int bb; |
| |
| for (bb = 0; bb < current_nr_blocks; bb++) |
| { |
| free_list (&bb_pending_read_insns[bb], &unused_insn_list); |
| free_list (&bb_pending_write_insns[bb], &unused_insn_list); |
| free_list (&bb_pending_read_mems[bb], &unused_expr_list); |
| free_list (&bb_pending_write_mems[bb], &unused_expr_list); |
| } |
| } |
| } |
| |
| /* Add an INSN and MEM reference pair to a pending INSN_LIST and MEM_LIST. |
| The MEM is a memory reference contained within INSN, which we are saving |
| so that we can do memory aliasing on it. */ |
| |
| static void |
| add_insn_mem_dependence (insn_list, mem_list, insn, mem) |
| rtx *insn_list, *mem_list, insn, mem; |
| { |
| register rtx link; |
| |
| link = alloc_INSN_LIST (insn, *insn_list); |
| *insn_list = link; |
| |
| link = alloc_EXPR_LIST (VOIDmode, mem, *mem_list); |
| *mem_list = link; |
| |
| pending_lists_length++; |
| } |
| |
| |
| /* Make a dependency between every memory reference on the pending lists |
| and INSN, thus flushing the pending lists. If ONLY_WRITE, don't flush |
| the read list. */ |
| |
| static void |
| flush_pending_lists (insn, only_write) |
| rtx insn; |
| int only_write; |
| { |
| rtx u; |
| rtx link; |
| |
| while (pending_read_insns && ! only_write) |
| { |
| add_dependence (insn, XEXP (pending_read_insns, 0), REG_DEP_ANTI); |
| |
| link = pending_read_insns; |
| pending_read_insns = XEXP (pending_read_insns, 1); |
| XEXP (link, 1) = unused_insn_list; |
| unused_insn_list = link; |
| |
| link = pending_read_mems; |
| pending_read_mems = XEXP (pending_read_mems, 1); |
| XEXP (link, 1) = unused_expr_list; |
| unused_expr_list = link; |
| } |
| while (pending_write_insns) |
| { |
| add_dependence (insn, XEXP (pending_write_insns, 0), REG_DEP_ANTI); |
| |
| link = pending_write_insns; |
| pending_write_insns = XEXP (pending_write_insns, 1); |
| XEXP (link, 1) = unused_insn_list |