| /* Straight-line strength reduction. |
| Copyright (C) 2012-2013 Free Software Foundation, Inc. |
| Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com> |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 3, or (at your option) any later |
| version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| /* There are many algorithms for performing strength reduction on |
| loops. This is not one of them. IVOPTS handles strength reduction |
| of induction variables just fine. This pass is intended to pick |
| up the crumbs it leaves behind, by considering opportunities for |
| strength reduction along dominator paths. |
| |
| Strength reduction will be implemented in four stages, gradually |
| adding more complex candidates: |
| |
| 1) Explicit multiplies, known constant multipliers, no |
| conditional increments. (complete) |
| 2) Explicit multiplies, unknown constant multipliers, |
| no conditional increments. (complete) |
| 3) Implicit multiplies in addressing expressions. (complete) |
| 4) Explicit multiplies, conditional increments. (pending) |
| |
| It would also be possible to apply strength reduction to divisions |
| and modulos, but such opportunities are relatively uncommon. |
| |
| Strength reduction is also currently restricted to integer operations. |
| If desired, it could be extended to floating-point operations under |
| control of something like -funsafe-math-optimizations. */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "tree.h" |
| #include "gimple.h" |
| #include "basic-block.h" |
| #include "tree-pass.h" |
| #include "cfgloop.h" |
| #include "gimple-pretty-print.h" |
| #include "tree-flow.h" |
| #include "domwalk.h" |
| #include "pointer-set.h" |
| #include "expmed.h" |
| #include "params.h" |
| |
| /* Information about a strength reduction candidate. Each statement |
| in the candidate table represents an expression of one of the |
| following forms (the special case of CAND_REF will be described |
| later): |
| |
| (CAND_MULT) S1: X = (B + i) * S |
| (CAND_ADD) S1: X = B + (i * S) |
| |
| Here X and B are SSA names, i is an integer constant, and S is |
| either an SSA name or a constant. We call B the "base," i the |
| "index", and S the "stride." |
| |
| Any statement S0 that dominates S1 and is of the form: |
| |
| (CAND_MULT) S0: Y = (B + i') * S |
| (CAND_ADD) S0: Y = B + (i' * S) |
| |
| is called a "basis" for S1. In both cases, S1 may be replaced by |
| |
| S1': X = Y + (i - i') * S, |
| |
| where (i - i') * S is folded to the extent possible. |
| |
| All gimple statements are visited in dominator order, and each |
| statement that may contribute to one of the forms of S1 above is |
| given at least one entry in the candidate table. Such statements |
| include addition, pointer addition, subtraction, multiplication, |
| negation, copies, and nontrivial type casts. If a statement may |
| represent more than one expression of the forms of S1 above, |
| multiple "interpretations" are stored in the table and chained |
| together. Examples: |
| |
| * An add of two SSA names may treat either operand as the base. |
| * A multiply of two SSA names, likewise. |
| * A copy or cast may be thought of as either a CAND_MULT with |
| i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0. |
| |
| Candidate records are allocated from an obstack. They are addressed |
| both from a hash table keyed on S1, and from a vector of candidate |
| pointers arranged in predominator order. |
| |
| Opportunity note |
| ---------------- |
| Currently we don't recognize: |
| |
| S0: Y = (S * i') - B |
| S1: X = (S * i) - B |
| |
| as a strength reduction opportunity, even though this S1 would |
| also be replaceable by the S1' above. This can be added if it |
| comes up in practice. |
| |
| Strength reduction in addressing |
| -------------------------------- |
| There is another kind of candidate known as CAND_REF. A CAND_REF |
| describes a statement containing a memory reference having |
| complex addressing that might benefit from strength reduction. |
| Specifically, we are interested in references for which |
| get_inner_reference returns a base address, offset, and bitpos as |
| follows: |
| |
| base: MEM_REF (T1, C1) |
| offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3) |
| bitpos: C4 * BITS_PER_UNIT |
| |
| Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are |
| arbitrary integer constants. Note that C2 may be zero, in which |
| case the offset will be MULT_EXPR (T2, C3). |
| |
| When this pattern is recognized, the original memory reference |
| can be replaced with: |
| |
| MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)), |
| C1 + (C2 * C3) + C4) |
| |
| which distributes the multiply to allow constant folding. When |
| two or more addressing expressions can be represented by MEM_REFs |
| of this form, differing only in the constants C1, C2, and C4, |
| making this substitution produces more efficient addressing during |
| the RTL phases. When there are not at least two expressions with |
| the same values of T1, T2, and C3, there is nothing to be gained |
| by the replacement. |
| |
| Strength reduction of CAND_REFs uses the same infrastructure as |
| that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B) |
| field, MULT_EXPR (T2, C3) in the stride (S) field, and |
| C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF |
| is thus another CAND_REF with the same B and S values. When at |
| least two CAND_REFs are chained together using the basis relation, |
| each of them is replaced as above, resulting in improved code |
| generation for addressing. */ |
| |
| |
| /* Index into the candidate vector, offset by 1. VECs are zero-based, |
| while cand_idx's are one-based, with zero indicating null. */ |
| typedef unsigned cand_idx; |
| |
| /* The kind of candidate. */ |
| enum cand_kind |
| { |
| CAND_MULT, |
| CAND_ADD, |
| CAND_REF |
| }; |
| |
| struct slsr_cand_d |
| { |
| /* The candidate statement S1. */ |
| gimple cand_stmt; |
| |
| /* The base expression B: often an SSA name, but not always. */ |
| tree base_expr; |
| |
| /* The stride S. */ |
| tree stride; |
| |
| /* The index constant i. */ |
| double_int index; |
| |
| /* The type of the candidate. This is normally the type of base_expr, |
| but casts may have occurred when combining feeding instructions. |
| A candidate can only be a basis for candidates of the same final type. |
| (For CAND_REFs, this is the type to be used for operand 1 of the |
| replacement MEM_REF.) */ |
| tree cand_type; |
| |
| /* The kind of candidate (CAND_MULT, etc.). */ |
| enum cand_kind kind; |
| |
| /* Index of this candidate in the candidate vector. */ |
| cand_idx cand_num; |
| |
| /* Index of the next candidate record for the same statement. |
| A statement may be useful in more than one way (e.g., due to |
| commutativity). So we can have multiple "interpretations" |
| of a statement. */ |
| cand_idx next_interp; |
| |
| /* Index of the basis statement S0, if any, in the candidate vector. */ |
| cand_idx basis; |
| |
| /* First candidate for which this candidate is a basis, if one exists. */ |
| cand_idx dependent; |
| |
| /* Next candidate having the same basis as this one. */ |
| cand_idx sibling; |
| |
| /* If this is a conditional candidate, the defining PHI statement |
| for the base SSA name B. For future use; always NULL for now. */ |
| gimple def_phi; |
| |
| /* Savings that can be expected from eliminating dead code if this |
| candidate is replaced. */ |
| int dead_savings; |
| }; |
| |
| typedef struct slsr_cand_d slsr_cand, *slsr_cand_t; |
| typedef const struct slsr_cand_d *const_slsr_cand_t; |
| |
| /* Pointers to candidates are chained together as part of a mapping |
| from base expressions to the candidates that use them. */ |
| |
| struct cand_chain_d |
| { |
| /* Base expression for the chain of candidates: often, but not |
| always, an SSA name. */ |
| tree base_expr; |
| |
| /* Pointer to a candidate. */ |
| slsr_cand_t cand; |
| |
| /* Chain pointer. */ |
| struct cand_chain_d *next; |
| |
| }; |
| |
| typedef struct cand_chain_d cand_chain, *cand_chain_t; |
| typedef const struct cand_chain_d *const_cand_chain_t; |
| |
| /* Information about a unique "increment" associated with candidates |
| having an SSA name for a stride. An increment is the difference |
| between the index of the candidate and the index of its basis, |
| i.e., (i - i') as discussed in the module commentary. |
| |
| When we are not going to generate address arithmetic we treat |
| increments that differ only in sign as the same, allowing sharing |
| of the cost of initializers. The absolute value of the increment |
| is stored in the incr_info. */ |
| |
| struct incr_info_d |
| { |
| /* The increment that relates a candidate to its basis. */ |
| double_int incr; |
| |
| /* How many times the increment occurs in the candidate tree. */ |
| unsigned count; |
| |
| /* Cost of replacing candidates using this increment. Negative and |
| zero costs indicate replacement should be performed. */ |
| int cost; |
| |
| /* If this increment is profitable but is not -1, 0, or 1, it requires |
| an initializer T_0 = stride * incr to be found or introduced in the |
| nearest common dominator of all candidates. This field holds T_0 |
| for subsequent use. */ |
| tree initializer; |
| |
| /* If the initializer was found to already exist, this is the block |
| where it was found. */ |
| basic_block init_bb; |
| }; |
| |
| typedef struct incr_info_d incr_info, *incr_info_t; |
| |
| /* Candidates are maintained in a vector. If candidate X dominates |
| candidate Y, then X appears before Y in the vector; but the |
| converse does not necessarily hold. */ |
| static vec<slsr_cand_t> cand_vec; |
| |
| enum cost_consts |
| { |
| COST_NEUTRAL = 0, |
| COST_INFINITE = 1000 |
| }; |
| |
| /* Pointer map embodying a mapping from statements to candidates. */ |
| static struct pointer_map_t *stmt_cand_map; |
| |
| /* Obstack for candidates. */ |
| static struct obstack cand_obstack; |
| |
| /* Hash table embodying a mapping from base exprs to chains of candidates. */ |
| static htab_t base_cand_map; |
| |
| /* Obstack for candidate chains. */ |
| static struct obstack chain_obstack; |
| |
| /* An array INCR_VEC of incr_infos is used during analysis of related |
| candidates having an SSA name for a stride. INCR_VEC_LEN describes |
| its current length. */ |
| static incr_info_t incr_vec; |
| static unsigned incr_vec_len; |
| |
| /* For a chain of candidates with unknown stride, indicates whether or not |
| we must generate pointer arithmetic when replacing statements. */ |
| static bool address_arithmetic_p; |
| |
| /* Produce a pointer to the IDX'th candidate in the candidate vector. */ |
| |
| static slsr_cand_t |
| lookup_cand (cand_idx idx) |
| { |
| return cand_vec[idx - 1]; |
| } |
| |
| /* Callback to produce a hash value for a candidate chain header. */ |
| |
| static hashval_t |
| base_cand_hash (const void *p) |
| { |
| tree base_expr = ((const_cand_chain_t) p)->base_expr; |
| return iterative_hash_expr (base_expr, 0); |
| } |
| |
| /* Callback when an element is removed from the hash table. |
| We never remove entries until the entire table is released. */ |
| |
| static void |
| base_cand_free (void *p ATTRIBUTE_UNUSED) |
| { |
| } |
| |
| /* Callback to return true if two candidate chain headers are equal. */ |
| |
| static int |
| base_cand_eq (const void *p1, const void *p2) |
| { |
| const_cand_chain_t const chain1 = (const_cand_chain_t) p1; |
| const_cand_chain_t const chain2 = (const_cand_chain_t) p2; |
| return operand_equal_p (chain1->base_expr, chain2->base_expr, 0); |
| } |
| |
| /* Use the base expr from candidate C to look for possible candidates |
| that can serve as a basis for C. Each potential basis must also |
| appear in a block that dominates the candidate statement and have |
| the same stride and type. If more than one possible basis exists, |
| the one with highest index in the vector is chosen; this will be |
| the most immediately dominating basis. */ |
| |
| static int |
| find_basis_for_candidate (slsr_cand_t c) |
| { |
| cand_chain mapping_key; |
| cand_chain_t chain; |
| slsr_cand_t basis = NULL; |
| |
| // Limit potential of N^2 behavior for long candidate chains. |
| int iters = 0; |
| int max_iters = PARAM_VALUE (PARAM_MAX_SLSR_CANDIDATE_SCAN); |
| |
| mapping_key.base_expr = c->base_expr; |
| chain = (cand_chain_t) htab_find (base_cand_map, &mapping_key); |
| |
| for (; chain && iters < max_iters; chain = chain->next, ++iters) |
| { |
| slsr_cand_t one_basis = chain->cand; |
| |
| if (one_basis->kind != c->kind |
| || one_basis->cand_stmt == c->cand_stmt |
| || !operand_equal_p (one_basis->stride, c->stride, 0) |
| || !types_compatible_p (one_basis->cand_type, c->cand_type) |
| || !dominated_by_p (CDI_DOMINATORS, |
| gimple_bb (c->cand_stmt), |
| gimple_bb (one_basis->cand_stmt))) |
| continue; |
| |
| if (!basis || basis->cand_num < one_basis->cand_num) |
| basis = one_basis; |
| } |
| |
| if (basis) |
| { |
| c->sibling = basis->dependent; |
| basis->dependent = c->cand_num; |
| return basis->cand_num; |
| } |
| |
| return 0; |
| } |
| |
| /* Record a mapping from the base expression of C to C itself, indicating that |
| C may potentially serve as a basis using that base expression. */ |
| |
| static void |
| record_potential_basis (slsr_cand_t c) |
| { |
| cand_chain_t node; |
| void **slot; |
| |
| node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain)); |
| node->base_expr = c->base_expr; |
| node->cand = c; |
| node->next = NULL; |
| slot = htab_find_slot (base_cand_map, node, INSERT); |
| |
| if (*slot) |
| { |
| cand_chain_t head = (cand_chain_t) (*slot); |
| node->next = head->next; |
| head->next = node; |
| } |
| else |
| *slot = node; |
| } |
| |
| /* Allocate storage for a new candidate and initialize its fields. |
| Attempt to find a basis for the candidate. */ |
| |
| static slsr_cand_t |
| alloc_cand_and_find_basis (enum cand_kind kind, gimple gs, tree base, |
| double_int index, tree stride, tree ctype, |
| unsigned savings) |
| { |
| slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack, |
| sizeof (slsr_cand)); |
| c->cand_stmt = gs; |
| c->base_expr = base; |
| c->stride = stride; |
| c->index = index; |
| c->cand_type = ctype; |
| c->kind = kind; |
| c->cand_num = cand_vec.length () + 1; |
| c->next_interp = 0; |
| c->dependent = 0; |
| c->sibling = 0; |
| c->def_phi = NULL; |
| c->dead_savings = savings; |
| |
| cand_vec.safe_push (c); |
| c->basis = find_basis_for_candidate (c); |
| record_potential_basis (c); |
| |
| return c; |
| } |
| |
| /* Determine the target cost of statement GS when compiling according |
| to SPEED. */ |
| |
| static int |
| stmt_cost (gimple gs, bool speed) |
| { |
| tree lhs, rhs1, rhs2; |
| enum machine_mode lhs_mode; |
| |
| gcc_assert (is_gimple_assign (gs)); |
| lhs = gimple_assign_lhs (gs); |
| rhs1 = gimple_assign_rhs1 (gs); |
| lhs_mode = TYPE_MODE (TREE_TYPE (lhs)); |
| |
| switch (gimple_assign_rhs_code (gs)) |
| { |
| case MULT_EXPR: |
| rhs2 = gimple_assign_rhs2 (gs); |
| |
| if (host_integerp (rhs2, 0)) |
| return mult_by_coeff_cost (TREE_INT_CST_LOW (rhs2), lhs_mode, speed); |
| |
| gcc_assert (TREE_CODE (rhs1) != INTEGER_CST); |
| return mul_cost (speed, lhs_mode); |
| |
| case PLUS_EXPR: |
| case POINTER_PLUS_EXPR: |
| case MINUS_EXPR: |
| return add_cost (speed, lhs_mode); |
| |
| case NEGATE_EXPR: |
| return neg_cost (speed, lhs_mode); |
| |
| case NOP_EXPR: |
| return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed); |
| |
| /* Note that we don't assign costs to copies that in most cases |
| will go away. */ |
| default: |
| ; |
| } |
| |
| gcc_unreachable (); |
| return 0; |
| } |
| |
| /* Look up the defining statement for BASE_IN and return a pointer |
| to its candidate in the candidate table, if any; otherwise NULL. |
| Only CAND_ADD and CAND_MULT candidates are returned. */ |
| |
| static slsr_cand_t |
| base_cand_from_table (tree base_in) |
| { |
| slsr_cand_t *result; |
| |
| gimple def = SSA_NAME_DEF_STMT (base_in); |
| if (!def) |
| return (slsr_cand_t) NULL; |
| |
| result = (slsr_cand_t *) pointer_map_contains (stmt_cand_map, def); |
| |
| if (result && (*result)->kind != CAND_REF) |
| return *result; |
| |
| return (slsr_cand_t) NULL; |
| } |
| |
| /* Add an entry to the statement-to-candidate mapping. */ |
| |
| static void |
| add_cand_for_stmt (gimple gs, slsr_cand_t c) |
| { |
| void **slot = pointer_map_insert (stmt_cand_map, gs); |
| gcc_assert (!*slot); |
| *slot = c; |
| } |
| |
| /* Look for the following pattern: |
| |
| *PBASE: MEM_REF (T1, C1) |
| |
| *POFFSET: MULT_EXPR (T2, C3) [C2 is zero] |
| or |
| MULT_EXPR (PLUS_EXPR (T2, C2), C3) |
| or |
| MULT_EXPR (MINUS_EXPR (T2, -C2), C3) |
| |
| *PINDEX: C4 * BITS_PER_UNIT |
| |
| If not present, leave the input values unchanged and return FALSE. |
| Otherwise, modify the input values as follows and return TRUE: |
| |
| *PBASE: T1 |
| *POFFSET: MULT_EXPR (T2, C3) |
| *PINDEX: C1 + (C2 * C3) + C4 */ |
| |
| static bool |
| restructure_reference (tree *pbase, tree *poffset, double_int *pindex, |
| tree *ptype) |
| { |
| tree base = *pbase, offset = *poffset; |
| double_int index = *pindex; |
| double_int bpu = double_int::from_uhwi (BITS_PER_UNIT); |
| tree mult_op0, mult_op1, t1, t2, type; |
| double_int c1, c2, c3, c4; |
| |
| if (!base |
| || !offset |
| || TREE_CODE (base) != MEM_REF |
| || TREE_CODE (offset) != MULT_EXPR |
| || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST |
| || !index.umod (bpu, FLOOR_MOD_EXPR).is_zero ()) |
| return false; |
| |
| t1 = TREE_OPERAND (base, 0); |
| c1 = mem_ref_offset (base); |
| type = TREE_TYPE (TREE_OPERAND (base, 1)); |
| |
| mult_op0 = TREE_OPERAND (offset, 0); |
| mult_op1 = TREE_OPERAND (offset, 1); |
| |
| c3 = tree_to_double_int (mult_op1); |
| |
| if (TREE_CODE (mult_op0) == PLUS_EXPR) |
| |
| if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST) |
| { |
| t2 = TREE_OPERAND (mult_op0, 0); |
| c2 = tree_to_double_int (TREE_OPERAND (mult_op0, 1)); |
| } |
| else |
| return false; |
| |
| else if (TREE_CODE (mult_op0) == MINUS_EXPR) |
| |
| if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST) |
| { |
| t2 = TREE_OPERAND (mult_op0, 0); |
| c2 = -tree_to_double_int (TREE_OPERAND (mult_op0, 1)); |
| } |
| else |
| return false; |
| |
| else |
| { |
| t2 = mult_op0; |
| c2 = double_int_zero; |
| } |
| |
| c4 = index.udiv (bpu, FLOOR_DIV_EXPR); |
| |
| *pbase = t1; |
| *poffset = fold_build2 (MULT_EXPR, sizetype, t2, |
| double_int_to_tree (sizetype, c3)); |
| *pindex = c1 + c2 * c3 + c4; |
| *ptype = type; |
| |
| return true; |
| } |
| |
| /* Given GS which contains a data reference, create a CAND_REF entry in |
| the candidate table and attempt to find a basis. */ |
| |
| static void |
| slsr_process_ref (gimple gs) |
| { |
| tree ref_expr, base, offset, type; |
| HOST_WIDE_INT bitsize, bitpos; |
| enum machine_mode mode; |
| int unsignedp, volatilep; |
| double_int index; |
| slsr_cand_t c; |
| |
| if (gimple_vdef (gs)) |
| ref_expr = gimple_assign_lhs (gs); |
| else |
| ref_expr = gimple_assign_rhs1 (gs); |
| |
| if (!handled_component_p (ref_expr) |
| || TREE_CODE (ref_expr) == BIT_FIELD_REF |
| || (TREE_CODE (ref_expr) == COMPONENT_REF |
| && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1)))) |
| return; |
| |
| base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode, |
| &unsignedp, &volatilep, false); |
| index = double_int::from_uhwi (bitpos); |
| |
| if (!restructure_reference (&base, &offset, &index, &type)) |
| return; |
| |
| c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset, |
| type, 0); |
| |
| /* Add the candidate to the statement-candidate mapping. */ |
| add_cand_for_stmt (gs, c); |
| } |
| |
| /* Create a candidate entry for a statement GS, where GS multiplies |
| two SSA names BASE_IN and STRIDE_IN. Propagate any known information |
| about the two SSA names into the new candidate. Return the new |
| candidate. */ |
| |
| static slsr_cand_t |
| create_mul_ssa_cand (gimple gs, tree base_in, tree stride_in, bool speed) |
| { |
| tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE; |
| double_int index; |
| unsigned savings = 0; |
| slsr_cand_t c; |
| slsr_cand_t base_cand = base_cand_from_table (base_in); |
| |
| /* Look at all interpretations of the base candidate, if necessary, |
| to find information to propagate into this candidate. */ |
| while (base_cand && !base) |
| { |
| |
| if (base_cand->kind == CAND_MULT |
| && operand_equal_p (base_cand->stride, integer_one_node, 0)) |
| { |
| /* Y = (B + i') * 1 |
| X = Y * Z |
| ================ |
| X = (B + i') * Z */ |
| base = base_cand->base_expr; |
| index = base_cand->index; |
| stride = stride_in; |
| ctype = base_cand->cand_type; |
| if (has_single_use (base_in)) |
| savings = (base_cand->dead_savings |
| + stmt_cost (base_cand->cand_stmt, speed)); |
| } |
| else if (base_cand->kind == CAND_ADD |
| && TREE_CODE (base_cand->stride) == INTEGER_CST) |
| { |
| /* Y = B + (i' * S), S constant |
| X = Y * Z |
| ============================ |
| X = B + ((i' * S) * Z) */ |
| base = base_cand->base_expr; |
| index = base_cand->index * tree_to_double_int (base_cand->stride); |
| stride = stride_in; |
| ctype = base_cand->cand_type; |
| if (has_single_use (base_in)) |
| savings = (base_cand->dead_savings |
| + stmt_cost (base_cand->cand_stmt, speed)); |
| } |
| |
| if (base_cand->next_interp) |
| base_cand = lookup_cand (base_cand->next_interp); |
| else |
| base_cand = NULL; |
| } |
| |
| if (!base) |
| { |
| /* No interpretations had anything useful to propagate, so |
| produce X = (Y + 0) * Z. */ |
| base = base_in; |
| index = double_int_zero; |
| stride = stride_in; |
| ctype = TREE_TYPE (base_in); |
| } |
| |
| c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride, |
| ctype, savings); |
| return c; |
| } |
| |
| /* Create a candidate entry for a statement GS, where GS multiplies |
| SSA name BASE_IN by constant STRIDE_IN. Propagate any known |
| information about BASE_IN into the new candidate. Return the new |
| candidate. */ |
| |
| static slsr_cand_t |
| create_mul_imm_cand (gimple gs, tree base_in, tree stride_in, bool speed) |
| { |
| tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE; |
| double_int index, temp; |
| unsigned savings = 0; |
| slsr_cand_t c; |
| slsr_cand_t base_cand = base_cand_from_table (base_in); |
| |
| /* Look at all interpretations of the base candidate, if necessary, |
| to find information to propagate into this candidate. */ |
| while (base_cand && !base) |
| { |
| if (base_cand->kind == CAND_MULT |
| && TREE_CODE (base_cand->stride) == INTEGER_CST) |
| { |
| /* Y = (B + i') * S, S constant |
| X = Y * c |
| ============================ |
| X = (B + i') * (S * c) */ |
| temp = tree_to_double_int (base_cand->stride) |
| * tree_to_double_int (stride_in); |
| if (double_int_fits_to_tree_p (TREE_TYPE (stride_in), temp)) |
| { |
| base = base_cand->base_expr; |
| index = base_cand->index; |
| stride = double_int_to_tree (TREE_TYPE (stride_in), temp); |
| ctype = base_cand->cand_type; |
| if (has_single_use (base_in)) |
| savings = (base_cand->dead_savings |
| + stmt_cost (base_cand->cand_stmt, speed)); |
| } |
| } |
| else if (base_cand->kind == CAND_ADD |
| && operand_equal_p (base_cand->stride, integer_one_node, 0)) |
| { |
| /* Y = B + (i' * 1) |
| X = Y * c |
| =========================== |
| X = (B + i') * c */ |
| base = base_cand->base_expr; |
| index = base_cand->index; |
| stride = stride_in; |
| ctype = base_cand->cand_type; |
| if (has_single_use (base_in)) |
| savings = (base_cand->dead_savings |
| + stmt_cost (base_cand->cand_stmt, speed)); |
| } |
| else if (base_cand->kind == CAND_ADD |
| && base_cand->index.is_one () |
| && TREE_CODE (base_cand->stride) == INTEGER_CST) |
| { |
| /* Y = B + (1 * S), S constant |
| X = Y * c |
| =========================== |
| X = (B + S) * c */ |
| base = base_cand->base_expr; |
| index = tree_to_double_int (base_cand->stride); |
| stride = stride_in; |
| ctype = base_cand->cand_type; |
| if (has_single_use (base_in)) |
| savings = (base_cand->dead_savings |
| + stmt_cost (base_cand->cand_stmt, speed)); |
| } |
| |
| if (base_cand->next_interp) |
| base_cand = lookup_cand (base_cand->next_interp); |
| else |
| base_cand = NULL; |
| } |
| |
| if (!base) |
| { |
| /* No interpretations had anything useful to propagate, so |
| produce X = (Y + 0) * c. */ |
| base = base_in; |
| index = double_int_zero; |
| stride = stride_in; |
| ctype = TREE_TYPE (base_in); |
| } |
| |
| c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride, |
| ctype, savings); |
| return c; |
| } |
| |
| /* Given GS which is a multiply of scalar integers, make an appropriate |
| entry in the candidate table. If this is a multiply of two SSA names, |
| create two CAND_MULT interpretations and attempt to find a basis for |
| each of them. Otherwise, create a single CAND_MULT and attempt to |
| find a basis. */ |
| |
| static void |
| slsr_process_mul (gimple gs, tree rhs1, tree rhs2, bool speed) |
| { |
| slsr_cand_t c, c2; |
| |
| /* If this is a multiply of an SSA name with itself, it is highly |
| unlikely that we will get a strength reduction opportunity, so |
| don't record it as a candidate. This simplifies the logic for |
| finding a basis, so if this is removed that must be considered. */ |
| if (rhs1 == rhs2) |
| return; |
| |
| if (TREE_CODE (rhs2) == SSA_NAME) |
| { |
| /* Record an interpretation of this statement in the candidate table |
| assuming RHS1 is the base expression and RHS2 is the stride. */ |
| c = create_mul_ssa_cand (gs, rhs1, rhs2, speed); |
| |
| /* Add the first interpretation to the statement-candidate mapping. */ |
| add_cand_for_stmt (gs, c); |
| |
| /* Record another interpretation of this statement assuming RHS1 |
| is the stride and RHS2 is the base expression. */ |
| c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed); |
| c->next_interp = c2->cand_num; |
| } |
| else |
| { |
| /* Record an interpretation for the multiply-immediate. */ |
| c = create_mul_imm_cand (gs, rhs1, rhs2, speed); |
| |
| /* Add the interpretation to the statement-candidate mapping. */ |
| add_cand_for_stmt (gs, c); |
| } |
| } |
| |
| /* Create a candidate entry for a statement GS, where GS adds two |
| SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and |
| subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known |
| information about the two SSA names into the new candidate. |
| Return the new candidate. */ |
| |
| static slsr_cand_t |
| create_add_ssa_cand (gimple gs, tree base_in, tree addend_in, |
| bool subtract_p, bool speed) |
| { |
| tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL; |
| double_int index; |
| unsigned savings = 0; |
| slsr_cand_t c; |
| slsr_cand_t base_cand = base_cand_from_table (base_in); |
| slsr_cand_t addend_cand = base_cand_from_table (addend_in); |
| |
| /* The most useful transformation is a multiply-immediate feeding |
| an add or subtract. Look for that first. */ |
| while (addend_cand && !base) |
| { |
| if (addend_cand->kind == CAND_MULT |
| && addend_cand->index.is_zero () |
| && TREE_CODE (addend_cand->stride) == INTEGER_CST) |
| { |
| /* Z = (B + 0) * S, S constant |
| X = Y +/- Z |
| =========================== |
| X = Y + ((+/-1 * S) * B) */ |
| base = base_in; |
| index = tree_to_double_int (addend_cand->stride); |
| if (subtract_p) |
| index = -index; |
| stride = addend_cand->base_expr; |
| ctype = TREE_TYPE (base_in); |
| if (has_single_use (addend_in)) |
| savings = (addend_cand->dead_savings |
| + stmt_cost (addend_cand->cand_stmt, speed)); |
| } |
| |
| if (addend_cand->next_interp) |
| addend_cand = lookup_cand (addend_cand->next_interp); |
| else |
| addend_cand = NULL; |
| } |
| |
| while (base_cand && !base) |
| { |
| if (base_cand->kind == CAND_ADD |
| && (base_cand->index.is_zero () |
| || operand_equal_p (base_cand->stride, |
| integer_zero_node, 0))) |
| { |
| /* Y = B + (i' * S), i' * S = 0 |
| X = Y +/- Z |
| ============================ |
| X = B + (+/-1 * Z) */ |
| base = base_cand->base_expr; |
| index = subtract_p ? double_int_minus_one : double_int_one; |
| stride = addend_in; |
| ctype = base_cand->cand_type; |
| if (has_single_use (base_in)) |
| savings = (base_cand->dead_savings |
| + stmt_cost (base_cand->cand_stmt, speed)); |
| } |
| else if (subtract_p) |
| { |
| slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in); |
| |
| while (subtrahend_cand && !base) |
| { |
| if (subtrahend_cand->kind == CAND_MULT |
| && subtrahend_cand->index.is_zero () |
| && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST) |
| { |
| /* Z = (B + 0) * S, S constant |
| X = Y - Z |
| =========================== |
| Value: X = Y + ((-1 * S) * B) */ |
| base = base_in; |
| index = tree_to_double_int (subtrahend_cand->stride); |
| index = -index; |
| stride = subtrahend_cand->base_expr; |
| ctype = TREE_TYPE (base_in); |
| if (has_single_use (addend_in)) |
| savings = (subtrahend_cand->dead_savings |
| + stmt_cost (subtrahend_cand->cand_stmt, speed)); |
| } |
| |
| if (subtrahend_cand->next_interp) |
| subtrahend_cand = lookup_cand (subtrahend_cand->next_interp); |
| else |
| subtrahend_cand = NULL; |
| } |
| } |
| |
| if (base_cand->next_interp) |
| base_cand = lookup_cand (base_cand->next_interp); |
| else |
| base_cand = NULL; |
| } |
| |
| if (!base) |
| { |
| /* No interpretations had anything useful to propagate, so |
| produce X = Y + (1 * Z). */ |
| base = base_in; |
| index = subtract_p ? double_int_minus_one : double_int_one; |
| stride = addend_in; |
| ctype = TREE_TYPE (base_in); |
| } |
| |
| c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride, |
| ctype, savings); |
| return c; |
| } |
| |
| /* Create a candidate entry for a statement GS, where GS adds SSA |
| name BASE_IN to constant INDEX_IN. Propagate any known information |
| about BASE_IN into the new candidate. Return the new candidate. */ |
| |
| static slsr_cand_t |
| create_add_imm_cand (gimple gs, tree base_in, double_int index_in, bool speed) |
| { |
| enum cand_kind kind = CAND_ADD; |
| tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE; |
| double_int index, multiple; |
| unsigned savings = 0; |
| slsr_cand_t c; |
| slsr_cand_t base_cand = base_cand_from_table (base_in); |
| |
| while (base_cand && !base) |
| { |
| bool unsigned_p = TYPE_UNSIGNED (TREE_TYPE (base_cand->stride)); |
| |
| if (TREE_CODE (base_cand->stride) == INTEGER_CST |
| && index_in.multiple_of (tree_to_double_int (base_cand->stride), |
| unsigned_p, &multiple)) |
| { |
| /* Y = (B + i') * S, S constant, c = kS for some integer k |
| X = Y + c |
| ============================ |
| X = (B + (i'+ k)) * S |
| OR |
| Y = B + (i' * S), S constant, c = kS for some integer k |
| X = Y + c |
| ============================ |
| X = (B + (i'+ k)) * S */ |
| kind = base_cand->kind; |
| base = base_cand->base_expr; |
| index = base_cand->index + multiple; |
| stride = base_cand->stride; |
| ctype = base_cand->cand_type; |
| if (has_single_use (base_in)) |
| savings = (base_cand->dead_savings |
| + stmt_cost (base_cand->cand_stmt, speed)); |
| } |
| |
| if (base_cand->next_interp) |
| base_cand = lookup_cand (base_cand->next_interp); |
| else |
| base_cand = NULL; |
| } |
| |
| if (!base) |
| { |
| /* No interpretations had anything useful to propagate, so |
| produce X = Y + (c * 1). */ |
| kind = CAND_ADD; |
| base = base_in; |
| index = index_in; |
| stride = integer_one_node; |
| ctype = TREE_TYPE (base_in); |
| } |
| |
| c = alloc_cand_and_find_basis (kind, gs, base, index, stride, |
| ctype, savings); |
| return c; |
| } |
| |
| /* Given GS which is an add or subtract of scalar integers or pointers, |
| make at least one appropriate entry in the candidate table. */ |
| |
| static void |
| slsr_process_add (gimple gs, tree rhs1, tree rhs2, bool speed) |
| { |
| bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR; |
| slsr_cand_t c = NULL, c2; |
| |
| if (TREE_CODE (rhs2) == SSA_NAME) |
| { |
| /* First record an interpretation assuming RHS1 is the base expression |
| and RHS2 is the stride. But it doesn't make sense for the |
| stride to be a pointer, so don't record a candidate in that case. */ |
| if (!POINTER_TYPE_P (TREE_TYPE (rhs2))) |
| { |
| c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed); |
| |
| /* Add the first interpretation to the statement-candidate |
| mapping. */ |
| add_cand_for_stmt (gs, c); |
| } |
| |
| /* If the two RHS operands are identical, or this is a subtract, |
| we're done. */ |
| if (operand_equal_p (rhs1, rhs2, 0) || subtract_p) |
| return; |
| |
| /* Otherwise, record another interpretation assuming RHS2 is the |
| base expression and RHS1 is the stride, again provided that the |
| stride is not a pointer. */ |
| if (!POINTER_TYPE_P (TREE_TYPE (rhs1))) |
| { |
| c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed); |
| if (c) |
| c->next_interp = c2->cand_num; |
| else |
| add_cand_for_stmt (gs, c2); |
| } |
| } |
| else |
| { |
| double_int index; |
| |
| /* Record an interpretation for the add-immediate. */ |
| index = tree_to_double_int (rhs2); |
| if (subtract_p) |
| index = -index; |
| |
| c = create_add_imm_cand (gs, rhs1, index, speed); |
| |
| /* Add the interpretation to the statement-candidate mapping. */ |
| add_cand_for_stmt (gs, c); |
| } |
| } |
| |
| /* Given GS which is a negate of a scalar integer, make an appropriate |
| entry in the candidate table. A negate is equivalent to a multiply |
| by -1. */ |
| |
| static void |
| slsr_process_neg (gimple gs, tree rhs1, bool speed) |
| { |
| /* Record a CAND_MULT interpretation for the multiply by -1. */ |
| slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed); |
| |
| /* Add the interpretation to the statement-candidate mapping. */ |
| add_cand_for_stmt (gs, c); |
| } |
| |
| /* Help function for legal_cast_p, operating on two trees. Checks |
| whether it's allowable to cast from RHS to LHS. See legal_cast_p |
| for more details. */ |
| |
| static bool |
| legal_cast_p_1 (tree lhs, tree rhs) |
| { |
| tree lhs_type, rhs_type; |
| unsigned lhs_size, rhs_size; |
| bool lhs_wraps, rhs_wraps; |
| |
| lhs_type = TREE_TYPE (lhs); |
| rhs_type = TREE_TYPE (rhs); |
| lhs_size = TYPE_PRECISION (lhs_type); |
| rhs_size = TYPE_PRECISION (rhs_type); |
| lhs_wraps = TYPE_OVERFLOW_WRAPS (lhs_type); |
| rhs_wraps = TYPE_OVERFLOW_WRAPS (rhs_type); |
| |
| if (lhs_size < rhs_size |
| || (rhs_wraps && !lhs_wraps) |
| || (rhs_wraps && lhs_wraps && rhs_size != lhs_size)) |
| return false; |
| |
| return true; |
| } |
| |
| /* Return TRUE if GS is a statement that defines an SSA name from |
| a conversion and is legal for us to combine with an add and multiply |
| in the candidate table. For example, suppose we have: |
| |
| A = B + i; |
| C = (type) A; |
| D = C * S; |
| |
| Without the type-cast, we would create a CAND_MULT for D with base B, |
| index i, and stride S. We want to record this candidate only if it |
| is equivalent to apply the type cast following the multiply: |
| |
| A = B + i; |
| E = A * S; |
| D = (type) E; |
| |
| We will record the type with the candidate for D. This allows us |
| to use a similar previous candidate as a basis. If we have earlier seen |
| |
| A' = B + i'; |
| C' = (type) A'; |
| D' = C' * S; |
| |
| we can replace D with |
| |
| D = D' + (i - i') * S; |
| |
| But if moving the type-cast would change semantics, we mustn't do this. |
| |
| This is legitimate for casts from a non-wrapping integral type to |
| any integral type of the same or larger size. It is not legitimate |
| to convert a wrapping type to a non-wrapping type, or to a wrapping |
| type of a different size. I.e., with a wrapping type, we must |
| assume that the addition B + i could wrap, in which case performing |
| the multiply before or after one of the "illegal" type casts will |
| have different semantics. */ |
| |
| static bool |
| legal_cast_p (gimple gs, tree rhs) |
| { |
| if (!is_gimple_assign (gs) |
| || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs))) |
| return false; |
| |
| return legal_cast_p_1 (gimple_assign_lhs (gs), rhs); |
| } |
| |
| /* Given GS which is a cast to a scalar integer type, determine whether |
| the cast is legal for strength reduction. If so, make at least one |
| appropriate entry in the candidate table. */ |
| |
| static void |
| slsr_process_cast (gimple gs, tree rhs1, bool speed) |
| { |
| tree lhs, ctype; |
| slsr_cand_t base_cand, c, c2; |
| unsigned savings = 0; |
| |
| if (!legal_cast_p (gs, rhs1)) |
| return; |
| |
| lhs = gimple_assign_lhs (gs); |
| base_cand = base_cand_from_table (rhs1); |
| ctype = TREE_TYPE (lhs); |
| |
| if (base_cand) |
| { |
| while (base_cand) |
| { |
| /* Propagate all data from the base candidate except the type, |
| which comes from the cast, and the base candidate's cast, |
| which is no longer applicable. */ |
| if (has_single_use (rhs1)) |
| savings = (base_cand->dead_savings |
| + stmt_cost (base_cand->cand_stmt, speed)); |
| |
| c = alloc_cand_and_find_basis (base_cand->kind, gs, |
| base_cand->base_expr, |
| base_cand->index, base_cand->stride, |
| ctype, savings); |
| if (base_cand->next_interp) |
| base_cand = lookup_cand (base_cand->next_interp); |
| else |
| base_cand = NULL; |
| } |
| } |
| else |
| { |
| /* If nothing is known about the RHS, create fresh CAND_ADD and |
| CAND_MULT interpretations: |
| |
| X = Y + (0 * 1) |
| X = (Y + 0) * 1 |
| |
| The first of these is somewhat arbitrary, but the choice of |
| 1 for the stride simplifies the logic for propagating casts |
| into their uses. */ |
| c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero, |
| integer_one_node, ctype, 0); |
| c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero, |
| integer_one_node, ctype, 0); |
| c->next_interp = c2->cand_num; |
| } |
| |
| /* Add the first (or only) interpretation to the statement-candidate |
| mapping. */ |
| add_cand_for_stmt (gs, c); |
| } |
| |
| /* Given GS which is a copy of a scalar integer type, make at least one |
| appropriate entry in the candidate table. |
| |
| This interface is included for completeness, but is unnecessary |
| if this pass immediately follows a pass that performs copy |
| propagation, such as DOM. */ |
| |
| static void |
| slsr_process_copy (gimple gs, tree rhs1, bool speed) |
| { |
| slsr_cand_t base_cand, c, c2; |
| unsigned savings = 0; |
| |
| base_cand = base_cand_from_table (rhs1); |
| |
| if (base_cand) |
| { |
| while (base_cand) |
| { |
| /* Propagate all data from the base candidate. */ |
| if (has_single_use (rhs1)) |
| savings = (base_cand->dead_savings |
| + stmt_cost (base_cand->cand_stmt, speed)); |
| |
| c = alloc_cand_and_find_basis (base_cand->kind, gs, |
| base_cand->base_expr, |
| base_cand->index, base_cand->stride, |
| base_cand->cand_type, savings); |
| if (base_cand->next_interp) |
| base_cand = lookup_cand (base_cand->next_interp); |
| else |
| base_cand = NULL; |
| } |
| } |
| else |
| { |
| /* If nothing is known about the RHS, create fresh CAND_ADD and |
| CAND_MULT interpretations: |
| |
| X = Y + (0 * 1) |
| X = (Y + 0) * 1 |
| |
| The first of these is somewhat arbitrary, but the choice of |
| 1 for the stride simplifies the logic for propagating casts |
| into their uses. */ |
| c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, double_int_zero, |
| integer_one_node, TREE_TYPE (rhs1), 0); |
| c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, double_int_zero, |
| integer_one_node, TREE_TYPE (rhs1), 0); |
| c->next_interp = c2->cand_num; |
| } |
| |
| /* Add the first (or only) interpretation to the statement-candidate |
| mapping. */ |
| add_cand_for_stmt (gs, c); |
| } |
| |
| /* Find strength-reduction candidates in block BB. */ |
| |
| static void |
| find_candidates_in_block (struct dom_walk_data *walk_data ATTRIBUTE_UNUSED, |
| basic_block bb) |
| { |
| bool speed = optimize_bb_for_speed_p (bb); |
| gimple_stmt_iterator gsi; |
| |
| for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
| { |
| gimple gs = gsi_stmt (gsi); |
| |
| if (gimple_vuse (gs) && gimple_assign_single_p (gs)) |
| slsr_process_ref (gs); |
| |
| else if (is_gimple_assign (gs) |
| && SCALAR_INT_MODE_P |
| (TYPE_MODE (TREE_TYPE (gimple_assign_lhs (gs))))) |
| { |
| tree rhs1 = NULL_TREE, rhs2 = NULL_TREE; |
| |
| switch (gimple_assign_rhs_code (gs)) |
| { |
| case MULT_EXPR: |
| case PLUS_EXPR: |
| rhs1 = gimple_assign_rhs1 (gs); |
| rhs2 = gimple_assign_rhs2 (gs); |
| /* Should never happen, but currently some buggy situations |
| in earlier phases put constants in rhs1. */ |
| if (TREE_CODE (rhs1) != SSA_NAME) |
| continue; |
| break; |
| |
| /* Possible future opportunity: rhs1 of a ptr+ can be |
| an ADDR_EXPR. */ |
| case POINTER_PLUS_EXPR: |
| case MINUS_EXPR: |
| rhs2 = gimple_assign_rhs2 (gs); |
| /* Fall-through. */ |
| |
| case NOP_EXPR: |
| case MODIFY_EXPR: |
| case NEGATE_EXPR: |
| rhs1 = gimple_assign_rhs1 (gs); |
| if (TREE_CODE (rhs1) != SSA_NAME) |
| continue; |
| break; |
| |
| default: |
| ; |
| } |
| |
| switch (gimple_assign_rhs_code (gs)) |
| { |
| case MULT_EXPR: |
| slsr_process_mul (gs, rhs1, rhs2, speed); |
| break; |
| |
| case PLUS_EXPR: |
| case POINTER_PLUS_EXPR: |
| case MINUS_EXPR: |
| slsr_process_add (gs, rhs1, rhs2, speed); |
| break; |
| |
| case NEGATE_EXPR: |
| slsr_process_neg (gs, rhs1, speed); |
| break; |
| |
| case NOP_EXPR: |
| slsr_process_cast (gs, rhs1, speed); |
| break; |
| |
| case MODIFY_EXPR: |
| slsr_process_copy (gs, rhs1, speed); |
| break; |
| |
| default: |
| ; |
| } |
| } |
| } |
| } |
| |
| /* Dump a candidate for debug. */ |
| |
| static void |
| dump_candidate (slsr_cand_t c) |
| { |
| fprintf (dump_file, "%3d [%d] ", c->cand_num, |
| gimple_bb (c->cand_stmt)->index); |
| print_gimple_stmt (dump_file, c->cand_stmt, 0, 0); |
| switch (c->kind) |
| { |
| case CAND_MULT: |
| fputs (" MULT : (", dump_file); |
| print_generic_expr (dump_file, c->base_expr, 0); |
| fputs (" + ", dump_file); |
| dump_double_int (dump_file, c->index, false); |
| fputs (") * ", dump_file); |
| print_generic_expr (dump_file, c->stride, 0); |
| fputs (" : ", dump_file); |
| break; |
| case CAND_ADD: |
| fputs (" ADD : ", dump_file); |
| print_generic_expr (dump_file, c->base_expr, 0); |
| fputs (" + (", dump_file); |
| dump_double_int (dump_file, c->index, false); |
| fputs (" * ", dump_file); |
| print_generic_expr (dump_file, c->stride, 0); |
| fputs (") : ", dump_file); |
| break; |
| case CAND_REF: |
| fputs (" REF : ", dump_file); |
| print_generic_expr (dump_file, c->base_expr, 0); |
| fputs (" + (", dump_file); |
| print_generic_expr (dump_file, c->stride, 0); |
| fputs (") + ", dump_file); |
| dump_double_int (dump_file, c->index, false); |
| fputs (" : ", dump_file); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| print_generic_expr (dump_file, c->cand_type, 0); |
| fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n", |
| c->basis, c->dependent, c->sibling); |
| fprintf (dump_file, " next-interp: %d dead-savings: %d\n", |
| c->next_interp, c->dead_savings); |
| if (c->def_phi) |
| { |
| fputs (" phi: ", dump_file); |
| print_gimple_stmt (dump_file, c->def_phi, 0, 0); |
| } |
| fputs ("\n", dump_file); |
| } |
| |
| /* Dump the candidate vector for debug. */ |
| |
| static void |
| dump_cand_vec (void) |
| { |
| unsigned i; |
| slsr_cand_t c; |
| |
| fprintf (dump_file, "\nStrength reduction candidate vector:\n\n"); |
| |
| FOR_EACH_VEC_ELT (cand_vec, i, c) |
| dump_candidate (c); |
| } |
| |
| /* Callback used to dump the candidate chains hash table. */ |
| |
| static int |
| base_cand_dump_callback (void **slot, void *ignored ATTRIBUTE_UNUSED) |
| { |
| const_cand_chain_t chain = *((const_cand_chain_t *) slot); |
| cand_chain_t p; |
| |
| print_generic_expr (dump_file, chain->base_expr, 0); |
| fprintf (dump_file, " -> %d", chain->cand->cand_num); |
| |
| for (p = chain->next; p; p = p->next) |
| fprintf (dump_file, " -> %d", p->cand->cand_num); |
| |
| fputs ("\n", dump_file); |
| return 1; |
| } |
| |
| /* Dump the candidate chains. */ |
| |
| static void |
| dump_cand_chains (void) |
| { |
| fprintf (dump_file, "\nStrength reduction candidate chains:\n\n"); |
| htab_traverse_noresize (base_cand_map, base_cand_dump_callback, NULL); |
| fputs ("\n", dump_file); |
| } |
| |
| /* Dump the increment vector for debug. */ |
| |
| static void |
| dump_incr_vec (void) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| unsigned i; |
| |
| fprintf (dump_file, "\nIncrement vector:\n\n"); |
| |
| for (i = 0; i < incr_vec_len; i++) |
| { |
| fprintf (dump_file, "%3d increment: ", i); |
| dump_double_int (dump_file, incr_vec[i].incr, false); |
| fprintf (dump_file, "\n count: %d", incr_vec[i].count); |
| fprintf (dump_file, "\n cost: %d", incr_vec[i].cost); |
| fputs ("\n initializer: ", dump_file); |
| print_generic_expr (dump_file, incr_vec[i].initializer, 0); |
| fputs ("\n\n", dump_file); |
| } |
| } |
| } |
| |
| /* Recursive helper for unconditional_cands_with_known_stride_p. |
| Returns TRUE iff C, its siblings, and its dependents are all |
| unconditional candidates. */ |
| |
| static bool |
| unconditional_cands (slsr_cand_t c) |
| { |
| if (c->def_phi) |
| return false; |
| |
| if (c->sibling && !unconditional_cands (lookup_cand (c->sibling))) |
| return false; |
| |
| if (c->dependent && !unconditional_cands (lookup_cand (c->dependent))) |
| return false; |
| |
| return true; |
| } |
| |
| /* Determine whether or not the tree of candidates rooted at |
| ROOT consists entirely of unconditional increments with |
| an INTEGER_CST stride. */ |
| |
| static bool |
| unconditional_cands_with_known_stride_p (slsr_cand_t root) |
| { |
| /* The stride is identical for all related candidates, so |
| check it once. */ |
| if (TREE_CODE (root->stride) != INTEGER_CST) |
| return false; |
| |
| return unconditional_cands (lookup_cand (root->dependent)); |
| } |
| |
| /* Replace *EXPR in candidate C with an equivalent strength-reduced |
| data reference. */ |
| |
| static void |
| replace_ref (tree *expr, slsr_cand_t c) |
| { |
| tree add_expr, mem_ref, acc_type = TREE_TYPE (*expr); |
| unsigned HOST_WIDE_INT misalign; |
| unsigned align; |
| |
| /* Ensure the memory reference carries the minimum alignment |
| requirement for the data type. See PR58041. */ |
| get_object_alignment_1 (*expr, &align, &misalign); |
| if (misalign != 0) |
| align = (misalign & -misalign); |
| if (align < TYPE_ALIGN (acc_type)) |
| acc_type = build_aligned_type (acc_type, align); |
| |
| add_expr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (c->base_expr), |
| c->base_expr, c->stride); |
| mem_ref = fold_build2 (MEM_REF, acc_type, add_expr, |
| double_int_to_tree (c->cand_type, c->index)); |
| |
| /* Gimplify the base addressing expression for the new MEM_REF tree. */ |
| gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); |
| TREE_OPERAND (mem_ref, 0) |
| = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0), |
| /*simple_p=*/true, NULL, |
| /*before=*/true, GSI_SAME_STMT); |
| copy_ref_info (mem_ref, *expr); |
| *expr = mem_ref; |
| update_stmt (c->cand_stmt); |
| } |
| |
| /* Replace CAND_REF candidate C, each sibling of candidate C, and each |
| dependent of candidate C with an equivalent strength-reduced data |
| reference. */ |
| |
| static void |
| replace_refs (slsr_cand_t c) |
| { |
| if (gimple_vdef (c->cand_stmt)) |
| { |
| tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt); |
| replace_ref (lhs, c); |
| } |
| else |
| { |
| tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt); |
| replace_ref (rhs, c); |
| } |
| |
| if (c->sibling) |
| replace_refs (lookup_cand (c->sibling)); |
| |
| if (c->dependent) |
| replace_refs (lookup_cand (c->dependent)); |
| } |
| |
| /* Calculate the increment required for candidate C relative to |
| its basis. */ |
| |
| static double_int |
| cand_increment (slsr_cand_t c) |
| { |
| slsr_cand_t basis; |
| |
| /* If the candidate doesn't have a basis, just return its own |
| index. This is useful in record_increments to help us find |
| an existing initializer. */ |
| if (!c->basis) |
| return c->index; |
| |
| basis = lookup_cand (c->basis); |
| gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0)); |
| return c->index - basis->index; |
| } |
| |
| /* Calculate the increment required for candidate C relative to |
| its basis. If we aren't going to generate pointer arithmetic |
| for this candidate, return the absolute value of that increment |
| instead. */ |
| |
| static inline double_int |
| cand_abs_increment (slsr_cand_t c) |
| { |
| double_int increment = cand_increment (c); |
| |
| if (!address_arithmetic_p && increment.is_negative ()) |
| increment = -increment; |
| |
| return increment; |
| } |
| |
| /* If *VAR is NULL or is not of a compatible type with TYPE, create a |
| new temporary reg of type TYPE and store it in *VAR. */ |
| |
| static inline void |
| lazy_create_slsr_reg (tree *var, tree type) |
| { |
| if (!*var || !types_compatible_p (TREE_TYPE (*var), type)) |
| *var = create_tmp_reg (type, "slsr"); |
| } |
| |
| /* Return TRUE iff candidate C has already been replaced under |
| another interpretation. */ |
| |
| static inline bool |
| cand_already_replaced (slsr_cand_t c) |
| { |
| return (gimple_bb (c->cand_stmt) == 0); |
| } |
| |
| /* Helper routine for replace_dependents, doing the work for a |
| single candidate C. */ |
| |
| static void |
| replace_dependent (slsr_cand_t c, enum tree_code cand_code) |
| { |
| double_int stride = tree_to_double_int (c->stride); |
| double_int bump = cand_increment (c) * stride; |
| gimple stmt_to_print = NULL; |
| slsr_cand_t basis; |
| tree basis_name, incr_type, bump_tree; |
| enum tree_code code; |
| |
| /* It is highly unlikely, but possible, that the resulting |
| bump doesn't fit in a HWI. Abandon the replacement |
| in this case. Restriction to signed HWI is conservative |
| for unsigned types but allows for safe negation without |
| twisted logic. */ |
| if (!bump.fits_shwi ()) |
| return; |
| |
| basis = lookup_cand (c->basis); |
| basis_name = gimple_assign_lhs (basis->cand_stmt); |
| if (cand_code == POINTER_PLUS_EXPR) |
| { |
| incr_type = sizetype; |
| code = cand_code; |
| } |
| else |
| { |
| incr_type = TREE_TYPE (gimple_assign_rhs1 (c->cand_stmt)); |
| code = PLUS_EXPR; |
| } |
| |
| if (bump.is_negative () |
| && cand_code != POINTER_PLUS_EXPR) |
| { |
| code = MINUS_EXPR; |
| bump = -bump; |
| } |
| |
| bump_tree = double_int_to_tree (incr_type, bump); |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fputs ("Replacing: ", dump_file); |
| print_gimple_stmt (dump_file, c->cand_stmt, 0, 0); |
| } |
| |
| if (bump.is_zero ()) |
| { |
| tree lhs = gimple_assign_lhs (c->cand_stmt); |
| gimple copy_stmt = gimple_build_assign (lhs, basis_name); |
| gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); |
| gimple_set_location (copy_stmt, gimple_location (c->cand_stmt)); |
| gsi_replace (&gsi, copy_stmt, false); |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| stmt_to_print = copy_stmt; |
| } |
| else |
| { |
| tree rhs1 = gimple_assign_rhs1 (c->cand_stmt); |
| tree rhs2 = gimple_assign_rhs2 (c->cand_stmt); |
| if (cand_code != NEGATE_EXPR |
| && ((operand_equal_p (rhs1, basis_name, 0) |
| && operand_equal_p (rhs2, bump_tree, 0)) |
| || (operand_equal_p (rhs1, bump_tree, 0) |
| && operand_equal_p (rhs2, basis_name, 0)))) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fputs ("(duplicate, not actually replacing)", dump_file); |
| stmt_to_print = c->cand_stmt; |
| } |
| } |
| else |
| { |
| gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); |
| gimple_assign_set_rhs_with_ops (&gsi, code, basis_name, bump_tree); |
| update_stmt (gsi_stmt (gsi)); |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| stmt_to_print = gsi_stmt (gsi); |
| } |
| } |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fputs ("With: ", dump_file); |
| print_gimple_stmt (dump_file, stmt_to_print, 0, 0); |
| fputs ("\n", dump_file); |
| } |
| } |
| |
| /* Replace candidate C, each sibling of candidate C, and each |
| dependent of candidate C with an add or subtract. Note that we |
| only operate on CAND_MULTs with known strides, so we will never |
| generate a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is |
| replaced by X = Y + ((i - i') * S), as described in the module |
| commentary. The folded value ((i - i') * S) is referred to here |
| as the "bump." */ |
| |
| static void |
| replace_dependents (slsr_cand_t c) |
| { |
| enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt); |
| |
| /* It is not useful to replace casts, copies, or adds of an SSA name |
| and a constant. Also skip candidates that have already been |
| replaced under another interpretation. */ |
| if (cand_code != MODIFY_EXPR |
| && cand_code != NOP_EXPR |
| && c->kind == CAND_MULT |
| && !cand_already_replaced (c)) |
| replace_dependent (c, cand_code); |
| |
| if (c->sibling) |
| replace_dependents (lookup_cand (c->sibling)); |
| |
| if (c->dependent) |
| replace_dependents (lookup_cand (c->dependent)); |
| } |
| |
| /* Return the index in the increment vector of the given INCREMENT. */ |
| |
| static inline unsigned |
| incr_vec_index (double_int increment) |
| { |
| unsigned i; |
| |
| for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++) |
| ; |
| |
| gcc_assert (i < incr_vec_len); |
| return i; |
| } |
| |
| /* Count the number of candidates in the tree rooted at C that have |
| not already been replaced under other interpretations. */ |
| |
| static unsigned |
| count_candidates (slsr_cand_t c) |
| { |
| unsigned count = cand_already_replaced (c) ? 0 : 1; |
| |
| if (c->sibling) |
| count += count_candidates (lookup_cand (c->sibling)); |
| |
| if (c->dependent) |
| count += count_candidates (lookup_cand (c->dependent)); |
| |
| return count; |
| } |
| |
| /* Increase the count of INCREMENT by one in the increment vector. |
| INCREMENT is associated with candidate C. If an initializer |
| T_0 = stride * I is provided by a candidate that dominates all |
| candidates with the same increment, also record T_0 for subsequent use. */ |
| |
| static void |
| record_increment (slsr_cand_t c, double_int increment) |
| { |
| bool found = false; |
| unsigned i; |
| |
| /* Treat increments that differ only in sign as identical so as to |
| share initializers, unless we are generating pointer arithmetic. */ |
| if (!address_arithmetic_p && increment.is_negative ()) |
| increment = -increment; |
| |
| for (i = 0; i < incr_vec_len; i++) |
| { |
| if (incr_vec[i].incr == increment) |
| { |
| incr_vec[i].count++; |
| found = true; |
| |
| /* If we previously recorded an initializer that doesn't |
| dominate this candidate, it's not going to be useful to |
| us after all. */ |
| if (incr_vec[i].initializer |
| && !dominated_by_p (CDI_DOMINATORS, |
| gimple_bb (c->cand_stmt), |
| incr_vec[i].init_bb)) |
| { |
| incr_vec[i].initializer = NULL_TREE; |
| incr_vec[i].init_bb = NULL; |
| } |
| |
| break; |
| } |
| } |
| |
| if (!found) |
| { |
| /* The first time we see an increment, create the entry for it. |
| If this is the root candidate which doesn't have a basis, set |
| the count to zero. We're only processing it so it can possibly |
| provide an initializer for other candidates. */ |
| incr_vec[incr_vec_len].incr = increment; |
| incr_vec[incr_vec_len].count = c->basis ? 1 : 0; |
| incr_vec[incr_vec_len].cost = COST_INFINITE; |
| |
| /* Optimistically record the first occurrence of this increment |
| as providing an initializer (if it does); we will revise this |
| opinion later if it doesn't dominate all other occurrences. |
| Exception: increments of -1, 0, 1 never need initializers. */ |
| if (c->kind == CAND_ADD |
| && c->index == increment |
| && (increment.sgt (double_int_one) |
| || increment.slt (double_int_minus_one)) |
| && (gimple_assign_rhs_code (c->cand_stmt) == PLUS_EXPR |
| || gimple_assign_rhs_code (c->cand_stmt) == POINTER_PLUS_EXPR)) |
| { |
| tree t0 = NULL_TREE; |
| tree rhs1 = gimple_assign_rhs1 (c->cand_stmt); |
| tree rhs2 = gimple_assign_rhs2 (c->cand_stmt); |
| if (operand_equal_p (rhs1, c->base_expr, 0)) |
| t0 = rhs2; |
| else if (operand_equal_p (rhs2, c->base_expr, 0)) |
| t0 = rhs1; |
| if (t0 |
| && SSA_NAME_DEF_STMT (t0) |
| && gimple_bb (SSA_NAME_DEF_STMT (t0))) |
| { |
| incr_vec[incr_vec_len].initializer = t0; |
| incr_vec[incr_vec_len++].init_bb |
| = gimple_bb (SSA_NAME_DEF_STMT (t0)); |
| } |
| else |
| { |
| incr_vec[incr_vec_len].initializer = NULL_TREE; |
| incr_vec[incr_vec_len++].init_bb = NULL; |
| } |
| } |
| else |
| { |
| incr_vec[incr_vec_len].initializer = NULL_TREE; |
| incr_vec[incr_vec_len++].init_bb = NULL; |
| } |
| } |
| } |
| |
| /* Determine how many times each unique increment occurs in the set |
| of candidates rooted at C's parent, recording the data in the |
| increment vector. For each unique increment I, if an initializer |
| T_0 = stride * I is provided by a candidate that dominates all |
| candidates with the same increment, also record T_0 for subsequent |
| use. */ |
| |
| static void |
| record_increments (slsr_cand_t c) |
| { |
| if (!cand_already_replaced (c)) |
| record_increment (c, cand_increment (c)); |
| |
| if (c->sibling) |
| record_increments (lookup_cand (c->sibling)); |
| |
| if (c->dependent) |
| record_increments (lookup_cand (c->dependent)); |
| } |
| |
| /* Return the first candidate in the tree rooted at C that has not |
| already been replaced, favoring siblings over dependents. */ |
| |
| static slsr_cand_t |
| unreplaced_cand_in_tree (slsr_cand_t c) |
| { |
| if (!cand_already_replaced (c)) |
| return c; |
| |
| if (c->sibling) |
| { |
| slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling)); |
| if (sib) |
| return sib; |
| } |
| |
| if (c->dependent) |
| { |
| slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent)); |
| if (dep) |
| return dep; |
| } |
| |
| return NULL; |
| } |
| |
| /* Return TRUE if the candidates in the tree rooted at C should be |
| optimized for speed, else FALSE. We estimate this based on the block |
| containing the most dominant candidate in the tree that has not yet |
| been replaced. */ |
| |
| static bool |
| optimize_cands_for_speed_p (slsr_cand_t c) |
| { |
| slsr_cand_t c2 = unreplaced_cand_in_tree (c); |
| gcc_assert (c2); |
| return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt)); |
| } |
| |
| /* Add COST_IN to the lowest cost of any dependent path starting at |
| candidate C or any of its siblings, counting only candidates along |
| such paths with increment INCR. Assume that replacing a candidate |
| reduces cost by REPL_SAVINGS. Also account for savings from any |
| statements that would go dead. */ |
| |
| static int |
| lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c, double_int incr) |
| { |
| int local_cost, sib_cost; |
| double_int cand_incr = cand_abs_increment (c); |
| |
| if (cand_already_replaced (c)) |
| local_cost = cost_in; |
| else if (incr == cand_incr) |
| local_cost = cost_in - repl_savings - c->dead_savings; |
| else |
| local_cost = cost_in - c->dead_savings; |
| |
| if (c->dependent) |
| local_cost = lowest_cost_path (local_cost, repl_savings, |
| lookup_cand (c->dependent), incr); |
| |
| if (c->sibling) |
| { |
| sib_cost = lowest_cost_path (cost_in, repl_savings, |
| lookup_cand (c->sibling), incr); |
| local_cost = MIN (local_cost, sib_cost); |
| } |
| |
| return local_cost; |
| } |
| |
| /* Compute the total savings that would accrue from all replacements |
| in the candidate tree rooted at C, counting only candidates with |
| increment INCR. Assume that replacing a candidate reduces cost |
| by REPL_SAVINGS. Also account for savings from statements that |
| would go dead. */ |
| |
| static int |
| total_savings (int repl_savings, slsr_cand_t c, double_int incr) |
| { |
| int savings = 0; |
| double_int cand_incr = cand_abs_increment (c); |
| |
| if (incr == cand_incr && !cand_already_replaced (c)) |
| savings += repl_savings + c->dead_savings; |
| |
| if (c->dependent) |
| savings += total_savings (repl_savings, lookup_cand (c->dependent), incr); |
| |
| if (c->sibling) |
| savings += total_savings (repl_savings, lookup_cand (c->sibling), incr); |
| |
| return savings; |
| } |
| |
| /* Use target-specific costs to determine and record which increments |
| in the current candidate tree are profitable to replace, assuming |
| MODE and SPEED. FIRST_DEP is the first dependent of the root of |
| the candidate tree. |
| |
| One slight limitation here is that we don't account for the possible |
| introduction of casts in some cases. See replace_one_candidate for |
| the cases where these are introduced. This should probably be cleaned |
| up sometime. */ |
| |
| static void |
| analyze_increments (slsr_cand_t first_dep, enum machine_mode mode, bool speed) |
| { |
| unsigned i; |
| |
| for (i = 0; i < incr_vec_len; i++) |
| { |
| HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi (); |
| |
| /* If somehow this increment is bigger than a HWI, we won't |
| be optimizing candidates that use it. And if the increment |
| has a count of zero, nothing will be done with it. */ |
| if (!incr_vec[i].incr.fits_shwi () || !incr_vec[i].count) |
| incr_vec[i].cost = COST_INFINITE; |
| |
| /* Increments of 0, 1, and -1 are always profitable to replace, |
| because they always replace a multiply or add with an add or |
| copy, and may cause one or more existing instructions to go |
| dead. Exception: -1 can't be assumed to be profitable for |
| pointer addition. */ |
| else if (incr == 0 |
| || incr == 1 |
| || (incr == -1 |
| && (gimple_assign_rhs_code (first_dep->cand_stmt) |
| != POINTER_PLUS_EXPR))) |
| incr_vec[i].cost = COST_NEUTRAL; |
| |
| /* FORNOW: If we need to add an initializer, give up if a cast from |
| the candidate's type to its stride's type can lose precision. |
| This could eventually be handled better by expressly retaining the |
| result of a cast to a wider type in the stride. Example: |
| |
| short int _1; |
| _2 = (int) _1; |
| _3 = _2 * 10; |
| _4 = x + _3; ADD: x + (10 * _1) : int |
| _5 = _2 * 15; |
| _6 = x + _3; ADD: x + (15 * _1) : int |
| |
| Right now replacing _6 would cause insertion of an initializer |
| of the form "short int T = _1 * 5;" followed by a cast to |
| int, which could overflow incorrectly. Had we recorded _2 or |
| (int)_1 as the stride, this wouldn't happen. However, doing |
| this breaks other opportunities, so this will require some |
| care. */ |
| else if (!incr_vec[i].initializer |
| && TREE_CODE (first_dep->stride) != INTEGER_CST |
| && !legal_cast_p_1 (first_dep->stride, |
| gimple_assign_lhs (first_dep->cand_stmt))) |
| |
| incr_vec[i].cost = COST_INFINITE; |
| |
| /* If we need to add an initializer, make sure we don't introduce |
| a multiply by a pointer type, which can happen in certain cast |
| scenarios. FIXME: When cleaning up these cast issues, we can |
| afford to introduce the multiply provided we cast out to an |
| unsigned int of appropriate size. */ |
| else if (!incr_vec[i].initializer |
| && TREE_CODE (first_dep->stride) != INTEGER_CST |
| && POINTER_TYPE_P (TREE_TYPE (first_dep->stride))) |
| |
| incr_vec[i].cost = COST_INFINITE; |
| |
| /* For any other increment, if this is a multiply candidate, we |
| must introduce a temporary T and initialize it with |
| T_0 = stride * increment. When optimizing for speed, walk the |
| candidate tree to calculate the best cost reduction along any |
| path; if it offsets the fixed cost of inserting the initializer, |
| replacing the increment is profitable. When optimizing for |
| size, instead calculate the total cost reduction from replacing |
| all candidates with this increment. */ |
| else if (first_dep->kind == CAND_MULT) |
| { |
| int cost = mult_by_coeff_cost (incr, mode, speed); |
| int repl_savings = mul_cost (speed, mode) - add_cost (speed, mode); |
| if (speed) |
| cost = lowest_cost_path (cost, repl_savings, first_dep, |
| incr_vec[i].incr); |
| else |
| cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr); |
| |
| incr_vec[i].cost = cost; |
| } |
| |
| /* If this is an add candidate, the initializer may already |
| exist, so only calculate the cost of the initializer if it |
| doesn't. We are replacing one add with another here, so the |
| known replacement savings is zero. We will account for removal |
| of dead instructions in lowest_cost_path or total_savings. */ |
| else |
| { |
| int cost = 0; |
| if (!incr_vec[i].initializer) |
| cost = mult_by_coeff_cost (incr, mode, speed); |
| |
| if (speed) |
| cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr); |
| else |
| cost -= total_savings (0, first_dep, incr_vec[i].incr); |
| |
| incr_vec[i].cost = cost; |
| } |
| } |
| } |
| |
| /* Return the nearest common dominator of BB1 and BB2. If the blocks |
| are identical, return the earlier of C1 and C2 in *WHERE. Otherwise, |
| if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2, |
| return C2 in *WHERE; and if the NCD matches neither, return NULL in |
| *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */ |
| |
| static basic_block |
| ncd_for_two_cands (basic_block bb1, basic_block bb2, |
| slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where) |
| { |
| basic_block ncd; |
| |
| if (!bb1) |
| { |
| *where = c2; |
| return bb2; |
| } |
| |
| if (!bb2) |
| { |
| *where = c1; |
| return bb1; |
| } |
| |
| ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2); |
| |
| /* If both candidates are in the same block, the earlier |
| candidate wins. */ |
| if (bb1 == ncd && bb2 == ncd) |
| { |
| if (!c1 || (c2 && c2->cand_num < c1->cand_num)) |
| *where = c2; |
| else |
| *where = c1; |
| } |
| |
| /* Otherwise, if one of them produced a candidate in the |
| dominator, that one wins. */ |
| else if (bb1 == ncd) |
| *where = c1; |
| |
| else if (bb2 == ncd) |
| *where = c2; |
| |
| /* If neither matches the dominator, neither wins. */ |
| else |
| *where = NULL; |
| |
| return ncd; |
| } |
| |
| /* Consider all candidates in the tree rooted at C for which INCR |
| represents the required increment of C relative to its basis. |
| Find and return the basic block that most nearly dominates all |
| such candidates. If the returned block contains one or more of |
| the candidates, return the earliest candidate in the block in |
| *WHERE. */ |
| |
| static basic_block |
| nearest_common_dominator_for_cands (slsr_cand_t c, double_int incr, |
| slsr_cand_t *where) |
| { |
| basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd; |
| slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where; |
| double_int cand_incr; |
| |
| /* First find the NCD of all siblings and dependents. */ |
| if (c->sibling) |
| sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling), |
| incr, &sib_where); |
| if (c->dependent) |
| dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent), |
| incr, &dep_where); |
| if (!sib_ncd && !dep_ncd) |
| { |
| new_where = NULL; |
| ncd = NULL; |
| } |
| else if (sib_ncd && !dep_ncd) |
| { |
| new_where = sib_where; |
| ncd = sib_ncd; |
| } |
| else if (dep_ncd && !sib_ncd) |
| { |
| new_where = dep_where; |
| ncd = dep_ncd; |
| } |
| else |
| ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where, |
| dep_where, &new_where); |
| |
| /* If the candidate's increment doesn't match the one we're interested |
| in, then the result depends only on siblings and dependents. */ |
| cand_incr = cand_abs_increment (c); |
| |
| if (cand_incr != incr || cand_already_replaced (c)) |
| { |
| *where = new_where; |
| return ncd; |
| } |
| |
| /* Otherwise, compare this candidate with the result from all siblings |
| and dependents. */ |
| this_where = c; |
| this_ncd = gimple_bb (c->cand_stmt); |
| ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where); |
| |
| return ncd; |
| } |
| |
| /* Return TRUE if the increment indexed by INDEX is profitable to replace. */ |
| |
| static inline bool |
| profitable_increment_p (unsigned index) |
| { |
| return (incr_vec[index].cost <= COST_NEUTRAL); |
| } |
| |
| /* For each profitable increment in the increment vector not equal to |
| 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common |
| dominator of all statements in the candidate chain rooted at C |
| that require that increment, and insert an initializer |
| T_0 = stride * increment at that location. Record T_0 with the |
| increment record. */ |
| |
| static void |
| insert_initializers (slsr_cand_t c) |
| { |
| unsigned i; |
| tree new_var = NULL_TREE; |
| |
| for (i = 0; i < incr_vec_len; i++) |
| { |
| basic_block bb; |
| slsr_cand_t where = NULL; |
| gimple init_stmt; |
| tree stride_type, new_name, incr_tree; |
| double_int incr = incr_vec[i].incr; |
| |
| if (!profitable_increment_p (i) |
| || incr.is_one () |
| || (incr.is_minus_one () |
| && gimple_assign_rhs_code (c->cand_stmt) != POINTER_PLUS_EXPR) |
| || incr.is_zero ()) |
| continue; |
| |
| /* We may have already identified an existing initializer that |
| will suffice. */ |
| if (incr_vec[i].initializer) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fputs ("Using existing initializer: ", dump_file); |
| print_gimple_stmt (dump_file, |
| SSA_NAME_DEF_STMT (incr_vec[i].initializer), |
| 0, 0); |
| } |
| continue; |
| } |
| |
| /* Find the block that most closely dominates all candidates |
| with this increment. If there is at least one candidate in |
| that block, the earliest one will be returned in WHERE. */ |
| bb = nearest_common_dominator_for_cands (c, incr, &where); |
| |
| /* Create a new SSA name to hold the initializer's value. */ |
| stride_type = TREE_TYPE (c->stride); |
| lazy_create_slsr_reg (&new_var, stride_type); |
| new_name = make_ssa_name (new_var, NULL); |
| incr_vec[i].initializer = new_name; |
| |
| /* Create the initializer and insert it in the latest possible |
| dominating position. */ |
| incr_tree = double_int_to_tree (stride_type, incr); |
| init_stmt = gimple_build_assign_with_ops (MULT_EXPR, new_name, |
| c->stride, incr_tree); |
| if (where) |
| { |
| gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt); |
| gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT); |
| gimple_set_location (init_stmt, gimple_location (where->cand_stmt)); |
| } |
| else |
| { |
| gimple_stmt_iterator gsi = gsi_last_bb (bb); |
| gimple basis_stmt = lookup_cand (c->basis)->cand_stmt; |
| |
| if (!gsi_end_p (gsi) && is_ctrl_stmt (gsi_stmt (gsi))) |
| gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT); |
| else |
| gsi_insert_after (&gsi, init_stmt, GSI_SAME_STMT); |
| |
| gimple_set_location (init_stmt, gimple_location (basis_stmt)); |
| } |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fputs ("Inserting initializer: ", dump_file); |
| print_gimple_stmt (dump_file, init_stmt, 0, 0); |
| } |
| } |
| } |
| |
| /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of |
| type TO_TYPE, and insert it in front of the statement represented |
| by candidate C. Use *NEW_VAR to create the new SSA name. Return |
| the new SSA name. */ |
| |
| static tree |
| introduce_cast_before_cand (slsr_cand_t c, tree to_type, |
| tree from_expr, tree *new_var) |
| { |
| tree cast_lhs; |
| gimple cast_stmt; |
| gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); |
| |
| lazy_create_slsr_reg (new_var, to_type); |
| cast_lhs = make_ssa_name (*new_var, NULL); |
| cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, cast_lhs, |
| from_expr, NULL_TREE); |
| gimple_set_location (cast_stmt, gimple_location (c->cand_stmt)); |
| gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT); |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fputs (" Inserting: ", dump_file); |
| print_gimple_stmt (dump_file, cast_stmt, 0, 0); |
| } |
| |
| return cast_lhs; |
| } |
| |
| /* Replace the RHS of the statement represented by candidate C with |
| NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't |
| leave C unchanged or just interchange its operands. The original |
| operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2. |
| If the replacement was made and we are doing a details dump, |
| return the revised statement, else NULL. */ |
| |
| static gimple |
| replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2, |
| enum tree_code old_code, tree old_rhs1, tree old_rhs2, |
| slsr_cand_t c) |
| { |
| if (new_code != old_code |
| || ((!operand_equal_p (new_rhs1, old_rhs1, 0) |
| || !operand_equal_p (new_rhs2, old_rhs2, 0)) |
| && (!operand_equal_p (new_rhs1, old_rhs2, 0) |
| || !operand_equal_p (new_rhs2, old_rhs1, 0)))) |
| { |
| gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); |
| gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2); |
| update_stmt (gsi_stmt (gsi)); |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| return gsi_stmt (gsi); |
| } |
| |
| else if (dump_file && (dump_flags & TDF_DETAILS)) |
| fputs (" (duplicate, not actually replacing)\n", dump_file); |
| |
| return NULL; |
| } |
| |
| /* Strength-reduce the statement represented by candidate C by replacing |
| it with an equivalent addition or subtraction. I is the index into |
| the increment vector identifying C's increment. NEW_VAR is used to |
| create a new SSA name if a cast needs to be introduced. BASIS_NAME |
| is the rhs1 to use in creating the add/subtract. */ |
| |
| static void |
| replace_one_candidate (slsr_cand_t c, unsigned i, tree *new_var, |
| tree basis_name) |
| { |
| gimple stmt_to_print = NULL; |
| tree orig_rhs1, orig_rhs2; |
| tree rhs2; |
| enum tree_code orig_code, repl_code; |
| double_int cand_incr; |
| |
| orig_code = gimple_assign_rhs_code (c->cand_stmt); |
| orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt); |
| orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt); |
| cand_incr = cand_increment (c); |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fputs ("Replacing: ", dump_file); |
| print_gimple_stmt (dump_file, c->cand_stmt, 0, 0); |
| stmt_to_print = c->cand_stmt; |
| } |
| |
| if (address_arithmetic_p) |
| repl_code = POINTER_PLUS_EXPR; |
| else |
| repl_code = PLUS_EXPR; |
| |
| /* If the increment has an initializer T_0, replace the candidate |
| statement with an add of the basis name and the initializer. */ |
| if (incr_vec[i].initializer) |
| { |
| tree init_type = TREE_TYPE (incr_vec[i].initializer); |
| tree orig_type = TREE_TYPE (orig_rhs2); |
| |
| if (types_compatible_p (orig_type, init_type)) |
| rhs2 = incr_vec[i].initializer; |
| else |
| rhs2 = introduce_cast_before_cand (c, orig_type, |
| incr_vec[i].initializer, |
| new_var); |
| |
| if (incr_vec[i].incr != cand_incr) |
| { |
| gcc_assert (repl_code == PLUS_EXPR); |
| repl_code = MINUS_EXPR; |
| } |
| |
| stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2, |
| orig_code, orig_rhs1, orig_rhs2, |
| c); |
| } |
| |
| /* Otherwise, the increment is one of -1, 0, and 1. Replace |
| with a subtract of the stride from the basis name, a copy |
| from the basis name, or an add of the stride to the basis |
| name, respectively. It may be necessary to introduce a |
| cast (or reuse an existing cast). */ |
| else if (cand_incr.is_one ()) |
| { |
| tree stride_type = TREE_TYPE (c->stride); |
| tree orig_type = TREE_TYPE (orig_rhs2); |
| |
| if (types_compatible_p (orig_type, stride_type)) |
| rhs2 = c->stride; |
| else |
| rhs2 = introduce_cast_before_cand (c, orig_type, c->stride, new_var); |
| |
| stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2, |
| orig_code, orig_rhs1, orig_rhs2, |
| c); |
| } |
| |
| else if (cand_incr.is_minus_one ()) |
| { |
| tree stride_type = TREE_TYPE (c->stride); |
| tree orig_type = TREE_TYPE (orig_rhs2); |
| gcc_assert (repl_code != POINTER_PLUS_EXPR); |
| |
| if (types_compatible_p (orig_type, stride_type)) |
| rhs2 = c->stride; |
| else |
| rhs2 = introduce_cast_before_cand (c, orig_type, c->stride, new_var); |
| |
| if (orig_code != MINUS_EXPR |
| || !operand_equal_p (basis_name, orig_rhs1, 0) |
| || !operand_equal_p (rhs2, orig_rhs2, 0)) |
| { |
| gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); |
| gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2); |
| update_stmt (gsi_stmt (gsi)); |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| stmt_to_print = gsi_stmt (gsi); |
| } |
| else if (dump_file && (dump_flags & TDF_DETAILS)) |
| fputs (" (duplicate, not actually replacing)\n", dump_file); |
| } |
| |
| else if (cand_incr.is_zero ()) |
| { |
| tree lhs = gimple_assign_lhs (c->cand_stmt); |
| tree lhs_type = TREE_TYPE (lhs); |
| tree basis_type = TREE_TYPE (basis_name); |
| |
| if (types_compatible_p (lhs_type, basis_type)) |
| { |
| gimple copy_stmt = gimple_build_assign (lhs, basis_name); |
| gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); |
| gimple_set_location (copy_stmt, gimple_location (c->cand_stmt)); |
| gsi_replace (&gsi, copy_stmt, false); |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| stmt_to_print = copy_stmt; |
| } |
| else |
| { |
| gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt); |
| gimple cast_stmt = gimple_build_assign_with_ops (NOP_EXPR, lhs, |
| basis_name, |
| NULL_TREE); |
| gimple_set_location (cast_stmt, gimple_location (c->cand_stmt)); |
| gsi_replace (&gsi, cast_stmt, false); |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| stmt_to_print = cast_stmt; |
| } |
| } |
| else |
| gcc_unreachable (); |
| |
| if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print) |
| { |
| fputs ("With: ", dump_file); |
| print_gimple_stmt (dump_file, stmt_to_print, 0, 0); |
| fputs ("\n", dump_file); |
| } |
| } |
| |
| /* For each candidate in the tree rooted at C, replace it with |
| an increment if such has been shown to be profitable. */ |
| |
| static void |
| replace_profitable_candidates (slsr_cand_t c) |
| { |
| if (!cand_already_replaced (c)) |
| { |
| double_int increment = cand_abs_increment (c); |
| tree new_var = NULL; |
| enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt); |
| unsigned i; |
| |
| i = incr_vec_index (increment); |
| |
| /* Only process profitable increments. Nothing useful can be done |
| to a cast or copy. */ |
| if (profitable_increment_p (i) |
| && orig_code != MODIFY_EXPR |
| && orig_code != NOP_EXPR) |
| { |
| slsr_cand_t basis = lookup_cand (c->basis); |
| tree basis_name = gimple_assign_lhs (basis->cand_stmt); |
| replace_one_candidate (c, i, &new_var, basis_name); |
| } |
| } |
| |
| if (c->sibling) |
| replace_profitable_candidates (lookup_cand (c->sibling)); |
| |
| if (c->dependent) |
| replace_profitable_candidates (lookup_cand (c->dependent)); |
| } |
| |
| /* Analyze costs of related candidates in the candidate vector, |
| and make beneficial replacements. */ |
| |
| static void |
| analyze_candidates_and_replace (void) |
| { |
| unsigned i; |
| slsr_cand_t c; |
| |
| /* Each candidate that has a null basis and a non-null |
| dependent is the root of a tree of related statements. |
| Analyze each tree to determine a subset of those |
| statements that can be replaced with maximum benefit. */ |
| FOR_EACH_VEC_ELT (cand_vec, i, c) |
| { |
| slsr_cand_t first_dep; |
| |
| if (c->basis != 0 || c->dependent == 0) |
| continue; |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n", |
| c->cand_num); |
| |
| first_dep = lookup_cand (c->dependent); |
| |
| /* If this is a chain of CAND_REFs, unconditionally replace |
| each of them with a strength-reduced data reference. */ |
| if (c->kind == CAND_REF) |
| replace_refs (c); |
| |
| /* If the common stride of all related candidates is a |
| known constant, and none of these has a phi-dependence, |
| then all replacements are considered profitable. |
| Each replaces a multiply by a single add, with the |
| possibility that a feeding add also goes dead as a |
| result. */ |
| else if (unconditional_cands_with_known_stride_p (c)) |
| replace_dependents (first_dep); |
| |
| /* When the stride is an SSA name, it may still be profitable |
| to replace some or all of the dependent candidates, depending |
| on whether the introduced increments can be reused, or are |
| less expensive to calculate than the replaced statements. */ |
| else |
| { |
| unsigned length; |
| enum machine_mode mode; |
| bool speed; |
| |
| /* Determine whether we'll be generating pointer arithmetic |
| when replacing candidates. */ |
| address_arithmetic_p = (c->kind == CAND_ADD |
| && POINTER_TYPE_P (c->cand_type)); |
| |
| /* If all candidates have already been replaced under other |
| interpretations, nothing remains to be done. */ |
| length = count_candidates (c); |
| if (!length) |
| continue; |
| |
| /* Construct an array of increments for this candidate chain. */ |
| incr_vec = XNEWVEC (incr_info, length); |
| incr_vec_len = 0; |
| record_increments (c); |
| |
| /* Determine which increments are profitable to replace. */ |
| mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt))); |
| speed = optimize_cands_for_speed_p (c); |
| analyze_increments (first_dep, mode, speed); |
| |
| /* Insert initializers of the form T_0 = stride * increment |
| for use in profitable replacements. */ |
| insert_initializers (first_dep); |
| dump_incr_vec (); |
| |
| /* Perform the replacements. */ |
| replace_profitable_candidates (first_dep); |
| free (incr_vec); |
| } |
| |
| /* TODO: When conditional increments occur so that a |
| candidate is dependent upon a phi-basis, the cost of |
| introducing a temporary must be accounted for. */ |
| } |
| } |
| |
| static unsigned |
| execute_strength_reduction (void) |
| { |
| struct dom_walk_data walk_data; |
| |
| /* Create the obstack where candidates will reside. */ |
| gcc_obstack_init (&cand_obstack); |
| |
| /* Allocate the candidate vector. */ |
| cand_vec.create (128); |
| |
| /* Allocate the mapping from statements to candidate indices. */ |
| stmt_cand_map = pointer_map_create (); |
| |
| /* Create the obstack where candidate chains will reside. */ |
| gcc_obstack_init (&chain_obstack); |
| |
| /* Allocate the mapping from base expressions to candidate chains. */ |
| base_cand_map = htab_create (500, base_cand_hash, |
| base_cand_eq, base_cand_free); |
| |
| /* Initialize the loop optimizer. We need to detect flow across |
| back edges, and this gives us dominator information as well. */ |
| loop_optimizer_init (AVOID_CFG_MODIFICATIONS); |
| |
| /* Set up callbacks for the generic dominator tree walker. */ |
| walk_data.dom_direction = CDI_DOMINATORS; |
| walk_data.initialize_block_local_data = NULL; |
| walk_data.before_dom_children = find_candidates_in_block; |
| walk_data.after_dom_children = NULL; |
| walk_data.global_data = NULL; |
| walk_data.block_local_data_size = 0; |
| init_walk_dominator_tree (&walk_data); |
| |
| /* Walk the CFG in predominator order looking for strength reduction |
| candidates. */ |
| walk_dominator_tree (&walk_data, ENTRY_BLOCK_PTR); |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| dump_cand_vec (); |
| dump_cand_chains (); |
| } |
| |
| /* Analyze costs and make appropriate replacements. */ |
| analyze_candidates_and_replace (); |
| |
| /* Free resources. */ |
| fini_walk_dominator_tree (&walk_data); |
| loop_optimizer_finalize (); |
| htab_delete (base_cand_map); |
| obstack_free (&chain_obstack, NULL); |
| pointer_map_destroy (stmt_cand_map); |
| cand_vec.release (); |
| obstack_free (&cand_obstack, NULL); |
| |
| return 0; |
| } |
| |
| static bool |
| gate_strength_reduction (void) |
| { |
| return flag_tree_slsr; |
| } |
| |
| struct gimple_opt_pass pass_strength_reduction = |
| { |
| { |
| GIMPLE_PASS, |
| "slsr", /* name */ |
| OPTGROUP_NONE, /* optinfo_flags */ |
| gate_strength_reduction, /* gate */ |
| execute_strength_reduction, /* execute */ |
| NULL, /* sub */ |
| NULL, /* next */ |
| 0, /* static_pass_number */ |
| TV_GIMPLE_SLSR, /* tv_id */ |
| PROP_cfg | PROP_ssa, /* properties_required */ |
| 0, /* properties_provided */ |
| 0, /* properties_destroyed */ |
| 0, /* todo_flags_start */ |
| TODO_verify_ssa /* todo_flags_finish */ |
| } |
| }; |