| /* Reassociation for trees. |
| Copyright (C) 2005-2013 Free Software Foundation, Inc. |
| Contributed by Daniel Berlin <dan@dberlin.org> |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| GCC is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "tm.h" |
| #include "tree.h" |
| #include "basic-block.h" |
| #include "gimple-pretty-print.h" |
| #include "tree-inline.h" |
| #include "tree-flow.h" |
| #include "gimple.h" |
| #include "tree-iterator.h" |
| #include "tree-pass.h" |
| #include "alloc-pool.h" |
| #include "vec.h" |
| #include "langhooks.h" |
| #include "pointer-set.h" |
| #include "cfgloop.h" |
| #include "flags.h" |
| #include "target.h" |
| #include "params.h" |
| #include "diagnostic-core.h" |
| |
| /* This is a simple global reassociation pass. It is, in part, based |
| on the LLVM pass of the same name (They do some things more/less |
| than we do, in different orders, etc). |
| |
| It consists of five steps: |
| |
| 1. Breaking up subtract operations into addition + negate, where |
| it would promote the reassociation of adds. |
| |
| 2. Left linearization of the expression trees, so that (A+B)+(C+D) |
| becomes (((A+B)+C)+D), which is easier for us to rewrite later. |
| During linearization, we place the operands of the binary |
| expressions into a vector of operand_entry_t |
| |
| 3. Optimization of the operand lists, eliminating things like a + |
| -a, a & a, etc. |
| |
| 3a. Combine repeated factors with the same occurrence counts |
| into a __builtin_powi call that will later be optimized into |
| an optimal number of multiplies. |
| |
| 4. Rewrite the expression trees we linearized and optimized so |
| they are in proper rank order. |
| |
| 5. Repropagate negates, as nothing else will clean it up ATM. |
| |
| A bit of theory on #4, since nobody seems to write anything down |
| about why it makes sense to do it the way they do it: |
| |
| We could do this much nicer theoretically, but don't (for reasons |
| explained after how to do it theoretically nice :P). |
| |
| In order to promote the most redundancy elimination, you want |
| binary expressions whose operands are the same rank (or |
| preferably, the same value) exposed to the redundancy eliminator, |
| for possible elimination. |
| |
| So the way to do this if we really cared, is to build the new op |
| tree from the leaves to the roots, merging as you go, and putting the |
| new op on the end of the worklist, until you are left with one |
| thing on the worklist. |
| |
| IE if you have to rewrite the following set of operands (listed with |
| rank in parentheses), with opcode PLUS_EXPR: |
| |
| a (1), b (1), c (1), d (2), e (2) |
| |
| |
| We start with our merge worklist empty, and the ops list with all of |
| those on it. |
| |
| You want to first merge all leaves of the same rank, as much as |
| possible. |
| |
| So first build a binary op of |
| |
| mergetmp = a + b, and put "mergetmp" on the merge worklist. |
| |
| Because there is no three operand form of PLUS_EXPR, c is not going to |
| be exposed to redundancy elimination as a rank 1 operand. |
| |
| So you might as well throw it on the merge worklist (you could also |
| consider it to now be a rank two operand, and merge it with d and e, |
| but in this case, you then have evicted e from a binary op. So at |
| least in this situation, you can't win.) |
| |
| Then build a binary op of d + e |
| mergetmp2 = d + e |
| |
| and put mergetmp2 on the merge worklist. |
| |
| so merge worklist = {mergetmp, c, mergetmp2} |
| |
| Continue building binary ops of these operations until you have only |
| one operation left on the worklist. |
| |
| So we have |
| |
| build binary op |
| mergetmp3 = mergetmp + c |
| |
| worklist = {mergetmp2, mergetmp3} |
| |
| mergetmp4 = mergetmp2 + mergetmp3 |
| |
| worklist = {mergetmp4} |
| |
| because we have one operation left, we can now just set the original |
| statement equal to the result of that operation. |
| |
| This will at least expose a + b and d + e to redundancy elimination |
| as binary operations. |
| |
| For extra points, you can reuse the old statements to build the |
| mergetmps, since you shouldn't run out. |
| |
| So why don't we do this? |
| |
| Because it's expensive, and rarely will help. Most trees we are |
| reassociating have 3 or less ops. If they have 2 ops, they already |
| will be written into a nice single binary op. If you have 3 ops, a |
| single simple check suffices to tell you whether the first two are of the |
| same rank. If so, you know to order it |
| |
| mergetmp = op1 + op2 |
| newstmt = mergetmp + op3 |
| |
| instead of |
| mergetmp = op2 + op3 |
| newstmt = mergetmp + op1 |
| |
| If all three are of the same rank, you can't expose them all in a |
| single binary operator anyway, so the above is *still* the best you |
| can do. |
| |
| Thus, this is what we do. When we have three ops left, we check to see |
| what order to put them in, and call it a day. As a nod to vector sum |
| reduction, we check if any of the ops are really a phi node that is a |
| destructive update for the associating op, and keep the destructive |
| update together for vector sum reduction recognition. */ |
| |
| |
| /* Statistics */ |
| static struct |
| { |
| int linearized; |
| int constants_eliminated; |
| int ops_eliminated; |
| int rewritten; |
| int pows_encountered; |
| int pows_created; |
| } reassociate_stats; |
| |
| /* Operator, rank pair. */ |
| typedef struct operand_entry |
| { |
| unsigned int rank; |
| int id; |
| tree op; |
| unsigned int count; |
| } *operand_entry_t; |
| |
| static alloc_pool operand_entry_pool; |
| |
| /* This is used to assign a unique ID to each struct operand_entry |
| so that qsort results are identical on different hosts. */ |
| static int next_operand_entry_id; |
| |
| /* Starting rank number for a given basic block, so that we can rank |
| operations using unmovable instructions in that BB based on the bb |
| depth. */ |
| static long *bb_rank; |
| |
| /* Operand->rank hashtable. */ |
| static struct pointer_map_t *operand_rank; |
| |
| /* Forward decls. */ |
| static long get_rank (tree); |
| |
| |
| /* Bias amount for loop-carried phis. We want this to be larger than |
| the depth of any reassociation tree we can see, but not larger than |
| the rank difference between two blocks. */ |
| #define PHI_LOOP_BIAS (1 << 15) |
| |
| /* Rank assigned to a phi statement. If STMT is a loop-carried phi of |
| an innermost loop, and the phi has only a single use which is inside |
| the loop, then the rank is the block rank of the loop latch plus an |
| extra bias for the loop-carried dependence. This causes expressions |
| calculated into an accumulator variable to be independent for each |
| iteration of the loop. If STMT is some other phi, the rank is the |
| block rank of its containing block. */ |
| static long |
| phi_rank (gimple stmt) |
| { |
| basic_block bb = gimple_bb (stmt); |
| struct loop *father = bb->loop_father; |
| tree res; |
| unsigned i; |
| use_operand_p use; |
| gimple use_stmt; |
| |
| /* We only care about real loops (those with a latch). */ |
| if (!father->latch) |
| return bb_rank[bb->index]; |
| |
| /* Interesting phis must be in headers of innermost loops. */ |
| if (bb != father->header |
| || father->inner) |
| return bb_rank[bb->index]; |
| |
| /* Ignore virtual SSA_NAMEs. */ |
| res = gimple_phi_result (stmt); |
| if (virtual_operand_p (res)) |
| return bb_rank[bb->index]; |
| |
| /* The phi definition must have a single use, and that use must be |
| within the loop. Otherwise this isn't an accumulator pattern. */ |
| if (!single_imm_use (res, &use, &use_stmt) |
| || gimple_bb (use_stmt)->loop_father != father) |
| return bb_rank[bb->index]; |
| |
| /* Look for phi arguments from within the loop. If found, bias this phi. */ |
| for (i = 0; i < gimple_phi_num_args (stmt); i++) |
| { |
| tree arg = gimple_phi_arg_def (stmt, i); |
| if (TREE_CODE (arg) == SSA_NAME |
| && !SSA_NAME_IS_DEFAULT_DEF (arg)) |
| { |
| gimple def_stmt = SSA_NAME_DEF_STMT (arg); |
| if (gimple_bb (def_stmt)->loop_father == father) |
| return bb_rank[father->latch->index] + PHI_LOOP_BIAS; |
| } |
| } |
| |
| /* Must be an uninteresting phi. */ |
| return bb_rank[bb->index]; |
| } |
| |
| /* If EXP is an SSA_NAME defined by a PHI statement that represents a |
| loop-carried dependence of an innermost loop, return TRUE; else |
| return FALSE. */ |
| static bool |
| loop_carried_phi (tree exp) |
| { |
| gimple phi_stmt; |
| long block_rank; |
| |
| if (TREE_CODE (exp) != SSA_NAME |
| || SSA_NAME_IS_DEFAULT_DEF (exp)) |
| return false; |
| |
| phi_stmt = SSA_NAME_DEF_STMT (exp); |
| |
| if (gimple_code (SSA_NAME_DEF_STMT (exp)) != GIMPLE_PHI) |
| return false; |
| |
| /* Non-loop-carried phis have block rank. Loop-carried phis have |
| an additional bias added in. If this phi doesn't have block rank, |
| it's biased and should not be propagated. */ |
| block_rank = bb_rank[gimple_bb (phi_stmt)->index]; |
| |
| if (phi_rank (phi_stmt) != block_rank) |
| return true; |
| |
| return false; |
| } |
| |
| /* Return the maximum of RANK and the rank that should be propagated |
| from expression OP. For most operands, this is just the rank of OP. |
| For loop-carried phis, the value is zero to avoid undoing the bias |
| in favor of the phi. */ |
| static long |
| propagate_rank (long rank, tree op) |
| { |
| long op_rank; |
| |
| if (loop_carried_phi (op)) |
| return rank; |
| |
| op_rank = get_rank (op); |
| |
| return MAX (rank, op_rank); |
| } |
| |
| /* Look up the operand rank structure for expression E. */ |
| |
| static inline long |
| find_operand_rank (tree e) |
| { |
| void **slot = pointer_map_contains (operand_rank, e); |
| return slot ? (long) (intptr_t) *slot : -1; |
| } |
| |
| /* Insert {E,RANK} into the operand rank hashtable. */ |
| |
| static inline void |
| insert_operand_rank (tree e, long rank) |
| { |
| void **slot; |
| gcc_assert (rank > 0); |
| slot = pointer_map_insert (operand_rank, e); |
| gcc_assert (!*slot); |
| *slot = (void *) (intptr_t) rank; |
| } |
| |
| /* Given an expression E, return the rank of the expression. */ |
| |
| static long |
| get_rank (tree e) |
| { |
| /* Constants have rank 0. */ |
| if (is_gimple_min_invariant (e)) |
| return 0; |
| |
| /* SSA_NAME's have the rank of the expression they are the result |
| of. |
| For globals and uninitialized values, the rank is 0. |
| For function arguments, use the pre-setup rank. |
| For PHI nodes, stores, asm statements, etc, we use the rank of |
| the BB. |
| For simple operations, the rank is the maximum rank of any of |
| its operands, or the bb_rank, whichever is less. |
| I make no claims that this is optimal, however, it gives good |
| results. */ |
| |
| /* We make an exception to the normal ranking system to break |
| dependences of accumulator variables in loops. Suppose we |
| have a simple one-block loop containing: |
| |
| x_1 = phi(x_0, x_2) |
| b = a + x_1 |
| c = b + d |
| x_2 = c + e |
| |
| As shown, each iteration of the calculation into x is fully |
| dependent upon the iteration before it. We would prefer to |
| see this in the form: |
| |
| x_1 = phi(x_0, x_2) |
| b = a + d |
| c = b + e |
| x_2 = c + x_1 |
| |
| If the loop is unrolled, the calculations of b and c from |
| different iterations can be interleaved. |
| |
| To obtain this result during reassociation, we bias the rank |
| of the phi definition x_1 upward, when it is recognized as an |
| accumulator pattern. The artificial rank causes it to be |
| added last, providing the desired independence. */ |
| |
| if (TREE_CODE (e) == SSA_NAME) |
| { |
| gimple stmt; |
| long rank; |
| int i, n; |
| tree op; |
| |
| if (SSA_NAME_IS_DEFAULT_DEF (e)) |
| return find_operand_rank (e); |
| |
| stmt = SSA_NAME_DEF_STMT (e); |
| if (gimple_code (stmt) == GIMPLE_PHI) |
| return phi_rank (stmt); |
| |
| if (!is_gimple_assign (stmt) |
| || gimple_vdef (stmt)) |
| return bb_rank[gimple_bb (stmt)->index]; |
| |
| /* If we already have a rank for this expression, use that. */ |
| rank = find_operand_rank (e); |
| if (rank != -1) |
| return rank; |
| |
| /* Otherwise, find the maximum rank for the operands. As an |
| exception, remove the bias from loop-carried phis when propagating |
| the rank so that dependent operations are not also biased. */ |
| rank = 0; |
| if (gimple_assign_single_p (stmt)) |
| { |
| tree rhs = gimple_assign_rhs1 (stmt); |
| n = TREE_OPERAND_LENGTH (rhs); |
| if (n == 0) |
| rank = propagate_rank (rank, rhs); |
| else |
| { |
| for (i = 0; i < n; i++) |
| { |
| op = TREE_OPERAND (rhs, i); |
| |
| if (op != NULL_TREE) |
| rank = propagate_rank (rank, op); |
| } |
| } |
| } |
| else |
| { |
| n = gimple_num_ops (stmt); |
| for (i = 1; i < n; i++) |
| { |
| op = gimple_op (stmt, i); |
| gcc_assert (op); |
| rank = propagate_rank (rank, op); |
| } |
| } |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, "Rank for "); |
| print_generic_expr (dump_file, e, 0); |
| fprintf (dump_file, " is %ld\n", (rank + 1)); |
| } |
| |
| /* Note the rank in the hashtable so we don't recompute it. */ |
| insert_operand_rank (e, (rank + 1)); |
| return (rank + 1); |
| } |
| |
| /* Globals, etc, are rank 0 */ |
| return 0; |
| } |
| |
| |
| /* We want integer ones to end up last no matter what, since they are |
| the ones we can do the most with. */ |
| #define INTEGER_CONST_TYPE 1 << 3 |
| #define FLOAT_CONST_TYPE 1 << 2 |
| #define OTHER_CONST_TYPE 1 << 1 |
| |
| /* Classify an invariant tree into integer, float, or other, so that |
| we can sort them to be near other constants of the same type. */ |
| static inline int |
| constant_type (tree t) |
| { |
| if (INTEGRAL_TYPE_P (TREE_TYPE (t))) |
| return INTEGER_CONST_TYPE; |
| else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) |
| return FLOAT_CONST_TYPE; |
| else |
| return OTHER_CONST_TYPE; |
| } |
| |
| /* qsort comparison function to sort operand entries PA and PB by rank |
| so that the sorted array is ordered by rank in decreasing order. */ |
| static int |
| sort_by_operand_rank (const void *pa, const void *pb) |
| { |
| const operand_entry_t oea = *(const operand_entry_t *)pa; |
| const operand_entry_t oeb = *(const operand_entry_t *)pb; |
| |
| /* It's nicer for optimize_expression if constants that are likely |
| to fold when added/multiplied//whatever are put next to each |
| other. Since all constants have rank 0, order them by type. */ |
| if (oeb->rank == 0 && oea->rank == 0) |
| { |
| if (constant_type (oeb->op) != constant_type (oea->op)) |
| return constant_type (oeb->op) - constant_type (oea->op); |
| else |
| /* To make sorting result stable, we use unique IDs to determine |
| order. */ |
| return oeb->id - oea->id; |
| } |
| |
| /* Lastly, make sure the versions that are the same go next to each |
| other. We use SSA_NAME_VERSION because it's stable. */ |
| if ((oeb->rank - oea->rank == 0) |
| && TREE_CODE (oea->op) == SSA_NAME |
| && TREE_CODE (oeb->op) == SSA_NAME) |
| { |
| if (SSA_NAME_VERSION (oeb->op) != SSA_NAME_VERSION (oea->op)) |
| return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op); |
| else |
| return oeb->id - oea->id; |
| } |
| |
| if (oeb->rank != oea->rank) |
| return oeb->rank - oea->rank; |
| else |
| return oeb->id - oea->id; |
| } |
| |
| /* Add an operand entry to *OPS for the tree operand OP. */ |
| |
| static void |
| add_to_ops_vec (vec<operand_entry_t> *ops, tree op) |
| { |
| operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); |
| |
| oe->op = op; |
| oe->rank = get_rank (op); |
| oe->id = next_operand_entry_id++; |
| oe->count = 1; |
| ops->safe_push (oe); |
| } |
| |
| /* Add an operand entry to *OPS for the tree operand OP with repeat |
| count REPEAT. */ |
| |
| static void |
| add_repeat_to_ops_vec (vec<operand_entry_t> *ops, tree op, |
| HOST_WIDE_INT repeat) |
| { |
| operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); |
| |
| oe->op = op; |
| oe->rank = get_rank (op); |
| oe->id = next_operand_entry_id++; |
| oe->count = repeat; |
| ops->safe_push (oe); |
| |
| reassociate_stats.pows_encountered++; |
| } |
| |
| /* Return true if STMT is reassociable operation containing a binary |
| operation with tree code CODE, and is inside LOOP. */ |
| |
| static bool |
| is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop) |
| { |
| basic_block bb = gimple_bb (stmt); |
| |
| if (gimple_bb (stmt) == NULL) |
| return false; |
| |
| if (!flow_bb_inside_loop_p (loop, bb)) |
| return false; |
| |
| if (is_gimple_assign (stmt) |
| && gimple_assign_rhs_code (stmt) == code |
| && has_single_use (gimple_assign_lhs (stmt))) |
| return true; |
| |
| return false; |
| } |
| |
| |
| /* Given NAME, if NAME is defined by a unary operation OPCODE, return the |
| operand of the negate operation. Otherwise, return NULL. */ |
| |
| static tree |
| get_unary_op (tree name, enum tree_code opcode) |
| { |
| gimple stmt = SSA_NAME_DEF_STMT (name); |
| |
| if (!is_gimple_assign (stmt)) |
| return NULL_TREE; |
| |
| if (gimple_assign_rhs_code (stmt) == opcode) |
| return gimple_assign_rhs1 (stmt); |
| return NULL_TREE; |
| } |
| |
| /* If CURR and LAST are a pair of ops that OPCODE allows us to |
| eliminate through equivalences, do so, remove them from OPS, and |
| return true. Otherwise, return false. */ |
| |
| static bool |
| eliminate_duplicate_pair (enum tree_code opcode, |
| vec<operand_entry_t> *ops, |
| bool *all_done, |
| unsigned int i, |
| operand_entry_t curr, |
| operand_entry_t last) |
| { |
| |
| /* If we have two of the same op, and the opcode is & |, min, or max, |
| we can eliminate one of them. |
| If we have two of the same op, and the opcode is ^, we can |
| eliminate both of them. */ |
| |
| if (last && last->op == curr->op) |
| { |
| switch (opcode) |
| { |
| case MAX_EXPR: |
| case MIN_EXPR: |
| case BIT_IOR_EXPR: |
| case BIT_AND_EXPR: |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, "Equivalence: "); |
| print_generic_expr (dump_file, curr->op, 0); |
| fprintf (dump_file, " [&|minmax] "); |
| print_generic_expr (dump_file, last->op, 0); |
| fprintf (dump_file, " -> "); |
| print_generic_stmt (dump_file, last->op, 0); |
| } |
| |
| ops->ordered_remove (i); |
| reassociate_stats.ops_eliminated ++; |
| |
| return true; |
| |
| case BIT_XOR_EXPR: |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, "Equivalence: "); |
| print_generic_expr (dump_file, curr->op, 0); |
| fprintf (dump_file, " ^ "); |
| print_generic_expr (dump_file, last->op, 0); |
| fprintf (dump_file, " -> nothing\n"); |
| } |
| |
| reassociate_stats.ops_eliminated += 2; |
| |
| if (ops->length () == 2) |
| { |
| ops->create (0); |
| add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (last->op))); |
| *all_done = true; |
| } |
| else |
| { |
| ops->ordered_remove (i-1); |
| ops->ordered_remove (i-1); |
| } |
| |
| return true; |
| |
| default: |
| break; |
| } |
| } |
| return false; |
| } |
| |
| static vec<tree> plus_negates; |
| |
| /* If OPCODE is PLUS_EXPR, CURR->OP is a negate expression or a bitwise not |
| expression, look in OPS for a corresponding positive operation to cancel |
| it out. If we find one, remove the other from OPS, replace |
| OPS[CURRINDEX] with 0 or -1, respectively, and return true. Otherwise, |
| return false. */ |
| |
| static bool |
| eliminate_plus_minus_pair (enum tree_code opcode, |
| vec<operand_entry_t> *ops, |
| unsigned int currindex, |
| operand_entry_t curr) |
| { |
| tree negateop; |
| tree notop; |
| unsigned int i; |
| operand_entry_t oe; |
| |
| if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) |
| return false; |
| |
| negateop = get_unary_op (curr->op, NEGATE_EXPR); |
| notop = get_unary_op (curr->op, BIT_NOT_EXPR); |
| if (negateop == NULL_TREE && notop == NULL_TREE) |
| return false; |
| |
| /* Any non-negated version will have a rank that is one less than |
| the current rank. So once we hit those ranks, if we don't find |
| one, we can stop. */ |
| |
| for (i = currindex + 1; |
| ops->iterate (i, &oe) |
| && oe->rank >= curr->rank - 1 ; |
| i++) |
| { |
| if (oe->op == negateop) |
| { |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, "Equivalence: "); |
| print_generic_expr (dump_file, negateop, 0); |
| fprintf (dump_file, " + -"); |
| print_generic_expr (dump_file, oe->op, 0); |
| fprintf (dump_file, " -> 0\n"); |
| } |
| |
| ops->ordered_remove (i); |
| add_to_ops_vec (ops, build_zero_cst (TREE_TYPE (oe->op))); |
| ops->ordered_remove (currindex); |
| reassociate_stats.ops_eliminated ++; |
| |
| return true; |
| } |
| else if (oe->op == notop) |
| { |
| tree op_type = TREE_TYPE (oe->op); |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, "Equivalence: "); |
| print_generic_expr (dump_file, notop, 0); |
| fprintf (dump_file, " + ~"); |
| print_generic_expr (dump_file, oe->op, 0); |
| fprintf (dump_file, " -> -1\n"); |
| } |
| |
| ops->ordered_remove (i); |
| add_to_ops_vec (ops, build_int_cst_type (op_type, -1)); |
| ops->ordered_remove (currindex); |
| reassociate_stats.ops_eliminated ++; |
| |
| return true; |
| } |
| } |
| |
| /* CURR->OP is a negate expr in a plus expr: save it for later |
| inspection in repropagate_negates(). */ |
| if (negateop != NULL_TREE) |
| plus_negates.safe_push (curr->op); |
| |
| return false; |
| } |
| |
| /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a |
| bitwise not expression, look in OPS for a corresponding operand to |
| cancel it out. If we find one, remove the other from OPS, replace |
| OPS[CURRINDEX] with 0, and return true. Otherwise, return |
| false. */ |
| |
| static bool |
| eliminate_not_pairs (enum tree_code opcode, |
| vec<operand_entry_t> *ops, |
| unsigned int currindex, |
| operand_entry_t curr) |
| { |
| tree notop; |
| unsigned int i; |
| operand_entry_t oe; |
| |
| if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) |
| || TREE_CODE (curr->op) != SSA_NAME) |
| return false; |
| |
| notop = get_unary_op (curr->op, BIT_NOT_EXPR); |
| if (notop == NULL_TREE) |
| return false; |
| |
| /* Any non-not version will have a rank that is one less than |
| the current rank. So once we hit those ranks, if we don't find |
| one, we can stop. */ |
| |
| for (i = currindex + 1; |
| ops->iterate (i, &oe) |
| && oe->rank >= curr->rank - 1; |
| i++) |
| { |
| if (oe->op == notop) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, "Equivalence: "); |
| print_generic_expr (dump_file, notop, 0); |
| if (opcode == BIT_AND_EXPR) |
| fprintf (dump_file, " & ~"); |
| else if (opcode == BIT_IOR_EXPR) |
| fprintf (dump_file, " | ~"); |
| print_generic_expr (dump_file, oe->op, 0); |
| if (opcode == BIT_AND_EXPR) |
| fprintf (dump_file, " -> 0\n"); |
| else if (opcode == BIT_IOR_EXPR) |
| fprintf (dump_file, " -> -1\n"); |
| } |
| |
| if (opcode == BIT_AND_EXPR) |
| oe->op = build_zero_cst (TREE_TYPE (oe->op)); |
| else if (opcode == BIT_IOR_EXPR) |
| oe->op = build_all_ones_cst (TREE_TYPE (oe->op)); |
| |
| reassociate_stats.ops_eliminated += ops->length () - 1; |
| ops->truncate (0); |
| ops->quick_push (oe); |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| /* Use constant value that may be present in OPS to try to eliminate |
| operands. Note that this function is only really used when we've |
| eliminated ops for other reasons, or merged constants. Across |
| single statements, fold already does all of this, plus more. There |
| is little point in duplicating logic, so I've only included the |
| identities that I could ever construct testcases to trigger. */ |
| |
| static void |
| eliminate_using_constants (enum tree_code opcode, |
| vec<operand_entry_t> *ops) |
| { |
| operand_entry_t oelast = ops->last (); |
| tree type = TREE_TYPE (oelast->op); |
| |
| if (oelast->rank == 0 |
| && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type))) |
| { |
| switch (opcode) |
| { |
| case BIT_AND_EXPR: |
| if (integer_zerop (oelast->op)) |
| { |
| if (ops->length () != 1) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "Found & 0, removing all other ops\n"); |
| |
| reassociate_stats.ops_eliminated += ops->length () - 1; |
| |
| ops->truncate (0); |
| ops->quick_push (oelast); |
| return; |
| } |
| } |
| else if (integer_all_onesp (oelast->op)) |
| { |
| if (ops->length () != 1) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "Found & -1, removing\n"); |
| ops->pop (); |
| reassociate_stats.ops_eliminated++; |
| } |
| } |
| break; |
| case BIT_IOR_EXPR: |
| if (integer_all_onesp (oelast->op)) |
| { |
| if (ops->length () != 1) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "Found | -1, removing all other ops\n"); |
| |
| reassociate_stats.ops_eliminated += ops->length () - 1; |
| |
| ops->truncate (0); |
| ops->quick_push (oelast); |
| return; |
| } |
| } |
| else if (integer_zerop (oelast->op)) |
| { |
| if (ops->length () != 1) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "Found | 0, removing\n"); |
| ops->pop (); |
| reassociate_stats.ops_eliminated++; |
| } |
| } |
| break; |
| case MULT_EXPR: |
| if (integer_zerop (oelast->op) |
| || (FLOAT_TYPE_P (type) |
| && !HONOR_NANS (TYPE_MODE (type)) |
| && !HONOR_SIGNED_ZEROS (TYPE_MODE (type)) |
| && real_zerop (oelast->op))) |
| { |
| if (ops->length () != 1) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "Found * 0, removing all other ops\n"); |
| |
| reassociate_stats.ops_eliminated += ops->length () - 1; |
| ops->truncate (1); |
| ops->quick_push (oelast); |
| return; |
| } |
| } |
| else if (integer_onep (oelast->op) |
| || (FLOAT_TYPE_P (type) |
| && !HONOR_SNANS (TYPE_MODE (type)) |
| && real_onep (oelast->op))) |
| { |
| if (ops->length () != 1) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "Found * 1, removing\n"); |
| ops->pop (); |
| reassociate_stats.ops_eliminated++; |
| return; |
| } |
| } |
| break; |
| case BIT_XOR_EXPR: |
| case PLUS_EXPR: |
| case MINUS_EXPR: |
| if (integer_zerop (oelast->op) |
| || (FLOAT_TYPE_P (type) |
| && (opcode == PLUS_EXPR || opcode == MINUS_EXPR) |
| && fold_real_zero_addition_p (type, oelast->op, |
| opcode == MINUS_EXPR))) |
| { |
| if (ops->length () != 1) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "Found [|^+] 0, removing\n"); |
| ops->pop (); |
| reassociate_stats.ops_eliminated++; |
| return; |
| } |
| } |
| break; |
| default: |
| break; |
| } |
| } |
| } |
| |
| |
| static void linearize_expr_tree (vec<operand_entry_t> *, gimple, |
| bool, bool); |
| |
| /* Structure for tracking and counting operands. */ |
| typedef struct oecount_s { |
| int cnt; |
| int id; |
| enum tree_code oecode; |
| tree op; |
| } oecount; |
| |
| |
| /* The heap for the oecount hashtable and the sorted list of operands. */ |
| static vec<oecount> cvec; |
| |
| /* Hash function for oecount. */ |
| |
| static hashval_t |
| oecount_hash (const void *p) |
| { |
| const oecount *c = &cvec[(size_t)p - 42]; |
| return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode; |
| } |
| |
| /* Comparison function for oecount. */ |
| |
| static int |
| oecount_eq (const void *p1, const void *p2) |
| { |
| const oecount *c1 = &cvec[(size_t)p1 - 42]; |
| const oecount *c2 = &cvec[(size_t)p2 - 42]; |
| return (c1->oecode == c2->oecode |
| && c1->op == c2->op); |
| } |
| |
| /* Comparison function for qsort sorting oecount elements by count. */ |
| |
| static int |
| oecount_cmp (const void *p1, const void *p2) |
| { |
| const oecount *c1 = (const oecount *)p1; |
| const oecount *c2 = (const oecount *)p2; |
| if (c1->cnt != c2->cnt) |
| return c1->cnt - c2->cnt; |
| else |
| /* If counts are identical, use unique IDs to stabilize qsort. */ |
| return c1->id - c2->id; |
| } |
| |
| /* Return TRUE iff STMT represents a builtin call that raises OP |
| to some exponent. */ |
| |
| static bool |
| stmt_is_power_of_op (gimple stmt, tree op) |
| { |
| tree fndecl; |
| |
| if (!is_gimple_call (stmt)) |
| return false; |
| |
| fndecl = gimple_call_fndecl (stmt); |
| |
| if (!fndecl |
| || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) |
| return false; |
| |
| switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) |
| { |
| CASE_FLT_FN (BUILT_IN_POW): |
| CASE_FLT_FN (BUILT_IN_POWI): |
| return (operand_equal_p (gimple_call_arg (stmt, 0), op, 0)); |
| |
| default: |
| return false; |
| } |
| } |
| |
| /* Given STMT which is a __builtin_pow* call, decrement its exponent |
| in place and return the result. Assumes that stmt_is_power_of_op |
| was previously called for STMT and returned TRUE. */ |
| |
| static HOST_WIDE_INT |
| decrement_power (gimple stmt) |
| { |
| REAL_VALUE_TYPE c, cint; |
| HOST_WIDE_INT power; |
| tree arg1; |
| |
| switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt))) |
| { |
| CASE_FLT_FN (BUILT_IN_POW): |
| arg1 = gimple_call_arg (stmt, 1); |
| c = TREE_REAL_CST (arg1); |
| power = real_to_integer (&c) - 1; |
| real_from_integer (&cint, VOIDmode, power, 0, 0); |
| gimple_call_set_arg (stmt, 1, build_real (TREE_TYPE (arg1), cint)); |
| return power; |
| |
| CASE_FLT_FN (BUILT_IN_POWI): |
| arg1 = gimple_call_arg (stmt, 1); |
| power = TREE_INT_CST_LOW (arg1) - 1; |
| gimple_call_set_arg (stmt, 1, build_int_cst (TREE_TYPE (arg1), power)); |
| return power; |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Find the single immediate use of STMT's LHS, and replace it |
| with OP. Remove STMT. If STMT's LHS is the same as *DEF, |
| replace *DEF with OP as well. */ |
| |
| static void |
| propagate_op_to_single_use (tree op, gimple stmt, tree *def) |
| { |
| tree lhs; |
| gimple use_stmt; |
| use_operand_p use; |
| gimple_stmt_iterator gsi; |
| |
| if (is_gimple_call (stmt)) |
| lhs = gimple_call_lhs (stmt); |
| else |
| lhs = gimple_assign_lhs (stmt); |
| |
| gcc_assert (has_single_use (lhs)); |
| single_imm_use (lhs, &use, &use_stmt); |
| if (lhs == *def) |
| *def = op; |
| SET_USE (use, op); |
| if (TREE_CODE (op) != SSA_NAME) |
| update_stmt (use_stmt); |
| gsi = gsi_for_stmt (stmt); |
| unlink_stmt_vdef (stmt); |
| gsi_remove (&gsi, true); |
| release_defs (stmt); |
| } |
| |
| /* Walks the linear chain with result *DEF searching for an operation |
| with operand OP and code OPCODE removing that from the chain. *DEF |
| is updated if there is only one operand but no operation left. */ |
| |
| static void |
| zero_one_operation (tree *def, enum tree_code opcode, tree op) |
| { |
| gimple stmt = SSA_NAME_DEF_STMT (*def); |
| |
| do |
| { |
| tree name; |
| |
| if (opcode == MULT_EXPR |
| && stmt_is_power_of_op (stmt, op)) |
| { |
| if (decrement_power (stmt) == 1) |
| propagate_op_to_single_use (op, stmt, def); |
| return; |
| } |
| |
| name = gimple_assign_rhs1 (stmt); |
| |
| /* If this is the operation we look for and one of the operands |
| is ours simply propagate the other operand into the stmts |
| single use. */ |
| if (gimple_assign_rhs_code (stmt) == opcode |
| && (name == op |
| || gimple_assign_rhs2 (stmt) == op)) |
| { |
| if (name == op) |
| name = gimple_assign_rhs2 (stmt); |
| propagate_op_to_single_use (name, stmt, def); |
| return; |
| } |
| |
| /* We might have a multiply of two __builtin_pow* calls, and |
| the operand might be hiding in the rightmost one. */ |
| if (opcode == MULT_EXPR |
| && gimple_assign_rhs_code (stmt) == opcode |
| && TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME |
| && has_single_use (gimple_assign_rhs2 (stmt))) |
| { |
| gimple stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); |
| if (stmt_is_power_of_op (stmt2, op)) |
| { |
| if (decrement_power (stmt2) == 1) |
| propagate_op_to_single_use (op, stmt2, def); |
| return; |
| } |
| } |
| |
| /* Continue walking the chain. */ |
| gcc_assert (name != op |
| && TREE_CODE (name) == SSA_NAME); |
| stmt = SSA_NAME_DEF_STMT (name); |
| } |
| while (1); |
| } |
| |
| /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for |
| the result. Places the statement after the definition of either |
| OP1 or OP2. Returns the new statement. */ |
| |
| static gimple |
| build_and_add_sum (tree type, tree op1, tree op2, enum tree_code opcode) |
| { |
| gimple op1def = NULL, op2def = NULL; |
| gimple_stmt_iterator gsi; |
| tree op; |
| gimple sum; |
| |
| /* Create the addition statement. */ |
| op = make_ssa_name (type, NULL); |
| sum = gimple_build_assign_with_ops (opcode, op, op1, op2); |
| |
| /* Find an insertion place and insert. */ |
| if (TREE_CODE (op1) == SSA_NAME) |
| op1def = SSA_NAME_DEF_STMT (op1); |
| if (TREE_CODE (op2) == SSA_NAME) |
| op2def = SSA_NAME_DEF_STMT (op2); |
| if ((!op1def || gimple_nop_p (op1def)) |
| && (!op2def || gimple_nop_p (op2def))) |
| { |
| gsi = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR)); |
| gsi_insert_before (&gsi, sum, GSI_NEW_STMT); |
| } |
| else if ((!op1def || gimple_nop_p (op1def)) |
| || (op2def && !gimple_nop_p (op2def) |
| && stmt_dominates_stmt_p (op1def, op2def))) |
| { |
| if (gimple_code (op2def) == GIMPLE_PHI) |
| { |
| gsi = gsi_after_labels (gimple_bb (op2def)); |
| gsi_insert_before (&gsi, sum, GSI_NEW_STMT); |
| } |
| else |
| { |
| if (!stmt_ends_bb_p (op2def)) |
| { |
| gsi = gsi_for_stmt (op2def); |
| gsi_insert_after (&gsi, sum, GSI_NEW_STMT); |
| } |
| else |
| { |
| edge e; |
| edge_iterator ei; |
| |
| FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs) |
| if (e->flags & EDGE_FALLTHRU) |
| gsi_insert_on_edge_immediate (e, sum); |
| } |
| } |
| } |
| else |
| { |
| if (gimple_code (op1def) == GIMPLE_PHI) |
| { |
| gsi = gsi_after_labels (gimple_bb (op1def)); |
| gsi_insert_before (&gsi, sum, GSI_NEW_STMT); |
| } |
| else |
| { |
| if (!stmt_ends_bb_p (op1def)) |
| { |
| gsi = gsi_for_stmt (op1def); |
| gsi_insert_after (&gsi, sum, GSI_NEW_STMT); |
| } |
| else |
| { |
| edge e; |
| edge_iterator ei; |
| |
| FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs) |
| if (e->flags & EDGE_FALLTHRU) |
| gsi_insert_on_edge_immediate (e, sum); |
| } |
| } |
| } |
| update_stmt (sum); |
| |
| return sum; |
| } |
| |
| /* Perform un-distribution of divisions and multiplications. |
| A * X + B * X is transformed into (A + B) * X and A / X + B / X |
| to (A + B) / X for real X. |
| |
| The algorithm is organized as follows. |
| |
| - First we walk the addition chain *OPS looking for summands that |
| are defined by a multiplication or a real division. This results |
| in the candidates bitmap with relevant indices into *OPS. |
| |
| - Second we build the chains of multiplications or divisions for |
| these candidates, counting the number of occurrences of (operand, code) |
| pairs in all of the candidates chains. |
| |
| - Third we sort the (operand, code) pairs by number of occurrence and |
| process them starting with the pair with the most uses. |
| |
| * For each such pair we walk the candidates again to build a |
| second candidate bitmap noting all multiplication/division chains |
| that have at least one occurrence of (operand, code). |
| |
| * We build an alternate addition chain only covering these |
| candidates with one (operand, code) operation removed from their |
| multiplication/division chain. |
| |
| * The first candidate gets replaced by the alternate addition chain |
| multiplied/divided by the operand. |
| |
| * All candidate chains get disabled for further processing and |
| processing of (operand, code) pairs continues. |
| |
| The alternate addition chains built are re-processed by the main |
| reassociation algorithm which allows optimizing a * x * y + b * y * x |
| to (a + b ) * x * y in one invocation of the reassociation pass. */ |
| |
| static bool |
| undistribute_ops_list (enum tree_code opcode, |
| vec<operand_entry_t> *ops, struct loop *loop) |
| { |
| unsigned int length = ops->length (); |
| operand_entry_t oe1; |
| unsigned i, j; |
| sbitmap candidates, candidates2; |
| unsigned nr_candidates, nr_candidates2; |
| sbitmap_iterator sbi0; |
| vec<operand_entry_t> *subops; |
| htab_t ctable; |
| bool changed = false; |
| int next_oecount_id = 0; |
| |
| if (length <= 1 |
| || opcode != PLUS_EXPR) |
| return false; |
| |
| /* Build a list of candidates to process. */ |
| candidates = sbitmap_alloc (length); |
| bitmap_clear (candidates); |
| nr_candidates = 0; |
| FOR_EACH_VEC_ELT (*ops, i, oe1) |
| { |
| enum tree_code dcode; |
| gimple oe1def; |
| |
| if (TREE_CODE (oe1->op) != SSA_NAME) |
| continue; |
| oe1def = SSA_NAME_DEF_STMT (oe1->op); |
| if (!is_gimple_assign (oe1def)) |
| continue; |
| dcode = gimple_assign_rhs_code (oe1def); |
| if ((dcode != MULT_EXPR |
| && dcode != RDIV_EXPR) |
| || !is_reassociable_op (oe1def, dcode, loop)) |
| continue; |
| |
| bitmap_set_bit (candidates, i); |
| nr_candidates++; |
| } |
| |
| if (nr_candidates < 2) |
| { |
| sbitmap_free (candidates); |
| return false; |
| } |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, "searching for un-distribute opportunities "); |
| print_generic_expr (dump_file, |
| (*ops)[bitmap_first_set_bit (candidates)]->op, 0); |
| fprintf (dump_file, " %d\n", nr_candidates); |
| } |
| |
| /* Build linearized sub-operand lists and the counting table. */ |
| cvec.create (0); |
| ctable = htab_create (15, oecount_hash, oecount_eq, NULL); |
| /* ??? Macro arguments cannot have multi-argument template types in |
| them. This typedef is needed to workaround that limitation. */ |
| typedef vec<operand_entry_t> vec_operand_entry_t_heap; |
| subops = XCNEWVEC (vec_operand_entry_t_heap, ops->length ()); |
| EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) |
| { |
| gimple oedef; |
| enum tree_code oecode; |
| unsigned j; |
| |
| oedef = SSA_NAME_DEF_STMT ((*ops)[i]->op); |
| oecode = gimple_assign_rhs_code (oedef); |
| linearize_expr_tree (&subops[i], oedef, |
| associative_tree_code (oecode), false); |
| |
| FOR_EACH_VEC_ELT (subops[i], j, oe1) |
| { |
| oecount c; |
| void **slot; |
| size_t idx; |
| c.oecode = oecode; |
| c.cnt = 1; |
| c.id = next_oecount_id++; |
| c.op = oe1->op; |
| cvec.safe_push (c); |
| idx = cvec.length () + 41; |
| slot = htab_find_slot (ctable, (void *)idx, INSERT); |
| if (!*slot) |
| { |
| *slot = (void *)idx; |
| } |
| else |
| { |
| cvec.pop (); |
| cvec[(size_t)*slot - 42].cnt++; |
| } |
| } |
| } |
| htab_delete (ctable); |
| |
| /* Sort the counting table. */ |
| cvec.qsort (oecount_cmp); |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| oecount *c; |
| fprintf (dump_file, "Candidates:\n"); |
| FOR_EACH_VEC_ELT (cvec, j, c) |
| { |
| fprintf (dump_file, " %u %s: ", c->cnt, |
| c->oecode == MULT_EXPR |
| ? "*" : c->oecode == RDIV_EXPR ? "/" : "?"); |
| print_generic_expr (dump_file, c->op, 0); |
| fprintf (dump_file, "\n"); |
| } |
| } |
| |
| /* Process the (operand, code) pairs in order of most occurence. */ |
| candidates2 = sbitmap_alloc (length); |
| while (!cvec.is_empty ()) |
| { |
| oecount *c = &cvec.last (); |
| if (c->cnt < 2) |
| break; |
| |
| /* Now collect the operands in the outer chain that contain |
| the common operand in their inner chain. */ |
| bitmap_clear (candidates2); |
| nr_candidates2 = 0; |
| EXECUTE_IF_SET_IN_BITMAP (candidates, 0, i, sbi0) |
| { |
| gimple oedef; |
| enum tree_code oecode; |
| unsigned j; |
| tree op = (*ops)[i]->op; |
| |
| /* If we undistributed in this chain already this may be |
| a constant. */ |
| if (TREE_CODE (op) != SSA_NAME) |
| continue; |
| |
| oedef = SSA_NAME_DEF_STMT (op); |
| oecode = gimple_assign_rhs_code (oedef); |
| if (oecode != c->oecode) |
| continue; |
| |
| FOR_EACH_VEC_ELT (subops[i], j, oe1) |
| { |
| if (oe1->op == c->op) |
| { |
| bitmap_set_bit (candidates2, i); |
| ++nr_candidates2; |
| break; |
| } |
| } |
| } |
| |
| if (nr_candidates2 >= 2) |
| { |
| operand_entry_t oe1, oe2; |
| gimple prod; |
| int first = bitmap_first_set_bit (candidates2); |
| |
| /* Build the new addition chain. */ |
| oe1 = (*ops)[first]; |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, "Building ("); |
| print_generic_expr (dump_file, oe1->op, 0); |
| } |
| zero_one_operation (&oe1->op, c->oecode, c->op); |
| EXECUTE_IF_SET_IN_BITMAP (candidates2, first+1, i, sbi0) |
| { |
| gimple sum; |
| oe2 = (*ops)[i]; |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, " + "); |
| print_generic_expr (dump_file, oe2->op, 0); |
| } |
| zero_one_operation (&oe2->op, c->oecode, c->op); |
| sum = build_and_add_sum (TREE_TYPE (oe1->op), |
| oe1->op, oe2->op, opcode); |
| oe2->op = build_zero_cst (TREE_TYPE (oe2->op)); |
| oe2->rank = 0; |
| oe1->op = gimple_get_lhs (sum); |
| } |
| |
| /* Apply the multiplication/division. */ |
| prod = build_and_add_sum (TREE_TYPE (oe1->op), |
| oe1->op, c->op, c->oecode); |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/"); |
| print_generic_expr (dump_file, c->op, 0); |
| fprintf (dump_file, "\n"); |
| } |
| |
| /* Record it in the addition chain and disable further |
| undistribution with this op. */ |
| oe1->op = gimple_assign_lhs (prod); |
| oe1->rank = get_rank (oe1->op); |
| subops[first].release (); |
| |
| changed = true; |
| } |
| |
| cvec.pop (); |
| } |
| |
| for (i = 0; i < ops->length (); ++i) |
| subops[i].release (); |
| free (subops); |
| cvec.release (); |
| sbitmap_free (candidates); |
| sbitmap_free (candidates2); |
| |
| return changed; |
| } |
| |
| /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison |
| expression, examine the other OPS to see if any of them are comparisons |
| of the same values, which we may be able to combine or eliminate. |
| For example, we can rewrite (a < b) | (a == b) as (a <= b). */ |
| |
| static bool |
| eliminate_redundant_comparison (enum tree_code opcode, |
| vec<operand_entry_t> *ops, |
| unsigned int currindex, |
| operand_entry_t curr) |
| { |
| tree op1, op2; |
| enum tree_code lcode, rcode; |
| gimple def1, def2; |
| int i; |
| operand_entry_t oe; |
| |
| if (opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) |
| return false; |
| |
| /* Check that CURR is a comparison. */ |
| if (TREE_CODE (curr->op) != SSA_NAME) |
| return false; |
| def1 = SSA_NAME_DEF_STMT (curr->op); |
| if (!is_gimple_assign (def1)) |
| return false; |
| lcode = gimple_assign_rhs_code (def1); |
| if (TREE_CODE_CLASS (lcode) != tcc_comparison) |
| return false; |
| op1 = gimple_assign_rhs1 (def1); |
| op2 = gimple_assign_rhs2 (def1); |
| |
| /* Now look for a similar comparison in the remaining OPS. */ |
| for (i = currindex + 1; ops->iterate (i, &oe); i++) |
| { |
| tree t; |
| |
| if (TREE_CODE (oe->op) != SSA_NAME) |
| continue; |
| def2 = SSA_NAME_DEF_STMT (oe->op); |
| if (!is_gimple_assign (def2)) |
| continue; |
| rcode = gimple_assign_rhs_code (def2); |
| if (TREE_CODE_CLASS (rcode) != tcc_comparison) |
| continue; |
| |
| /* If we got here, we have a match. See if we can combine the |
| two comparisons. */ |
| if (opcode == BIT_IOR_EXPR) |
| t = maybe_fold_or_comparisons (lcode, op1, op2, |
| rcode, gimple_assign_rhs1 (def2), |
| gimple_assign_rhs2 (def2)); |
| else |
| t = maybe_fold_and_comparisons (lcode, op1, op2, |
| rcode, gimple_assign_rhs1 (def2), |
| gimple_assign_rhs2 (def2)); |
| if (!t) |
| continue; |
| |
| /* maybe_fold_and_comparisons and maybe_fold_or_comparisons |
| always give us a boolean_type_node value back. If the original |
| BIT_AND_EXPR or BIT_IOR_EXPR was of a wider integer type, |
| we need to convert. */ |
| if (!useless_type_conversion_p (TREE_TYPE (curr->op), TREE_TYPE (t))) |
| t = fold_convert (TREE_TYPE (curr->op), t); |
| |
| if (TREE_CODE (t) != INTEGER_CST |
| && !operand_equal_p (t, curr->op, 0)) |
| { |
| enum tree_code subcode; |
| tree newop1, newop2; |
| if (!COMPARISON_CLASS_P (t)) |
| continue; |
| extract_ops_from_tree (t, &subcode, &newop1, &newop2); |
| STRIP_USELESS_TYPE_CONVERSION (newop1); |
| STRIP_USELESS_TYPE_CONVERSION (newop2); |
| if (!is_gimple_val (newop1) || !is_gimple_val (newop2)) |
| continue; |
| } |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, "Equivalence: "); |
| print_generic_expr (dump_file, curr->op, 0); |
| fprintf (dump_file, " %s ", op_symbol_code (opcode)); |
| print_generic_expr (dump_file, oe->op, 0); |
| fprintf (dump_file, " -> "); |
| print_generic_expr (dump_file, t, 0); |
| fprintf (dump_file, "\n"); |
| } |
| |
| /* Now we can delete oe, as it has been subsumed by the new combined |
| expression t. */ |
| ops->ordered_remove (i); |
| reassociate_stats.ops_eliminated ++; |
| |
| /* If t is the same as curr->op, we're done. Otherwise we must |
| replace curr->op with t. Special case is if we got a constant |
| back, in which case we add it to the end instead of in place of |
| the current entry. */ |
| if (TREE_CODE (t) == INTEGER_CST) |
| { |
| ops->ordered_remove (currindex); |
| add_to_ops_vec (ops, t); |
| } |
| else if (!operand_equal_p (t, curr->op, 0)) |
| { |
| gimple sum; |
| enum tree_code subcode; |
| tree newop1; |
| tree newop2; |
| gcc_assert (COMPARISON_CLASS_P (t)); |
| extract_ops_from_tree (t, &subcode, &newop1, &newop2); |
| STRIP_USELESS_TYPE_CONVERSION (newop1); |
| STRIP_USELESS_TYPE_CONVERSION (newop2); |
| gcc_checking_assert (is_gimple_val (newop1) |
| && is_gimple_val (newop2)); |
| sum = build_and_add_sum (TREE_TYPE (t), newop1, newop2, subcode); |
| curr->op = gimple_get_lhs (sum); |
| } |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* Perform various identities and other optimizations on the list of |
| operand entries, stored in OPS. The tree code for the binary |
| operation between all the operands is OPCODE. */ |
| |
| static void |
| optimize_ops_list (enum tree_code opcode, |
| vec<operand_entry_t> *ops) |
| { |
| unsigned int length = ops->length (); |
| unsigned int i; |
| operand_entry_t oe; |
| operand_entry_t oelast = NULL; |
| bool iterate = false; |
| |
| if (length == 1) |
| return; |
| |
| oelast = ops->last (); |
| |
| /* If the last two are constants, pop the constants off, merge them |
| and try the next two. */ |
| if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) |
| { |
| operand_entry_t oelm1 = (*ops)[length - 2]; |
| |
| if (oelm1->rank == 0 |
| && is_gimple_min_invariant (oelm1->op) |
| && useless_type_conversion_p (TREE_TYPE (oelm1->op), |
| TREE_TYPE (oelast->op))) |
| { |
| tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op), |
| oelm1->op, oelast->op); |
| |
| if (folded && is_gimple_min_invariant (folded)) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "Merging constants\n"); |
| |
| ops->pop (); |
| ops->pop (); |
| |
| add_to_ops_vec (ops, folded); |
| reassociate_stats.constants_eliminated++; |
| |
| optimize_ops_list (opcode, ops); |
| return; |
| } |
| } |
| } |
| |
| eliminate_using_constants (opcode, ops); |
| oelast = NULL; |
| |
| for (i = 0; ops->iterate (i, &oe);) |
| { |
| bool done = false; |
| |
| if (eliminate_not_pairs (opcode, ops, i, oe)) |
| return; |
| if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast) |
| || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)) |
| || (!done && eliminate_redundant_comparison (opcode, ops, i, oe))) |
| { |
| if (done) |
| return; |
| iterate = true; |
| oelast = NULL; |
| continue; |
| } |
| oelast = oe; |
| i++; |
| } |
| |
| length = ops->length (); |
| oelast = ops->last (); |
| |
| if (iterate) |
| optimize_ops_list (opcode, ops); |
| } |
| |
| /* The following functions are subroutines to optimize_range_tests and allow |
| it to try to change a logical combination of comparisons into a range |
| test. |
| |
| For example, both |
| X == 2 || X == 5 || X == 3 || X == 4 |
| and |
| X >= 2 && X <= 5 |
| are converted to |
| (unsigned) (X - 2) <= 3 |
| |
| For more information see comments above fold_test_range in fold-const.c, |
| this implementation is for GIMPLE. */ |
| |
| struct range_entry |
| { |
| tree exp; |
| tree low; |
| tree high; |
| bool in_p; |
| bool strict_overflow_p; |
| unsigned int idx, next; |
| }; |
| |
| /* This is similar to make_range in fold-const.c, but on top of |
| GIMPLE instead of trees. If EXP is non-NULL, it should be |
| an SSA_NAME and STMT argument is ignored, otherwise STMT |
| argument should be a GIMPLE_COND. */ |
| |
| static void |
| init_range_entry (struct range_entry *r, tree exp, gimple stmt) |
| { |
| int in_p; |
| tree low, high; |
| bool is_bool, strict_overflow_p; |
| |
| r->exp = NULL_TREE; |
| r->in_p = false; |
| r->strict_overflow_p = false; |
| r->low = NULL_TREE; |
| r->high = NULL_TREE; |
| if (exp != NULL_TREE |
| && (TREE_CODE (exp) != SSA_NAME || !INTEGRAL_TYPE_P (TREE_TYPE (exp)))) |
| return; |
| |
| /* Start with simply saying "EXP != 0" and then look at the code of EXP |
| and see if we can refine the range. Some of the cases below may not |
| happen, but it doesn't seem worth worrying about this. We "continue" |
| the outer loop when we've changed something; otherwise we "break" |
| the switch, which will "break" the while. */ |
| low = exp ? build_int_cst (TREE_TYPE (exp), 0) : boolean_false_node; |
| high = low; |
| in_p = 0; |
| strict_overflow_p = false; |
| is_bool = false; |
| if (exp == NULL_TREE) |
| is_bool = true; |
| else if (TYPE_PRECISION (TREE_TYPE (exp)) == 1) |
| { |
| if (TYPE_UNSIGNED (TREE_TYPE (exp))) |
| is_bool = true; |
| else |
| return; |
| } |
| else if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE) |
| is_bool = true; |
| |
| while (1) |
| { |
| enum tree_code code; |
| tree arg0, arg1, exp_type; |
| tree nexp; |
| location_t loc; |
| |
| if (exp != NULL_TREE) |
| { |
| if (TREE_CODE (exp) != SSA_NAME) |
| break; |
| |
| stmt = SSA_NAME_DEF_STMT (exp); |
| if (!is_gimple_assign (stmt)) |
| break; |
| |
| code = gimple_assign_rhs_code (stmt); |
| arg0 = gimple_assign_rhs1 (stmt); |
| arg1 = gimple_assign_rhs2 (stmt); |
| exp_type = TREE_TYPE (exp); |
| } |
| else |
| { |
| code = gimple_cond_code (stmt); |
| arg0 = gimple_cond_lhs (stmt); |
| arg1 = gimple_cond_rhs (stmt); |
| exp_type = boolean_type_node; |
| } |
| |
| if (TREE_CODE (arg0) != SSA_NAME) |
| break; |
| loc = gimple_location (stmt); |
| switch (code) |
| { |
| case BIT_NOT_EXPR: |
| if (TREE_CODE (TREE_TYPE (exp)) == BOOLEAN_TYPE |
| /* Ensure the range is either +[-,0], +[0,0], |
| -[-,0], -[0,0] or +[1,-], +[1,1], -[1,-] or |
| -[1,1]. If it is e.g. +[-,-] or -[-,-] |
| or similar expression of unconditional true or |
| false, it should not be negated. */ |
| && ((high && integer_zerop (high)) |
| || (low && integer_onep (low)))) |
| { |
| in_p = !in_p; |
| exp = arg0; |
| continue; |
| } |
| break; |
| case SSA_NAME: |
| exp = arg0; |
| continue; |
| CASE_CONVERT: |
| if (is_bool) |
| goto do_default; |
| if (TYPE_PRECISION (TREE_TYPE (arg0)) == 1) |
| { |
| if (TYPE_UNSIGNED (TREE_TYPE (arg0))) |
| is_bool = true; |
| else |
| return; |
| } |
| else if (TREE_CODE (TREE_TYPE (arg0)) == BOOLEAN_TYPE) |
| is_bool = true; |
| goto do_default; |
| case EQ_EXPR: |
| case NE_EXPR: |
| case LT_EXPR: |
| case LE_EXPR: |
| case GE_EXPR: |
| case GT_EXPR: |
| is_bool = true; |
| /* FALLTHRU */ |
| default: |
| if (!is_bool) |
| return; |
| do_default: |
| nexp = make_range_step (loc, code, arg0, arg1, exp_type, |
| &low, &high, &in_p, |
| &strict_overflow_p); |
| if (nexp != NULL_TREE) |
| { |
| exp = nexp; |
| gcc_assert (TREE_CODE (exp) == SSA_NAME); |
| continue; |
| } |
| break; |
| } |
| break; |
| } |
| if (is_bool) |
| { |
| r->exp = exp; |
| r->in_p = in_p; |
| r->low = low; |
| r->high = high; |
| r->strict_overflow_p = strict_overflow_p; |
| } |
| } |
| |
| /* Comparison function for qsort. Sort entries |
| without SSA_NAME exp first, then with SSA_NAMEs sorted |
| by increasing SSA_NAME_VERSION, and for the same SSA_NAMEs |
| by increasing ->low and if ->low is the same, by increasing |
| ->high. ->low == NULL_TREE means minimum, ->high == NULL_TREE |
| maximum. */ |
| |
| static int |
| range_entry_cmp (const void *a, const void *b) |
| { |
| const struct range_entry *p = (const struct range_entry *) a; |
| const struct range_entry *q = (const struct range_entry *) b; |
| |
| if (p->exp != NULL_TREE && TREE_CODE (p->exp) == SSA_NAME) |
| { |
| if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) |
| { |
| /* Group range_entries for the same SSA_NAME together. */ |
| if (SSA_NAME_VERSION (p->exp) < SSA_NAME_VERSION (q->exp)) |
| return -1; |
| else if (SSA_NAME_VERSION (p->exp) > SSA_NAME_VERSION (q->exp)) |
| return 1; |
| /* If ->low is different, NULL low goes first, then by |
| ascending low. */ |
| if (p->low != NULL_TREE) |
| { |
| if (q->low != NULL_TREE) |
| { |
| tree tem = fold_binary (LT_EXPR, boolean_type_node, |
| p->low, q->low); |
| if (tem && integer_onep (tem)) |
| return -1; |
| tem = fold_binary (GT_EXPR, boolean_type_node, |
| p->low, q->low); |
| if (tem && integer_onep (tem)) |
| return 1; |
| } |
| else |
| return 1; |
| } |
| else if (q->low != NULL_TREE) |
| return -1; |
| /* If ->high is different, NULL high goes last, before that by |
| ascending high. */ |
| if (p->high != NULL_TREE) |
| { |
| if (q->high != NULL_TREE) |
| { |
| tree tem = fold_binary (LT_EXPR, boolean_type_node, |
| p->high, q->high); |
| if (tem && integer_onep (tem)) |
| return -1; |
| tem = fold_binary (GT_EXPR, boolean_type_node, |
| p->high, q->high); |
| if (tem && integer_onep (tem)) |
| return 1; |
| } |
| else |
| return -1; |
| } |
| else if (q->high != NULL_TREE) |
| return 1; |
| /* If both ranges are the same, sort below by ascending idx. */ |
| } |
| else |
| return 1; |
| } |
| else if (q->exp != NULL_TREE && TREE_CODE (q->exp) == SSA_NAME) |
| return -1; |
| |
| if (p->idx < q->idx) |
| return -1; |
| else |
| { |
| gcc_checking_assert (p->idx > q->idx); |
| return 1; |
| } |
| } |
| |
| /* Helper routine of optimize_range_test. |
| [EXP, IN_P, LOW, HIGH, STRICT_OVERFLOW_P] is a merged range for |
| RANGE and OTHERRANGE through OTHERRANGE + COUNT - 1 ranges, |
| OPCODE and OPS are arguments of optimize_range_tests. Return |
| true if the range merge has been successful. |
| If OPCODE is ERROR_MARK, this is called from within |
| maybe_optimize_range_tests and is performing inter-bb range optimization. |
| Changes should be then performed right away, and whether an op is |
| BIT_AND_EXPR or BIT_IOR_EXPR is found in oe->rank. */ |
| |
| static bool |
| update_range_test (struct range_entry *range, struct range_entry *otherrange, |
| unsigned int count, enum tree_code opcode, |
| vec<operand_entry_t> *ops, tree exp, bool in_p, |
| tree low, tree high, bool strict_overflow_p) |
| { |
| operand_entry_t oe = (*ops)[range->idx]; |
| tree op = oe->op; |
| gimple stmt = op ? SSA_NAME_DEF_STMT (op) : last_stmt (BASIC_BLOCK (oe->id)); |
| location_t loc = gimple_location (stmt); |
| tree optype = op ? TREE_TYPE (op) : boolean_type_node; |
| tree tem = build_range_check (loc, optype, exp, in_p, low, high); |
| enum warn_strict_overflow_code wc = WARN_STRICT_OVERFLOW_COMPARISON; |
| gimple_stmt_iterator gsi; |
| |
| if (tem == NULL_TREE) |
| return false; |
| |
| if (strict_overflow_p && issue_strict_overflow_warning (wc)) |
| warning_at (loc, OPT_Wstrict_overflow, |
| "assuming signed overflow does not occur " |
| "when simplifying range test"); |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| struct range_entry *r; |
| fprintf (dump_file, "Optimizing range tests "); |
| print_generic_expr (dump_file, range->exp, 0); |
| fprintf (dump_file, " %c[", range->in_p ? '+' : '-'); |
| print_generic_expr (dump_file, range->low, 0); |
| fprintf (dump_file, ", "); |
| print_generic_expr (dump_file, range->high, 0); |
| fprintf (dump_file, "]"); |
| for (r = otherrange; r < otherrange + count; r++) |
| { |
| fprintf (dump_file, " and %c[", r->in_p ? '+' : '-'); |
| print_generic_expr (dump_file, r->low, 0); |
| fprintf (dump_file, ", "); |
| print_generic_expr (dump_file, r->high, 0); |
| fprintf (dump_file, "]"); |
| } |
| fprintf (dump_file, "\n into "); |
| print_generic_expr (dump_file, tem, 0); |
| fprintf (dump_file, "\n"); |
| } |
| |
| if (opcode == BIT_IOR_EXPR |
| || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) |
| tem = invert_truthvalue_loc (loc, tem); |
| |
| tem = fold_convert_loc (loc, optype, tem); |
| gsi = gsi_for_stmt (stmt); |
| /* In rare cases range->exp can be equal to lhs of stmt. |
| In that case we have to insert after the stmt rather then before |
| it. */ |
| if (op == range->exp) |
| tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, false, |
| GSI_SAME_STMT); |
| else |
| tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE, true, |
| GSI_SAME_STMT); |
| |
| /* If doing inter-bb range test optimization, update the |
| stmts immediately. Start with changing the first range test |
| immediate use to the new value (TEM), or, if the first range |
| test is a GIMPLE_COND stmt, change that condition. */ |
| if (opcode == ERROR_MARK) |
| { |
| if (op) |
| { |
| imm_use_iterator iter; |
| use_operand_p use_p; |
| gimple use_stmt; |
| |
| FOR_EACH_IMM_USE_STMT (use_stmt, iter, op) |
| { |
| if (is_gimple_debug (use_stmt)) |
| continue; |
| FOR_EACH_IMM_USE_ON_STMT (use_p, iter) |
| SET_USE (use_p, tem); |
| update_stmt (use_stmt); |
| } |
| } |
| else |
| { |
| gimple_cond_set_code (stmt, NE_EXPR); |
| gimple_cond_set_lhs (stmt, tem); |
| gimple_cond_set_rhs (stmt, boolean_false_node); |
| update_stmt (stmt); |
| } |
| } |
| oe->op = tem; |
| range->exp = exp; |
| range->low = low; |
| range->high = high; |
| range->in_p = in_p; |
| range->strict_overflow_p = false; |
| |
| for (range = otherrange; range < otherrange + count; range++) |
| { |
| oe = (*ops)[range->idx]; |
| /* Now change all the other range test immediate uses, so that |
| those tests will be optimized away. */ |
| if (opcode == ERROR_MARK) |
| { |
| if (oe->op) |
| { |
| imm_use_iterator iter; |
| use_operand_p use_p; |
| gimple use_stmt; |
| |
| FOR_EACH_IMM_USE_STMT (use_stmt, iter, oe->op) |
| { |
| if (is_gimple_debug (use_stmt)) |
| continue; |
| /* If imm use of _8 is a statement like _7 = _8 | _9;, |
| adjust it into _7 = _9;. */ |
| if (is_gimple_assign (use_stmt) |
| && gimple_assign_rhs_code (use_stmt) == oe->rank) |
| { |
| tree expr = NULL_TREE; |
| if (oe->op == gimple_assign_rhs1 (use_stmt)) |
| expr = gimple_assign_rhs2 (use_stmt); |
| else if (oe->op == gimple_assign_rhs2 (use_stmt)) |
| expr = gimple_assign_rhs1 (use_stmt); |
| if (expr |
| && expr != oe->op |
| && TREE_CODE (expr) == SSA_NAME) |
| { |
| gimple_stmt_iterator gsi2 = gsi_for_stmt (use_stmt); |
| gimple_assign_set_rhs_with_ops (&gsi2, SSA_NAME, |
| expr, NULL_TREE); |
| update_stmt (use_stmt); |
| continue; |
| } |
| } |
| /* If imm use of _8 is a statement like _7 = (int) _8;, |
| adjust it into _7 = 0; or _7 = 1;. */ |
| if (gimple_assign_cast_p (use_stmt) |
| && oe->op == gimple_assign_rhs1 (use_stmt)) |
| { |
| tree lhs = gimple_assign_lhs (use_stmt); |
| if (INTEGRAL_TYPE_P (TREE_TYPE (lhs))) |
| { |
| gimple_stmt_iterator gsi2 |
| = gsi_for_stmt (use_stmt); |
| tree expr = build_int_cst (TREE_TYPE (lhs), |
| oe->rank == BIT_IOR_EXPR |
| ? 0 : 1); |
| gimple_assign_set_rhs_with_ops (&gsi2, |
| INTEGER_CST, |
| expr, NULL_TREE); |
| update_stmt (use_stmt); |
| continue; |
| } |
| } |
| /* Otherwise replace the use with 0 or 1. */ |
| FOR_EACH_IMM_USE_ON_STMT (use_p, iter) |
| SET_USE (use_p, |
| build_int_cst (TREE_TYPE (oe->op), |
| oe->rank == BIT_IOR_EXPR |
| ? 0 : 1)); |
| update_stmt (use_stmt); |
| } |
| } |
| else |
| { |
| /* If range test was a GIMPLE_COND, simply change it |
| into an always false or always true condition. */ |
| stmt = last_stmt (BASIC_BLOCK (oe->id)); |
| if (oe->rank == BIT_IOR_EXPR) |
| gimple_cond_make_false (stmt); |
| else |
| gimple_cond_make_true (stmt); |
| update_stmt (stmt); |
| } |
| } |
| oe->op = error_mark_node; |
| range->exp = NULL_TREE; |
| } |
| return true; |
| } |
| |
| /* Optimize range tests, similarly how fold_range_test optimizes |
| it on trees. The tree code for the binary |
| operation between all the operands is OPCODE. |
| If OPCODE is ERROR_MARK, optimize_range_tests is called from within |
| maybe_optimize_range_tests for inter-bb range optimization. |
| In that case if oe->op is NULL, oe->id is bb->index whose |
| GIMPLE_COND is && or ||ed into the test, and oe->rank says |
| the actual opcode. */ |
| |
| static void |
| optimize_range_tests (enum tree_code opcode, |
| vec<operand_entry_t> *ops) |
| { |
| unsigned int length = ops->length (), i, j, first; |
| operand_entry_t oe; |
| struct range_entry *ranges; |
| bool any_changes = false; |
| |
| if (length == 1) |
| return; |
| |
| ranges = XNEWVEC (struct range_entry, length); |
| for (i = 0; i < length; i++) |
| { |
| oe = (*ops)[i]; |
| ranges[i].idx = i; |
| init_range_entry (ranges + i, oe->op, |
| oe->op ? NULL : last_stmt (BASIC_BLOCK (oe->id))); |
| /* For | invert it now, we will invert it again before emitting |
| the optimized expression. */ |
| if (opcode == BIT_IOR_EXPR |
| || (opcode == ERROR_MARK && oe->rank == BIT_IOR_EXPR)) |
| ranges[i].in_p = !ranges[i].in_p; |
| } |
| |
| qsort (ranges, length, sizeof (*ranges), range_entry_cmp); |
| for (i = 0; i < length; i++) |
| if (ranges[i].exp != NULL_TREE && TREE_CODE (ranges[i].exp) == SSA_NAME) |
| break; |
| |
| /* Try to merge ranges. */ |
| for (first = i; i < length; i++) |
| { |
| tree low = ranges[i].low; |
| tree high = ranges[i].high; |
| int in_p = ranges[i].in_p; |
| bool strict_overflow_p = ranges[i].strict_overflow_p; |
| int update_fail_count = 0; |
| |
| for (j = i + 1; j < length; j++) |
| { |
| if (ranges[i].exp != ranges[j].exp) |
| break; |
| if (!merge_ranges (&in_p, &low, &high, in_p, low, high, |
| ranges[j].in_p, ranges[j].low, ranges[j].high)) |
| break; |
| strict_overflow_p |= ranges[j].strict_overflow_p; |
| } |
| |
| if (j == i + 1) |
| continue; |
| |
| if (update_range_test (ranges + i, ranges + i + 1, j - i - 1, opcode, |
| ops, ranges[i].exp, in_p, low, high, |
| strict_overflow_p)) |
| { |
| i = j - 1; |
| any_changes = true; |
| } |
| /* Avoid quadratic complexity if all merge_ranges calls would succeed, |
| while update_range_test would fail. */ |
| else if (update_fail_count == 64) |
| i = j - 1; |
| else |
| ++update_fail_count; |
| } |
| |
| /* Optimize X == CST1 || X == CST2 |
| if popcount (CST1 ^ CST2) == 1 into |
| (X & ~(CST1 ^ CST2)) == (CST1 & ~(CST1 ^ CST2)). |
| Similarly for ranges. E.g. |
| X != 2 && X != 3 && X != 10 && X != 11 |
| will be transformed by the above loop into |
| (X - 2U) <= 1U && (X - 10U) <= 1U |
| and this loop can transform that into |
| ((X & ~8) - 2U) <= 1U. */ |
| for (i = first; i < length; i++) |
| { |
| tree lowi, highi, lowj, highj, type, lowxor, highxor, tem, exp; |
| |
| if (ranges[i].exp == NULL_TREE || ranges[i].in_p) |
| continue; |
| type = TREE_TYPE (ranges[i].exp); |
| if (!INTEGRAL_TYPE_P (type)) |
| continue; |
| lowi = ranges[i].low; |
| if (lowi == NULL_TREE) |
| lowi = TYPE_MIN_VALUE (type); |
| highi = ranges[i].high; |
| if (highi == NULL_TREE) |
| continue; |
| for (j = i + 1; j < length && j < i + 64; j++) |
| { |
| if (ranges[j].exp == NULL_TREE) |
| continue; |
| if (ranges[i].exp != ranges[j].exp) |
| break; |
| if (ranges[j].in_p) |
| continue; |
| lowj = ranges[j].low; |
| if (lowj == NULL_TREE) |
| continue; |
| highj = ranges[j].high; |
| if (highj == NULL_TREE) |
| highj = TYPE_MAX_VALUE (type); |
| tem = fold_binary (GT_EXPR, boolean_type_node, |
| lowj, highi); |
| if (tem == NULL_TREE || !integer_onep (tem)) |
| continue; |
| lowxor = fold_binary (BIT_XOR_EXPR, type, lowi, lowj); |
| if (lowxor == NULL_TREE || TREE_CODE (lowxor) != INTEGER_CST) |
| continue; |
| gcc_checking_assert (!integer_zerop (lowxor)); |
| tem = fold_binary (MINUS_EXPR, type, lowxor, |
| build_int_cst (type, 1)); |
| if (tem == NULL_TREE) |
| continue; |
| tem = fold_binary (BIT_AND_EXPR, type, lowxor, tem); |
| if (tem == NULL_TREE || !integer_zerop (tem)) |
| continue; |
| highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); |
| if (!tree_int_cst_equal (lowxor, highxor)) |
| continue; |
| tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); |
| exp = fold_build2 (BIT_AND_EXPR, type, ranges[i].exp, tem); |
| lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); |
| highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); |
| if (update_range_test (ranges + i, ranges + j, 1, opcode, ops, exp, |
| ranges[i].in_p, lowj, highj, |
| ranges[i].strict_overflow_p |
| || ranges[j].strict_overflow_p)) |
| { |
| any_changes = true; |
| break; |
| } |
| } |
| } |
| |
| if (any_changes && opcode != ERROR_MARK) |
| { |
| j = 0; |
| FOR_EACH_VEC_ELT (*ops, i, oe) |
| { |
| if (oe->op == error_mark_node) |
| continue; |
| else if (i != j) |
| (*ops)[j] = oe; |
| j++; |
| } |
| ops->truncate (j); |
| } |
| |
| XDELETEVEC (ranges); |
| } |
| |
| /* Return true if STMT is a cast like: |
| <bb N>: |
| ... |
| _123 = (int) _234; |
| |
| <bb M>: |
| # _345 = PHI <_123(N), 1(...), 1(...)> |
| where _234 has bool type, _123 has single use and |
| bb N has a single successor M. This is commonly used in |
| the last block of a range test. */ |
| |
| static bool |
| final_range_test_p (gimple stmt) |
| { |
| basic_block bb, rhs_bb; |
| edge e; |
| tree lhs, rhs; |
| use_operand_p use_p; |
| gimple use_stmt; |
| |
| if (!gimple_assign_cast_p (stmt)) |
| return false; |
| bb = gimple_bb (stmt); |
| if (!single_succ_p (bb)) |
| return false; |
| e = single_succ_edge (bb); |
| if (e->flags & EDGE_COMPLEX) |
| return false; |
| |
| lhs = gimple_assign_lhs (stmt); |
| rhs = gimple_assign_rhs1 (stmt); |
| if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) |
| || TREE_CODE (rhs) != SSA_NAME |
| || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE) |
| return false; |
| |
| /* Test whether lhs is consumed only by a PHI in the only successor bb. */ |
| if (!single_imm_use (lhs, &use_p, &use_stmt)) |
| return false; |
| |
| if (gimple_code (use_stmt) != GIMPLE_PHI |
| || gimple_bb (use_stmt) != e->dest) |
| return false; |
| |
| /* And that the rhs is defined in the same loop. */ |
| rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs)); |
| if (rhs_bb == NULL |
| || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb)) |
| return false; |
| |
| return true; |
| } |
| |
| /* Return true if BB is suitable basic block for inter-bb range test |
| optimization. If BACKWARD is true, BB should be the only predecessor |
| of TEST_BB, and *OTHER_BB is either NULL and filled by the routine, |
| or compared with to find a common basic block to which all conditions |
| branch to if true resp. false. If BACKWARD is false, TEST_BB should |
| be the only predecessor of BB. */ |
| |
| static bool |
| suitable_cond_bb (basic_block bb, basic_block test_bb, basic_block *other_bb, |
| bool backward) |
| { |
| edge_iterator ei, ei2; |
| edge e, e2; |
| gimple stmt; |
| gimple_stmt_iterator gsi; |
| bool other_edge_seen = false; |
| bool is_cond; |
| |
| if (test_bb == bb) |
| return false; |
| /* Check last stmt first. */ |
| stmt = last_stmt (bb); |
| if (stmt == NULL |
| || (gimple_code (stmt) != GIMPLE_COND |
| && (backward || !final_range_test_p (stmt))) |
| || gimple_visited_p (stmt) |
| || stmt_could_throw_p (stmt) |
| || *other_bb == bb) |
| return false; |
| is_cond = gimple_code (stmt) == GIMPLE_COND; |
| if (is_cond) |
| { |
| /* If last stmt is GIMPLE_COND, verify that one of the succ edges |
| goes to the next bb (if BACKWARD, it is TEST_BB), and the other |
| to *OTHER_BB (if not set yet, try to find it out). */ |
| if (EDGE_COUNT (bb->succs) != 2) |
| return false; |
| FOR_EACH_EDGE (e, ei, bb->succs) |
| { |
| if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) |
| return false; |
| if (e->dest == test_bb) |
| { |
| if (backward) |
| continue; |
| else |
| return false; |
| } |
| if (e->dest == bb) |
| return false; |
| if (*other_bb == NULL) |
| { |
| FOR_EACH_EDGE (e2, ei2, test_bb->succs) |
| if (!(e2->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE))) |
| return false; |
| else if (e->dest == e2->dest) |
| *other_bb = e->dest; |
| if (*other_bb == NULL) |
| return false; |
| } |
| if (e->dest == *other_bb) |
| other_edge_seen = true; |
| else if (backward) |
| return false; |
| } |
| if (*other_bb == NULL || !other_edge_seen) |
| return false; |
| } |
| else if (single_succ (bb) != *other_bb) |
| return false; |
| |
| /* Now check all PHIs of *OTHER_BB. */ |
| e = find_edge (bb, *other_bb); |
| e2 = find_edge (test_bb, *other_bb); |
| for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) |
| { |
| gimple phi = gsi_stmt (gsi); |
| /* If both BB and TEST_BB end with GIMPLE_COND, all PHI arguments |
| corresponding to BB and TEST_BB predecessor must be the same. */ |
| if (!operand_equal_p (gimple_phi_arg_def (phi, e->dest_idx), |
| gimple_phi_arg_def (phi, e2->dest_idx), 0)) |
| { |
| /* Otherwise, if one of the blocks doesn't end with GIMPLE_COND, |
| one of the PHIs should have the lhs of the last stmt in |
| that block as PHI arg and that PHI should have 0 or 1 |
| corresponding to it in all other range test basic blocks |
| considered. */ |
| if (!is_cond) |
| { |
| if (gimple_phi_arg_def (phi, e->dest_idx) |
| == gimple_assign_lhs (stmt) |
| && (integer_zerop (gimple_phi_arg_def (phi, e2->dest_idx)) |
| || integer_onep (gimple_phi_arg_def (phi, |
| e2->dest_idx)))) |
| continue; |
| } |
| else |
| { |
| gimple test_last = last_stmt (test_bb); |
| if (gimple_code (test_last) != GIMPLE_COND |
| && gimple_phi_arg_def (phi, e2->dest_idx) |
| == gimple_assign_lhs (test_last) |
| && (integer_zerop (gimple_phi_arg_def (phi, e->dest_idx)) |
| || integer_onep (gimple_phi_arg_def (phi, e->dest_idx)))) |
| continue; |
| } |
| |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| /* Return true if BB doesn't have side-effects that would disallow |
| range test optimization, all SSA_NAMEs set in the bb are consumed |
| in the bb and there are no PHIs. */ |
| |
| static bool |
| no_side_effect_bb (basic_block bb) |
| { |
| gimple_stmt_iterator gsi; |
| gimple last; |
| |
| if (!gimple_seq_empty_p (phi_nodes (bb))) |
| return false; |
| last = last_stmt (bb); |
| for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
| { |
| gimple stmt = gsi_stmt (gsi); |
| tree lhs; |
| imm_use_iterator imm_iter; |
| use_operand_p use_p; |
| |
| if (is_gimple_debug (stmt)) |
| continue; |
| if (gimple_has_side_effects (stmt)) |
| return false; |
| if (stmt == last) |
| return true; |
| if (!is_gimple_assign (stmt)) |
| return false; |
| lhs = gimple_assign_lhs (stmt); |
| if (TREE_CODE (lhs) != SSA_NAME) |
| return false; |
| if (gimple_assign_rhs_could_trap_p (stmt)) |
| return false; |
| FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) |
| { |
| gimple use_stmt = USE_STMT (use_p); |
| if (is_gimple_debug (use_stmt)) |
| continue; |
| if (gimple_bb (use_stmt) != bb) |
| return false; |
| } |
| } |
| return false; |
| } |
| |
| /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable, |
| return true and fill in *OPS recursively. */ |
| |
| static bool |
| get_ops (tree var, enum tree_code code, vec<operand_entry_t> *ops, |
| struct loop *loop) |
| { |
| gimple stmt = SSA_NAME_DEF_STMT (var); |
| tree rhs[2]; |
| int i; |
| |
| if (!is_reassociable_op (stmt, code, loop)) |
| return false; |
| |
| rhs[0] = gimple_assign_rhs1 (stmt); |
| rhs[1] = gimple_assign_rhs2 (stmt); |
| gimple_set_visited (stmt, true); |
| for (i = 0; i < 2; i++) |
| if (TREE_CODE (rhs[i]) == SSA_NAME |
| && !get_ops (rhs[i], code, ops, loop) |
| && has_single_use (rhs[i])) |
| { |
| operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); |
| |
| oe->op = rhs[i]; |
| oe->rank = code; |
| oe->id = 0; |
| oe->count = 1; |
| ops->safe_push (oe); |
| } |
| return true; |
| } |
| |
| /* Inter-bb range test optimization. */ |
| |
| static void |
| maybe_optimize_range_tests (gimple stmt) |
| { |
| basic_block first_bb = gimple_bb (stmt); |
| basic_block last_bb = first_bb; |
| basic_block other_bb = NULL; |
| basic_block bb; |
| edge_iterator ei; |
| edge e; |
| vec<operand_entry_t> ops = vNULL; |
| |
| /* Consider only basic blocks that end with GIMPLE_COND or |
| a cast statement satisfying final_range_test_p. All |
| but the last bb in the first_bb .. last_bb range |
| should end with GIMPLE_COND. */ |
| if (gimple_code (stmt) == GIMPLE_COND) |
| { |
| if (EDGE_COUNT (first_bb->succs) != 2) |
| return; |
| } |
| else if (final_range_test_p (stmt)) |
| other_bb = single_succ (first_bb); |
| else |
| return; |
| |
| if (stmt_could_throw_p (stmt)) |
| return; |
| |
| /* As relative ordering of post-dominator sons isn't fixed, |
| maybe_optimize_range_tests can be called first on any |
| bb in the range we want to optimize. So, start searching |
| backwards, if first_bb can be set to a predecessor. */ |
| while (single_pred_p (first_bb)) |
| { |
| basic_block pred_bb = single_pred (first_bb); |
| if (!suitable_cond_bb (pred_bb, first_bb, &other_bb, true)) |
| break; |
| if (!no_side_effect_bb (first_bb)) |
| break; |
| first_bb = pred_bb; |
| } |
| /* If first_bb is last_bb, other_bb hasn't been computed yet. |
| Before starting forward search in last_bb successors, find |
| out the other_bb. */ |
| if (first_bb == last_bb) |
| { |
| other_bb = NULL; |
| /* As non-GIMPLE_COND last stmt always terminates the range, |
| if forward search didn't discover anything, just give up. */ |
| if (gimple_code (stmt) != GIMPLE_COND) |
| return; |
| /* Look at both successors. Either it ends with a GIMPLE_COND |
| and satisfies suitable_cond_bb, or ends with a cast and |
| other_bb is that cast's successor. */ |
| FOR_EACH_EDGE (e, ei, first_bb->succs) |
| if (!(e->flags & (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)) |
| || e->dest == first_bb) |
| return; |
| else if (single_pred_p (e->dest)) |
| { |
| stmt = last_stmt (e->dest); |
| if (stmt |
| && gimple_code (stmt) == GIMPLE_COND |
| && EDGE_COUNT (e->dest->succs) == 2) |
| { |
| if (suitable_cond_bb (first_bb, e->dest, &other_bb, true)) |
| break; |
| else |
| other_bb = NULL; |
| } |
| else if (stmt |
| && final_range_test_p (stmt) |
| && find_edge (first_bb, single_succ (e->dest))) |
| { |
| other_bb = single_succ (e->dest); |
| if (other_bb == first_bb) |
| other_bb = NULL; |
| } |
| } |
| if (other_bb == NULL) |
| return; |
| } |
| /* Now do the forward search, moving last_bb to successor bbs |
| that aren't other_bb. */ |
| while (EDGE_COUNT (last_bb->succs) == 2) |
| { |
| FOR_EACH_EDGE (e, ei, last_bb->succs) |
| if (e->dest != other_bb) |
| break; |
| if (e == NULL) |
| break; |
| if (!single_pred_p (e->dest)) |
| break; |
| if (!suitable_cond_bb (e->dest, last_bb, &other_bb, false)) |
| break; |
| if (!no_side_effect_bb (e->dest)) |
| break; |
| last_bb = e->dest; |
| } |
| if (first_bb == last_bb) |
| return; |
| /* Here basic blocks first_bb through last_bb's predecessor |
| end with GIMPLE_COND, all of them have one of the edges to |
| other_bb and another to another block in the range, |
| all blocks except first_bb don't have side-effects and |
| last_bb ends with either GIMPLE_COND, or cast satisfying |
| final_range_test_p. */ |
| for (bb = last_bb; ; bb = single_pred (bb)) |
| { |
| enum tree_code code; |
| tree lhs, rhs; |
| |
| e = find_edge (bb, other_bb); |
| stmt = last_stmt (bb); |
| gimple_set_visited (stmt, true); |
| if (gimple_code (stmt) != GIMPLE_COND) |
| { |
| use_operand_p use_p; |
| gimple phi; |
| edge e2; |
| unsigned int d; |
| |
| lhs = gimple_assign_lhs (stmt); |
| rhs = gimple_assign_rhs1 (stmt); |
| gcc_assert (bb == last_bb); |
| |
| /* stmt is |
| _123 = (int) _234; |
| |
| followed by: |
| <bb M>: |
| # _345 = PHI <_123(N), 1(...), 1(...)> |
| |
| or 0 instead of 1. If it is 0, the _234 |
| range test is anded together with all the |
| other range tests, if it is 1, it is ored with |
| them. */ |
| single_imm_use (lhs, &use_p, &phi); |
| gcc_assert (gimple_code (phi) == GIMPLE_PHI); |
| e2 = find_edge (first_bb, other_bb); |
| d = e2->dest_idx; |
| gcc_assert (gimple_phi_arg_def (phi, e->dest_idx) == lhs); |
| if (integer_zerop (gimple_phi_arg_def (phi, d))) |
| code = BIT_AND_EXPR; |
| else |
| { |
| gcc_checking_assert (integer_onep (gimple_phi_arg_def (phi, d))); |
| code = BIT_IOR_EXPR; |
| } |
| |
| /* If _234 SSA_NAME_DEF_STMT is |
| _234 = _567 | _789; |
| (or &, corresponding to 1/0 in the phi arguments, |
| push into ops the individual range test arguments |
| of the bitwise or resp. and, recursively. */ |
| if (!get_ops (rhs, code, &ops, |
| loop_containing_stmt (stmt)) |
| && has_single_use (rhs)) |
| { |
| /* Otherwise, push the _234 range test itself. */ |
| operand_entry_t oe |
| = (operand_entry_t) pool_alloc (operand_entry_pool); |
| |
| oe->op = rhs; |
| oe->rank = code; |
| oe->id = 0; |
| oe->count = 1; |
| ops.safe_push (oe); |
| } |
| continue; |
| } |
| /* Otherwise stmt is GIMPLE_COND. */ |
| code = gimple_cond_code (stmt); |
| lhs = gimple_cond_lhs (stmt); |
| rhs = gimple_cond_rhs (stmt); |
| if (TREE_CODE (lhs) == SSA_NAME |
| && INTEGRAL_TYPE_P (TREE_TYPE (lhs)) |
| && ((code != EQ_EXPR && code != NE_EXPR) |
| || rhs != boolean_false_node |
| /* Either push into ops the individual bitwise |
| or resp. and operands, depending on which |
| edge is other_bb. */ |
| || !get_ops (lhs, (((e->flags & EDGE_TRUE_VALUE) == 0) |
| ^ (code == EQ_EXPR)) |
| ? BIT_AND_EXPR : BIT_IOR_EXPR, &ops, |
| loop_containing_stmt (stmt)))) |
| { |
| /* Or push the GIMPLE_COND stmt itself. */ |
| operand_entry_t oe |
| = (operand_entry_t) pool_alloc (operand_entry_pool); |
| |
| oe->op = NULL; |
| oe->rank = (e->flags & EDGE_TRUE_VALUE) |
| ? BIT_IOR_EXPR : BIT_AND_EXPR; |
| /* oe->op = NULL signs that there is no SSA_NAME |
| for the range test, and oe->id instead is the |
| basic block number, at which's end the GIMPLE_COND |
| is. */ |
| oe->id = bb->index; |
| oe->count = 1; |
| ops.safe_push (oe); |
| } |
| if (bb == first_bb) |
| break; |
| } |
| if (ops.length () > 1) |
| optimize_range_tests (ERROR_MARK, &ops); |
| ops.release (); |
| } |
| |
| /* Return true if OPERAND is defined by a PHI node which uses the LHS |
| of STMT in it's operands. This is also known as a "destructive |
| update" operation. */ |
| |
| static bool |
| is_phi_for_stmt (gimple stmt, tree operand) |
| { |
| gimple def_stmt; |
| tree lhs; |
| use_operand_p arg_p; |
| ssa_op_iter i; |
| |
| if (TREE_CODE (operand) != SSA_NAME) |
| return false; |
| |
| lhs = gimple_assign_lhs (stmt); |
| |
| def_stmt = SSA_NAME_DEF_STMT (operand); |
| if (gimple_code (def_stmt) != GIMPLE_PHI) |
| return false; |
| |
| FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE) |
| if (lhs == USE_FROM_PTR (arg_p)) |
| return true; |
| return false; |
| } |
| |
| /* Remove def stmt of VAR if VAR has zero uses and recurse |
| on rhs1 operand if so. */ |
| |
| static void |
| remove_visited_stmt_chain (tree var) |
| { |
| gimple stmt; |
| gimple_stmt_iterator gsi; |
| |
| while (1) |
| { |
| if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var)) |
| return; |
| stmt = SSA_NAME_DEF_STMT (var); |
| if (is_gimple_assign (stmt) && gimple_visited_p (stmt)) |
| { |
| var = gimple_assign_rhs1 (stmt); |
| gsi = gsi_for_stmt (stmt); |
| gsi_remove (&gsi, true); |
| release_defs (stmt); |
| } |
| else |
| return; |
| } |
| } |
| |
| /* This function checks three consequtive operands in |
| passed operands vector OPS starting from OPINDEX and |
| swaps two operands if it is profitable for binary operation |
| consuming OPINDEX + 1 abnd OPINDEX + 2 operands. |
| |
| We pair ops with the same rank if possible. |
| |
| The alternative we try is to see if STMT is a destructive |
| update style statement, which is like: |
| b = phi (a, ...) |
| a = c + b; |
| In that case, we want to use the destructive update form to |
| expose the possible vectorizer sum reduction opportunity. |
| In that case, the third operand will be the phi node. This |
| check is not performed if STMT is null. |
| |
| We could, of course, try to be better as noted above, and do a |
| lot of work to try to find these opportunities in >3 operand |
| cases, but it is unlikely to be worth it. */ |
| |
| static void |
| swap_ops_for_binary_stmt (vec<operand_entry_t> ops, |
| unsigned int opindex, gimple stmt) |
| { |
| operand_entry_t oe1, oe2, oe3; |
| |
| oe1 = ops[opindex]; |
| oe2 = ops[opindex + 1]; |
| oe3 = ops[opindex + 2]; |
| |
| if ((oe1->rank == oe2->rank |
| && oe2->rank != oe3->rank) |
| || (stmt && is_phi_for_stmt (stmt, oe3->op) |
| && !is_phi_for_stmt (stmt, oe1->op) |
| && !is_phi_for_stmt (stmt, oe2->op))) |
| { |
| struct operand_entry temp = *oe3; |
| oe3->op = oe1->op; |
| oe3->rank = oe1->rank; |
| oe1->op = temp.op; |
| oe1->rank= temp.rank; |
| } |
| else if ((oe1->rank == oe3->rank |
| && oe2->rank != oe3->rank) |
| || (stmt && is_phi_for_stmt (stmt, oe2->op) |
| && !is_phi_for_stmt (stmt, oe1->op) |
| && !is_phi_for_stmt (stmt, oe3->op))) |
| { |
| struct operand_entry temp = *oe2; |
| oe2->op = oe1->op; |
| oe2->rank = oe1->rank; |
| oe1->op = temp.op; |
| oe1->rank= temp.rank; |
| } |
| } |
| |
| /* Recursively rewrite our linearized statements so that the operators |
| match those in OPS[OPINDEX], putting the computation in rank |
| order. */ |
| |
| static void |
| rewrite_expr_tree (gimple stmt, unsigned int opindex, |
| vec<operand_entry_t> ops, bool moved) |
| { |
| tree rhs1 = gimple_assign_rhs1 (stmt); |
| tree rhs2 = gimple_assign_rhs2 (stmt); |
| operand_entry_t oe; |
| |
| /* If we have three operands left, then we want to make sure the ones |
| that get the double binary op are chosen wisely. */ |
| if (opindex + 3 == ops.length ()) |
| swap_ops_for_binary_stmt (ops, opindex, stmt); |
| |
| /* The final recursion case for this function is that you have |
| exactly two operations left. |
| If we had one exactly one op in the entire list to start with, we |
| would have never called this function, and the tail recursion |
| rewrites them one at a time. */ |
| if (opindex + 2 == ops.length ()) |
| { |
| operand_entry_t oe1, oe2; |
| |
| oe1 = ops[opindex]; |
| oe2 = ops[opindex + 1]; |
| |
| if (rhs1 != oe1->op || rhs2 != oe2->op) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, "Transforming "); |
| print_gimple_stmt (dump_file, stmt, 0, 0); |
| } |
| |
| gimple_assign_set_rhs1 (stmt, oe1->op); |
| gimple_assign_set_rhs2 (stmt, oe2->op); |
| update_stmt (stmt); |
| if (rhs1 != oe1->op && rhs1 != oe2->op) |
| remove_visited_stmt_chain (rhs1); |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, " into "); |
| print_gimple_stmt (dump_file, stmt, 0, 0); |
| } |
| } |
| return; |
| } |
| |
| /* If we hit here, we should have 3 or more ops left. */ |
| gcc_assert (opindex + 2 < ops.length ()); |
| |
| /* Rewrite the next operator. */ |
| oe = ops[opindex]; |
| |
| if (oe->op != rhs2) |
| { |
| if (!moved) |
| { |
| gimple_stmt_iterator gsinow, gsirhs1; |
| gimple stmt1 = stmt, stmt2; |
| unsigned int count; |
| |
| gsinow = gsi_for_stmt (stmt); |
| count = ops.length () - opindex - 2; |
| while (count-- != 0) |
| { |
| stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1)); |
| gsirhs1 = gsi_for_stmt (stmt2); |
| gsi_move_before (&gsirhs1, &gsinow); |
| gsi_prev (&gsinow); |
| stmt1 = stmt2; |
| } |
| moved = true; |
| } |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, "Transforming "); |
| print_gimple_stmt (dump_file, stmt, 0, 0); |
| } |
| |
| gimple_assign_set_rhs2 (stmt, oe->op); |
| update_stmt (stmt); |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, " into "); |
| print_gimple_stmt (dump_file, stmt, 0, 0); |
| } |
| } |
| /* Recurse on the LHS of the binary operator, which is guaranteed to |
| be the non-leaf side. */ |
| rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved); |
| } |
| |
| /* Find out how many cycles we need to compute statements chain. |
| OPS_NUM holds number os statements in a chain. CPU_WIDTH is a |
| maximum number of independent statements we may execute per cycle. */ |
| |
| static int |
| get_required_cycles (int ops_num, int cpu_width) |
| { |
| int res; |
| int elog; |
| unsigned int rest; |
| |
| /* While we have more than 2 * cpu_width operands |
| we may reduce number of operands by cpu_width |
| per cycle. */ |
| res = ops_num / (2 * cpu_width); |
| |
| /* Remained operands count may be reduced twice per cycle |
| until we have only one operand. */ |
| rest = (unsigned)(ops_num - res * cpu_width); |
| elog = exact_log2 (rest); |
| if (elog >= 0) |
| res += elog; |
| else |
| res += floor_log2 (rest) + 1; |
| |
| return res; |
| } |
| |
| /* Returns an optimal number of registers to use for computation of |
| given statements. */ |
| |
| static int |
| get_reassociation_width (int ops_num, enum tree_code opc, |
| enum machine_mode mode) |
| { |
| int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH); |
| int width; |
| int width_min; |
| int cycles_best; |
| |
| if (param_width > 0) |
| width = param_width; |
| else |
| width = targetm.sched.reassociation_width (opc, mode); |
| |
| if (width == 1) |
| return width; |
| |
| /* Get the minimal time required for sequence computation. */ |
| cycles_best = get_required_cycles (ops_num, width); |
| |
| /* Check if we may use less width and still compute sequence for |
| the same time. It will allow us to reduce registers usage. |
| get_required_cycles is monotonically increasing with lower width |
| so we can perform a binary search for the minimal width that still |
| results in the optimal cycle count. */ |
| width_min = 1; |
| while (width > width_min) |
| { |
| int width_mid = (width + width_min) / 2; |
| |
| if (get_required_cycles (ops_num, width_mid) == cycles_best) |
| width = width_mid; |
| else if (width_min < width_mid) |
| width_min = width_mid; |
| else |
| break; |
| } |
| |
| return width; |
| } |
| |
| /* Recursively rewrite our linearized statements so that the operators |
| match those in OPS[OPINDEX], putting the computation in rank |
| order and trying to allow operations to be executed in |
| parallel. */ |
| |
| static void |
| rewrite_expr_tree_parallel (gimple stmt, int width, |
| vec<operand_entry_t> ops) |
| { |
| enum tree_code opcode = gimple_assign_rhs_code (stmt); |
| int op_num = ops.length (); |
| int stmt_num = op_num - 1; |
| gimple *stmts = XALLOCAVEC (gimple, stmt_num); |
| int op_index = op_num - 1; |
| int stmt_index = 0; |
| int ready_stmts_end = 0; |
| int i = 0; |
| tree last_rhs1 = gimple_assign_rhs1 (stmt); |
| |
| /* We start expression rewriting from the top statements. |
| So, in this loop we create a full list of statements |
| we will work with. */ |
| stmts[stmt_num - 1] = stmt; |
| for (i = stmt_num - 2; i >= 0; i--) |
| stmts[i] = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmts[i+1])); |
| |
| for (i = 0; i < stmt_num; i++) |
| { |
| tree op1, op2; |
| |
| /* Determine whether we should use results of |
| already handled statements or not. */ |
| if (ready_stmts_end == 0 |
| && (i - stmt_index >= width || op_index < 1)) |
| ready_stmts_end = i; |
| |
| /* Now we choose operands for the next statement. Non zero |
| value in ready_stmts_end means here that we should use |
| the result of already generated statements as new operand. */ |
| if (ready_stmts_end > 0) |
| { |
| op1 = gimple_assign_lhs (stmts[stmt_index++]); |
| if (ready_stmts_end > stmt_index) |
| op2 = gimple_assign_lhs (stmts[stmt_index++]); |
| else if (op_index >= 0) |
| op2 = ops[op_index--]->op; |
| else |
| { |
| gcc_assert (stmt_index < i); |
| op2 = gimple_assign_lhs (stmts[stmt_index++]); |
| } |
| |
| if (stmt_index >= ready_stmts_end) |
| ready_stmts_end = 0; |
| } |
| else |
| { |
| if (op_index > 1) |
| swap_ops_for_binary_stmt (ops, op_index - 2, NULL); |
| op2 = ops[op_index--]->op; |
| op1 = ops[op_index--]->op; |
| } |
| |
| /* If we emit the last statement then we should put |
| operands into the last statement. It will also |
| break the loop. */ |
| if (op_index < 0 && stmt_index == i) |
| i = stmt_num - 1; |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, "Transforming "); |
| print_gimple_stmt (dump_file, stmts[i], 0, 0); |
| } |
| |
| /* We keep original statement only for the last one. All |
| others are recreated. */ |
| if (i == stmt_num - 1) |
| { |
| gimple_assign_set_rhs1 (stmts[i], op1); |
| gimple_assign_set_rhs2 (stmts[i], op2); |
| update_stmt (stmts[i]); |
| } |
| else |
| stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode); |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, " into "); |
| print_gimple_stmt (dump_file, stmts[i], 0, 0); |
| } |
| } |
| |
| remove_visited_stmt_chain (last_rhs1); |
| } |
| |
| /* Transform STMT, which is really (A +B) + (C + D) into the left |
| linear form, ((A+B)+C)+D. |
| Recurse on D if necessary. */ |
| |
| static void |
| linearize_expr (gimple stmt) |
| { |
| gimple_stmt_iterator gsinow, gsirhs; |
| gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); |
| gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); |
| enum tree_code rhscode = gimple_assign_rhs_code (stmt); |
| gimple newbinrhs = NULL; |
| struct loop *loop = loop_containing_stmt (stmt); |
| |
| gcc_assert (is_reassociable_op (binlhs, rhscode, loop) |
| && is_reassociable_op (binrhs, rhscode, loop)); |
| |
| gsinow = gsi_for_stmt (stmt); |
| gsirhs = gsi_for_stmt (binrhs); |
| gsi_move_before (&gsirhs, &gsinow); |
| |
| gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs)); |
| gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs)); |
| gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs)); |
| |
| if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) |
| newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, "Linearized: "); |
| print_gimple_stmt (dump_file, stmt, 0, 0); |
| } |
| |
| reassociate_stats.linearized++; |
| update_stmt (binrhs); |
| update_stmt (binlhs); |
| update_stmt (stmt); |
| |
| gimple_set_visited (stmt, true); |
| gimple_set_visited (binlhs, true); |
| gimple_set_visited (binrhs, true); |
| |
| /* Tail recurse on the new rhs if it still needs reassociation. */ |
| if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop)) |
| /* ??? This should probably be linearize_expr (newbinrhs) but I don't |
| want to change the algorithm while converting to tuples. */ |
| linearize_expr (stmt); |
| } |
| |
| /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return |
| it. Otherwise, return NULL. */ |
| |
| static gimple |
| get_single_immediate_use (tree lhs) |
| { |
| use_operand_p immuse; |
|