blob: ecfa569259cdbd3fcd1f414b4947db8e9eacb701 [file] [log] [blame]
/* Induction variable optimizations.
Copyright (C) 2003-2019 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 3, or (at your option) any
later version.
GCC is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
/* This pass tries to find the optimal set of induction variables for the loop.
It optimizes just the basic linear induction variables (although adding
support for other types should not be too hard). It includes the
optimizations commonly known as strength reduction, induction variable
coalescing and induction variable elimination. It does it in the
following steps:
1) The interesting uses of induction variables are found. This includes
-- uses of induction variables in non-linear expressions
-- addresses of arrays
-- comparisons of induction variables
Note the interesting uses are categorized and handled in group.
Generally, address type uses are grouped together if their iv bases
are different in constant offset.
2) Candidates for the induction variables are found. This includes
-- old induction variables
-- the variables defined by expressions derived from the "interesting
groups/uses" above
3) The optimal (w.r. to a cost function) set of variables is chosen. The
cost function assigns a cost to sets of induction variables and consists
of three parts:
-- The group/use costs. Each of the interesting groups/uses chooses
the best induction variable in the set and adds its cost to the sum.
The cost reflects the time spent on modifying the induction variables
value to be usable for the given purpose (adding base and offset for
arrays, etc.).
-- The variable costs. Each of the variables has a cost assigned that
reflects the costs associated with incrementing the value of the
variable. The original variables are somewhat preferred.
-- The set cost. Depending on the size of the set, extra cost may be
added to reflect register pressure.
All the costs are defined in a machine-specific way, using the target
hooks and machine descriptions to determine them.
4) The trees are transformed to use the new variables, the dead code is
removed.
All of this is done loop by loop. Doing it globally is theoretically
possible, it might give a better performance and it might enable us
to decide costs more precisely, but getting all the interactions right
would be complicated. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "rtl.h"
#include "tree.h"
#include "gimple.h"
#include "cfghooks.h"
#include "tree-pass.h"
#include "memmodel.h"
#include "tm_p.h"
#include "ssa.h"
#include "expmed.h"
#include "insn-config.h"
#include "emit-rtl.h"
#include "recog.h"
#include "cgraph.h"
#include "gimple-pretty-print.h"
#include "alias.h"
#include "fold-const.h"
#include "stor-layout.h"
#include "tree-eh.h"
#include "gimplify.h"
#include "gimple-iterator.h"
#include "gimplify-me.h"
#include "tree-cfg.h"
#include "tree-ssa-loop-ivopts.h"
#include "tree-ssa-loop-manip.h"
#include "tree-ssa-loop-niter.h"
#include "tree-ssa-loop.h"
#include "explow.h"
#include "expr.h"
#include "tree-dfa.h"
#include "tree-ssa.h"
#include "cfgloop.h"
#include "tree-scalar-evolution.h"
#include "params.h"
#include "tree-affine.h"
#include "tree-ssa-propagate.h"
#include "tree-ssa-address.h"
#include "builtins.h"
#include "tree-vectorizer.h"
/* For lang_hooks.types.type_for_mode. */
#include "langhooks.h"
/* FIXME: Expressions are expanded to RTL in this pass to determine the
cost of different addressing modes. This should be moved to a TBD
interface between the GIMPLE and RTL worlds. */
/* The infinite cost. */
#define INFTY 10000000
/* Returns the expected number of loop iterations for LOOP.
The average trip count is computed from profile data if it
exists. */
static inline HOST_WIDE_INT
avg_loop_niter (struct loop *loop)
{
HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
if (niter == -1)
{
niter = likely_max_stmt_executions_int (loop);
if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
}
return niter;
}
struct iv_use;
/* Representation of the induction variable. */
struct iv
{
tree base; /* Initial value of the iv. */
tree base_object; /* A memory object to that the induction variable points. */
tree step; /* Step of the iv (constant only). */
tree ssa_name; /* The ssa name with the value. */
struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
bool biv_p; /* Is it a biv? */
bool no_overflow; /* True if the iv doesn't overflow. */
bool have_address_use;/* For biv, indicate if it's used in any address
type use. */
};
/* Per-ssa version information (induction variable descriptions, etc.). */
struct version_info
{
tree name; /* The ssa name. */
struct iv *iv; /* Induction variable description. */
bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
an expression that is not an induction variable. */
bool preserve_biv; /* For the original biv, whether to preserve it. */
unsigned inv_id; /* Id of an invariant. */
};
/* Types of uses. */
enum use_type
{
USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
USE_REF_ADDRESS, /* Use is an address for an explicit memory
reference. */
USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
cases where the expansion of the function
will turn the argument into a normal address. */
USE_COMPARE /* Use is a compare. */
};
/* Cost of a computation. */
struct comp_cost
{
comp_cost (): cost (0), complexity (0), scratch (0)
{}
comp_cost (int cost, unsigned complexity, int scratch = 0)
: cost (cost), complexity (complexity), scratch (scratch)
{}
/* Returns true if COST is infinite. */
bool infinite_cost_p ();
/* Adds costs COST1 and COST2. */
friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
/* Adds COST to the comp_cost. */
comp_cost operator+= (comp_cost cost);
/* Adds constant C to this comp_cost. */
comp_cost operator+= (HOST_WIDE_INT c);
/* Subtracts constant C to this comp_cost. */
comp_cost operator-= (HOST_WIDE_INT c);
/* Divide the comp_cost by constant C. */
comp_cost operator/= (HOST_WIDE_INT c);
/* Multiply the comp_cost by constant C. */
comp_cost operator*= (HOST_WIDE_INT c);
/* Subtracts costs COST1 and COST2. */
friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
/* Subtracts COST from this comp_cost. */
comp_cost operator-= (comp_cost cost);
/* Returns true if COST1 is smaller than COST2. */
friend bool operator< (comp_cost cost1, comp_cost cost2);
/* Returns true if COST1 and COST2 are equal. */
friend bool operator== (comp_cost cost1, comp_cost cost2);
/* Returns true if COST1 is smaller or equal than COST2. */
friend bool operator<= (comp_cost cost1, comp_cost cost2);
int cost; /* The runtime cost. */
unsigned complexity; /* The estimate of the complexity of the code for
the computation (in no concrete units --
complexity field should be larger for more
complex expressions and addressing modes). */
int scratch; /* Scratch used during cost computation. */
};
static const comp_cost no_cost;
static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
bool
comp_cost::infinite_cost_p ()
{
return cost == INFTY;
}
comp_cost
operator+ (comp_cost cost1, comp_cost cost2)
{
if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
return infinite_cost;
cost1.cost += cost2.cost;
cost1.complexity += cost2.complexity;
return cost1;
}
comp_cost
operator- (comp_cost cost1, comp_cost cost2)
{
if (cost1.infinite_cost_p ())
return infinite_cost;
gcc_assert (!cost2.infinite_cost_p ());
cost1.cost -= cost2.cost;
cost1.complexity -= cost2.complexity;
return cost1;
}
comp_cost
comp_cost::operator+= (comp_cost cost)
{
*this = *this + cost;
return *this;
}
comp_cost
comp_cost::operator+= (HOST_WIDE_INT c)
{
if (infinite_cost_p ())
return *this;
this->cost += c;
return *this;
}
comp_cost
comp_cost::operator-= (HOST_WIDE_INT c)
{
if (infinite_cost_p ())
return *this;
this->cost -= c;
return *this;
}
comp_cost
comp_cost::operator/= (HOST_WIDE_INT c)
{
if (infinite_cost_p ())
return *this;
this->cost /= c;
return *this;
}
comp_cost
comp_cost::operator*= (HOST_WIDE_INT c)
{
if (infinite_cost_p ())
return *this;
this->cost *= c;
return *this;
}
comp_cost
comp_cost::operator-= (comp_cost cost)
{
*this = *this - cost;
return *this;
}
bool
operator< (comp_cost cost1, comp_cost cost2)
{
if (cost1.cost == cost2.cost)
return cost1.complexity < cost2.complexity;
return cost1.cost < cost2.cost;
}
bool
operator== (comp_cost cost1, comp_cost cost2)
{
return cost1.cost == cost2.cost
&& cost1.complexity == cost2.complexity;
}
bool
operator<= (comp_cost cost1, comp_cost cost2)
{
return cost1 < cost2 || cost1 == cost2;
}
struct iv_inv_expr_ent;
/* The candidate - cost pair. */
struct cost_pair
{
struct iv_cand *cand; /* The candidate. */
comp_cost cost; /* The cost. */
enum tree_code comp; /* For iv elimination, the comparison. */
bitmap inv_vars; /* The list of invariant ssa_vars that have to be
preserved when representing iv_use with iv_cand. */
bitmap inv_exprs; /* The list of newly created invariant expressions
when representing iv_use with iv_cand. */
tree value; /* For final value elimination, the expression for
the final value of the iv. For iv elimination,
the new bound to compare with. */
};
/* Use. */
struct iv_use
{
unsigned id; /* The id of the use. */
unsigned group_id; /* The group id the use belongs to. */
enum use_type type; /* Type of the use. */
tree mem_type; /* The memory type to use when testing whether an
address is legitimate, and what the address's
cost is. */
struct iv *iv; /* The induction variable it is based on. */
gimple *stmt; /* Statement in that it occurs. */
tree *op_p; /* The place where it occurs. */
tree addr_base; /* Base address with const offset stripped. */
poly_uint64_pod addr_offset;
/* Const offset stripped from base address. */
};
/* Group of uses. */
struct iv_group
{
/* The id of the group. */
unsigned id;
/* Uses of the group are of the same type. */
enum use_type type;
/* The set of "related" IV candidates, plus the important ones. */
bitmap related_cands;
/* Number of IV candidates in the cost_map. */
unsigned n_map_members;
/* The costs wrto the iv candidates. */
struct cost_pair *cost_map;
/* The selected candidate for the group. */
struct iv_cand *selected;
/* Uses in the group. */
vec<struct iv_use *> vuses;
};
/* The position where the iv is computed. */
enum iv_position
{
IP_NORMAL, /* At the end, just before the exit condition. */
IP_END, /* At the end of the latch block. */
IP_BEFORE_USE, /* Immediately before a specific use. */
IP_AFTER_USE, /* Immediately after a specific use. */
IP_ORIGINAL /* The original biv. */
};
/* The induction variable candidate. */
struct iv_cand
{
unsigned id; /* The number of the candidate. */
bool important; /* Whether this is an "important" candidate, i.e. such
that it should be considered by all uses. */
ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
gimple *incremented_at;/* For original biv, the statement where it is
incremented. */
tree var_before; /* The variable used for it before increment. */
tree var_after; /* The variable used for it after increment. */
struct iv *iv; /* The value of the candidate. NULL for
"pseudocandidate" used to indicate the possibility
to replace the final value of an iv by direct
computation of the value. */
unsigned cost; /* Cost of the candidate. */
unsigned cost_step; /* Cost of the candidate's increment operation. */
struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
where it is incremented. */
bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
iv_cand. */
bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
hanlde it as a new invariant expression which will
be hoisted out of loop. */
struct iv *orig_iv; /* The original iv if this cand is added from biv with
smaller type. */
};
/* Hashtable entry for common candidate derived from iv uses. */
struct iv_common_cand
{
tree base;
tree step;
/* IV uses from which this common candidate is derived. */
auto_vec<struct iv_use *> uses;
hashval_t hash;
};
/* Hashtable helpers. */
struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
{
static inline hashval_t hash (const iv_common_cand *);
static inline bool equal (const iv_common_cand *, const iv_common_cand *);
};
/* Hash function for possible common candidates. */
inline hashval_t
iv_common_cand_hasher::hash (const iv_common_cand *ccand)
{
return ccand->hash;
}
/* Hash table equality function for common candidates. */
inline bool
iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
const iv_common_cand *ccand2)
{
return (ccand1->hash == ccand2->hash
&& operand_equal_p (ccand1->base, ccand2->base, 0)
&& operand_equal_p (ccand1->step, ccand2->step, 0)
&& (TYPE_PRECISION (TREE_TYPE (ccand1->base))
== TYPE_PRECISION (TREE_TYPE (ccand2->base))));
}
/* Loop invariant expression hashtable entry. */
struct iv_inv_expr_ent
{
/* Tree expression of the entry. */
tree expr;
/* Unique indentifier. */
int id;
/* Hash value. */
hashval_t hash;
};
/* Sort iv_inv_expr_ent pair A and B by id field. */
static int
sort_iv_inv_expr_ent (const void *a, const void *b)
{
const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
unsigned id1 = (*e1)->id;
unsigned id2 = (*e2)->id;
if (id1 < id2)
return -1;
else if (id1 > id2)
return 1;
else
return 0;
}
/* Hashtable helpers. */
struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
{
static inline hashval_t hash (const iv_inv_expr_ent *);
static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
};
/* Return true if uses of type TYPE represent some form of address. */
inline bool
address_p (use_type type)
{
return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
}
/* Hash function for loop invariant expressions. */
inline hashval_t
iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
{
return expr->hash;
}
/* Hash table equality function for expressions. */
inline bool
iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
const iv_inv_expr_ent *expr2)
{
return expr1->hash == expr2->hash
&& operand_equal_p (expr1->expr, expr2->expr, 0);
}
struct ivopts_data
{
/* The currently optimized loop. */
struct loop *current_loop;
location_t loop_loc;
/* Numbers of iterations for all exits of the current loop. */
hash_map<edge, tree_niter_desc *> *niters;
/* Number of registers used in it. */
unsigned regs_used;
/* The size of version_info array allocated. */
unsigned version_info_size;
/* The array of information for the ssa names. */
struct version_info *version_info;
/* The hashtable of loop invariant expressions created
by ivopt. */
hash_table<iv_inv_expr_hasher> *inv_expr_tab;
/* The bitmap of indices in version_info whose value was changed. */
bitmap relevant;
/* The uses of induction variables. */
vec<iv_group *> vgroups;
/* The candidates. */
vec<iv_cand *> vcands;
/* A bitmap of important candidates. */
bitmap important_candidates;
/* Cache used by tree_to_aff_combination_expand. */
hash_map<tree, name_expansion *> *name_expansion_cache;
/* The hashtable of common candidates derived from iv uses. */
hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
/* The common candidates. */
vec<iv_common_cand *> iv_common_cands;
/* Hash map recording base object information of tree exp. */
hash_map<tree, tree> *base_object_map;
/* The maximum invariant variable id. */
unsigned max_inv_var_id;
/* The maximum invariant expression id. */
unsigned max_inv_expr_id;
/* Number of no_overflow BIVs which are not used in memory address. */
unsigned bivs_not_used_in_addr;
/* Obstack for iv structure. */
struct obstack iv_obstack;
/* Whether to consider just related and important candidates when replacing a
use. */
bool consider_all_candidates;
/* Are we optimizing for speed? */
bool speed;
/* Whether the loop body includes any function calls. */
bool body_includes_call;
/* Whether the loop body can only be exited via single exit. */
bool loop_single_exit_p;
};
/* An assignment of iv candidates to uses. */
struct iv_ca
{
/* The number of uses covered by the assignment. */
unsigned upto;
/* Number of uses that cannot be expressed by the candidates in the set. */
unsigned bad_groups;
/* Candidate assigned to a use, together with the related costs. */
struct cost_pair **cand_for_group;
/* Number of times each candidate is used. */
unsigned *n_cand_uses;
/* The candidates used. */
bitmap cands;
/* The number of candidates in the set. */
unsigned n_cands;
/* The number of invariants needed, including both invariant variants and
invariant expressions. */
unsigned n_invs;
/* Total cost of expressing uses. */
comp_cost cand_use_cost;
/* Total cost of candidates. */
unsigned cand_cost;
/* Number of times each invariant variable is used. */
unsigned *n_inv_var_uses;
/* Number of times each invariant expression is used. */
unsigned *n_inv_expr_uses;
/* Total cost of the assignment. */
comp_cost cost;
};
/* Difference of two iv candidate assignments. */
struct iv_ca_delta
{
/* Changed group. */
struct iv_group *group;
/* An old assignment (for rollback purposes). */
struct cost_pair *old_cp;
/* A new assignment. */
struct cost_pair *new_cp;
/* Next change in the list. */
struct iv_ca_delta *next;
};
/* Bound on number of candidates below that all candidates are considered. */
#define CONSIDER_ALL_CANDIDATES_BOUND \
((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
/* If there are more iv occurrences, we just give up (it is quite unlikely that
optimizing such a loop would help, and it would take ages). */
#define MAX_CONSIDERED_GROUPS \
((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
/* If there are at most this number of ivs in the set, try removing unnecessary
ivs from the set always. */
#define ALWAYS_PRUNE_CAND_SET_BOUND \
((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
/* The list of trees for that the decl_rtl field must be reset is stored
here. */
static vec<tree> decl_rtl_to_reset;
static comp_cost force_expr_to_var_cost (tree, bool);
/* The single loop exit if it dominates the latch, NULL otherwise. */
edge
single_dom_exit (struct loop *loop)
{
edge exit = single_exit (loop);
if (!exit)
return NULL;
if (!just_once_each_iteration_p (loop, exit->src))
return NULL;
return exit;
}
/* Dumps information about the induction variable IV to FILE. Don't dump
variable's name if DUMP_NAME is FALSE. The information is dumped with
preceding spaces indicated by INDENT_LEVEL. */
void
dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
{
const char *p;
const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
if (indent_level > 4)
indent_level = 4;
p = spaces + 8 - (indent_level << 1);
fprintf (file, "%sIV struct:\n", p);
if (iv->ssa_name && dump_name)
{
fprintf (file, "%s SSA_NAME:\t", p);
print_generic_expr (file, iv->ssa_name, TDF_SLIM);
fprintf (file, "\n");
}
fprintf (file, "%s Type:\t", p);
print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
fprintf (file, "\n");
fprintf (file, "%s Base:\t", p);
print_generic_expr (file, iv->base, TDF_SLIM);
fprintf (file, "\n");
fprintf (file, "%s Step:\t", p);
print_generic_expr (file, iv->step, TDF_SLIM);
fprintf (file, "\n");
if (iv->base_object)
{
fprintf (file, "%s Object:\t", p);
print_generic_expr (file, iv->base_object, TDF_SLIM);
fprintf (file, "\n");
}
fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
p, iv->no_overflow ? "No-overflow" : "Overflow");
}
/* Dumps information about the USE to FILE. */
void
dump_use (FILE *file, struct iv_use *use)
{
fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
fprintf (file, " At stmt:\t");
print_gimple_stmt (file, use->stmt, 0);
fprintf (file, " At pos:\t");
if (use->op_p)
print_generic_expr (file, *use->op_p, TDF_SLIM);
fprintf (file, "\n");
dump_iv (file, use->iv, false, 2);
}
/* Dumps information about the uses to FILE. */
void
dump_groups (FILE *file, struct ivopts_data *data)
{
unsigned i, j;
struct iv_group *group;
for (i = 0; i < data->vgroups.length (); i++)
{
group = data->vgroups[i];
fprintf (file, "Group %d:\n", group->id);
if (group->type == USE_NONLINEAR_EXPR)
fprintf (file, " Type:\tGENERIC\n");
else if (group->type == USE_REF_ADDRESS)
fprintf (file, " Type:\tREFERENCE ADDRESS\n");
else if (group->type == USE_PTR_ADDRESS)
fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n");
else
{
gcc_assert (group->type == USE_COMPARE);
fprintf (file, " Type:\tCOMPARE\n");
}
for (j = 0; j < group->vuses.length (); j++)
dump_use (file, group->vuses[j]);
}
}
/* Dumps information about induction variable candidate CAND to FILE. */
void
dump_cand (FILE *file, struct iv_cand *cand)
{
struct iv *iv = cand->iv;
fprintf (file, "Candidate %d:\n", cand->id);
if (cand->inv_vars)
{
fprintf (file, " Depend on inv.vars: ");
dump_bitmap (file, cand->inv_vars);
}
if (cand->inv_exprs)
{
fprintf (file, " Depend on inv.exprs: ");
dump_bitmap (file, cand->inv_exprs);
}
if (cand->var_before)
{
fprintf (file, " Var befor: ");
print_generic_expr (file, cand->var_before, TDF_SLIM);
fprintf (file, "\n");
}
if (cand->var_after)
{
fprintf (file, " Var after: ");
print_generic_expr (file, cand->var_after, TDF_SLIM);
fprintf (file, "\n");
}
switch (cand->pos)
{
case IP_NORMAL:
fprintf (file, " Incr POS: before exit test\n");
break;
case IP_BEFORE_USE:
fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
break;
case IP_AFTER_USE:
fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
break;
case IP_END:
fprintf (file, " Incr POS: at end\n");
break;
case IP_ORIGINAL:
fprintf (file, " Incr POS: orig biv\n");
break;
}
dump_iv (file, iv, false, 1);
}
/* Returns the info for ssa version VER. */
static inline struct version_info *
ver_info (struct ivopts_data *data, unsigned ver)
{
return data->version_info + ver;
}
/* Returns the info for ssa name NAME. */
static inline struct version_info *
name_info (struct ivopts_data *data, tree name)
{
return ver_info (data, SSA_NAME_VERSION (name));
}
/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
emitted in LOOP. */
static bool
stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
{
basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
gcc_assert (bb);
if (sbb == loop->latch)
return true;
if (sbb != bb)
return false;
return stmt == last_stmt (bb);
}
/* Returns true if STMT if after the place where the original induction
variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
if the positions are identical. */
static bool
stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
{
basic_block cand_bb = gimple_bb (cand->incremented_at);
basic_block stmt_bb = gimple_bb (stmt);
if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
return false;
if (stmt_bb != cand_bb)
return true;
if (true_if_equal
&& gimple_uid (stmt) == gimple_uid (cand->incremented_at))
return true;
return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
}
/* Returns true if STMT if after the place where the induction variable
CAND is incremented in LOOP. */
static bool
stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
{
switch (cand->pos)
{
case IP_END:
return false;
case IP_NORMAL:
return stmt_after_ip_normal_pos (loop, stmt);
case IP_ORIGINAL:
case IP_AFTER_USE:
return stmt_after_inc_pos (cand, stmt, false);
case IP_BEFORE_USE:
return stmt_after_inc_pos (cand, stmt, true);
default:
gcc_unreachable ();
}
}
/* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
static bool
abnormal_ssa_name_p (tree exp)
{
if (!exp)
return false;
if (TREE_CODE (exp) != SSA_NAME)
return false;
return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
}
/* Returns false if BASE or INDEX contains a ssa name that occurs in an
abnormal phi node. Callback for for_each_index. */
static bool
idx_contains_abnormal_ssa_name_p (tree base, tree *index,
void *data ATTRIBUTE_UNUSED)
{
if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
{
if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
return false;
if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
return false;
}
return !abnormal_ssa_name_p (*index);
}
/* Returns true if EXPR contains a ssa name that occurs in an
abnormal phi node. */
bool
contains_abnormal_ssa_name_p (tree expr)
{
enum tree_code code;
enum tree_code_class codeclass;
if (!expr)
return false;
code = TREE_CODE (expr);
codeclass = TREE_CODE_CLASS (code);
if (code == CALL_EXPR)
{
tree arg;
call_expr_arg_iterator iter;
FOR_EACH_CALL_EXPR_ARG (arg, iter, expr)
if (contains_abnormal_ssa_name_p (arg))
return true;
return false;
}
if (code == SSA_NAME)
return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
if (code == INTEGER_CST
|| is_gimple_min_invariant (expr))
return false;
if (code == ADDR_EXPR)
return !for_each_index (&TREE_OPERAND (expr, 0),
idx_contains_abnormal_ssa_name_p,
NULL);
if (code == COND_EXPR)
return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
|| contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
|| contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
switch (codeclass)
{
case tcc_binary:
case tcc_comparison:
if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
return true;
/* Fallthru. */
case tcc_unary:
if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
return true;
break;
default:
gcc_unreachable ();
}
return false;
}
/* Returns the structure describing number of iterations determined from
EXIT of DATA->current_loop, or NULL if something goes wrong. */
static struct tree_niter_desc *
niter_for_exit (struct ivopts_data *data, edge exit)
{
struct tree_niter_desc *desc;
tree_niter_desc **slot;
if (!data->niters)
{
data->niters = new hash_map<edge, tree_niter_desc *>;
slot = NULL;
}
else
slot = data->niters->get (exit);
if (!slot)
{
/* Try to determine number of iterations. We cannot safely work with ssa
names that appear in phi nodes on abnormal edges, so that we do not
create overlapping life ranges for them (PR 27283). */
desc = XNEW (struct tree_niter_desc);
if (!number_of_iterations_exit (data->current_loop,
exit, desc, true)
|| contains_abnormal_ssa_name_p (desc->niter))
{
XDELETE (desc);
desc = NULL;
}
data->niters->put (exit, desc);
}
else
desc = *slot;
return desc;
}
/* Returns the structure describing number of iterations determined from
single dominating exit of DATA->current_loop, or NULL if something
goes wrong. */
static struct tree_niter_desc *
niter_for_single_dom_exit (struct ivopts_data *data)
{
edge exit = single_dom_exit (data->current_loop);
if (!exit)
return NULL;
return niter_for_exit (data, exit);
}
/* Initializes data structures used by the iv optimization pass, stored
in DATA. */
static void
tree_ssa_iv_optimize_init (struct ivopts_data *data)
{
data->version_info_size = 2 * num_ssa_names;
data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
data->relevant = BITMAP_ALLOC (NULL);
data->important_candidates = BITMAP_ALLOC (NULL);
data->max_inv_var_id = 0;
data->max_inv_expr_id = 0;
data->niters = NULL;
data->vgroups.create (20);
data->vcands.create (20);
data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
data->name_expansion_cache = NULL;
data->base_object_map = NULL;
data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
data->iv_common_cands.create (20);
decl_rtl_to_reset.create (20);
gcc_obstack_init (&data->iv_obstack);
}
/* walk_tree callback for determine_base_object. */
static tree
determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
{
tree_code code = TREE_CODE (*tp);
tree obj = NULL_TREE;
if (code == ADDR_EXPR)
{
tree base = get_base_address (TREE_OPERAND (*tp, 0));
if (!base)
obj = *tp;
else if (TREE_CODE (base) != MEM_REF)
obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
}
else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
obj = fold_convert (ptr_type_node, *tp);
if (!obj)
{
if (!EXPR_P (*tp))
*walk_subtrees = 0;
return NULL_TREE;
}
/* Record special node for multiple base objects and stop. */
if (*static_cast<tree *> (wdata))
{
*static_cast<tree *> (wdata) = integer_zero_node;
return integer_zero_node;
}
/* Record the base object and continue looking. */
*static_cast<tree *> (wdata) = obj;
return NULL_TREE;
}
/* Returns a memory object to that EXPR points with caching. Return NULL if we
are able to determine that it does not point to any such object; specially
return integer_zero_node if EXPR contains multiple base objects. */
static tree
determine_base_object (struct ivopts_data *data, tree expr)
{
tree *slot, obj = NULL_TREE;
if (data->base_object_map)
{
if ((slot = data->base_object_map->get(expr)) != NULL)
return *slot;
}
else
data->base_object_map = new hash_map<tree, tree>;
(void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
data->base_object_map->put (expr, obj);
return obj;
}
/* Return true if address expression with non-DECL_P operand appears
in EXPR. */
static bool
contain_complex_addr_expr (tree expr)
{
bool res = false;
STRIP_NOPS (expr);
switch (TREE_CODE (expr))
{
case POINTER_PLUS_EXPR:
case PLUS_EXPR:
case MINUS_EXPR:
res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
break;
case ADDR_EXPR:
return (!DECL_P (TREE_OPERAND (expr, 0)));
default:
return false;
}
return res;
}
/* Allocates an induction variable with given initial value BASE and step STEP
for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
static struct iv *
alloc_iv (struct ivopts_data *data, tree base, tree step,
bool no_overflow = false)
{
tree expr = base;
struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
sizeof (struct iv));
gcc_assert (step != NULL_TREE);
/* Lower address expression in base except ones with DECL_P as operand.
By doing this:
1) More accurate cost can be computed for address expressions;
2) Duplicate candidates won't be created for bases in different
forms, like &a[0] and &a. */
STRIP_NOPS (expr);
if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
|| contain_complex_addr_expr (expr))
{
aff_tree comb;
tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
}
iv->base = base;
iv->base_object = determine_base_object (data, base);
iv->step = step;
iv->biv_p = false;
iv->nonlin_use = NULL;
iv->ssa_name = NULL_TREE;
if (!no_overflow
&& !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
base, step))
no_overflow = true;
iv->no_overflow = no_overflow;
iv->have_address_use = false;
return iv;
}
/* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
doesn't overflow. */
static void
set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
bool no_overflow)
{
struct version_info *info = name_info (data, iv);
gcc_assert (!info->iv);
bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
info->iv = alloc_iv (data, base, step, no_overflow);
info->iv->ssa_name = iv;
}
/* Finds induction variable declaration for VAR. */
static struct iv *
get_iv (struct ivopts_data *data, tree var)
{
basic_block bb;
tree type = TREE_TYPE (var);
if (!POINTER_TYPE_P (type)
&& !INTEGRAL_TYPE_P (type))
return NULL;
if (!name_info (data, var)->iv)
{
bb = gimple_bb (SSA_NAME_DEF_STMT (var));
if (!bb
|| !flow_bb_inside_loop_p (data->current_loop, bb))
set_iv (data, var, var, build_int_cst (type, 0), true);
}
return name_info (data, var)->iv;
}
/* Return the first non-invariant ssa var found in EXPR. */
static tree
extract_single_var_from_expr (tree expr)
{
int i, n;
tree tmp;
enum tree_code code;
if (!expr || is_gimple_min_invariant (expr))
return NULL;
code = TREE_CODE (expr);
if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
{
n = TREE_OPERAND_LENGTH (expr);
for (i = 0; i < n; i++)
{
tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
if (tmp)
return tmp;
}
}
return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
}
/* Finds basic ivs. */
static bool
find_bivs (struct ivopts_data *data)
{
gphi *phi;
affine_iv iv;
tree step, type, base, stop;
bool found = false;
struct loop *loop = data->current_loop;
gphi_iterator psi;
for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
{
phi = psi.phi ();
if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
continue;
if (virtual_operand_p (PHI_RESULT (phi)))
continue;
if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
continue;
if (integer_zerop (iv.step))
continue;
step = iv.step;
base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
/* Stop expanding iv base at the first ssa var referred by iv step.
Ideally we should stop at any ssa var, because that's expensive
and unusual to happen, we just do it on the first one.
See PR64705 for the rationale. */
stop = extract_single_var_from_expr (step);
base = expand_simple_operations (base, stop);
if (contains_abnormal_ssa_name_p (base)
|| contains_abnormal_ssa_name_p (step))
continue;
type = TREE_TYPE (PHI_RESULT (phi));
base = fold_convert (type, base);
if (step)
{
if (POINTER_TYPE_P (type))
step = convert_to_ptrofftype (step);
else
step = fold_convert (type, step);
}
set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
found = true;
}
return found;
}
/* Marks basic ivs. */
static void
mark_bivs (struct ivopts_data *data)
{
gphi *phi;
gimple *def;
tree var;
struct iv *iv, *incr_iv;
struct loop *loop = data->current_loop;
basic_block incr_bb;
gphi_iterator psi;
data->bivs_not_used_in_addr = 0;
for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
{
phi = psi.phi ();
iv = get_iv (data, PHI_RESULT (phi));
if (!iv)
continue;
var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
def = SSA_NAME_DEF_STMT (var);
/* Don't mark iv peeled from other one as biv. */
if (def
&& gimple_code (def) == GIMPLE_PHI
&& gimple_bb (def) == loop->header)
continue;
incr_iv = get_iv (data, var);
if (!incr_iv)
continue;
/* If the increment is in the subloop, ignore it. */
incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
if (incr_bb->loop_father != data->current_loop
|| (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
continue;
iv->biv_p = true;
incr_iv->biv_p = true;
if (iv->no_overflow)
data->bivs_not_used_in_addr++;
if (incr_iv->no_overflow)
data->bivs_not_used_in_addr++;
}
}
/* Checks whether STMT defines a linear induction variable and stores its
parameters to IV. */
static bool
find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
{
tree lhs, stop;
struct loop *loop = data->current_loop;
iv->base = NULL_TREE;
iv->step = NULL_TREE;
if (gimple_code (stmt) != GIMPLE_ASSIGN)
return false;
lhs = gimple_assign_lhs (stmt);
if (TREE_CODE (lhs) != SSA_NAME)
return false;
if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
return false;
/* Stop expanding iv base at the first ssa var referred by iv step.
Ideally we should stop at any ssa var, because that's expensive
and unusual to happen, we just do it on the first one.
See PR64705 for the rationale. */
stop = extract_single_var_from_expr (iv->step);
iv->base = expand_simple_operations (iv->base, stop);
if (contains_abnormal_ssa_name_p (iv->base)
|| contains_abnormal_ssa_name_p (iv->step))
return false;
/* If STMT could throw, then do not consider STMT as defining a GIV.
While this will suppress optimizations, we cannot safely delete this
GIV and associated statements, even if it appears it is not used. */
if (stmt_could_throw_p (cfun, stmt))
return false;
return true;
}
/* Finds general ivs in statement STMT. */
static void
find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
{
affine_iv iv;
if (!find_givs_in_stmt_scev (data, stmt, &iv))
return;
set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
}
/* Finds general ivs in basic block BB. */
static void
find_givs_in_bb (struct ivopts_data *data, basic_block bb)
{
gimple_stmt_iterator bsi;
for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
find_givs_in_stmt (data, gsi_stmt (bsi));
}
/* Finds general ivs. */
static void
find_givs (struct ivopts_data *data)
{
struct loop *loop = data->current_loop;
basic_block *body = get_loop_body_in_dom_order (loop);
unsigned i;
for (i = 0; i < loop->num_nodes; i++)
find_givs_in_bb (data, body[i]);
free (body);
}
/* For each ssa name defined in LOOP determines whether it is an induction
variable and if so, its initial value and step. */
static bool
find_induction_variables (struct ivopts_data *data)
{
unsigned i;
bitmap_iterator bi;
if (!find_bivs (data))
return false;
find_givs (data);
mark_bivs (data);
if (dump_file && (dump_flags & TDF_DETAILS))
{
struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
if (niter)
{
fprintf (dump_file, " number of iterations ");
print_generic_expr (dump_file, niter->niter, TDF_SLIM);
if (!integer_zerop (niter->may_be_zero))
{
fprintf (dump_file, "; zero if ");
print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
}
fprintf (dump_file, "\n");
};
fprintf (dump_file, "\n<Induction Vars>:\n");
EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
{
struct version_info *info = ver_info (data, i);
if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
}
}
return true;
}
/* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
is the const offset stripped from IV base and MEM_TYPE is the type
of the memory being addressed. For uses of other types, ADDR_BASE
and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
static struct iv_use *
record_use (struct iv_group *group, tree *use_p, struct iv *iv,
gimple *stmt, enum use_type type, tree mem_type,
tree addr_base, poly_uint64 addr_offset)
{
struct iv_use *use = XCNEW (struct iv_use);
use->id = group->vuses.length ();
use->group_id = group->id;
use->type = type;
use->mem_type = mem_type;
use->iv = iv;
use->stmt = stmt;
use->op_p = use_p;
use->addr_base = addr_base;
use->addr_offset = addr_offset;
group->vuses.safe_push (use);
return use;
}
/* Checks whether OP is a loop-level invariant and if so, records it.
NONLINEAR_USE is true if the invariant is used in a way we do not
handle specially. */
static void
record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
{
basic_block bb;
struct version_info *info;
if (TREE_CODE (op) != SSA_NAME
|| virtual_operand_p (op))
return;
bb = gimple_bb (SSA_NAME_DEF_STMT (op));
if (bb
&& flow_bb_inside_loop_p (data->current_loop, bb))
return;
info = name_info (data, op);
info->name = op;
info->has_nonlin_use |= nonlinear_use;
if (!info->inv_id)
info->inv_id = ++data->max_inv_var_id;
bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
}
/* Record a group of TYPE. */
static struct iv_group *
record_group (struct ivopts_data *data, enum use_type type)
{
struct iv_group *group = XCNEW (struct iv_group);
group->id = data->vgroups.length ();
group->type = type;
group->related_cands = BITMAP_ALLOC (NULL);
group->vuses.create (1);
data->vgroups.safe_push (group);
return group;
}
/* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
New group will be created if there is no existing group for the use.
MEM_TYPE is the type of memory being addressed, or NULL if this
isn't an address reference. */
static struct iv_use *
record_group_use (struct ivopts_data *data, tree *use_p,
struct iv *iv, gimple *stmt, enum use_type type,
tree mem_type)
{
tree addr_base = NULL;
struct iv_group *group = NULL;
poly_uint64 addr_offset = 0;
/* Record non address type use in a new group. */
if (address_p (type))
{
unsigned int i;
addr_base = strip_offset (iv->base, &addr_offset);
for (i = 0; i < data->vgroups.length (); i++)
{
struct iv_use *use;
group = data->vgroups[i];
use = group->vuses[0];
if (!address_p (use->type))
continue;
/* Check if it has the same stripped base and step. */
if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
&& operand_equal_p (iv->step, use->iv->step, 0)
&& operand_equal_p (addr_base, use->addr_base, 0))
break;
}
if (i == data->vgroups.length ())
group = NULL;
}
if (!group)
group = record_group (data, type);
return record_use (group, use_p, iv, stmt, type, mem_type,
addr_base, addr_offset);
}
/* Checks whether the use OP is interesting and if so, records it. */
static struct iv_use *
find_interesting_uses_op (struct ivopts_data *data, tree op)
{
struct iv *iv;
gimple *stmt;
struct iv_use *use;
if (TREE_CODE (op) != SSA_NAME)
return NULL;
iv = get_iv (data, op);
if (!iv)
return NULL;
if (iv->nonlin_use)
{
gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
return iv->nonlin_use;
}
if (integer_zerop (iv->step))
{
record_invariant (data, op, true);
return NULL;
}
stmt = SSA_NAME_DEF_STMT (op);
gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
iv->nonlin_use = use;
return use;
}
/* Indicate how compare type iv_use can be handled. */
enum comp_iv_rewrite
{
COMP_IV_NA,
/* We may rewrite compare type iv_use by expressing value of the iv_use. */
COMP_IV_EXPR,
/* We may rewrite compare type iv_uses on both sides of comparison by
expressing value of each iv_use. */
COMP_IV_EXPR_2,
/* We may rewrite compare type iv_use by expressing value of the iv_use
or by eliminating it with other iv_cand. */
COMP_IV_ELIM
};
/* Given a condition in statement STMT, checks whether it is a compare
of an induction variable and an invariant. If this is the case,
CONTROL_VAR is set to location of the iv, BOUND to the location of
the invariant, IV_VAR and IV_BOUND are set to the corresponding
induction variable descriptions, and true is returned. If this is not
the case, CONTROL_VAR and BOUND are set to the arguments of the
condition and false is returned. */
static enum comp_iv_rewrite
extract_cond_operands (struct ivopts_data *data, gimple *stmt,
tree **control_var, tree **bound,
struct iv **iv_var, struct iv **iv_bound)
{
/* The objects returned when COND has constant operands. */
static struct iv const_iv;
static tree zero;
tree *op0 = &zero, *op1 = &zero;
struct iv *iv0 = &const_iv, *iv1 = &const_iv;
enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
if (gimple_code (stmt) == GIMPLE_COND)
{
gcond *cond_stmt = as_a <gcond *> (stmt);
op0 = gimple_cond_lhs_ptr (cond_stmt);
op1 = gimple_cond_rhs_ptr (cond_stmt);
}
else
{
op0 = gimple_assign_rhs1_ptr (stmt);
op1 = gimple_assign_rhs2_ptr (stmt);
}
zero = integer_zero_node;
const_iv.step = integer_zero_node;
if (TREE_CODE (*op0) == SSA_NAME)
iv0 = get_iv (data, *op0);
if (TREE_CODE (*op1) == SSA_NAME)
iv1 = get_iv (data, *op1);
/* If both sides of comparison are IVs. We can express ivs on both end. */
if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
{
rewrite_type = COMP_IV_EXPR_2;
goto end;
}
/* If none side of comparison is IV. */
if ((!iv0 || integer_zerop (iv0->step))
&& (!iv1 || integer_zerop (iv1->step)))
goto end;
/* Control variable may be on the other side. */
if (!iv0 || integer_zerop (iv0->step))
{
std::swap (op0, op1);
std::swap (iv0, iv1);
}
/* If one side is IV and the other side isn't loop invariant. */
if (!iv1)
rewrite_type = COMP_IV_EXPR;
/* If one side is IV and the other side is loop invariant. */
else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
rewrite_type = COMP_IV_ELIM;
end:
if (control_var)
*control_var = op0;
if (iv_var)
*iv_var = iv0;
if (bound)
*bound = op1;
if (iv_bound)
*iv_bound = iv1;
return rewrite_type;
}
/* Checks whether the condition in STMT is interesting and if so,
records it. */
static void
find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
{
tree *var_p, *bound_p;
struct iv *var_iv, *bound_iv;
enum comp_iv_rewrite ret;
ret = extract_cond_operands (data, stmt,
&var_p, &bound_p, &var_iv, &bound_iv);
if (ret == COMP_IV_NA)
{
find_interesting_uses_op (data, *var_p);
find_interesting_uses_op (data, *bound_p);
return;
}
record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
/* Record compare type iv_use for iv on the other side of comparison. */
if (ret == COMP_IV_EXPR_2)
record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
}
/* Returns the outermost loop EXPR is obviously invariant in
relative to the loop LOOP, i.e. if all its operands are defined
outside of the returned loop. Returns NULL if EXPR is not
even obviously invariant in LOOP. */
struct loop *
outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
{
basic_block def_bb;
unsigned i, len;
if (is_gimple_min_invariant (expr))
return current_loops->tree_root;
if (TREE_CODE (expr) == SSA_NAME)
{
def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
if (def_bb)
{
if (flow_bb_inside_loop_p (loop, def_bb))
return NULL;
return superloop_at_depth (loop,
loop_depth (def_bb->loop_father) + 1);
}
return current_loops->tree_root;
}
if (!EXPR_P (expr))
return NULL;
unsigned maxdepth = 0;
len = TREE_OPERAND_LENGTH (expr);
for (i = 0; i < len; i++)
{
struct loop *ivloop;
if (!TREE_OPERAND (expr, i))
continue;
ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
if (!ivloop)
return NULL;
maxdepth = MAX (maxdepth, loop_depth (ivloop));
}
return superloop_at_depth (loop, maxdepth);
}
/* Returns true if expression EXPR is obviously invariant in LOOP,
i.e. if all its operands are defined outside of the LOOP. LOOP
should not be the function body. */
bool
expr_invariant_in_loop_p (struct loop *loop, tree expr)
{
basic_block def_bb;
unsigned i, len;
gcc_assert (loop_depth (loop) > 0);
if (is_gimple_min_invariant (expr))
return true;
if (TREE_CODE (expr) == SSA_NAME)
{
def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
if (def_bb
&& flow_bb_inside_loop_p (loop, def_bb))
return false;
return true;
}
if (!EXPR_P (expr))
return false;
len = TREE_OPERAND_LENGTH (expr);
for (i = 0; i < len; i++)
if (TREE_OPERAND (expr, i)
&& !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
return false;
return true;
}
/* Given expression EXPR which computes inductive values with respect
to loop recorded in DATA, this function returns biv from which EXPR
is derived by tracing definition chains of ssa variables in EXPR. */
static struct iv*
find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
{
struct iv *iv;
unsigned i, n;
tree e2, e1;
enum tree_code code;
gimple *stmt;
if (expr == NULL_TREE)
return NULL;
if (is_gimple_min_invariant (expr))
return NULL;
code = TREE_CODE (expr);
if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
{
n = TREE_OPERAND_LENGTH (expr);
for (i = 0; i < n; i++)
{
iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
if (iv)
return iv;
}
}
/* Stop if it's not ssa name. */
if (code != SSA_NAME)
return NULL;
iv = get_iv (data, expr);
if (!iv || integer_zerop (iv->step))
return NULL;
else if (iv->biv_p)
return iv;
stmt = SSA_NAME_DEF_STMT (expr);
if (gphi *phi = dyn_cast <gphi *> (stmt))
{
ssa_op_iter iter;
use_operand_p use_p;
basic_block phi_bb = gimple_bb (phi);
/* Skip loop header PHI that doesn't define biv. */
if (phi_bb->loop_father == data->current_loop)
return NULL;
if (virtual_operand_p (gimple_phi_result (phi)))
return NULL;
FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
{
tree use = USE_FROM_PTR (use_p);
iv = find_deriving_biv_for_expr (data, use);
if (iv)
return iv;
}
return NULL;
}
if (gimple_code (stmt) != GIMPLE_ASSIGN)
return NULL;
e1 = gimple_assign_rhs1 (stmt);
code = gimple_assign_rhs_code (stmt);
if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
return find_deriving_biv_for_expr (data, e1);
switch (code)
{
case MULT_EXPR:
case PLUS_EXPR:
case MINUS_EXPR:
case POINTER_PLUS_EXPR:
/* Increments, decrements and multiplications by a constant
are simple. */
e2 = gimple_assign_rhs2 (stmt);
iv = find_deriving_biv_for_expr (data, e2);
if (iv)
return iv;
gcc_fallthrough ();
CASE_CONVERT:
/* Casts are simple. */
return find_deriving_biv_for_expr (data, e1);
default:
break;
}
return NULL;
}
/* Record BIV, its predecessor and successor that they are used in
address type uses. */
static void
record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
{
unsigned i;
tree type, base_1, base_2;
bitmap_iterator bi;
if (!biv || !biv->biv_p || integer_zerop (biv->step)
|| biv->have_address_use || !biv->no_overflow)
return;
type = TREE_TYPE (biv->base);
if (!INTEGRAL_TYPE_P (type))
return;
biv->have_address_use = true;
data->bivs_not_used_in_addr--;
base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
{
struct iv *iv = ver_info (data, i)->iv;
if (!iv || !iv->biv_p || integer_zerop (iv->step)
|| iv->have_address_use || !iv->no_overflow)
continue;
if (type != TREE_TYPE (iv->base)
|| !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
continue;
if (!operand_equal_p (biv->step, iv->step, 0))
continue;
base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
if (operand_equal_p (base_1, iv->base, 0)
|| operand_equal_p (base_2, biv->base, 0))
{
iv->have_address_use = true;
data->bivs_not_used_in_addr--;
}
}
}
/* Cumulates the steps of indices into DATA and replaces their values with the
initial ones. Returns false when the value of the index cannot be determined.
Callback for for_each_index. */
struct ifs_ivopts_data
{
struct ivopts_data *ivopts_data;
gimple *stmt;
tree step;
};
static bool
idx_find_step (tree base, tree *idx, void *data)
{
struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
struct iv *iv;
bool use_overflow_semantics = false;
tree step, iv_base, iv_step, lbound, off;
struct loop *loop = dta->ivopts_data->current_loop;
/* If base is a component ref, require that the offset of the reference
be invariant. */
if (TREE_CODE (base) == COMPONENT_REF)
{
off = component_ref_field_offset (base);
return expr_invariant_in_loop_p (loop, off);
}
/* If base is array, first check whether we will be able to move the
reference out of the loop (in order to take its address in strength
reduction). In order for this to work we need both lower bound
and step to be loop invariants. */
if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
{
/* Moreover, for a range, the size needs to be invariant as well. */
if (TREE_CODE (base) == ARRAY_RANGE_REF
&& !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
return false;
step = array_ref_element_size (base);
lbound = array_ref_low_bound (base);
if (!expr_invariant_in_loop_p (loop, step)
|| !expr_invariant_in_loop_p (loop, lbound))
return false;
}
if (TREE_CODE (*idx) != SSA_NAME)
return true;
iv = get_iv (dta->ivopts_data, *idx);
if (!iv)
return false;
/* XXX We produce for a base of *D42 with iv->base being &x[0]
*&x[0], which is not folded and does not trigger the
ARRAY_REF path below. */
*idx = iv->base;
if (integer_zerop (iv->step))
return true;
if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
{
step = array_ref_element_size (base);
/* We only handle addresses whose step is an integer constant. */
if (TREE_CODE (step) != INTEGER_CST)
return false;
}
else
/* The step for pointer arithmetics already is 1 byte. */
step = size_one_node;
iv_base = iv->base;
iv_step = iv->step;
if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
use_overflow_semantics = true;
if (!convert_affine_scev (dta->ivopts_data->current_loop,
sizetype, &iv_base, &iv_step, dta->stmt,
use_overflow_semantics))
{
/* The index might wrap. */
return false;
}
step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
if (dta->ivopts_data->bivs_not_used_in_addr)
{
if (!iv->biv_p)
iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
record_biv_for_address_use (dta->ivopts_data, iv);
}
return true;
}
/* Records use in index IDX. Callback for for_each_index. Ivopts data
object is passed to it in DATA. */
static bool
idx_record_use (tree base, tree *idx,
void *vdata)
{
struct ivopts_data *data = (struct ivopts_data *) vdata;
find_interesting_uses_op (data, *idx);
if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
{
find_interesting_uses_op (data, array_ref_element_size (base));
find_interesting_uses_op (data, array_ref_low_bound (base));
}
return true;
}
/* If we can prove that TOP = cst * BOT for some constant cst,
store cst to MUL and return true. Otherwise return false.
The returned value is always sign-extended, regardless of the
signedness of TOP and BOT. */
static bool
constant_multiple_of (tree top, tree bot, widest_int *mul)
{
tree mby;
enum tree_code code;
unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
widest_int res, p0, p1;
STRIP_NOPS (top);
STRIP_NOPS (bot);
if (operand_equal_p (top, bot, 0))
{
*mul = 1;
return true;
}
code = TREE_CODE (top);
switch (code)
{
case MULT_EXPR:
mby = TREE_OPERAND (top, 1);
if (TREE_CODE (mby) != INTEGER_CST)
return false;
if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
return false;
*mul = wi::sext (res * wi::to_widest (mby), precision);
return true;
case PLUS_EXPR:
case MINUS_EXPR:
if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
|| !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
return false;
if (code == MINUS_EXPR)
p1 = -p1;
*mul = wi::sext (p0 + p1, precision);
return true;
case INTEGER_CST:
if (TREE_CODE (bot) != INTEGER_CST)
return false;
p0 = widest_int::from (wi::to_wide (top), SIGNED);
p1 = widest_int::from (wi::to_wide (bot), SIGNED);
if (p1 == 0)
return false;
*mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
return res == 0;
default:
if (POLY_INT_CST_P (top)
&& POLY_INT_CST_P (bot)
&& constant_multiple_p (wi::to_poly_widest (top),
wi::to_poly_widest (bot), mul))
return true;
return false;
}
}
/* Return true if memory reference REF with step STEP may be unaligned. */
static bool
may_be_unaligned_p (tree ref, tree step)
{
/* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
thus they are not misaligned. */
if (TREE_CODE (ref) == TARGET_MEM_REF)
return false;
unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
unsigned HOST_WIDE_INT bitpos;
unsigned int ref_align;
get_object_alignment_1 (ref, &ref_align, &bitpos);
if (ref_align < align
|| (bitpos % align) != 0
|| (bitpos % BITS_PER_UNIT) != 0)
return true;
unsigned int trailing_zeros = tree_ctz (step);
if (trailing_zeros < HOST_BITS_PER_INT
&& (1U << trailing_zeros) * BITS_PER_UNIT < align)
return true;
return false;
}
/* Return true if EXPR may be non-addressable. */
bool
may_be_nonaddressable_p (tree expr)
{
switch (TREE_CODE (expr))
{
case VAR_DECL:
/* Check if it's a register variable. */
return DECL_HARD_REGISTER (expr);
case TARGET_MEM_REF:
/* TARGET_MEM_REFs are translated directly to valid MEMs on the
target, thus they are always addressable. */
return false;
case MEM_REF:
/* Likewise for MEM_REFs, modulo the storage order. */
return REF_REVERSE_STORAGE_ORDER (expr);
case BIT_FIELD_REF:
if (REF_REVERSE_STORAGE_ORDER (expr))
return true;
return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
case COMPONENT_REF:
if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
return true;
return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
|| may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
case ARRAY_REF:
case ARRAY_RANGE_REF:
if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
return true;
return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
case VIEW_CONVERT_EXPR:
/* This kind of view-conversions may wrap non-addressable objects
and make them look addressable. After some processing the
non-addressability may be uncovered again, causing ADDR_EXPRs
of inappropriate objects to be built. */
if (is_gimple_reg (TREE_OPERAND (expr, 0))
|| !is_gimple_addressable (TREE_OPERAND (expr, 0)))
return true;
return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
CASE_CONVERT:
return true;
default:
break;
}
return false;
}
/* Finds addresses in *OP_P inside STMT. */
static void
find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
tree *op_p)
{
tree base = *op_p, step = size_zero_node;
struct iv *civ;
struct ifs_ivopts_data ifs_ivopts_data;
/* Do not play with volatile memory references. A bit too conservative,
perhaps, but safe. */
if (gimple_has_volatile_ops (stmt))
goto fail;
/* Ignore bitfields for now. Not really something terribly complicated
to handle. TODO. */
if (TREE_CODE (base) == BIT_FIELD_REF)
goto fail;
base = unshare_expr (base);
if (TREE_CODE (base) == TARGET_MEM_REF)
{
tree type = build_pointer_type (TREE_TYPE (base));
tree astep;
if (TMR_BASE (base)
&& TREE_CODE (TMR_BASE (base)) == SSA_NAME)
{
civ = get_iv (data, TMR_BASE (base));
if (!civ)
goto fail;
TMR_BASE (base) = civ->base;
step = civ->step;
}
if (TMR_INDEX2 (base)
&& TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
{
civ = get_iv (data, TMR_INDEX2 (base));
if (!civ)
goto fail;
TMR_INDEX2 (base) = civ->base;
step = civ->step;
}
if (TMR_INDEX (base)
&& TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
{
civ = get_iv (data, TMR_INDEX (base));
if (!civ)
goto fail;
TMR_INDEX (base) = civ->base;
astep = civ->step;
if (astep)
{
if (TMR_STEP (base))
astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
step = fold_build2 (PLUS_EXPR, type, step, astep);
}
}
if (integer_zerop (step))
goto fail;
base = tree_mem_ref_addr (type, base);
}
else
{
ifs_ivopts_data.ivopts_data = data;
ifs_ivopts_data.stmt = stmt;
ifs_ivopts_data.step = size_zero_node;
if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
|| integer_zerop (ifs_ivopts_data.step))
goto fail;
step = ifs_ivopts_data.step;
/* Check that the base expression is addressable. This needs
to be done after substituting bases of IVs into it. */
if (may_be_nonaddressable_p (base))
goto fail;
/* Moreover, on strict alignment platforms, check that it is
sufficiently aligned. */
if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
goto fail;
base = build_fold_addr_expr (base);
/* Substituting bases of IVs into the base expression might
have caused folding opportunities. */
if (TREE_CODE (base) == ADDR_EXPR)
{
tree *ref = &TREE_OPERAND (base, 0);
while (handled_component_p (*ref))
ref = &TREE_OPERAND (*ref, 0);
if (TREE_CODE (*ref) == MEM_REF)
{
tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
TREE_OPERAND (*ref, 0),
TREE_OPERAND (*ref, 1));
if (tem)
*ref = tem;
}
}
}
civ = alloc_iv (data, base, step);
/* Fail if base object of this memory reference is unknown. */
if (civ->base_object == NULL_TREE)
goto fail;
record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
return;
fail:
for_each_index (op_p, idx_record_use, data);
}
/* Finds and records invariants used in STMT. */
static void
find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
{
ssa_op_iter iter;
use_operand_p use_p;
tree op;
FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
{
op = USE_FROM_PTR (use_p);
record_invariant (data, op, false);
}
}
/* CALL calls an internal function. If operand *OP_P will become an
address when the call is expanded, return the type of the memory
being addressed, otherwise return null. */
static tree
get_mem_type_for_internal_fn (gcall *call, tree *op_p)
{
switch (gimple_call_internal_fn (call))
{
case IFN_MASK_LOAD:
if (op_p == gimple_call_arg_ptr (call, 0))
return TREE_TYPE (gimple_call_lhs (call));
return NULL_TREE;
case IFN_MASK_STORE:
if (op_p == gimple_call_arg_ptr (call, 0))
return TREE_TYPE (gimple_call_arg (call, 3));
return NULL_TREE;
default:
return NULL_TREE;
}
}
/* IV is a (non-address) iv that describes operand *OP_P of STMT.
Return true if the operand will become an address when STMT
is expanded and record the associated address use if so. */
static bool
find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
struct iv *iv)
{
/* Fail if base object of this memory reference is unknown. */
if (iv->base_object == NULL_TREE)
return false;
tree mem_type = NULL_TREE;
if (gcall *call = dyn_cast <gcall *> (stmt))
if (gimple_call_internal_p (call))
mem_type = get_mem_type_for_internal_fn (call, op_p);
if (mem_type)
{
iv = alloc_iv (data, iv->base, iv->step);
record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
return true;
}
return false;
}
/* Finds interesting uses of induction variables in the statement STMT. */
static void
find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
{
struct iv *iv;
tree op, *lhs, *rhs;
ssa_op_iter iter;
use_operand_p use_p;
enum tree_code code;
find_invariants_stmt (data, stmt);
if (gimple_code (stmt) == GIMPLE_COND)
{
find_interesting_uses_cond (data, stmt);
return;
}
if (is_gimple_assign (stmt))
{
lhs = gimple_assign_lhs_ptr (stmt);
rhs = gimple_assign_rhs1_ptr (stmt);
if (TREE_CODE (*lhs) == SSA_NAME)
{
/* If the statement defines an induction variable, the uses are not
interesting by themselves. */
iv = get_iv (data, *lhs);
if (iv && !integer_zerop (iv->step))
return;
}
code = gimple_assign_rhs_code (stmt);
if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
&& (REFERENCE_CLASS_P (*rhs)
|| is_gimple_val (*rhs)))
{
if (REFERENCE_CLASS_P (*rhs))
find_interesting_uses_address (data, stmt, rhs);
else
find_interesting_uses_op (data, *rhs);
if (REFERENCE_CLASS_P (*lhs))
find_interesting_uses_address (data, stmt, lhs);
return;
}
else if (TREE_CODE_CLASS (code) == tcc_comparison)
{
find_interesting_uses_cond (data, stmt);
return;
}
/* TODO -- we should also handle address uses of type
memory = call (whatever);
and
call (memory). */
}
if (gimple_code (stmt) == GIMPLE_PHI
&& gimple_bb (stmt) == data->current_loop->header)
{
iv = get_iv (data, PHI_RESULT (stmt));
if (iv && !integer_zerop (iv->step))
return;
}
FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
{
op = USE_FROM_PTR (use_p);
if (TREE_CODE (op) != SSA_NAME)
continue;
iv = get_iv (data, op);
if (!iv)
continue;
if (!find_address_like_use (data, stmt, use_p->use, iv))
find_interesting_uses_op (data, op);
}
}
/* Finds interesting uses of induction variables outside of loops
on loop exit edge EXIT. */
static void
find_interesting_uses_outside (struct ivopts_data *data, edge exit)
{
gphi *phi;
gphi_iterator psi;
tree def;
for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
{
phi = psi.phi ();
def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
if (!virtual_operand_p (def))
find_interesting_uses_op (data, def);
}
}
/* Return TRUE if OFFSET is within the range of [base + offset] addressing
mode for memory reference represented by USE. */
static GTY (()) vec<rtx, va_gc> *addr_list;
static bool
addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
{
rtx reg, addr;
unsigned list_index;
addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
if (list_index >= vec_safe_length (addr_list))
vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
addr = (*addr_list)[list_index];
if (!addr)
{
addr_mode = targetm.addr_space.address_mode (as);
reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
(*addr_list)[list_index] = addr;
}
else
addr_mode = GET_MODE (addr);
XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
return (memory_address_addr_space_p (mem_mode, addr, as));
}
/* Comparison function to sort group in ascending order of addr_offset. */
static int
group_compare_offset (const void *a, const void *b)
{
const struct iv_use *const *u1 = (const struct iv_use *const *) a;
const struct iv_use *const *u2 = (const struct iv_use *const *) b;
return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
}
/* Check if small groups should be split. Return true if no group
contains more than two uses with distinct addr_offsets. Return
false otherwise. We want to split such groups because:
1) Small groups don't have much benefit and may interfer with
general candidate selection.
2) Size for problem with only small groups is usually small and
general algorithm can handle it well.
TODO -- Above claim may not hold when we want to merge memory
accesses with conseuctive addresses. */
static bool
split_small_address_groups_p (struct ivopts_data *data)
{
unsigned int i, j, distinct = 1;
struct iv_use *pre;
struct iv_group *group;
for (i = 0; i < data->vgroups.length (); i++)
{
group = data->vgroups[i];
if (group->vuses.length () == 1)
continue;
gcc_assert (address_p (group->type));
if (group->vuses.length () == 2)
{
if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
group->vuses[1]->addr_offset) > 0)
std::swap (group->vuses[0], group->vuses[1]);
}
else
group->vuses.qsort (group_compare_offset);
if (distinct > 2)
continue;
distinct = 1;
for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
{
if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
{
pre = group->vuses[j];
distinct++;
}
if (distinct > 2)
break;
}
}
return (distinct <= 2);
}
/* For each group of address type uses, this function further groups
these uses according to the maximum offset supported by target's
[base + offset] addressing mode. */
static void
split_address_groups (struct ivopts_data *data)
{
unsigned int i, j;
/* Always split group. */
bool split_p = split_small_address_groups_p (data);
for (i = 0; i < data->vgroups.length (); i++)
{
struct iv_group *new_group = NULL;
struct iv_group *group = data->vgroups[i];
struct iv_use *use = group->vuses[0];
use->id = 0;
use->group_id = group->id;
if (group->vuses.length () == 1)
continue;
gcc_assert (address_p (use->type));
for (j = 1; j < group->vuses.length ();)
{
struct iv_use *next = group->vuses[j];
poly_int64 offset = next->addr_offset - use->addr_offset;
/* Split group if aksed to, or the offset against the first
use can't fit in offset part of addressing mode. IV uses
having the same offset are still kept in one group. */
if (maybe_ne (offset, 0)
&& (split_p || !addr_offset_valid_p (use, offset)))
{
if (!new_group)
new_group = record_group (data, group->type);
group->vuses.ordered_remove (j);
new_group->vuses.safe_push (next);
continue;
}
next->id = j;
next->group_id = group->id;
j++;
}
}
}
/* Finds uses of the induction variables that are interesting. */
static void
find_interesting_uses (struct ivopts_data *data)
{
basic_block bb;
gimple_stmt_iterator bsi;
basic_block *body = get_loop_body (data->current_loop);
unsigned i;
edge e;
for (i = 0; i < data->current_loop->num_nodes; i++)
{
edge_iterator ei;
bb = body[i];
FOR_EACH_EDGE (e, ei, bb->succs)
if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
&& !flow_bb_inside_loop_p (data->current_loop, e->dest))
find_interesting_uses_outside (data, e);
for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
find_interesting_uses_stmt (data, gsi_stmt (bsi));
for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
if (!is_gimple_debug (gsi_stmt (bsi)))
find_interesting_uses_stmt (data, gsi_stmt (bsi));
}
free (body);
split_address_groups (data);
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "\n<IV Groups>:\n");
dump_groups (dump_file, data);
fprintf (dump_file, "\n");
}
}
/* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
is true, assume we are inside an address. If TOP_COMPREF is true, assume
we are at the top-level of the processed address. */
static tree
strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
poly_int64 *offset)
{
tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
enum tree_code code;
tree type, orig_type = TREE_TYPE (expr);
poly_int64 off0, off1;
HOST_WIDE_INT st;
tree orig_expr = expr;
STRIP_NOPS (expr);
type = TREE_TYPE (expr);
code = TREE_CODE (expr);
*offset = 0;
switch (code)
{
case POINTER_PLUS_EXPR:
case PLUS_EXPR:
case MINUS_EXPR:
op0 = TREE_OPERAND (expr, 0);
op1 = TREE_OPERAND (expr, 1);
op0 = strip_offset_1 (op0, false, false, &off0);
op1 = strip_offset_1 (op1, false, false, &off1);
*offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
if (op0 == TREE_OPERAND (expr, 0)
&& op1 == TREE_OPERAND (expr, 1))
return orig_expr;
if (integer_zerop (op1))
expr = op0;
else if (integer_zerop (op0))
{
if (code == MINUS_EXPR)
expr = fold_build1 (NEGATE_EXPR, type, op1);
else
expr = op1;
}
else
expr = fold_build2 (code, type, op0, op1);
return fold_convert (orig_type, expr);
case MULT_EXPR:
op1 = TREE_OPERAND (expr, 1);
if (!cst_and_fits_in_hwi (op1))
return orig_expr;
op0 = TREE_OPERAND (expr, 0);
op0 = strip_offset_1 (op0, false, false, &off0);
if (op0 == TREE_OPERAND (expr, 0))
return orig_expr;
*offset = off0 * int_cst_value (op1);
if (integer_zerop (op0))
expr = op0;
else
expr = fold_build2 (MULT_EXPR, type, op0, op1);
return fold_convert (orig_type, expr);
case ARRAY_REF:
case ARRAY_RANGE_REF:
if (!inside_addr)
return orig_expr;
step = array_ref_element_size (expr);
if (!cst_and_fits_in_hwi (step))
break;
st = int_cst_value (step);
op1 = TREE_OPERAND (expr, 1);
op1 = strip_offset_1 (op1, false, false, &off1);
*offset = off1 * st;
if (top_compref
&& integer_zerop (op1))
{
/* Strip the component reference completely. */
op0 = TREE_OPERAND (expr, 0);
op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
*offset += off0;
return op0;
}
break;
case COMPONENT_REF:
{
tree field;
if (!inside_addr)
return orig_expr;
tmp = component_ref_field_offset (expr);
field = TREE_OPERAND (expr, 1);
if (top_compref
&& cst_and_fits_in_hwi (tmp)
&& cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
{
HOST_WIDE_INT boffset, abs_off;
/* Strip the component reference completely. */
op0 = TREE_OPERAND (expr, 0);
op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
if (boffset < 0)
abs_off = -abs_off;
*offset = off0 + int_cst_value (tmp) + abs_off;
return op0;
}
}
break;
case ADDR_EXPR:
op0 = TREE_OPERAND (expr, 0);
op0 = strip_offset_1 (op0, true, true, &off0);
*offset += off0;
if (op0 == TREE_OPERAND (expr, 0))
return orig_expr;
expr = build_fold_addr_expr (op0);
return fold_convert (orig_type, expr);
case MEM_REF:
/* ??? Offset operand? */
inside_addr = false;
break;
default:
if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
return build_int_cst (orig_type, 0);
return orig_expr;
}
/* Default handling of expressions for that we want to recurse into
the first operand. */
op0 = TREE_OPERAND (expr, 0);
op0 = strip_offset_1 (op0, inside_addr, false, &off0);
*offset += off0;
if (op0 == TREE_OPERAND (expr, 0)
&& (!op1 || op1 == TREE_OPERAND (expr, 1)))
return orig_expr;
expr = copy_node (expr);
TREE_OPERAND (expr, 0) = op0;
if (op1)
TREE_OPERAND (expr, 1) = op1;
/* Inside address, we might strip the top level component references,
thus changing type of the expression. Handling of ADDR_EXPR
will fix that. */
expr = fold_convert (orig_type, expr);
return expr;
}
/* Strips constant offsets from EXPR and stores them to OFFSET. */
tree
strip_offset (tree expr, poly_uint64_pod *offset)
{
poly_int64 off;
tree core = strip_offset_1 (expr, false, false, &off);
*offset = off;
return core;
}
/* Returns variant of TYPE that can be used as base for different uses.
We return unsigned type with the same precision, which avoids problems
with overflows. */
static tree
generic_type_for (tree type)
{
if (POINTER_TYPE_P (type))
return unsigned_type_for (type);
if (TYPE_UNSIGNED (type))
return type;
return unsigned_type_for (type);
}
/* Private data for walk_tree. */
struct walk_tree_data
{
bitmap *inv_vars;
struct ivopts_data *idata;
};
/* Callback function for walk_tree, it records invariants and symbol
reference in *EXPR_P. DATA is the structure storing result info. */
static tree
find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
{
tree op = *expr_p;
struct version_info *info;
struct walk_tree_data *wdata = (struct walk_tree_data*) data;
if (TREE_CODE (op) != SSA_NAME)
return NULL_TREE;
info = name_info (wdata->idata, op);
/* Because we expand simple operations when finding IVs, loop invariant
variable that isn't referred by the original loop could be used now.
Record such invariant variables here. */
if (!info->iv)
{
struct ivopts_data *idata = wdata->idata;
basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
{
set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
record_invariant (idata, op, false);
}
}
if (!info->inv_id || info->has_nonlin_use)
return NULL_TREE;
if (!*wdata->inv_vars)
*wdata->inv_vars = BITMAP_ALLOC (NULL);
bitmap_set_bit (*wdata->inv_vars, info->inv_id);
return NULL_TREE;
}
/* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
store it. */
static inline void
find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
{
struct walk_tree_data wdata;
if (!inv_vars)
return;
wdata.idata = data;
wdata.inv_vars = inv_vars;
walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
}
/* Get entry from invariant expr hash table for INV_EXPR. New entry
will be recorded if it doesn't exist yet. Given below two exprs:
inv_expr + cst1, inv_expr + cst2
It's hard to make decision whether constant part should be stripped
or not. We choose to not strip based on below facts:
1) We need to count ADD cost for constant part if it's stripped,
which isn't always trivial where this functions is called.
2) Stripping constant away may be conflict with following loop
invariant hoisting pass.
3) Not stripping constant away results in more invariant exprs,
which usually leads to decision preferring lower reg pressure. */
static iv_inv_expr_ent *
get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
{
STRIP_NOPS (inv_expr);
if (poly_int_tree_p (inv_expr)
|| TREE_CODE (inv_expr) == SSA_NAME)
return NULL;
/* Don't strip constant part away as we used to. */
/* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
struct iv_inv_expr_ent ent;
ent.expr = inv_expr;
ent.hash = iterative_hash_expr (inv_expr, 0);
struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
if (!*slot)
{
*slot = XNEW (struct iv_inv_expr_ent);
(*slot)->expr = inv_expr;
(*slot)->hash = ent.hash;
(*slot)->id = ++data->max_inv_expr_id;
}
return *slot;
}
/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
position to POS. If USE is not NULL, the candidate is set as related to
it. If both BASE and STEP are NULL, we add a pseudocandidate for the
replacement of the final value of the iv by a direct computation. */
static struct iv_cand *
add_candidate_1 (struct ivopts_data *data,
tree base, tree step, bool important, enum iv_position pos,
struct iv_use *use, gimple *incremented_at,
struct iv *orig_iv = NULL)
{
unsigned i;
struct iv_cand *cand = NULL;
tree type, orig_type;
gcc_assert (base && step);
/* -fkeep-gc-roots-live means that we have to keep a real pointer
live, but the ivopts code may replace a real pointer with one
pointing before or after the memory block that is then adjusted
into the memory block during the loop. FIXME: It would likely be
better to actually force the pointer live and still use ivopts;
for example, it would be enough to write the pointer into memory
and keep it there until after the loop. */
if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
return NULL;
/* For non-original variables, make sure their values are computed in a type
that does not invoke undefined behavior on overflows (since in general,
we cannot prove that these induction variables are non-wrapping). */
if (pos != IP_ORIGINAL)
{
orig_type = TREE_TYPE (base);
type = generic_type_for (orig_type);
if (type != orig_type)
{
base = fold_convert (type, base);
step = fold_convert (type, step);
}
}
for (i = 0; i < data->vcands.length (); i++)
{
cand = data->vcands[i];
if (cand->pos != pos)
continue;
if (cand->incremented_at != incremented_at
|| ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
&& cand->ainc_use != use))
continue;
if (operand_equal_p (base, cand->iv->base, 0)
&& operand_equal_p (step, cand->iv->step, 0)
&& (TYPE_PRECISION (TREE_TYPE (base))
== TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
break;
}
if (i == data->vcands.length ())
{
cand = XCNEW (struct iv_cand);
cand->id = i;
cand->iv = alloc_iv (data, base, step);
cand->pos = pos;
if (pos != IP_ORIGINAL)
{
cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
cand->var_after = cand->var_before;
}
cand->important = important;
cand->incremented_at = incremented_at;
data->vcands.safe_push (cand);
if (!poly_int_tree_p (step))
{
find_inv_vars (data, &step, &cand->inv_vars);
iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
/* Share bitmap between inv_vars and inv_exprs for cand. */
if (inv_expr != NULL)
{
cand->inv_exprs = cand->inv_vars;
cand->inv_vars = NULL;
if (cand->inv_exprs)
bitmap_clear (cand->inv_exprs);
else
cand->inv_exprs = BITMAP_ALLOC (NULL);
bitmap_set_bit (cand->inv_exprs, inv_expr->id);
}
}
if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
cand->ainc_use = use;
else
cand->ainc_use = NULL;
cand->orig_iv = orig_iv;
if (dump_file && (dump_flags & TDF_DETAILS))
dump_cand (dump_file, cand);
}
cand->important |= important;
/* Relate candidate to the group for which it is added. */
if (use)
bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
return cand;
}
/* Returns true if incrementing the induction variable at the end of the LOOP
is allowed.
The purpose is to avoid splitting latch edge with a biv increment, thus
creating a jump, possibly confusing other optimization passes and leaving
less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
available (so we do not have a better alternative), or if the latch edge
is already nonempty. */
static bool
allow_ip_end_pos_p (struct loop *loop)
{
if (!ip_normal_pos (loop))
return true;
if (!empty_block_p (ip_end_pos (loop)))
return true;
return false;
}
/* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.