| /* Scalar evolution detector. |
| Copyright (C) 2003-2021 Free Software Foundation, Inc. |
| Contributed by Sebastian Pop <s.pop@laposte.net> |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 3, or (at your option) any later |
| version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| /* |
| Description: |
| |
| This pass analyzes the evolution of scalar variables in loop |
| structures. The algorithm is based on the SSA representation, |
| and on the loop hierarchy tree. This algorithm is not based on |
| the notion of versions of a variable, as it was the case for the |
| previous implementations of the scalar evolution algorithm, but |
| it assumes that each defined name is unique. |
| |
| The notation used in this file is called "chains of recurrences", |
| and has been proposed by Eugene Zima, Robert Van Engelen, and |
| others for describing induction variables in programs. For example |
| "b -> {0, +, 2}_1" means that the scalar variable "b" is equal to 0 |
| when entering in the loop_1 and has a step 2 in this loop, in other |
| words "for (b = 0; b < N; b+=2);". Note that the coefficients of |
| this chain of recurrence (or chrec [shrek]) can contain the name of |
| other variables, in which case they are called parametric chrecs. |
| For example, "b -> {a, +, 2}_1" means that the initial value of "b" |
| is the value of "a". In most of the cases these parametric chrecs |
| are fully instantiated before their use because symbolic names can |
| hide some difficult cases such as self-references described later |
| (see the Fibonacci example). |
| |
| A short sketch of the algorithm is: |
| |
| Given a scalar variable to be analyzed, follow the SSA edge to |
| its definition: |
| |
| - When the definition is a GIMPLE_ASSIGN: if the right hand side |
| (RHS) of the definition cannot be statically analyzed, the answer |
| of the analyzer is: "don't know". |
| Otherwise, for all the variables that are not yet analyzed in the |
| RHS, try to determine their evolution, and finally try to |
| evaluate the operation of the RHS that gives the evolution |
| function of the analyzed variable. |
| |
| - When the definition is a condition-phi-node: determine the |
| evolution function for all the branches of the phi node, and |
| finally merge these evolutions (see chrec_merge). |
| |
| - When the definition is a loop-phi-node: determine its initial |
| condition, that is the SSA edge defined in an outer loop, and |
| keep it symbolic. Then determine the SSA edges that are defined |
| in the body of the loop. Follow the inner edges until ending on |
| another loop-phi-node of the same analyzed loop. If the reached |
| loop-phi-node is not the starting loop-phi-node, then we keep |
| this definition under a symbolic form. If the reached |
| loop-phi-node is the same as the starting one, then we compute a |
| symbolic stride on the return path. The result is then the |
| symbolic chrec {initial_condition, +, symbolic_stride}_loop. |
| |
| Examples: |
| |
| Example 1: Illustration of the basic algorithm. |
| |
| | a = 3 |
| | loop_1 |
| | b = phi (a, c) |
| | c = b + 1 |
| | if (c > 10) exit_loop |
| | endloop |
| |
| Suppose that we want to know the number of iterations of the |
| loop_1. The exit_loop is controlled by a COND_EXPR (c > 10). We |
| ask the scalar evolution analyzer two questions: what's the |
| scalar evolution (scev) of "c", and what's the scev of "10". For |
| "10" the answer is "10" since it is a scalar constant. For the |
| scalar variable "c", it follows the SSA edge to its definition, |
| "c = b + 1", and then asks again what's the scev of "b". |
| Following the SSA edge, we end on a loop-phi-node "b = phi (a, |
| c)", where the initial condition is "a", and the inner loop edge |
| is "c". The initial condition is kept under a symbolic form (it |
| may be the case that the copy constant propagation has done its |
| work and we end with the constant "3" as one of the edges of the |
| loop-phi-node). The update edge is followed to the end of the |
| loop, and until reaching again the starting loop-phi-node: b -> c |
| -> b. At this point we have drawn a path from "b" to "b" from |
| which we compute the stride in the loop: in this example it is |
| "+1". The resulting scev for "b" is "b -> {a, +, 1}_1". Now |
| that the scev for "b" is known, it is possible to compute the |
| scev for "c", that is "c -> {a + 1, +, 1}_1". In order to |
| determine the number of iterations in the loop_1, we have to |
| instantiate_parameters (loop_1, {a + 1, +, 1}_1), that gives after some |
| more analysis the scev {4, +, 1}_1, or in other words, this is |
| the function "f (x) = x + 4", where x is the iteration count of |
| the loop_1. Now we have to solve the inequality "x + 4 > 10", |
| and take the smallest iteration number for which the loop is |
| exited: x = 7. This loop runs from x = 0 to x = 7, and in total |
| there are 8 iterations. In terms of loop normalization, we have |
| created a variable that is implicitly defined, "x" or just "_1", |
| and all the other analyzed scalars of the loop are defined in |
| function of this variable: |
| |
| a -> 3 |
| b -> {3, +, 1}_1 |
| c -> {4, +, 1}_1 |
| |
| or in terms of a C program: |
| |
| | a = 3 |
| | for (x = 0; x <= 7; x++) |
| | { |
| | b = x + 3 |
| | c = x + 4 |
| | } |
| |
| Example 2a: Illustration of the algorithm on nested loops. |
| |
| | loop_1 |
| | a = phi (1, b) |
| | c = a + 2 |
| | loop_2 10 times |
| | b = phi (c, d) |
| | d = b + 3 |
| | endloop |
| | endloop |
| |
| For analyzing the scalar evolution of "a", the algorithm follows |
| the SSA edge into the loop's body: "a -> b". "b" is an inner |
| loop-phi-node, and its analysis as in Example 1, gives: |
| |
| b -> {c, +, 3}_2 |
| d -> {c + 3, +, 3}_2 |
| |
| Following the SSA edge for the initial condition, we end on "c = a |
| + 2", and then on the starting loop-phi-node "a". From this point, |
| the loop stride is computed: back on "c = a + 2" we get a "+2" in |
| the loop_1, then on the loop-phi-node "b" we compute the overall |
| effect of the inner loop that is "b = c + 30", and we get a "+30" |
| in the loop_1. That means that the overall stride in loop_1 is |
| equal to "+32", and the result is: |
| |
| a -> {1, +, 32}_1 |
| c -> {3, +, 32}_1 |
| |
| Example 2b: Multivariate chains of recurrences. |
| |
| | loop_1 |
| | k = phi (0, k + 1) |
| | loop_2 4 times |
| | j = phi (0, j + 1) |
| | loop_3 4 times |
| | i = phi (0, i + 1) |
| | A[j + k] = ... |
| | endloop |
| | endloop |
| | endloop |
| |
| Analyzing the access function of array A with |
| instantiate_parameters (loop_1, "j + k"), we obtain the |
| instantiation and the analysis of the scalar variables "j" and "k" |
| in loop_1. This leads to the scalar evolution {4, +, 1}_1: the end |
| value of loop_2 for "j" is 4, and the evolution of "k" in loop_1 is |
| {0, +, 1}_1. To obtain the evolution function in loop_3 and |
| instantiate the scalar variables up to loop_1, one has to use: |
| instantiate_scev (block_before_loop (loop_1), loop_3, "j + k"). |
| The result of this call is {{0, +, 1}_1, +, 1}_2. |
| |
| Example 3: Higher degree polynomials. |
| |
| | loop_1 |
| | a = phi (2, b) |
| | c = phi (5, d) |
| | b = a + 1 |
| | d = c + a |
| | endloop |
| |
| a -> {2, +, 1}_1 |
| b -> {3, +, 1}_1 |
| c -> {5, +, a}_1 |
| d -> {5 + a, +, a}_1 |
| |
| instantiate_parameters (loop_1, {5, +, a}_1) -> {5, +, 2, +, 1}_1 |
| instantiate_parameters (loop_1, {5 + a, +, a}_1) -> {7, +, 3, +, 1}_1 |
| |
| Example 4: Lucas, Fibonacci, or mixers in general. |
| |
| | loop_1 |
| | a = phi (1, b) |
| | c = phi (3, d) |
| | b = c |
| | d = c + a |
| | endloop |
| |
| a -> (1, c)_1 |
| c -> {3, +, a}_1 |
| |
| The syntax "(1, c)_1" stands for a PEELED_CHREC that has the |
| following semantics: during the first iteration of the loop_1, the |
| variable contains the value 1, and then it contains the value "c". |
| Note that this syntax is close to the syntax of the loop-phi-node: |
| "a -> (1, c)_1" vs. "a = phi (1, c)". |
| |
| The symbolic chrec representation contains all the semantics of the |
| original code. What is more difficult is to use this information. |
| |
| Example 5: Flip-flops, or exchangers. |
| |
| | loop_1 |
| | a = phi (1, b) |
| | c = phi (3, d) |
| | b = c |
| | d = a |
| | endloop |
| |
| a -> (1, c)_1 |
| c -> (3, a)_1 |
| |
| Based on these symbolic chrecs, it is possible to refine this |
| information into the more precise PERIODIC_CHRECs: |
| |
| a -> |1, 3|_1 |
| c -> |3, 1|_1 |
| |
| This transformation is not yet implemented. |
| |
| Further readings: |
| |
| You can find a more detailed description of the algorithm in: |
| http://icps.u-strasbg.fr/~pop/DEA_03_Pop.pdf |
| http://icps.u-strasbg.fr/~pop/DEA_03_Pop.ps.gz. But note that |
| this is a preliminary report and some of the details of the |
| algorithm have changed. I'm working on a research report that |
| updates the description of the algorithms to reflect the design |
| choices used in this implementation. |
| |
| A set of slides show a high level overview of the algorithm and run |
| an example through the scalar evolution analyzer: |
| http://cri.ensmp.fr/~pop/gcc/mar04/slides.pdf |
| |
| The slides that I have presented at the GCC Summit'04 are available |
| at: http://cri.ensmp.fr/~pop/gcc/20040604/gccsummit-lno-spop.pdf |
| */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "backend.h" |
| #include "target.h" |
| #include "rtl.h" |
| #include "optabs-query.h" |
| #include "tree.h" |
| #include "gimple.h" |
| #include "ssa.h" |
| #include "gimple-pretty-print.h" |
| #include "fold-const.h" |
| #include "gimplify.h" |
| #include "gimple-iterator.h" |
| #include "gimplify-me.h" |
| #include "tree-cfg.h" |
| #include "tree-ssa-loop-ivopts.h" |
| #include "tree-ssa-loop-manip.h" |
| #include "tree-ssa-loop-niter.h" |
| #include "tree-ssa-loop.h" |
| #include "tree-ssa.h" |
| #include "cfgloop.h" |
| #include "tree-chrec.h" |
| #include "tree-affine.h" |
| #include "tree-scalar-evolution.h" |
| #include "dumpfile.h" |
| #include "tree-ssa-propagate.h" |
| #include "gimple-fold.h" |
| #include "tree-into-ssa.h" |
| #include "builtins.h" |
| #include "case-cfn-macros.h" |
| |
| static tree analyze_scalar_evolution_1 (class loop *, tree); |
| static tree analyze_scalar_evolution_for_address_of (class loop *loop, |
| tree var); |
| |
| /* The cached information about an SSA name with version NAME_VERSION, |
| claiming that below basic block with index INSTANTIATED_BELOW, the |
| value of the SSA name can be expressed as CHREC. */ |
| |
| struct GTY((for_user)) scev_info_str { |
| unsigned int name_version; |
| int instantiated_below; |
| tree chrec; |
| }; |
| |
| /* Counters for the scev database. */ |
| static unsigned nb_set_scev = 0; |
| static unsigned nb_get_scev = 0; |
| |
| struct scev_info_hasher : ggc_ptr_hash<scev_info_str> |
| { |
| static hashval_t hash (scev_info_str *i); |
| static bool equal (const scev_info_str *a, const scev_info_str *b); |
| }; |
| |
| static GTY (()) hash_table<scev_info_hasher> *scalar_evolution_info; |
| |
| |
| /* Constructs a new SCEV_INFO_STR structure for VAR and INSTANTIATED_BELOW. */ |
| |
| static inline struct scev_info_str * |
| new_scev_info_str (basic_block instantiated_below, tree var) |
| { |
| struct scev_info_str *res; |
| |
| res = ggc_alloc<scev_info_str> (); |
| res->name_version = SSA_NAME_VERSION (var); |
| res->chrec = chrec_not_analyzed_yet; |
| res->instantiated_below = instantiated_below->index; |
| |
| return res; |
| } |
| |
| /* Computes a hash function for database element ELT. */ |
| |
| hashval_t |
| scev_info_hasher::hash (scev_info_str *elt) |
| { |
| return elt->name_version ^ elt->instantiated_below; |
| } |
| |
| /* Compares database elements E1 and E2. */ |
| |
| bool |
| scev_info_hasher::equal (const scev_info_str *elt1, const scev_info_str *elt2) |
| { |
| return (elt1->name_version == elt2->name_version |
| && elt1->instantiated_below == elt2->instantiated_below); |
| } |
| |
| /* Get the scalar evolution of VAR for INSTANTIATED_BELOW basic block. |
| A first query on VAR returns chrec_not_analyzed_yet. */ |
| |
| static tree * |
| find_var_scev_info (basic_block instantiated_below, tree var) |
| { |
| struct scev_info_str *res; |
| struct scev_info_str tmp; |
| |
| tmp.name_version = SSA_NAME_VERSION (var); |
| tmp.instantiated_below = instantiated_below->index; |
| scev_info_str **slot = scalar_evolution_info->find_slot (&tmp, INSERT); |
| |
| if (!*slot) |
| *slot = new_scev_info_str (instantiated_below, var); |
| res = *slot; |
| |
| return &res->chrec; |
| } |
| |
| |
| /* Hashtable helpers for a temporary hash-table used when |
| analyzing a scalar evolution, instantiating a CHREC or |
| resolving mixers. */ |
| |
| class instantiate_cache_type |
| { |
| public: |
| htab_t map; |
| vec<scev_info_str> entries; |
| |
| instantiate_cache_type () : map (NULL), entries (vNULL) {} |
| ~instantiate_cache_type (); |
| tree get (unsigned slot) { return entries[slot].chrec; } |
| void set (unsigned slot, tree chrec) { entries[slot].chrec = chrec; } |
| }; |
| |
| instantiate_cache_type::~instantiate_cache_type () |
| { |
| if (map != NULL) |
| { |
| htab_delete (map); |
| entries.release (); |
| } |
| } |
| |
| /* Cache to avoid infinite recursion when instantiating an SSA name. |
| Live during the outermost analyze_scalar_evolution, instantiate_scev |
| or resolve_mixers call. */ |
| static instantiate_cache_type *global_cache; |
| |
| |
| /* Return true when PHI is a loop-phi-node. */ |
| |
| static bool |
| loop_phi_node_p (gimple *phi) |
| { |
| /* The implementation of this function is based on the following |
| property: "all the loop-phi-nodes of a loop are contained in the |
| loop's header basic block". */ |
| |
| return loop_containing_stmt (phi)->header == gimple_bb (phi); |
| } |
| |
| /* Compute the scalar evolution for EVOLUTION_FN after crossing LOOP. |
| In general, in the case of multivariate evolutions we want to get |
| the evolution in different loops. LOOP specifies the level for |
| which to get the evolution. |
| |
| Example: |
| |
| | for (j = 0; j < 100; j++) |
| | { |
| | for (k = 0; k < 100; k++) |
| | { |
| | i = k + j; - Here the value of i is a function of j, k. |
| | } |
| | ... = i - Here the value of i is a function of j. |
| | } |
| | ... = i - Here the value of i is a scalar. |
| |
| Example: |
| |
| | i_0 = ... |
| | loop_1 10 times |
| | i_1 = phi (i_0, i_2) |
| | i_2 = i_1 + 2 |
| | endloop |
| |
| This loop has the same effect as: |
| LOOP_1 has the same effect as: |
| |
| | i_1 = i_0 + 20 |
| |
| The overall effect of the loop, "i_0 + 20" in the previous example, |
| is obtained by passing in the parameters: LOOP = 1, |
| EVOLUTION_FN = {i_0, +, 2}_1. |
| */ |
| |
| tree |
| compute_overall_effect_of_inner_loop (class loop *loop, tree evolution_fn) |
| { |
| bool val = false; |
| |
| if (evolution_fn == chrec_dont_know) |
| return chrec_dont_know; |
| |
| else if (TREE_CODE (evolution_fn) == POLYNOMIAL_CHREC) |
| { |
| class loop *inner_loop = get_chrec_loop (evolution_fn); |
| |
| if (inner_loop == loop |
| || flow_loop_nested_p (loop, inner_loop)) |
| { |
| tree nb_iter = number_of_latch_executions (inner_loop); |
| |
| if (nb_iter == chrec_dont_know) |
| return chrec_dont_know; |
| else |
| { |
| tree res; |
| |
| /* evolution_fn is the evolution function in LOOP. Get |
| its value in the nb_iter-th iteration. */ |
| res = chrec_apply (inner_loop->num, evolution_fn, nb_iter); |
| |
| if (chrec_contains_symbols_defined_in_loop (res, loop->num)) |
| res = instantiate_parameters (loop, res); |
| |
| /* Continue the computation until ending on a parent of LOOP. */ |
| return compute_overall_effect_of_inner_loop (loop, res); |
| } |
| } |
| else |
| return evolution_fn; |
| } |
| |
| /* If the evolution function is an invariant, there is nothing to do. */ |
| else if (no_evolution_in_loop_p (evolution_fn, loop->num, &val) && val) |
| return evolution_fn; |
| |
| else |
| return chrec_dont_know; |
| } |
| |
| /* Associate CHREC to SCALAR. */ |
| |
| static void |
| set_scalar_evolution (basic_block instantiated_below, tree scalar, tree chrec) |
| { |
| tree *scalar_info; |
| |
| if (TREE_CODE (scalar) != SSA_NAME) |
| return; |
| |
| scalar_info = find_var_scev_info (instantiated_below, scalar); |
| |
| if (dump_file) |
| { |
| if (dump_flags & TDF_SCEV) |
| { |
| fprintf (dump_file, "(set_scalar_evolution \n"); |
| fprintf (dump_file, " instantiated_below = %d \n", |
| instantiated_below->index); |
| fprintf (dump_file, " (scalar = "); |
| print_generic_expr (dump_file, scalar); |
| fprintf (dump_file, ")\n (scalar_evolution = "); |
| print_generic_expr (dump_file, chrec); |
| fprintf (dump_file, "))\n"); |
| } |
| if (dump_flags & TDF_STATS) |
| nb_set_scev++; |
| } |
| |
| *scalar_info = chrec; |
| } |
| |
| /* Retrieve the chrec associated to SCALAR instantiated below |
| INSTANTIATED_BELOW block. */ |
| |
| static tree |
| get_scalar_evolution (basic_block instantiated_below, tree scalar) |
| { |
| tree res; |
| |
| if (dump_file) |
| { |
| if (dump_flags & TDF_SCEV) |
| { |
| fprintf (dump_file, "(get_scalar_evolution \n"); |
| fprintf (dump_file, " (scalar = "); |
| print_generic_expr (dump_file, scalar); |
| fprintf (dump_file, ")\n"); |
| } |
| if (dump_flags & TDF_STATS) |
| nb_get_scev++; |
| } |
| |
| if (VECTOR_TYPE_P (TREE_TYPE (scalar)) |
| || TREE_CODE (TREE_TYPE (scalar)) == COMPLEX_TYPE) |
| /* For chrec_dont_know we keep the symbolic form. */ |
| res = scalar; |
| else |
| switch (TREE_CODE (scalar)) |
| { |
| case SSA_NAME: |
| if (SSA_NAME_IS_DEFAULT_DEF (scalar)) |
| res = scalar; |
| else |
| res = *find_var_scev_info (instantiated_below, scalar); |
| break; |
| |
| case REAL_CST: |
| case FIXED_CST: |
| case INTEGER_CST: |
| res = scalar; |
| break; |
| |
| default: |
| res = chrec_not_analyzed_yet; |
| break; |
| } |
| |
| if (dump_file && (dump_flags & TDF_SCEV)) |
| { |
| fprintf (dump_file, " (scalar_evolution = "); |
| print_generic_expr (dump_file, res); |
| fprintf (dump_file, "))\n"); |
| } |
| |
| return res; |
| } |
| |
| /* Helper function for add_to_evolution. Returns the evolution |
| function for an assignment of the form "a = b + c", where "a" and |
| "b" are on the strongly connected component. CHREC_BEFORE is the |
| information that we already have collected up to this point. |
| TO_ADD is the evolution of "c". |
| |
| When CHREC_BEFORE has an evolution part in LOOP_NB, add to this |
| evolution the expression TO_ADD, otherwise construct an evolution |
| part for this loop. */ |
| |
| static tree |
| add_to_evolution_1 (unsigned loop_nb, tree chrec_before, tree to_add, |
| gimple *at_stmt) |
| { |
| tree type, left, right; |
| class loop *loop = get_loop (cfun, loop_nb), *chloop; |
| |
| switch (TREE_CODE (chrec_before)) |
| { |
| case POLYNOMIAL_CHREC: |
| chloop = get_chrec_loop (chrec_before); |
| if (chloop == loop |
| || flow_loop_nested_p (chloop, loop)) |
| { |
| unsigned var; |
| |
| type = chrec_type (chrec_before); |
| |
| /* When there is no evolution part in this loop, build it. */ |
| if (chloop != loop) |
| { |
| var = loop_nb; |
| left = chrec_before; |
| right = SCALAR_FLOAT_TYPE_P (type) |
| ? build_real (type, dconst0) |
| : build_int_cst (type, 0); |
| } |
| else |
| { |
| var = CHREC_VARIABLE (chrec_before); |
| left = CHREC_LEFT (chrec_before); |
| right = CHREC_RIGHT (chrec_before); |
| } |
| |
| to_add = chrec_convert (type, to_add, at_stmt); |
| right = chrec_convert_rhs (type, right, at_stmt); |
| right = chrec_fold_plus (chrec_type (right), right, to_add); |
| return build_polynomial_chrec (var, left, right); |
| } |
| else |
| { |
| gcc_assert (flow_loop_nested_p (loop, chloop)); |
| |
| /* Search the evolution in LOOP_NB. */ |
| left = add_to_evolution_1 (loop_nb, CHREC_LEFT (chrec_before), |
| to_add, at_stmt); |
| right = CHREC_RIGHT (chrec_before); |
| right = chrec_convert_rhs (chrec_type (left), right, at_stmt); |
| return build_polynomial_chrec (CHREC_VARIABLE (chrec_before), |
| left, right); |
| } |
| |
| default: |
| /* These nodes do not depend on a loop. */ |
| if (chrec_before == chrec_dont_know) |
| return chrec_dont_know; |
| |
| left = chrec_before; |
| right = chrec_convert_rhs (chrec_type (left), to_add, at_stmt); |
| return build_polynomial_chrec (loop_nb, left, right); |
| } |
| } |
| |
| /* Add TO_ADD to the evolution part of CHREC_BEFORE in the dimension |
| of LOOP_NB. |
| |
| Description (provided for completeness, for those who read code in |
| a plane, and for my poor 62 bytes brain that would have forgotten |
| all this in the next two or three months): |
| |
| The algorithm of translation of programs from the SSA representation |
| into the chrecs syntax is based on a pattern matching. After having |
| reconstructed the overall tree expression for a loop, there are only |
| two cases that can arise: |
| |
| 1. a = loop-phi (init, a + expr) |
| 2. a = loop-phi (init, expr) |
| |
| where EXPR is either a scalar constant with respect to the analyzed |
| loop (this is a degree 0 polynomial), or an expression containing |
| other loop-phi definitions (these are higher degree polynomials). |
| |
| Examples: |
| |
| 1. |
| | init = ... |
| | loop_1 |
| | a = phi (init, a + 5) |
| | endloop |
| |
| 2. |
| | inita = ... |
| | initb = ... |
| | loop_1 |
| | a = phi (inita, 2 * b + 3) |
| | b = phi (initb, b + 1) |
| | endloop |
| |
| For the first case, the semantics of the SSA representation is: |
| |
| | a (x) = init + \sum_{j = 0}^{x - 1} expr (j) |
| |
| that is, there is a loop index "x" that determines the scalar value |
| of the variable during the loop execution. During the first |
| iteration, the value is that of the initial condition INIT, while |
| during the subsequent iterations, it is the sum of the initial |
| condition with the sum of all the values of EXPR from the initial |
| iteration to the before last considered iteration. |
| |
| For the second case, the semantics of the SSA program is: |
| |
| | a (x) = init, if x = 0; |
| | expr (x - 1), otherwise. |
| |
| The second case corresponds to the PEELED_CHREC, whose syntax is |
| close to the syntax of a loop-phi-node: |
| |
| | phi (init, expr) vs. (init, expr)_x |
| |
| The proof of the translation algorithm for the first case is a |
| proof by structural induction based on the degree of EXPR. |
| |
| Degree 0: |
| When EXPR is a constant with respect to the analyzed loop, or in |
| other words when EXPR is a polynomial of degree 0, the evolution of |
| the variable A in the loop is an affine function with an initial |
| condition INIT, and a step EXPR. In order to show this, we start |
| from the semantics of the SSA representation: |
| |
| f (x) = init + \sum_{j = 0}^{x - 1} expr (j) |
| |
| and since "expr (j)" is a constant with respect to "j", |
| |
| f (x) = init + x * expr |
| |
| Finally, based on the semantics of the pure sum chrecs, by |
| identification we get the corresponding chrecs syntax: |
| |
| f (x) = init * \binom{x}{0} + expr * \binom{x}{1} |
| f (x) -> {init, +, expr}_x |
| |
| Higher degree: |
| Suppose that EXPR is a polynomial of degree N with respect to the |
| analyzed loop_x for which we have already determined that it is |
| written under the chrecs syntax: |
| |
| | expr (x) -> {b_0, +, b_1, +, ..., +, b_{n-1}} (x) |
| |
| We start from the semantics of the SSA program: |
| |
| | f (x) = init + \sum_{j = 0}^{x - 1} expr (j) |
| | |
| | f (x) = init + \sum_{j = 0}^{x - 1} |
| | (b_0 * \binom{j}{0} + ... + b_{n-1} * \binom{j}{n-1}) |
| | |
| | f (x) = init + \sum_{j = 0}^{x - 1} |
| | \sum_{k = 0}^{n - 1} (b_k * \binom{j}{k}) |
| | |
| | f (x) = init + \sum_{k = 0}^{n - 1} |
| | (b_k * \sum_{j = 0}^{x - 1} \binom{j}{k}) |
| | |
| | f (x) = init + \sum_{k = 0}^{n - 1} |
| | (b_k * \binom{x}{k + 1}) |
| | |
| | f (x) = init + b_0 * \binom{x}{1} + ... |
| | + b_{n-1} * \binom{x}{n} |
| | |
| | f (x) = init * \binom{x}{0} + b_0 * \binom{x}{1} + ... |
| | + b_{n-1} * \binom{x}{n} |
| | |
| |
| And finally from the definition of the chrecs syntax, we identify: |
| | f (x) -> {init, +, b_0, +, ..., +, b_{n-1}}_x |
| |
| This shows the mechanism that stands behind the add_to_evolution |
| function. An important point is that the use of symbolic |
| parameters avoids the need of an analysis schedule. |
| |
| Example: |
| |
| | inita = ... |
| | initb = ... |
| | loop_1 |
| | a = phi (inita, a + 2 + b) |
| | b = phi (initb, b + 1) |
| | endloop |
| |
| When analyzing "a", the algorithm keeps "b" symbolically: |
| |
| | a -> {inita, +, 2 + b}_1 |
| |
| Then, after instantiation, the analyzer ends on the evolution: |
| |
| | a -> {inita, +, 2 + initb, +, 1}_1 |
| |
| */ |
| |
| static tree |
| add_to_evolution (unsigned loop_nb, tree chrec_before, enum tree_code code, |
| tree to_add, gimple *at_stmt) |
| { |
| tree type = chrec_type (to_add); |
| tree res = NULL_TREE; |
| |
| if (to_add == NULL_TREE) |
| return chrec_before; |
| |
| /* TO_ADD is either a scalar, or a parameter. TO_ADD is not |
| instantiated at this point. */ |
| if (TREE_CODE (to_add) == POLYNOMIAL_CHREC) |
| /* This should not happen. */ |
| return chrec_dont_know; |
| |
| if (dump_file && (dump_flags & TDF_SCEV)) |
| { |
| fprintf (dump_file, "(add_to_evolution \n"); |
| fprintf (dump_file, " (loop_nb = %d)\n", loop_nb); |
| fprintf (dump_file, " (chrec_before = "); |
| print_generic_expr (dump_file, chrec_before); |
| fprintf (dump_file, ")\n (to_add = "); |
| print_generic_expr (dump_file, to_add); |
| fprintf (dump_file, ")\n"); |
| } |
| |
| if (code == MINUS_EXPR) |
| to_add = chrec_fold_multiply (type, to_add, SCALAR_FLOAT_TYPE_P (type) |
| ? build_real (type, dconstm1) |
| : build_int_cst_type (type, -1)); |
| |
| res = add_to_evolution_1 (loop_nb, chrec_before, to_add, at_stmt); |
| |
| if (dump_file && (dump_flags & TDF_SCEV)) |
| { |
| fprintf (dump_file, " (res = "); |
| print_generic_expr (dump_file, res); |
| fprintf (dump_file, "))\n"); |
| } |
| |
| return res; |
| } |
| |
| |
| |
| /* This section selects the loops that will be good candidates for the |
| scalar evolution analysis. For the moment, greedily select all the |
| loop nests we could analyze. */ |
| |
| /* For a loop with a single exit edge, return the COND_EXPR that |
| guards the exit edge. If the expression is too difficult to |
| analyze, then give up. */ |
| |
| gcond * |
| get_loop_exit_condition (const class loop *loop) |
| { |
| gcond *res = NULL; |
| edge exit_edge = single_exit (loop); |
| |
| if (dump_file && (dump_flags & TDF_SCEV)) |
| fprintf (dump_file, "(get_loop_exit_condition \n "); |
| |
| if (exit_edge) |
| { |
| gimple *stmt; |
| |
| stmt = last_stmt (exit_edge->src); |
| if (gcond *cond_stmt = safe_dyn_cast <gcond *> (stmt)) |
| res = cond_stmt; |
| } |
| |
| if (dump_file && (dump_flags & TDF_SCEV)) |
| { |
| print_gimple_stmt (dump_file, res, 0); |
| fprintf (dump_file, ")\n"); |
| } |
| |
| return res; |
| } |
| |
| |
| /* Depth first search algorithm. */ |
| |
| enum t_bool { |
| t_false, |
| t_true, |
| t_dont_know |
| }; |
| |
| |
| static t_bool follow_ssa_edge_expr (class loop *loop, gimple *, tree, gphi *, |
| tree *, int); |
| |
| /* Follow the ssa edge into the binary expression RHS0 CODE RHS1. |
| Return true if the strongly connected component has been found. */ |
| |
| static t_bool |
| follow_ssa_edge_binary (class loop *loop, gimple *at_stmt, |
| tree type, tree rhs0, enum tree_code code, tree rhs1, |
| gphi *halting_phi, tree *evolution_of_loop, |
| int limit) |
| { |
| t_bool res = t_false; |
| tree evol; |
| |
| switch (code) |
| { |
| case POINTER_PLUS_EXPR: |
| case PLUS_EXPR: |
| if (TREE_CODE (rhs0) == SSA_NAME) |
| { |
| if (TREE_CODE (rhs1) == SSA_NAME) |
| { |
| /* Match an assignment under the form: |
| "a = b + c". */ |
| |
| /* We want only assignments of form "name + name" contribute to |
| LIMIT, as the other cases do not necessarily contribute to |
| the complexity of the expression. */ |
| limit++; |
| |
| evol = *evolution_of_loop; |
| evol = add_to_evolution |
| (loop->num, |
| chrec_convert (type, evol, at_stmt), |
| code, rhs1, at_stmt); |
| res = follow_ssa_edge_expr |
| (loop, at_stmt, rhs0, halting_phi, &evol, limit); |
| if (res == t_true) |
| *evolution_of_loop = evol; |
| else if (res == t_false) |
| { |
| *evolution_of_loop = add_to_evolution |
| (loop->num, |
| chrec_convert (type, *evolution_of_loop, at_stmt), |
| code, rhs0, at_stmt); |
| res = follow_ssa_edge_expr |
| (loop, at_stmt, rhs1, halting_phi, |
| evolution_of_loop, limit); |
| } |
| } |
| |
| else |
| gcc_unreachable (); /* Handled in caller. */ |
| } |
| |
| else if (TREE_CODE (rhs1) == SSA_NAME) |
| { |
| /* Match an assignment under the form: |
| "a = ... + c". */ |
| *evolution_of_loop = add_to_evolution |
| (loop->num, chrec_convert (type, *evolution_of_loop, |
| at_stmt), |
| code, rhs0, at_stmt); |
| res = follow_ssa_edge_expr |
| (loop, at_stmt, rhs1, halting_phi, |
| evolution_of_loop, limit); |
| } |
| |
| else |
| /* Otherwise, match an assignment under the form: |
| "a = ... + ...". */ |
| /* And there is nothing to do. */ |
| res = t_false; |
| break; |
| |
| case MINUS_EXPR: |
| /* This case is under the form "opnd0 = rhs0 - rhs1". */ |
| if (TREE_CODE (rhs0) == SSA_NAME) |
| gcc_unreachable (); /* Handled in caller. */ |
| else |
| /* Otherwise, match an assignment under the form: |
| "a = ... - ...". */ |
| /* And there is nothing to do. */ |
| res = t_false; |
| break; |
| |
| default: |
| res = t_false; |
| } |
| |
| return res; |
| } |
| |
| /* Checks whether the I-th argument of a PHI comes from a backedge. */ |
| |
| static bool |
| backedge_phi_arg_p (gphi *phi, int i) |
| { |
| const_edge e = gimple_phi_arg_edge (phi, i); |
| |
| /* We would in fact like to test EDGE_DFS_BACK here, but we do not care |
| about updating it anywhere, and this should work as well most of the |
| time. */ |
| if (e->flags & EDGE_IRREDUCIBLE_LOOP) |
| return true; |
| |
| return false; |
| } |
| |
| /* Helper function for one branch of the condition-phi-node. Return |
| true if the strongly connected component has been found following |
| this path. */ |
| |
| static inline t_bool |
| follow_ssa_edge_in_condition_phi_branch (int i, |
| class loop *loop, |
| gphi *condition_phi, |
| gphi *halting_phi, |
| tree *evolution_of_branch, |
| tree init_cond, int limit) |
| { |
| tree branch = PHI_ARG_DEF (condition_phi, i); |
| *evolution_of_branch = chrec_dont_know; |
| |
| /* Do not follow back edges (they must belong to an irreducible loop, which |
| we really do not want to worry about). */ |
| if (backedge_phi_arg_p (condition_phi, i)) |
| return t_false; |
| |
| if (TREE_CODE (branch) == SSA_NAME) |
| { |
| *evolution_of_branch = init_cond; |
| return follow_ssa_edge_expr (loop, condition_phi, branch, halting_phi, |
| evolution_of_branch, limit); |
| } |
| |
| /* This case occurs when one of the condition branches sets |
| the variable to a constant: i.e. a phi-node like |
| "a_2 = PHI <a_7(5), 2(6)>;". |
| |
| FIXME: This case have to be refined correctly: |
| in some cases it is possible to say something better than |
| chrec_dont_know, for example using a wrap-around notation. */ |
| return t_false; |
| } |
| |
| /* This function merges the branches of a condition-phi-node in a |
| loop. */ |
| |
| static t_bool |
| follow_ssa_edge_in_condition_phi (class loop *loop, |
| gphi *condition_phi, |
| gphi *halting_phi, |
| tree *evolution_of_loop, int limit) |
| { |
| int i, n; |
| tree init = *evolution_of_loop; |
| tree evolution_of_branch; |
| t_bool res = follow_ssa_edge_in_condition_phi_branch (0, loop, condition_phi, |
| halting_phi, |
| &evolution_of_branch, |
| init, limit); |
| if (res == t_false || res == t_dont_know) |
| return res; |
| |
| *evolution_of_loop = evolution_of_branch; |
| |
| n = gimple_phi_num_args (condition_phi); |
| for (i = 1; i < n; i++) |
| { |
| /* Quickly give up when the evolution of one of the branches is |
| not known. */ |
| if (*evolution_of_loop == chrec_dont_know) |
| return t_true; |
| |
| /* Increase the limit by the PHI argument number to avoid exponential |
| time and memory complexity. */ |
| res = follow_ssa_edge_in_condition_phi_branch (i, loop, condition_phi, |
| halting_phi, |
| &evolution_of_branch, |
| init, limit + i); |
| if (res == t_false || res == t_dont_know) |
| return res; |
| |
| *evolution_of_loop = chrec_merge (*evolution_of_loop, |
| evolution_of_branch); |
| } |
| |
| return t_true; |
| } |
| |
| /* Follow an SSA edge in an inner loop. It computes the overall |
| effect of the loop, and following the symbolic initial conditions, |
| it follows the edges in the parent loop. The inner loop is |
| considered as a single statement. */ |
| |
| static t_bool |
| follow_ssa_edge_inner_loop_phi (class loop *outer_loop, |
| gphi *loop_phi_node, |
| gphi *halting_phi, |
| tree *evolution_of_loop, int limit) |
| { |
| class loop *loop = loop_containing_stmt (loop_phi_node); |
| tree ev = analyze_scalar_evolution (loop, PHI_RESULT (loop_phi_node)); |
| |
| /* Sometimes, the inner loop is too difficult to analyze, and the |
| result of the analysis is a symbolic parameter. */ |
| if (ev == PHI_RESULT (loop_phi_node)) |
| { |
| t_bool res = t_false; |
| int i, n = gimple_phi_num_args (loop_phi_node); |
| |
| for (i = 0; i < n; i++) |
| { |
| tree arg = PHI_ARG_DEF (loop_phi_node, i); |
| basic_block bb; |
| |
| /* Follow the edges that exit the inner loop. */ |
| bb = gimple_phi_arg_edge (loop_phi_node, i)->src; |
| if (!flow_bb_inside_loop_p (loop, bb)) |
| res = follow_ssa_edge_expr (outer_loop, loop_phi_node, |
| arg, halting_phi, |
| evolution_of_loop, limit); |
| if (res == t_true) |
| break; |
| } |
| |
| /* If the path crosses this loop-phi, give up. */ |
| if (res == t_true) |
| *evolution_of_loop = chrec_dont_know; |
| |
| return res; |
| } |
| |
| /* Otherwise, compute the overall effect of the inner loop. */ |
| ev = compute_overall_effect_of_inner_loop (loop, ev); |
| return follow_ssa_edge_expr (outer_loop, loop_phi_node, ev, halting_phi, |
| evolution_of_loop, limit); |
| } |
| |
| /* Follow the ssa edge into the expression EXPR. |
| Return true if the strongly connected component has been found. */ |
| |
| static t_bool |
| follow_ssa_edge_expr (class loop *loop, gimple *at_stmt, tree expr, |
| gphi *halting_phi, tree *evolution_of_loop, |
| int limit) |
| { |
| enum tree_code code; |
| tree type, rhs0, rhs1 = NULL_TREE; |
| |
| /* The EXPR is one of the following cases: |
| - an SSA_NAME, |
| - an INTEGER_CST, |
| - a PLUS_EXPR, |
| - a POINTER_PLUS_EXPR, |
| - a MINUS_EXPR, |
| - an ASSERT_EXPR, |
| - other cases are not yet handled. */ |
| |
| /* For SSA_NAME look at the definition statement, handling |
| PHI nodes and otherwise expand appropriately for the expression |
| handling below. */ |
| tail_recurse: |
| if (TREE_CODE (expr) == SSA_NAME) |
| { |
| gimple *def = SSA_NAME_DEF_STMT (expr); |
| |
| if (gimple_nop_p (def)) |
| return t_false; |
| |
| /* Give up if the path is longer than the MAX that we allow. */ |
| if (limit > param_scev_max_expr_complexity) |
| { |
| *evolution_of_loop = chrec_dont_know; |
| return t_dont_know; |
| } |
| |
| if (gphi *phi = dyn_cast <gphi *>(def)) |
| { |
| if (!loop_phi_node_p (phi)) |
| /* DEF is a condition-phi-node. Follow the branches, and |
| record their evolutions. Finally, merge the collected |
| information and set the approximation to the main |
| variable. */ |
| return follow_ssa_edge_in_condition_phi |
| (loop, phi, halting_phi, evolution_of_loop, limit); |
| |
| /* When the analyzed phi is the halting_phi, the |
| depth-first search is over: we have found a path from |
| the halting_phi to itself in the loop. */ |
| if (phi == halting_phi) |
| return t_true; |
| |
| /* Otherwise, the evolution of the HALTING_PHI depends |
| on the evolution of another loop-phi-node, i.e. the |
| evolution function is a higher degree polynomial. */ |
| class loop *def_loop = loop_containing_stmt (def); |
| if (def_loop == loop) |
| return t_false; |
| |
| /* Inner loop. */ |
| if (flow_loop_nested_p (loop, def_loop)) |
| return follow_ssa_edge_inner_loop_phi |
| (loop, phi, halting_phi, evolution_of_loop, |
| limit + 1); |
| |
| /* Outer loop. */ |
| return t_false; |
| } |
| |
| /* At this level of abstraction, the program is just a set |
| of GIMPLE_ASSIGNs and PHI_NODEs. In principle there is no |
| other def to be handled. */ |
| if (!is_gimple_assign (def)) |
| return t_false; |
| |
| code = gimple_assign_rhs_code (def); |
| switch (get_gimple_rhs_class (code)) |
| { |
| case GIMPLE_BINARY_RHS: |
| rhs0 = gimple_assign_rhs1 (def); |
| rhs1 = gimple_assign_rhs2 (def); |
| break; |
| case GIMPLE_UNARY_RHS: |
| case GIMPLE_SINGLE_RHS: |
| rhs0 = gimple_assign_rhs1 (def); |
| break; |
| default: |
| return t_false; |
| } |
| type = TREE_TYPE (gimple_assign_lhs (def)); |
| at_stmt = def; |
| } |
| else |
| { |
| code = TREE_CODE (expr); |
| type = TREE_TYPE (expr); |
| switch (code) |
| { |
| CASE_CONVERT: |
| rhs0 = TREE_OPERAND (expr, 0); |
| break; |
| case POINTER_PLUS_EXPR: |
| case PLUS_EXPR: |
| case MINUS_EXPR: |
| rhs0 = TREE_OPERAND (expr, 0); |
| rhs1 = TREE_OPERAND (expr, 1); |
| break; |
| default: |
| rhs0 = expr; |
| } |
| } |
| |
| switch (code) |
| { |
| CASE_CONVERT: |
| { |
| /* This assignment is under the form "a_1 = (cast) rhs. */ |
| t_bool res = follow_ssa_edge_expr (loop, at_stmt, rhs0, halting_phi, |
| evolution_of_loop, limit); |
| *evolution_of_loop = chrec_convert (type, *evolution_of_loop, at_stmt); |
| return res; |
| } |
| |
| case INTEGER_CST: |
| /* This assignment is under the form "a_1 = 7". */ |
| return t_false; |
| |
| case ADDR_EXPR: |
| { |
| /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */ |
| if (TREE_CODE (TREE_OPERAND (rhs0, 0)) != MEM_REF) |
| return t_false; |
| tree mem = TREE_OPERAND (rhs0, 0); |
| rhs0 = TREE_OPERAND (mem, 0); |
| rhs1 = TREE_OPERAND (mem, 1); |
| code = POINTER_PLUS_EXPR; |
| } |
| /* Fallthru. */ |
| case POINTER_PLUS_EXPR: |
| case PLUS_EXPR: |
| case MINUS_EXPR: |
| /* This case is under the form "rhs0 +- rhs1". */ |
| STRIP_USELESS_TYPE_CONVERSION (rhs0); |
| STRIP_USELESS_TYPE_CONVERSION (rhs1); |
| if (TREE_CODE (rhs0) == SSA_NAME |
| && (TREE_CODE (rhs1) != SSA_NAME || code == MINUS_EXPR)) |
| { |
| /* Match an assignment under the form: |
| "a = b +- ...". |
| Use tail-recursion for the simple case. */ |
| *evolution_of_loop = add_to_evolution |
| (loop->num, chrec_convert (type, *evolution_of_loop, |
| at_stmt), |
| code, rhs1, at_stmt); |
| expr = rhs0; |
| goto tail_recurse; |
| } |
| /* Else search for the SCC in both rhs0 and rhs1. */ |
| return follow_ssa_edge_binary (loop, at_stmt, type, rhs0, code, rhs1, |
| halting_phi, evolution_of_loop, limit); |
| |
| case ASSERT_EXPR: |
| /* This assignment is of the form: "a_1 = ASSERT_EXPR <a_2, ...>" |
| It must be handled as a copy assignment of the form a_1 = a_2. */ |
| expr = ASSERT_EXPR_VAR (rhs0); |
| goto tail_recurse; |
| |
| default: |
| return t_false; |
| } |
| } |
| |
| |
| /* Simplify PEELED_CHREC represented by (init_cond, arg) in LOOP. |
| Handle below case and return the corresponding POLYNOMIAL_CHREC: |
| |
| # i_17 = PHI <i_13(5), 0(3)> |
| # _20 = PHI <_5(5), start_4(D)(3)> |
| ... |
| i_13 = i_17 + 1; |
| _5 = start_4(D) + i_13; |
| |
| Though variable _20 appears as a PEELED_CHREC in the form of |
| (start_4, _5)_LOOP, it's a POLYNOMIAL_CHREC like {start_4, 1}_LOOP. |
| |
| See PR41488. */ |
| |
| static tree |
| simplify_peeled_chrec (class loop *loop, tree arg, tree init_cond) |
| { |
| aff_tree aff1, aff2; |
| tree ev, left, right, type, step_val; |
| hash_map<tree, name_expansion *> *peeled_chrec_map = NULL; |
| |
| ev = instantiate_parameters (loop, analyze_scalar_evolution (loop, arg)); |
| if (ev == NULL_TREE || TREE_CODE (ev) != POLYNOMIAL_CHREC) |
| return chrec_dont_know; |
| |
| left = CHREC_LEFT (ev); |
| right = CHREC_RIGHT (ev); |
| type = TREE_TYPE (left); |
| step_val = chrec_fold_plus (type, init_cond, right); |
| |
| /* Transform (init, {left, right}_LOOP)_LOOP to {init, right}_LOOP |
| if "left" equals to "init + right". */ |
| if (operand_equal_p (left, step_val, 0)) |
| { |
| if (dump_file && (dump_flags & TDF_SCEV)) |
| fprintf (dump_file, "Simplify PEELED_CHREC into POLYNOMIAL_CHREC.\n"); |
| |
| return build_polynomial_chrec (loop->num, init_cond, right); |
| } |
| |
| /* The affine code only deals with pointer and integer types. */ |
| if (!POINTER_TYPE_P (type) |
| && !INTEGRAL_TYPE_P (type)) |
| return chrec_dont_know; |
| |
| /* Try harder to check if they are equal. */ |
| tree_to_aff_combination_expand (left, type, &aff1, &peeled_chrec_map); |
| tree_to_aff_combination_expand (step_val, type, &aff2, &peeled_chrec_map); |
| free_affine_expand_cache (&peeled_chrec_map); |
| aff_combination_scale (&aff2, -1); |
| aff_combination_add (&aff1, &aff2); |
| |
| /* Transform (init, {left, right}_LOOP)_LOOP to {init, right}_LOOP |
| if "left" equals to "init + right". */ |
| if (aff_combination_zero_p (&aff1)) |
| { |
| if (dump_file && (dump_flags & TDF_SCEV)) |
| fprintf (dump_file, "Simplify PEELED_CHREC into POLYNOMIAL_CHREC.\n"); |
| |
| return build_polynomial_chrec (loop->num, init_cond, right); |
| } |
| return chrec_dont_know; |
| } |
| |
| /* Given a LOOP_PHI_NODE, this function determines the evolution |
| function from LOOP_PHI_NODE to LOOP_PHI_NODE in the loop. */ |
| |
| static tree |
| analyze_evolution_in_loop (gphi *loop_phi_node, |
| tree init_cond) |
| { |
| int i, n = gimple_phi_num_args (loop_phi_node); |
| tree evolution_function = chrec_not_analyzed_yet; |
| class loop *loop = loop_containing_stmt (loop_phi_node); |
| basic_block bb; |
| static bool simplify_peeled_chrec_p = true; |
| |
| if (dump_file && (dump_flags & TDF_SCEV)) |
| { |
| fprintf (dump_file, "(analyze_evolution_in_loop \n"); |
| fprintf (dump_file, " (loop_phi_node = "); |
| print_gimple_stmt (dump_file, loop_phi_node, 0); |
| fprintf (dump_file, ")\n"); |
| } |
| |
| for (i = 0; i < n; i++) |
| { |
| tree arg = PHI_ARG_DEF (loop_phi_node, i); |
| tree ev_fn; |
| t_bool res; |
| |
| /* Select the edges that enter the loop body. */ |
| bb = gimple_phi_arg_edge (loop_phi_node, i)->src; |
| if (!flow_bb_inside_loop_p (loop, bb)) |
| continue; |
| |
| if (TREE_CODE (arg) == SSA_NAME) |
| { |
| bool val = false; |
| |
| /* Pass in the initial condition to the follow edge function. */ |
| ev_fn = init_cond; |
| res = follow_ssa_edge_expr (loop, loop_phi_node, arg, |
| loop_phi_node, &ev_fn, 0); |
| |
| /* If ev_fn has no evolution in the inner loop, and the |
| init_cond is not equal to ev_fn, then we have an |
| ambiguity between two possible values, as we cannot know |
| the number of iterations at this point. */ |
| if (TREE_CODE (ev_fn) != POLYNOMIAL_CHREC |
| && no_evolution_in_loop_p (ev_fn, loop->num, &val) && val |
| && !operand_equal_p (init_cond, ev_fn, 0)) |
| ev_fn = chrec_dont_know; |
| } |
| else |
| res = t_false; |
| |
| /* When it is impossible to go back on the same |
| loop_phi_node by following the ssa edges, the |
| evolution is represented by a peeled chrec, i.e. the |
| first iteration, EV_FN has the value INIT_COND, then |
| all the other iterations it has the value of ARG. |
| For the moment, PEELED_CHREC nodes are not built. */ |
| if (res != t_true) |
| { |
| ev_fn = chrec_dont_know; |
| /* Try to recognize POLYNOMIAL_CHREC which appears in |
| the form of PEELED_CHREC, but guard the process with |
| a bool variable to keep the analyzer from infinite |
| recurrence for real PEELED_RECs. */ |
| if (simplify_peeled_chrec_p && TREE_CODE (arg) == SSA_NAME) |
| { |
| simplify_peeled_chrec_p = false; |
| ev_fn = simplify_peeled_chrec (loop, arg, init_cond); |
| simplify_peeled_chrec_p = true; |
| } |
| } |
| |
| /* When there are multiple back edges of the loop (which in fact never |
| happens currently, but nevertheless), merge their evolutions. */ |
| evolution_function = chrec_merge (evolution_function, ev_fn); |
| |
| if (evolution_function == chrec_dont_know) |
| break; |
| } |
| |
| if (dump_file && (dump_flags & TDF_SCEV)) |
| { |
| fprintf (dump_file, " (evolution_function = "); |
| print_generic_expr (dump_file, evolution_function); |
| fprintf (dump_file, "))\n"); |
| } |
| |
| return evolution_function; |
| } |
| |
| /* Looks to see if VAR is a copy of a constant (via straightforward assignments |
| or degenerate phi's). If so, returns the constant; else, returns VAR. */ |
| |
| static tree |
| follow_copies_to_constant (tree var) |
| { |
| tree res = var; |
| while (TREE_CODE (res) == SSA_NAME |
| /* We face not updated SSA form in multiple places and this walk |
| may end up in sibling loops so we have to guard it. */ |
| && !name_registered_for_update_p (res)) |
| { |
| gimple *def = SSA_NAME_DEF_STMT (res); |
| if (gphi *phi = dyn_cast <gphi *> (def)) |
| { |
| if (tree rhs = degenerate_phi_result (phi)) |
| res = rhs; |
| else |
| break; |
| } |
| else if (gimple_assign_single_p (def)) |
| /* Will exit loop if not an SSA_NAME. */ |
| res = gimple_assign_rhs1 (def); |
| else |
| break; |
| } |
| if (CONSTANT_CLASS_P (res)) |
| return res; |
| return var; |
| } |
| |
| /* Given a loop-phi-node, return the initial conditions of the |
| variable on entry of the loop. When the CCP has propagated |
| constants into the loop-phi-node, the initial condition is |
| instantiated, otherwise the initial condition is kept symbolic. |
| This analyzer does not analyze the evolution outside the current |
| loop, and leaves this task to the on-demand tree reconstructor. */ |
| |
| static tree |
| analyze_initial_condition (gphi *loop_phi_node) |
| { |
| int i, n; |
| tree init_cond = chrec_not_analyzed_yet; |
| class loop *loop = loop_containing_stmt (loop_phi_node); |
| |
| if (dump_file && (dump_flags & TDF_SCEV)) |
| { |
| fprintf (dump_file, "(analyze_initial_condition \n"); |
| fprintf (dump_file, " (loop_phi_node = \n"); |
| print_gimple_stmt (dump_file, loop_phi_node, 0); |
| fprintf (dump_file, ")\n"); |
| } |
| |
| n = gimple_phi_num_args (loop_phi_node); |
| for (i = 0; i < n; i++) |
| { |
| tree branch = PHI_ARG_DEF (loop_phi_node, i); |
| basic_block bb = gimple_phi_arg_edge (loop_phi_node, i)->src; |
| |
| /* When the branch is oriented to the loop's body, it does |
| not contribute to the initial condition. */ |
| if (flow_bb_inside_loop_p (loop, bb)) |
| continue; |
| |
| if (init_cond == chrec_not_analyzed_yet) |
| { |
| init_cond = branch; |
| continue; |
| } |
| |
| if (TREE_CODE (branch) == SSA_NAME) |
| { |
| init_cond = chrec_dont_know; |
| break; |
| } |
| |
| init_cond = chrec_merge (init_cond, branch); |
| } |
| |
| /* Ooops -- a loop without an entry??? */ |
| if (init_cond == chrec_not_analyzed_yet) |
| init_cond = chrec_dont_know; |
| |
| /* We may not have fully constant propagated IL. Handle degenerate PHIs here |
| to not miss important early loop unrollings. */ |
| init_cond = follow_copies_to_constant (init_cond); |
| |
| if (dump_file && (dump_flags & TDF_SCEV)) |
| { |
| fprintf (dump_file, " (init_cond = "); |
| print_generic_expr (dump_file, init_cond); |
| fprintf (dump_file, "))\n"); |
| } |
| |
| return init_cond; |
| } |
| |
| /* Analyze the scalar evolution for LOOP_PHI_NODE. */ |
| |
| static tree |
| interpret_loop_phi (class loop *loop, gphi *loop_phi_node) |
| { |
| tree res; |
| class loop *phi_loop = loop_containing_stmt (loop_phi_node); |
| tree init_cond; |
| |
| gcc_assert (phi_loop == loop); |
| |
| /* Otherwise really interpret the loop phi. */ |
| init_cond = analyze_initial_condition (loop_phi_node); |
| res = analyze_evolution_in_loop (loop_phi_node, init_cond); |
| |
| /* Verify we maintained the correct initial condition throughout |
| possible conversions in the SSA chain. */ |
| if (res != chrec_dont_know) |
| { |
| tree new_init = res; |
| if (CONVERT_EXPR_P (res) |
| && TREE_CODE (TREE_OPERAND (res, 0)) == POLYNOMIAL_CHREC) |
| new_init = fold_convert (TREE_TYPE (res), |
| CHREC_LEFT (TREE_OPERAND (res, 0))); |
| else if (TREE_CODE (res) == POLYNOMIAL_CHREC) |
| new_init = CHREC_LEFT (res); |
| STRIP_USELESS_TYPE_CONVERSION (new_init); |
| if (TREE_CODE (new_init) == POLYNOMIAL_CHREC |
| || !operand_equal_p (init_cond, new_init, 0)) |
| return chrec_dont_know; |
| } |
| |
| return res; |
| } |
| |
| /* This function merges the branches of a condition-phi-node, |
| contained in the outermost loop, and whose arguments are already |
| analyzed. */ |
| |
| static tree |
| interpret_condition_phi (class loop *loop, gphi *condition_phi) |
| { |
| int i, n = gimple_phi_num_args (condition_phi); |
| tree res = chrec_not_analyzed_yet; |
| |
| for (i = 0; i < n; i++) |
| { |
| tree branch_chrec; |
| |
| if (backedge_phi_arg_p (condition_phi, i)) |
| { |
| res = chrec_dont_know; |
| break; |
| } |
| |
| branch_chrec = analyze_scalar_evolution |
| (loop, PHI_ARG_DEF (condition_phi, i)); |
| |
| res = chrec_merge (res, branch_chrec); |
| if (res == chrec_dont_know) |
| break; |
| } |
| |
| return res; |
| } |
| |
| /* Interpret the operation RHS1 OP RHS2. If we didn't |
| analyze this node before, follow the definitions until ending |
| either on an analyzed GIMPLE_ASSIGN, or on a loop-phi-node. On the |
| return path, this function propagates evolutions (ala constant copy |
| propagation). OPND1 is not a GIMPLE expression because we could |
| analyze the effect of an inner loop: see interpret_loop_phi. */ |
| |
| static tree |
| interpret_rhs_expr (class loop *loop, gimple *at_stmt, |
| tree type, tree rhs1, enum tree_code code, tree rhs2) |
| { |
| tree res, chrec1, chrec2, ctype; |
| gimple *def; |
| |
| if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS) |
| { |
| if (is_gimple_min_invariant (rhs1)) |
| return chrec_convert (type, rhs1, at_stmt); |
| |
| if (code == SSA_NAME) |
| return chrec_convert (type, analyze_scalar_evolution (loop, rhs1), |
| at_stmt); |
| |
| if (code == ASSERT_EXPR) |
| { |
| rhs1 = ASSERT_EXPR_VAR (rhs1); |
| return chrec_convert (type, analyze_scalar_evolution (loop, rhs1), |
| at_stmt); |
| } |
| } |
| |
| switch (code) |
| { |
| case ADDR_EXPR: |
| if (TREE_CODE (TREE_OPERAND (rhs1, 0)) == MEM_REF |
| || handled_component_p (TREE_OPERAND (rhs1, 0))) |
| { |
| machine_mode mode; |
| poly_int64 bitsize, bitpos; |
| int unsignedp, reversep; |
| int volatilep = 0; |
| tree base, offset; |
| tree chrec3; |
| tree unitpos; |
| |
| base = get_inner_reference (TREE_OPERAND (rhs1, 0), |
| &bitsize, &bitpos, &offset, &mode, |
| &unsignedp, &reversep, &volatilep); |
| |
| if (TREE_CODE (base) == MEM_REF) |
| { |
| rhs2 = TREE_OPERAND (base, 1); |
| rhs1 = TREE_OPERAND (base, 0); |
| |
| chrec1 = analyze_scalar_evolution (loop, rhs1); |
| chrec2 = analyze_scalar_evolution (loop, rhs2); |
| chrec1 = chrec_convert (type, chrec1, at_stmt); |
| chrec2 = chrec_convert (TREE_TYPE (rhs2), chrec2, at_stmt); |
| chrec1 = instantiate_parameters (loop, chrec1); |
| chrec2 = instantiate_parameters (loop, chrec2); |
| res = chrec_fold_plus (type, chrec1, chrec2); |
| } |
| else |
| { |
| chrec1 = analyze_scalar_evolution_for_address_of (loop, base); |
| chrec1 = chrec_convert (type, chrec1, at_stmt); |
| res = chrec1; |
| } |
| |
| if (offset != NULL_TREE) |
| { |
| chrec2 = analyze_scalar_evolution (loop, offset); |
| chrec2 = chrec_convert (TREE_TYPE (offset), chrec2, at_stmt); |
| chrec2 = instantiate_parameters (loop, chrec2); |
| res = chrec_fold_plus (type, res, chrec2); |
| } |
| |
| if (maybe_ne (bitpos, 0)) |
| { |
| unitpos = size_int (exact_div (bitpos, BITS_PER_UNIT)); |
| chrec3 = analyze_scalar_evolution (loop, unitpos); |
| chrec3 = chrec_convert (TREE_TYPE (unitpos), chrec3, at_stmt); |
| chrec3 = instantiate_parameters (loop, chrec3); |
| res = chrec_fold_plus (type, res, chrec3); |
| } |
| } |
| else |
| res = chrec_dont_know; |
| break; |
| |
| case POINTER_PLUS_EXPR: |
| chrec1 = analyze_scalar_evolution (loop, rhs1); |
| chrec2 = analyze_scalar_evolution (loop, rhs2); |
| chrec1 = chrec_convert (type, chrec1, at_stmt); |
| chrec2 = chrec_convert (TREE_TYPE (rhs2), chrec2, at_stmt); |
| chrec1 = instantiate_parameters (loop, chrec1); |
| chrec2 = instantiate_parameters (loop, chrec2); |
| res = chrec_fold_plus (type, chrec1, chrec2); |
| break; |
| |
| case PLUS_EXPR: |
| chrec1 = analyze_scalar_evolution (loop, rhs1); |
| chrec2 = analyze_scalar_evolution (loop, rhs2); |
| ctype = type; |
| /* When the stmt is conditionally executed re-write the CHREC |
| into a form that has well-defined behavior on overflow. */ |
| if (at_stmt |
| && INTEGRAL_TYPE_P (type) |
| && ! TYPE_OVERFLOW_WRAPS (type) |
| && ! dominated_by_p (CDI_DOMINATORS, loop->latch, |
| gimple_bb (at_stmt))) |
| ctype = unsigned_type_for (type); |
| chrec1 = chrec_convert (ctype, chrec1, at_stmt); |
| chrec2 = chrec_convert (ctype, chrec2, at_stmt); |
| chrec1 = instantiate_parameters (loop, chrec1); |
| chrec2 = instantiate_parameters (loop, chrec2); |
| res = chrec_fold_plus (ctype, chrec1, chrec2); |
| if (type != ctype) |
| res = chrec_convert (type, res, at_stmt); |
| break; |
| |
| case MINUS_EXPR: |
| chrec1 = analyze_scalar_evolution (loop, rhs1); |
| chrec2 = analyze_scalar_evolution (loop, rhs2); |
| ctype = type; |
| /* When the stmt is conditionally executed re-write the CHREC |
| into a form that has well-defined behavior on overflow. */ |
| if (at_stmt |
| && INTEGRAL_TYPE_P (type) |
| && ! TYPE_OVERFLOW_WRAPS (type) |
| && ! dominated_by_p (CDI_DOMINATORS, |
| loop->latch, gimple_bb (at_stmt))) |
| ctype = unsigned_type_for (type); |
| chrec1 = chrec_convert (ctype, chrec1, at_stmt); |
| chrec2 = chrec_convert (ctype, chrec2, at_stmt); |
| chrec1 = instantiate_parameters (loop, chrec1); |
| chrec2 = instantiate_parameters (loop, chrec2); |
| res = chrec_fold_minus (ctype, chrec1, chrec2); |
| if (type != ctype) |
| res = chrec_convert (type, res, at_stmt); |
| break; |
| |
| case NEGATE_EXPR: |
| chrec1 = analyze_scalar_evolution (loop, rhs1); |
| ctype = type; |
| /* When the stmt is conditionally executed re-write the CHREC |
| into a form that has well-defined behavior on overflow. */ |
| if (at_stmt |
| && INTEGRAL_TYPE_P (type) |
| && ! TYPE_OVERFLOW_WRAPS (type) |
| && ! dominated_by_p (CDI_DOMINATORS, |
| loop->latch, gimple_bb (at_stmt))) |
| ctype = unsigned_type_for (type); |
| chrec1 = chrec_convert (ctype, chrec1, at_stmt); |
| /* TYPE may be integer, real or complex, so use fold_convert. */ |
| chrec1 = instantiate_parameters (loop, chrec1); |
| res = chrec_fold_multiply (ctype, chrec1, |
| fold_convert (ctype, integer_minus_one_node)); |
| if (type != ctype) |
| res = chrec_convert (type, res, at_stmt); |
| break; |
| |
| case BIT_NOT_EXPR: |
| /* Handle ~X as -1 - X. */ |
| chrec1 = analyze_scalar_evolution (loop, rhs1); |
| chrec1 = chrec_convert (type, chrec1, at_stmt); |
| chrec1 = instantiate_parameters (loop, chrec1); |
| res = chrec_fold_minus (type, |
| fold_convert (type, integer_minus_one_node), |
| chrec1); |
| break; |
| |
| case MULT_EXPR: |
| chrec1 = analyze_scalar_evolution (loop, rhs1); |
| chrec2 = analyze_scalar_evolution (loop, rhs2); |
| ctype = type; |
| /* When the stmt is conditionally executed re-write the CHREC |
| into a form that has well-defined behavior on overflow. */ |
| if (at_stmt |
| && INTEGRAL_TYPE_P (type) |
| && ! TYPE_OVERFLOW_WRAPS (type) |
| && ! dominated_by_p (CDI_DOMINATORS, |
| loop->latch, gimple_bb (at_stmt))) |
| ctype = unsigned_type_for (type); |
| chrec1 = chrec_convert (ctype, chrec1, at_stmt); |
| chrec2 = chrec_convert (ctype, chrec2, at_stmt); |
| chrec1 = instantiate_parameters (loop, chrec1); |
| chrec2 = instantiate_parameters (loop, chrec2); |
| res = chrec_fold_multiply (ctype, chrec1, chrec2); |
| if (type != ctype) |
| res = chrec_convert (type, res, at_stmt); |
| break; |
| |
| case LSHIFT_EXPR: |
| { |
| /* Handle A<<B as A * (1<<B). */ |
| tree uns = unsigned_type_for (type); |
| chrec1 = analyze_scalar_evolution (loop, rhs1); |
| chrec2 = analyze_scalar_evolution (loop, rhs2); |
| chrec1 = chrec_convert (uns, chrec1, at_stmt); |
| chrec1 = instantiate_parameters (loop, chrec1); |
| chrec2 = instantiate_parameters (loop, chrec2); |
| |
| tree one = build_int_cst (uns, 1); |
| chrec2 = fold_build2 (LSHIFT_EXPR, uns, one, chrec2); |
| res = chrec_fold_multiply (uns, chrec1, chrec2); |
| res = chrec_convert (type, res, at_stmt); |
| } |
| break; |
| |
| CASE_CONVERT: |
| /* In case we have a truncation of a widened operation that in |
| the truncated type has undefined overflow behavior analyze |
| the operation done in an unsigned type of the same precision |
| as the final truncation. We cannot derive a scalar evolution |
| for the widened operation but for the truncated result. */ |
| if (TREE_CODE (type) == INTEGER_TYPE |
| && TREE_CODE (TREE_TYPE (rhs1)) == INTEGER_TYPE |
| && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (rhs1)) |
| && TYPE_OVERFLOW_UNDEFINED (type) |
| && TREE_CODE (rhs1) == SSA_NAME |
| && (def = SSA_NAME_DEF_STMT (rhs1)) |
| && is_gimple_assign (def) |
| && TREE_CODE_CLASS (gimple_assign_rhs_code (def)) == tcc_binary |
| && TREE_CODE (gimple_assign_rhs2 (def)) == INTEGER_CST) |
| { |
| tree utype = unsigned_type_for (type); |
| chrec1 = interpret_rhs_expr (loop, at_stmt, utype, |
| gimple_assign_rhs1 (def), |
| gimple_assign_rhs_code (def), |
| gimple_assign_rhs2 (def)); |
| } |
| else |
| chrec1 = analyze_scalar_evolution (loop, rhs1); |
| res = chrec_convert (type, chrec1, at_stmt, true, rhs1); |
| break; |
| |
| case BIT_AND_EXPR: |
| /* Given int variable A, handle A&0xffff as (int)(unsigned short)A. |
| If A is SCEV and its value is in the range of representable set |
| of type unsigned short, the result expression is a (no-overflow) |
| SCEV. */ |
| res = chrec_dont_know; |
| if (tree_fits_uhwi_p (rhs2)) |
| { |
| int precision; |
| unsigned HOST_WIDE_INT val = tree_to_uhwi (rhs2); |
| |
| val ++; |
| /* Skip if value of rhs2 wraps in unsigned HOST_WIDE_INT or |
| it's not the maximum value of a smaller type than rhs1. */ |
| if (val != 0 |
| && (precision = exact_log2 (val)) > 0 |
| && (unsigned) precision < TYPE_PRECISION (TREE_TYPE (rhs1))) |
| { |
| tree utype = build_nonstandard_integer_type (precision, 1); |
| |
| if (TYPE_PRECISION (utype) < TYPE_PRECISION (TREE_TYPE (rhs1))) |
| { |
| chrec1 = analyze_scalar_evolution (loop, rhs1); |
| chrec1 = chrec_convert (utype, chrec1, at_stmt); |
| res = chrec_convert (TREE_TYPE (rhs1), chrec1, at_stmt); |
| } |
| } |
| } |
| break; |
| |
| default: |
| res = chrec_dont_know; |
| break; |
| } |
| |
| return res; |
| } |
| |
| /* Interpret the expression EXPR. */ |
| |
| static tree |
| interpret_expr (class loop *loop, gimple *at_stmt, tree expr) |
| { |
| enum tree_code code; |
| tree type = TREE_TYPE (expr), op0, op1; |
| |
| if (automatically_generated_chrec_p (expr)) |
| return expr; |
| |
| if (TREE_CODE (expr) == POLYNOMIAL_CHREC |
| || TREE_CODE (expr) == CALL_EXPR |
| || get_gimple_rhs_class (TREE_CODE (expr)) == GIMPLE_TERNARY_RHS) |
| return chrec_dont_know; |
| |
| extract_ops_from_tree (expr, &code, &op0, &op1); |
| |
| return interpret_rhs_expr (loop, at_stmt, type, |
| op0, code, op1); |
| } |
| |
| /* Interpret the rhs of the assignment STMT. */ |
| |
| static tree |
| interpret_gimple_assign (class loop *loop, gimple *stmt) |
| { |
| tree type = TREE_TYPE (gimple_assign_lhs (stmt)); |
| enum tree_code code = gimple_assign_rhs_code (stmt); |
| |
| return interpret_rhs_expr (loop, stmt, type, |
| gimple_assign_rhs1 (stmt), code, |
| gimple_assign_rhs2 (stmt)); |
| } |
| |
| |
| |
| /* This section contains all the entry points: |
| - number_of_iterations_in_loop, |
| - analyze_scalar_evolution, |
| - instantiate_parameters. |
| */ |
| |
| /* Helper recursive function. */ |
| |
| static tree |
| analyze_scalar_evolution_1 (class loop *loop, tree var) |
| { |
| gimple *def; |
| basic_block bb; |
| class loop *def_loop; |
| tree res; |
| |
| if (TREE_CODE (var) != SSA_NAME) |
| return interpret_expr (loop, NULL, var); |
| |
| def = SSA_NAME_DEF_STMT (var); |
| bb = gimple_bb (def); |
| def_loop = bb->loop_father; |
| |
| if (!flow_bb_inside_loop_p (loop, bb)) |
| { |
| /* Keep symbolic form, but look through obvious copies for constants. */ |
| res = follow_copies_to_constant (var); |
| goto set_and_end; |
| } |
| |
| if (loop != def_loop) |
| { |
| res = analyze_scalar_evolution_1 (def_loop, var); |
| class loop *loop_to_skip = superloop_at_depth (def_loop, |
| loop_depth (loop) + 1); |
| res = compute_overall_effect_of_inner_loop (loop_to_skip, res); |
| if (chrec_contains_symbols_defined_in_loop (res, loop->num)) |
| res = analyze_scalar_evolution_1 (loop, res); |
| goto set_and_end; |
| } |
| |
| switch (gimple_code (def)) |
| { |
| case GIMPLE_ASSIGN: |
| res = interpret_gimple_assign (loop, def); |
| break; |
| |
| case GIMPLE_PHI: |
| if (loop_phi_node_p (def)) |
| res = interpret_loop_phi (loop, as_a <gphi *> (def)); |
| else |
| res = interpret_condition_phi (loop, as_a <gphi *> (def)); |
| break; |
| |
| default: |
| res = chrec_dont_know; |
| break; |
| } |
| |
| set_and_end: |
| |
| /* Keep the symbolic form. */ |
| if (res == chrec_dont_know) |
| res = var; |
| |
| if (loop == def_loop) |
| set_scalar_evolution (block_before_loop (loop), var, res); |
| |
| return res; |
| } |
| |
| /* Analyzes and returns the scalar evolution of the ssa_name VAR in |
| LOOP. LOOP is the loop in which the variable is used. |
| |
| Example of use: having a pointer VAR to a SSA_NAME node, STMT a |
| pointer to the statement that uses this variable, in order to |
| determine the evolution function of the variable, use the following |
| calls: |
| |
| loop_p loop = loop_containing_stmt (stmt); |
| tree chrec_with_symbols = analyze_scalar_evolution (loop, var); |
| tree chrec_instantiated = instantiate_parameters (loop, chrec_with_symbols); |
| */ |
| |
| tree |
| analyze_scalar_evolution (class loop *loop, tree var) |
| { |
| tree res; |
| |
| /* ??? Fix callers. */ |
| if (! loop) |
| return var; |
| |
| if (dump_file && (dump_flags & TDF_SCEV)) |
| { |
| fprintf (dump_file, "(analyze_scalar_evolution \n"); |
| fprintf (dump_file, " (loop_nb = %d)\n", loop->num); |
| fprintf (dump_file, " (scalar = "); |
| print_generic_expr (dump_file, var); |
| fprintf (dump_file, ")\n"); |
| } |
| |
| res = get_scalar_evolution (block_before_loop (loop), var); |
| if (res == chrec_not_analyzed_yet) |
| { |
| /* We'll recurse into instantiate_scev, avoid tearing down the |
| instantiate cache repeatedly and keep it live from here. */ |
| bool destr = false; |
| if (!global_cache) |
| { |
| global_cache = new instantiate_cache_type; |
| destr = true; |
| } |
| res = analyze_scalar_evolution_1 (loop, var); |
| if (destr) |
| { |
| delete global_cache; |
| global_cache = NULL; |
| } |
| } |
| |
| if (dump_file && (dump_flags & TDF_SCEV)) |
| fprintf (dump_file, ")\n"); |
| |
| return res; |
| } |
| |
| /* Analyzes and returns the scalar evolution of VAR address in LOOP. */ |
| |
| static tree |
| analyze_scalar_evolution_for_address_of (class loop *loop, tree var) |
| { |
| return analyze_scalar_evolution (loop, build_fold_addr_expr (var)); |
| } |
| |
| /* Analyze scalar evolution of use of VERSION in USE_LOOP with respect to |
| WRTO_LOOP (which should be a superloop of USE_LOOP) |
| |
| FOLDED_CASTS is set to true if resolve_mixers used |
| chrec_convert_aggressive (TODO -- not really, we are way too conservative |
| at the moment in order to keep things simple). |
| |
| To illustrate the meaning of USE_LOOP and WRTO_LOOP, consider the following |
| example: |
| |
| for (i = 0; i < 100; i++) -- loop 1 |
| { |
| for (j = 0; j < 100; j++) -- loop 2 |
| { |
| k1 = i; |
| k2 = j; |
| |
| use2 (k1, k2); |
| |
| for (t = 0; t < 100; t++) -- loop 3 |
| use3 (k1, k2); |
| |
| } |
| use1 (k1, k2); |
| } |
| |
| Both k1 and k2 are invariants in loop3, thus |
| analyze_scalar_evolution_in_loop (loop3, loop3, k1) = k1 |
| analyze_scalar_evolution_in_loop (loop3, loop3, k2) = k2 |
| |
| As they are invariant, it does not matter whether we consider their |
| usage in loop 3 or loop 2, hence |
| analyze_scalar_evolution_in_loop (loop2, loop3, k1) = |
| analyze_scalar_evolution_in_loop (loop2, loop2, k1) = i |
| analyze_scalar_evolution_in_loop (loop2, loop3, k2) = |
| analyze_scalar_evolution_in_loop (loop2, loop2, k2) = [0,+,1]_2 |
| |
| Similarly for their evolutions with respect to loop 1. The values of K2 |
| in the use in loop 2 vary independently on loop 1, thus we cannot express |
| the evolution with respect to loop 1: |
| analyze_scalar_evolution_in_loop (loop1, loop3, k1) = |
| analyze_scalar_evolution_in_loop (loop1, loop2, k1) = [0,+,1]_1 |
| analyze_scalar_evolution_in_loop (loop1, loop3, k2) = |
| analyze_scalar_evolution_in_loop (loop1, loop2, k2) = dont_know |
| |
| The value of k2 in the use in loop 1 is known, though: |
| analyze_scalar_evolution_in_loop (loop1, loop1, k1) = [0,+,1]_1 |
| analyze_scalar_evolution_in_loop (loop1, loop1, k2) = 100 |
| */ |
| |
| static tree |
| analyze_scalar_evolution_in_loop (class loop *wrto_loop, class loop *use_loop, |
| tree version, bool *folded_casts) |
| { |
| bool val = false; |
| tree ev = version, tmp; |
| |
| /* We cannot just do |
| |
| tmp = analyze_scalar_evolution (use_loop, version); |
| ev = resolve_mixers (wrto_loop, tmp, folded_casts); |
| |
| as resolve_mixers would query the scalar evolution with respect to |
| wrto_loop. For example, in the situation described in the function |
| comment, suppose that wrto_loop = loop1, use_loop = loop3 and |
| version = k2. Then |
| |
| analyze_scalar_evolution (use_loop, version) = k2 |
| |
| and resolve_mixers (loop1, k2, folded_casts) finds that the value of |
| k2 in loop 1 is 100, which is a wrong result, since we are interested |
| in the value in loop 3. |
| |
| Instead, we need to proceed from use_loop to wrto_loop loop by loop, |
| each time checking that there is no evolution in the inner loop. */ |
| |
| if (folded_casts) |
| *folded_casts = false; |
| while (1) |
| { |
| tmp = analyze_scalar_evolution (use_loop, ev); |
| ev = resolve_mixers (use_loop, tmp, folded_casts); |
| |
| if (use_loop == wrto_loop) |
| return ev; |
| |
| /* If the value of the use changes in the inner loop, we cannot express |
| its value in the outer loop (we might try to return interval chrec, |
| but we do not have a user for it anyway) */ |
| if (!no_evolution_in_loop_p (ev, use_loop->num, &val) |
| || !val) |
| return chrec_dont_know; |
| |
| use_loop = loop_outer (use_loop); |
| } |
| } |
| |
| |
| /* Computes a hash function for database element ELT. */ |
| |
| static inline hashval_t |
| hash_idx_scev_info (const void *elt_) |
| { |
| unsigned idx = ((size_t) elt_) - 2; |
| return scev_info_hasher::hash (&global_cache->entries[idx]); |
| } |
| |
| /* Compares database elements E1 and E2. */ |
| |
| static inline int |
| eq_idx_scev_info (const void *e1, const void *e2) |
| { |
| unsigned idx1 = ((size_t) e1) - 2; |
| return scev_info_hasher::equal (&global_cache->entries[idx1], |
| (const scev_info_str *) e2); |
| } |
| |
| /* Returns from CACHE the slot number of the cached chrec for NAME. */ |
| |
| static unsigned |
| get_instantiated_value_entry (instantiate_cache_type &cache, |
| tree name, edge instantiate_below) |
| { |
| if (!cache.map) |
| { |
| cache.map = htab_create (10, hash_idx_scev_info, eq_idx_scev_info, NULL); |
| cache.entries.create (10); |
| } |
| |
| scev_info_str e; |
| e.name_version = SSA_NAME_VERSION (name); |
| e.instantiated_below = instantiate_below->dest->index; |
| void **slot = htab_find_slot_with_hash (cache.map, &e, |
| scev_info_hasher::hash (&e), INSERT); |
| if (!*slot) |
| { |
| e.chrec = chrec_not_analyzed_yet; |
| *slot = (void *)(size_t)(cache.entries.length () + 2); |
| cache.entries.safe_push (e); |
| } |
| |
| return ((size_t)*slot) - 2; |
| } |
| |
| |
| /* Return the closed_loop_phi node for VAR. If there is none, return |
| NULL_TREE. */ |
| |
| static tree |
| loop_closed_phi_def (tree var) |
| { |
| class loop *loop; |
| edge exit; |
| gphi *phi; |
| gphi_iterator psi; |
| |
| if (var == NULL_TREE |
| || TREE_CODE (var) != SSA_NAME) |
| return NULL_TREE; |
| |
| loop = loop_containing_stmt (SSA_NAME_DEF_STMT (var)); |
| exit = single_exit (loop); |
| if (!exit) |
| return NULL_TREE; |
| |
| for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi)) |
| { |
| phi = psi.phi (); |
| if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == var) |
| return PHI_RESULT (phi); |
| } |
| |
| return NULL_TREE; |
| } |
| |
| static tree instantiate_scev_r (edge, class loop *, class loop *, |
| tree, bool *, int); |
| |
| /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW |
| and EVOLUTION_LOOP, that were left under a symbolic form. |
| |
| CHREC is an SSA_NAME to be instantiated. |
| |
| CACHE is the cache of already instantiated values. |
| |
| Variable pointed by FOLD_CONVERSIONS is set to TRUE when the |
| conversions that may wrap in signed/pointer type are folded, as long |
| as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL |
| then we don't do such fold. |
| |
| SIZE_EXPR is used for computing the size of the expression to be |
| instantiated, and to stop if it exceeds some limit. */ |
| |
| static tree |
| instantiate_scev_name (edge instantiate_below, |
| class loop *evolution_loop, class loop *inner_loop, |
| tree chrec, |
| bool *fold_conversions, |
| int size_expr) |
| { |
| tree res; |
| class loop *def_loop; |
| basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (chrec)); |
| |
| /* A parameter, nothing to do. */ |
| if (!def_bb |
| || !dominated_by_p (CDI_DOMINATORS, def_bb, instantiate_below->dest)) |
| return chrec; |
| |
| /* We cache the value of instantiated variable to avoid exponential |
| time complexity due to reevaluations. We also store the convenient |
| value in the cache in order to prevent infinite recursion -- we do |
| not want to instantiate the SSA_NAME if it is in a mixer |
| structure. This is used for avoiding the instantiation of |
| recursively defined functions, such as: |
| |
| | a_2 -> {0, +, 1, +, a_2}_1 */ |
| |
| unsigned si = get_instantiated_value_entry (*global_cache, |
| chrec, instantiate_below); |
| if (global_cache->get (si) != chrec_not_analyzed_yet) |
| return global_cache->get (si); |
| |
| /* On recursion return chrec_dont_know. */ |
| global_cache->set (si, chrec_dont_know); |
| |
| def_loop = find_common_loop (evolution_loop, def_bb->loop_father); |
| |
| if (! dominated_by_p (CDI_DOMINATORS, |
| def_loop->header, instantiate_below->dest)) |
| { |
| gimple *def = SSA_NAME_DEF_STMT (chrec); |
| if (gassign *ass = dyn_cast <gassign *> (def)) |
| { |
| switch (gimple_assign_rhs_class (ass)) |
| { |
| case GIMPLE_UNARY_RHS: |
| { |
| tree op0 = instantiate_scev_r (instantiate_below, evolution_loop, |
| inner_loop, gimple_assign_rhs1 (ass), |
| fold_conversions, size_expr); |
| if (op0 == chrec_dont_know) |
| return chrec_dont_know; |
| res = fold_build1 (gimple_assign_rhs_code (ass), |
| TREE_TYPE (chrec), op0); |
| break; |
| } |
| case GIMPLE_BINARY_RHS: |
| { |
| tree op0 = instantiate_scev_r (instantiate_below, evolution_loop, |
| inner_loop, gimple_assign_rhs1 (ass), |
| fold_conversions, size_expr); |
| if (op0 == chrec_dont_know) |
| return chrec_dont_know; |
| tree op1 = instantiate_scev_r (instantiate_below, evolution_loop, |
| inner_loop, gimple_assign_rhs2 (ass), |
| fold_conversions, size_expr); |
| if (op1 == chrec_dont_know) |
| return chrec_dont_know; |
| res = fold_build2 (gimple_assign_rhs_code (ass), |
| TREE_TYPE (chrec), op0, op1); |
| break; |
| } |
| default: |
| res = chrec_dont_know; |
| } |
| } |
| else |
| res = chrec_dont_know; |
| global_cache->set (si, res); |
| return res; |
| } |
| |
| /* If the analysis yields a parametric chrec, instantiate the |
| result again. */ |
| res = analyze_scalar_evolution (def_loop, chrec); |
| |
| /* Don't instantiate default definitions. */ |
| if (TREE_CODE (res) == SSA_NAME |
| && SSA_NAME_IS_DEFAULT_DEF (res)) |
| ; |
| |
| /* Don't instantiate loop-closed-ssa phi nodes. */ |
| else if (TREE_CODE (res) == SSA_NAME |
| && loop_depth (loop_containing_stmt (SSA_NAME_DEF_STMT (res))) |
| > loop_depth (def_loop)) |
| { |
| if (res == chrec) |
| res = loop_closed_phi_def (chrec); |
| else |
| res = chrec; |
| |
| /* When there is no loop_closed_phi_def, it means that the |
| variable is not used after the loop: try to still compute the |
| value of the variable when exiting the loop. */ |
| if (res == NULL_TREE) |
| { |
| loop_p loop = loop_containing_stmt (SSA_NAME_DEF_STMT (chrec)); |
| res = analyze_scalar_evolution (loop, chrec); |
| res = compute_overall_effect_of_inner_loop (loop, res); |
| res = instantiate_scev_r (instantiate_below, evolution_loop, |
| inner_loop, res, |
| fold_conversions, size_expr); |
| } |
| else if (dominated_by_p (CDI_DOMINATORS, |
| gimple_bb (SSA_NAME_DEF_STMT (res)), |
| instantiate_below->dest)) |
| res = chrec_dont_know; |
| } |
| |
| else if (res != chrec_dont_know) |
| { |
| if (inner_loop |
| && def_bb->loop_father != inner_loop |
| && !flow_loop_nested_p (def_bb->loop_father, inner_loop)) |
| /* ??? We could try to compute the overall effect of the loop here. */ |
| res = chrec_dont_know; |
| else |
| res = instantiate_scev_r (instantiate_below, evolution_loop, |
| inner_loop, res, |
| fold_conversions, size_expr); |
| } |
| |
| /* Store the correct value to the cache. */ |
| global_cache->set (si, res); |
| return res; |
| } |
| |
| /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW |
| and EVOLUTION_LOOP, that were left under a symbolic form. |
| |
| CHREC is a polynomial chain of recurrence to be instantiated. |
| |
| CACHE is the cache of already instantiated values. |
| |
| Variable pointed by FOLD_CONVERSIONS is set to TRUE when the |
| conversions that may wrap in signed/pointer type are folded, as long |
| as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL |
| then we don't do such fold. |
| |
| SIZE_EXPR is used for computing the size of the expression to be |
| instantiated, and to stop if it exceeds some limit. */ |
| |
| static tree |
| instantiate_scev_poly (edge instantiate_below, |
| class loop *evolution_loop, class loop *, |
| tree chrec, bool *fold_conversions, int size_expr) |
| { |
| tree op1; |
| tree op0 = instantiate_scev_r (instantiate_below, evolution_loop, |
| get_chrec_loop (chrec), |
| CHREC_LEFT (chrec), fold_conversions, |
| size_expr); |
| if (op0 == chrec_dont_know) |
| return chrec_dont_know; |
| |
| op1 = instantiate_scev_r (instantiate_below, evolution_loop, |
| get_chrec_loop (chrec), |
| CHREC_RIGHT (chrec), fold_conversions, |
| size_expr); |
| if (op1 == chrec_dont_know) |
| return chrec_dont_know; |
| |
| if (CHREC_LEFT (chrec) != op0 |
| || CHREC_RIGHT (chrec) != op1) |
| { |
| op1 = chrec_convert_rhs (chrec_type (op0), op1, NULL); |
| chrec = build_polynomial_chrec (CHREC_VARIABLE (chrec), op0, op1); |
| } |
| |
| return chrec; |
| } |
| |
| /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW |
| and EVOLUTION_LOOP, that were left under a symbolic form. |
| |
| "C0 CODE C1" is a binary expression of type TYPE to be instantiated. |
| |
| CACHE is the cache of already instantiated values. |
| |
| Variable pointed by FOLD_CONVERSIONS is set to TRUE when the |
| conversions that may wrap in signed/pointer type are folded, as long |
| as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL |
| then we don't do such fold. |
| |
| SIZE_EXPR is used for computing the size of the expression to be |
| instantiated, and to stop if it exceeds some limit. */ |
| |
| static tree |
| instantiate_scev_binary (edge instantiate_below, |
| class loop *evolution_loop, class loop *inner_loop, |
| tree chrec, enum tree_code code, |
| tree type, tree c0, tree c1, |
| bool *fold_conversions, int size_expr) |
| { |
| tree op1; |
| tree op0 = instantiate_scev_r (instantiate_below, evolution_loop, inner_loop, |
| c0, fold_conversions, size_expr); |
| if (op0 == chrec_dont_know) |
| return chrec_dont_know; |
| |
| /* While we eventually compute the same op1 if c0 == c1 the process |
| of doing this is expensive so the following short-cut prevents |
| exponential compile-time behavior. */ |
| if (c0 != c1) |
| { |
| op1 = instantiate_scev_r (instantiate_below, evolution_loop, inner_loop, |
| c1, fold_conversions, size_expr); |
| if (op1 == chrec_dont_know) |
| return chrec_dont_know; |
| } |
| else |
| op1 = op0; |
| |
| if (c0 != op0 |
| || c1 != op1) |
| { |
| op0 = chrec_convert (type, op0, NULL); |
| op1 = chrec_convert_rhs (type, op1, NULL); |
| |
| switch (code) |
| { |
| case POINTER_PLUS_EXPR: |
| case PLUS_EXPR: |
| return chrec_fold_plus (type, op0, op1); |
| |
| case MINUS_EXPR: |
| return chrec_fold_minus (type, op0, op1); |
| |
| case MULT_EXPR: |
| return chrec_fold_multiply (type, op0, op1); |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| return chrec ? chrec : fold_build2 (code, type, c0, c1); |
| } |
| |
| /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW |
| and EVOLUTION_LOOP, that were left under a symbolic form. |
| |
| "CHREC" that stands for a convert expression "(TYPE) OP" is to be |
| instantiated. |
| |
| CACHE is the cache of already instantiated values. |
| |
| Variable pointed by FOLD_CONVERSIONS is set to TRUE when the |
| conversions that may wrap in signed/pointer type are folded, as long |
| as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL |
| then we don't do such fold. |
| |
| SIZE_EXPR is used for computing the size of the expression to be |
| instantiated, and to stop if it exceeds some limit. */ |
| |
| static tree |
| instantiate_scev_convert (edge instantiate_below, |
| class loop *evolution_loop, class loop *inner_loop, |
| tree chrec, tree type, tree op, |
| bool *fold_conversions, int size_expr) |
| { |
| tree op0 = instantiate_scev_r (instantiate_below, evolution_loop, |
| inner_loop, op, |
| fold_conversions, size_expr); |
| |
| if (op0 == chrec_dont_know) |
| return chrec_dont_know; |
| |
| if (fold_conversions) |
| { |
| tree tmp = chrec_convert_aggressive (type, op0, fold_conversions); |
| if (tmp) |
| return tmp; |
| |
| /* If we used chrec_convert_aggressive, we can no longer assume that |
| signed chrecs do not overflow, as chrec_convert does, so avoid |
| calling it in that case. */ |
| if (*fold_conversions) |
| { |
| if (chrec && op0 == op) |
| return chrec; |
| |
| return fold_convert (type, op0); |
| } |
| } |
| |
| return chrec_convert (type, op0, NULL); |
| } |
| |
| /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW |
| and EVOLUTION_LOOP, that were left under a symbolic form. |
| |
| CHREC is a BIT_NOT_EXPR or a NEGATE_EXPR expression to be instantiated. |
| Handle ~X as -1 - X. |
| Handle -X as -1 * X. |
| |
| CACHE is the cache of already instantiated values. |
| |
| Variable pointed by FOLD_CONVERSIONS is set to TRUE when the |
| conversions that may wrap in signed/pointer type are folded, as long |
| as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL |
| then we don't do such fold. |
| |
| SIZE_EXPR is used for computing the size of the expression to be |
| instantiated, and to stop if it exceeds some limit. */ |
| |
| static tree |
| instantiate_scev_not (edge instantiate_below, |
| class loop *evolution_loop, class loop *inner_loop, |
| tree chrec, |
| enum tree_code code, tree type, tree op, |
| bool *fold_conversions, int size_expr) |
| { |
| tree op0 = instantiate_scev_r (instantiate_below, evolution_loop, |
| inner_loop, op, |
| fold_conversions, size_expr); |
| |
| if (op0 == chrec_dont_know) |
| return chrec_dont_know; |
| |
| if (op != op0) |
| { |
| op0 = chrec_convert (type, op0, NULL); |
| |
| switch (code) |
| { |
| case BIT_NOT_EXPR: |
| return chrec_fold_minus |
| (type, fold_convert (type, integer_minus_one_node), op0); |
| |
| case NEGATE_EXPR: |
| return chrec_fold_multiply |
| (type, fold_convert (type, integer_minus_one_node), op0); |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| return chrec ? chrec : fold_build1 (code, type, op0); |
| } |
| |
| /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW |
| and EVOLUTION_LOOP, that were left under a symbolic form. |
| |
| CHREC is the scalar evolution to instantiate. |
| |
| CACHE is the cache of already instantiated values. |
| |
| Variable pointed by FOLD_CONVERSIONS is set to TRUE when the |
| conversions that may wrap in signed/pointer type are folded, as long |
| as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL |
| then we don't do such fold. |
| |
| SIZE_EXPR is used for computing the size of the expression to be |
| instantiated, and to stop if it exceeds some limit. */ |
| |
| static tree |
| instantiate_scev_r (edge instantiate_below, |
| class loop *evolution_loop, class loop *inner_loop, |
| tree chrec, |
| bool *fold_conversions, int size_expr) |
| { |
| /* Give up if the expression is larger than the MAX that we allow. */ |
| if (size_expr++ > param_scev_max_expr_size) |
| return chrec_dont_know; |
| |
| if (chrec == NULL_TREE |
| || automatically_generated_chrec_p (chrec) |
| || is_gimple_min_invariant (chrec)) |
| return chrec; |
| |
| switch (TREE_CODE (chrec)) |
| { |
| case SSA_NAME: |
| return instantiate_scev_name (instantiate_below, evolution_loop, |
| inner_loop, chrec, |
| fold_conversions, size_expr); |
| |
| case POLYNOMIAL_CHREC: |
| return instantiate_scev_poly (instantiate_below, evolution_loop, |
| inner_loop, chrec, |
| fold_conversions, size_expr); |
| |
| case POINTER_PLUS_EXPR: |
| case PLUS_EXPR: |
| case MINUS_EXPR: |
| case MULT_EXPR: |
| return instantiate_scev_binary (instantiate_below, evolution_loop, |
| inner_loop, chrec, |
| TREE_CODE (chrec), chrec_type (chrec), |
| TREE_OPERAND (chrec, 0), |
| TREE_OPERAND (chrec, 1), |
| fold_conversions, size_expr); |
| |
| CASE_CONVERT: |
| return instantiate_scev_convert (instantiate_below, evolution_loop, |
| inner_loop, chrec, |
| TREE_TYPE (chrec), TREE_OPERAND (chrec, 0), |
| fold_conversions, size_expr); |
| |
| case NEGATE_EXPR: |
| case BIT_NOT_EXPR: |
| return instantiate_scev_not (instantiate_below, evolution_loop, |
| inner_loop, chrec, |
| TREE_CODE (chrec), TREE_TYPE (chrec), |
| TREE_OPERAND (chrec, 0), |
| fold_conversions, size_expr); |
| |
| case ADDR_EXPR: |
| if (is_gimple_min_invariant (chrec)) |
| return chrec; |
| /* Fallthru. */ |
| case SCEV_NOT_KNOWN: |
| return chrec_dont_know; |
| |
| case SCEV_KNOWN: |
| return chrec_known; |
| |
| default: |
| if (CONSTANT_CLASS_P (chrec)) |
| return chrec; |
| return chrec_dont_know; |
| } |
| } |
| |
| /* Analyze all the parameters of the chrec that were left under a |
| symbolic form. INSTANTIATE_BELOW is the basic block that stops the |
| recursive instantiation of parameters: a parameter is a variable |
| that is defined in a basic block that dominates INSTANTIATE_BELOW or |
| a function parameter. */ |
| |
| tree |
| instantiate_scev (edge instantiate_below, class loop *evolution_loop, |
| tree chrec) |
| { |
| tree res; |
| |
| if (dump_file && (dump_flags & TDF_SCEV)) |
| { |
| fprintf (dump_file, "(instantiate_scev \n"); |
| fprintf (dump_file, " (instantiate_below = %d -> %d)\n", |
| instantiate_below->src->index, instantiate_below->dest->index); |
| if (evolution_loop) |
| fprintf (dump_file, " (evolution_loop = %d)\n", evolution_loop->num); |
| fprintf (dump_file, " (chrec = "); |
| print_generic_expr (dump_file, chrec); |
| fprintf (dump_file, ")\n"); |
| } |
| |
| bool destr = false; |
| if (!global_cache) |
| { |
| global_cache = new instantiate_cache_type; |
| destr = true; |
| } |
| |
| res = instantiate_scev_r (instantiate_below, evolution_loop, |
| NULL, chrec, NULL, 0); |
| |
| if (destr) |
| { |
| delete global_cache; |
| global_cache = NULL; |
| } |
| |
| if (dump_file && (dump_flags & TDF_SCEV)) |
| { |
| fprintf (dump_file, " (res = "); |
| print_generic_expr (dump_file, res); |
| fprintf (dump_file, "))\n"); |
| } |
| |
| return res; |
| } |
| |
| /* Similar to instantiate_parameters, but does not introduce the |
| evolutions in outer loops for LOOP invariants in CHREC, and does not |
| care about causing overflows, as long as they do not affect value |
| of an expression. */ |
| |
| tree |
| resolve_mixers (class loop *loop, tree chrec, bool *folded_casts) |
| { |
| bool destr = false; |
| bool fold_conversions = false; |
| if (!global_cache) |
| { |
| global_cache = new instantiate_cache_type; |
| destr = true; |
| } |
| |
| tree ret = instantiate_scev_r (loop_preheader_edge (loop), loop, NULL, |
| chrec, &fold_conversions, 0); |
| |
| if (folded_casts && !*folded_casts) |
| *folded_casts = fold_conversions; |
| |
| if (destr) |
| { |
| delete global_cache; |
| global_cache = NULL; |
| } |
| |
| return ret; |
| } |
| |
| /* Entry point for the analysis of the number of iterations pass. |
| This function tries to safely approximate the number of iterations |
| the loop will run. When this property is not decidable at compile |
| time, the result is chrec_dont_know. Otherwise the result is a |
| scalar or a symbolic parameter. When the number of iterations may |
| be equal to zero and the property cannot be determined at compile |
| time, the result is a COND_EXPR that represents in a symbolic form |
| the conditions under which the number of iterations is not zero. |
| |
| Example of analysis: suppose that the loop has an exit condition: |
| |
| "if (b > 49) goto end_loop;" |
| |
| and that in a previous analysis we have determined that the |
| variable 'b' has an evolution function: |
| |
| "EF = {23, +, 5}_2". |
| |
| When we evaluate the function at the point 5, i.e. the value of the |
| variable 'b' after 5 iterations in the loop, we have EF (5) = 48, |
| and EF (6) = 53. In this case the value of 'b' on exit is '53' and |
| the loop body has been executed 6 times. */ |
| |
| tree |
| number_of_latch_executions (class loop *loop) |
| { |
| edge exit; |
| class tree_niter_desc niter_desc; |
| tree may_be_zero; |
| tree res; |
| |
| /* Determine whether the number of iterations in loop has already |
| been computed. */ |
| res = loop->nb_iterations; |
| if (res) |
| return res; |
| |
| may_be_zero = NULL_TREE; |
| |
| if (dump_file && (dump_flags & TDF_SCEV)) |
| fprintf (dump_file, "(number_of_iterations_in_loop = \n"); |
| |
| res = chrec_dont_know; |
| exit = single_exit (loop); |
| |
| if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false)) |
| { |
| may_be_zero = niter_desc.may_be_zero; |
| res = niter_desc.niter; |
| } |
| |
| if (res == chrec_dont_know |
| || !may_be_zero |
| || integer_zerop (may_be_zero)) |
| ; |
| else if (integer_nonzerop (may_be_zero)) |
| res = build_int_cst (TREE_TYPE (res), 0); |
| |
| else if (COMPARISON_CLASS_P (may_be_zero)) |
| res = fold_build3 (COND_EXPR, TREE_TYPE (res), may_be_zero, |
| build_int_cst (TREE_TYPE (res), 0), res); |
| else |
| res = chrec_dont_know; |
| |
| if (dump_file && (dump_flags & TDF_SCEV)) |
| { |
| fprintf (dump_file, " (set_nb_iterations_in_loop = "); |
| print_generic_expr (dump_file, res); |
| fprintf (dump_file, "))\n"); |
| } |
| |
| loop->nb_iterations = res; |
| return res; |
| } |
| |
| |
| /* Counters for the stats. */ |
| |
| struct chrec_stats |
| { |
| unsigned nb_chrecs; |
| unsigned nb_affine; |
| unsigned nb_affine_multivar; |
| unsigned nb_higher_poly; |
| unsigned nb_chrec_dont_know; |
| unsigned nb_undetermined; |
| }; |
| |
| /* Reset the counters. */ |
| |
| static inline void |
| reset_chrecs_counters (struct chrec_stats *stats) |
| { |
| stats->nb_chrecs = 0; |
| stats->nb_affine = 0; |
| stats->nb_affine_multivar = 0; |
| stats->nb_higher_poly = 0; |
| stats->nb_chrec_dont_know = 0; |
| stats->nb_undetermined = 0; |
| } |
| |
| /* Dump the contents of a CHREC_STATS structure. */ |
| |
| static void |
| dump_chrecs_stats (FILE *file, struct chrec_stats *stats) |
| { |
| fprintf (file, "\n(\n"); |
| fprintf (file, "-----------------------------------------\n"); |
| fprintf (file, "%d\taffine univariate chrecs\n", stats->nb_affine); |
| fprintf (file, "%d\taffine multivariate chrecs\n", stats->nb_affine_multivar); |
| fprintf (file, "%d\tdegree greater than 2 polynomials\n", |
| stats->nb_higher_poly); |
| fprintf (file, "%d\tchrec_dont_know chrecs\n", stats->nb_chrec_dont_know); |
| fprintf (file, "-----------------------------------------\n"); |
| fprintf (file, "%d\ttotal chrecs\n", stats->nb_chrecs); |
| fprintf (file, "%d\twith undetermined coefficients\n", |
| stats->nb_undetermined); |
| fprintf (file, "-----------------------------------------\n"); |
| fprintf (file, "%d\tchrecs in the scev database\n", |
| (int) scalar_evolution_info->elements ()); |
| fprintf (file, "%d\tsets in the scev database\n", nb_set_scev); |
| fprintf (file, "%d\tgets in the scev database\n", nb_get_scev); |
| fprintf (file, "-----------------------------------------\n"); |
| fprintf (file, ")\n\n"); |
| } |
| |
| /* Gather statistics about CHREC. */ |
| |
| static void |
| gather_chrec_stats (tree chrec, struct chrec_stats *stats) |
| { |
| if (dump_file && (dump_flags & TDF_STATS)) |
| { |
| fprintf (dump_file, "(classify_chrec "); |
| print_generic_expr (dump_file, chrec); |
| fprintf (dump_file, "\n"); |
| } |
| |
| stats->nb_chrecs++; |
| |
| if (chrec == NULL_TREE) |
| { |
| stats->nb_undetermined++; |
| return; |
| } |
| |
| switch (TREE_CODE (chrec)) |
| { |
| case POLYNOMIAL_CHREC: |
| if (evolution_function_is_affine_p (chrec)) |
| { |
| if (dump_file && (dump_flags & TDF_STATS)) |
| fprintf (dump_file, " affine_univariate\n"); |
| stats->nb_affine++; |
| } |
| else if (evolution_function_is_affine_multivariate_p (chrec, 0)) |
| { |
| if (dump_file && (dump_flags & TDF_STATS)) |
| fprintf (dump_file, " affine_multivariate\n"); |
| stats->nb_affine_multivar++; |
| } |
| else |
| { |
| if (dump_file && (dump_flags & TDF_STATS)) |
| fprintf (dump_file, " higher_degree_polynomial\n"); |
| stats->nb_higher_poly++; |
| } |
| |
| break; |
| |
| default: |
| break; |
| } |
| |
| if (chrec_contains_undetermined (chrec)) |
| { |
| if (dump_file && (dump_flags & TDF_STATS)) |
| fprintf (dump_file, " undetermined\n"); |
| stats->nb_undetermined++; |
| } |
| |
| if (dump_file && (dump_flags & TDF_STATS)) |
| fprintf (dump_file, ")\n"); |
| } |
| |
| /* Classify the chrecs of the whole database. */ |
| |
| void |
| gather_stats_on_scev_database (void) |
| { |
| struct chrec_stats stats; |
| |
| if (!dump_file) |
| return; |
| |
| reset_chrecs_counters (&stats); |
| |
| hash_table<scev_info_hasher>::iterator iter; |
| scev_info_str *elt; |
| FOR_EACH_HASH_TABLE_ELEMENT (*scalar_evolution_info, elt, scev_info_str *, |
| iter) |
| gather_chrec_stats (elt->chrec, &stats); |
| |
| dump_chrecs_stats (dump_file, &stats); |
| } |
| |
| |
| /* Initialize the analysis of scalar evolutions for LOOPS. */ |
| |
| void |
| scev_initialize (void) |
| { |
| class loop *loop; |
| |
| gcc_assert (! scev_initialized_p ()); |
| |
| scalar_evolution_info = hash_table<scev_info_hasher>::create_ggc (100); |
| |
| FOR_EACH_LOOP (loop, 0) |
| { |
| loop->nb_iterations = NULL_TREE; |
| } |
| } |
| |
| /* Return true if SCEV is initialized. */ |
| |
| bool |
| scev_initialized_p (void) |
| { |
| return scalar_evolution_info != NULL; |
| } |
| |
| /* Cleans up the information cached by the scalar evolutions analysis |
| in the hash table. */ |
| |
| void |
| scev_reset_htab (void) |
| { |
| if (!scalar_evolution_info) |
| return; |
| |
| scalar_evolution_info->empty (); |
| } |
| |
| /* Cleans up the information cached by the scalar evolutions analysis |
| in the hash table and in the loop->nb_iterations. */ |
| |
| void |
| scev_reset (void) |
| { |
| class loop *loop; |
| |
| scev_reset_htab (); |
| |
| FOR_EACH_LOOP (loop, 0) |
| { |
| loop->nb_iterations = NULL_TREE; |
| } |
| } |
| |
| /* Return true if the IV calculation in TYPE can overflow based on the knowledge |
| of the upper bound on the number of iterations of LOOP, the BASE and STEP |
| of IV. |
| |
| We do not use information whether TYPE can overflow so it is safe to |
| use this test even for derived IVs not computed every iteration or |
| hypotetical IVs to be inserted into code. */ |
| |
| bool |
| iv_can_overflow_p (class loop *loop, tree type, tree base, tree step) |
| { |
| widest_int nit; |
| wide_int base_min, base_max, step_min, step_max, type_min, type_max; |
| signop sgn = TYPE_SIGN (type); |
| |
| if (integer_zerop (step)) |
| return false; |
| |
| if (!INTEGRAL_TYPE_P (TREE_TYPE (base)) |
| || get_range_info (base, &base_min, &base_max) != VR_RANGE) |
| return true; |
| |
| if (!INTEGRAL_TYPE_P (TREE_TYPE (step)) |
| || get_range_info (step, &step_min, &step_max) != VR_RANGE) |
| return true; |
| |
| if (!get_max_loop_iterations (loop, &nit)) |
| return true; |
| |
| type_min = wi::min_value (type); |
| type_max = wi::max_value (type); |
| |
| /* Just sanity check that we don't see values out of the range of the type. |
| In this case the arithmetics bellow would overflow. */ |
| gcc_checking_assert (wi::ge_p (base_min, type_min, sgn) |
| && wi::le_p (base_max, type_max, sgn)); |
| |
| /* Account the possible increment in the last ieration. */ |
| wi::overflow_type overflow = wi::OVF_NONE; |
| nit = wi::add (nit, 1, SIGNED, &overflow); |
| if (overflow) |
| return true; |
| |
| /* NIT is typeless and can exceed the precision of the type. In this case |
| overflow is always possible, because we know STEP is non-zero. */ |
| if (wi::min_precision (nit, UNSIGNED) > TYPE_PRECISION (type)) |
| return true; |
| wide_int nit2 = wide_int::from (nit, TYPE_PRECISION (type), UNSIGNED); |
| |
| /* If step can be positive, check that nit*step <= type_max-base. |
| This can be done by unsigned arithmetic and we only need to watch overflow |
| in the multiplication. The right hand side can always be represented in |
| the type. */ |
| if (sgn == UNSIGNED || !wi::neg_p (step_max)) |
| { |
| wi::overflow_type overflow = wi::OVF_NONE; |
| if (wi::gtu_p (wi::mul (step_max, nit2, UNSIGNED, &overflow), |
| type_max - base_max) |
| || overflow) |
| return true; |
| } |
| /* If step can be negative, check that nit*(-step) <= base_min-type_min. */ |
| if (sgn == SIGNED && wi::neg_p (step_min)) |
| { |
| wi::overflow_type overflow, overflow2; |
| overflow = overflow2 = wi::OVF_NONE; |
| if (wi::gtu_p (wi::mul (wi::neg (step_min, &overflow2), |
| nit2, UNSIGNED, &overflow), |
| base_min - type_min) |
| || overflow || overflow2) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* Given EV with form of "(type) {inner_base, inner_step}_loop", this |
| function tries to derive condition under which it can be simplified |
| into "{(type)inner_base, (type)inner_step}_loop". The condition is |
| the maximum number that inner iv can iterate. */ |
| |
| static tree |
| derive_simple_iv_with_niters (tree ev, tree *niters) |
| { |
| if (!CONVERT_EXPR_P (ev)) |
| return ev; |
| |
| tree inner_ev = TREE_OPERAND (ev, 0); |
| if (TREE_CODE (inner_ev) != POLYNOMIAL_CHREC) |
| return ev; |
| |
| tree init = CHREC_LEFT (inner_ev); |
| tree step = CHREC_RIGHT (inner_ev); |
| if (TREE_CODE (init) != INTEGER_CST |
| || TREE_CODE (step) != INTEGER_CST || integer_zerop (step)) |
| return ev; |
| |
| tree type = TREE_TYPE (ev); |
| tree inner_type = TREE_TYPE (inner_ev); |
| if (TYPE_PRECISION (inner_type) >= TYPE_PRECISION (type)) |
| return ev; |
| |
| /* Type conversion in "(type) {inner_base, inner_step}_loop" can be |
| folded only if inner iv won't overflow. We compute the maximum |
| number the inner iv can iterate before overflowing and return the |
| simplified affine iv. */ |
| tree delta; |
| init = fold_convert (type, init); |
| step = fold_convert (type, step); |
| ev = build_polynomial_chrec (CHREC_VARIABLE (inner_ev), init, step); |
| if (tree_int_cst_sign_bit (step)) |
| { |
| tree bound = lower_bound_in_type (inner_type, inner_type); |
| delta = fold_build2 (MINUS_EXPR, type, init, fold_convert (type, bound)); |
| step = fold_build1 (NEGATE_EXPR, type, step); |
| } |
| else |
| { |
| tree bound = upper_bound_in_type (inner_type, inner_type); |
| delta = fold_build2 (MINUS_EXPR, type, fold_convert (type, bound), init); |
| } |
| *niters = fold_build2 (FLOOR_DIV_EXPR, type, delta, step); |
| return ev; |
| } |
| |
| /* Checks whether use of OP in USE_LOOP behaves as a simple affine iv with |
| respect to WRTO_LOOP and returns its base and step in IV if possible |
| (see analyze_scalar_evolution_in_loop for more details on USE_LOOP |
| and WRTO_LOOP). If ALLOW_NONCONSTANT_STEP is true, we want step to be |
| invariant in LOOP. Otherwise we require it to be an integer constant. |
| |
| IV->no_overflow is set to true if we are sure the iv cannot overflow (e.g. |
| because it is computed in signed arithmetics). Consequently, adding an |
| induction variable |
| |
| for (i = IV->base; ; i += IV->step) |
| |
| is only safe if IV->no_overflow is false, or TYPE_OVERFLOW_UNDEFINED is |
| false for the type of the induction variable, or you can prove that i does |
| not wrap by some other argument. Otherwise, this might introduce undefined |
| behavior, and |
| |
| i = iv->base; |
| for (; ; i = (type) ((unsigned type) i + (unsigned type) iv->step)) |
| |
| must be used instead. |
| |
| When IV_NITERS is not NULL, this function also checks case in which OP |
| is a conversion of an inner simple iv of below form: |
| |
| (outer_type){inner_base, inner_step}_loop. |
| |
| If type of inner iv has smaller precision than outer_type, it can't be |
| folded into {(outer_type)inner_base, (outer_type)inner_step}_loop because |
| the inner iv could overflow/wrap. In this case, we derive a condition |
| under which the inner iv won't overflow/wrap and do the simplification. |
| The derived condition normally is the maximum number the inner iv can |
| iterate, and will be stored in IV_NITERS. This is useful in loop niter |
| analysis, to derive break conditions when a loop must terminate, when is |
| infinite. */ |
| |
| bool |
| simple_iv_with_niters (class loop *wrto_loop, class loop *use_loop, |
| tree op, affine_iv *iv, tree *iv_niters, |
| bool allow_nonconstant_step) |
| { |
| enum tree_code code; |
| tree type, ev, base, e; |
| wide_int extreme; |
| bool folded_casts; |
| |
| iv->base = NULL_TREE; |
| iv->step = NULL_TREE; |
| iv->no_overflow = false; |
| |
| type = TREE_TYPE (op); |
| if (!POINTER_TYPE_P (type) |
| && !INTEGRAL_TYPE_P (type)) |
| return false; |
| |
| ev = analyze_scalar_evolution_in_loop (wrto_loop, use_loop, op, |
| &folded_casts); |
| if (chrec_contains_undetermined (ev) |
| || chrec_contains_symbols_defined_in_loop (ev, wrto_loop->num)) |
| return false; |
| |
| if (tree_does_not_contain_chrecs (ev)) |
| { |
| iv->base = ev; |
| iv->step = build_int_cst (TREE_TYPE (ev), 0); |
| iv->no_overflow = true; |
| return true; |
| } |
| |
| /* If we can derive valid scalar evolution with assumptions. */ |
| if (iv_niters && TREE_CODE (ev) != POLYNOMIAL_CHREC) |
| ev = derive_simple_iv_with_niters (ev, iv_niters); |
| |
| if (TREE_CODE (ev) != POLYNOMIAL_CHREC) |
| return false; |
| |
| if (CHREC_VARIABLE (ev) != (unsigned) wrto_loop->num) |
| return false; |
| |
| iv->step = CHREC_RIGHT (ev); |
| if ((!allow_nonconstant_step && TREE_CODE (iv->step) != INTEGER_CST) |
| || tree_contains_chrecs (iv->step, NULL)) |
| return false; |
| |
| iv->base = CHREC_LEFT (ev); |
| if (tree_contains_chrecs (iv->base, NULL)) |
| return false; |
| |
| iv->no_overflow = !folded_casts && nowrap_type_p (type); |
| |
| if (!iv->no_overflow |
| && !iv_can_overflow_p (wrto_loop, type, iv->base, iv->step)) |
| iv->no_overflow = true; |
| |
| /* Try to simplify iv base: |
| |
| (signed T) ((unsigned T)base + step) ;; TREE_TYPE (base) == signed T |
| == (signed T)(unsigned T)base + step |
| == base + step |
| |
| If we can prove operation (base + step) doesn't overflow or underflow. |
| Specifically, we try to prove below conditions are satisfied: |
| |
| base <= UPPER_BOUND (type) - step ;;step > 0 |
| base >= LOWER_BOUND (type) - step ;;step < 0 |
| |
| This is done by proving the reverse conditions are false using loop's |
| initial conditions. |
| |
| The is necessary to make loop niter, or iv overflow analysis easier |
| for below example: |
| |
| int foo (int *a, signed char s, signed char l) |
| { |
| signed char i; |
| for (i = s; i < l; i++) |
| a[i] = 0; |
| return 0; |
| } |
| |
| Note variable I is firstly converted to type unsigned char, incremented, |
| then converted back to type signed char. */ |
| |
| if (wrto_loop->num != use_loop->num) |
| return true; |
| |
| if (!CONVERT_EXPR_P (iv->base) || TREE_CODE (iv->step) != INTEGER_CST) |
| return true; |
| |
| type = TREE_TYPE (iv->base); |
| e = TREE_OPERAND (iv->base, 0); |
| if (TREE_CODE (e) != PLUS_EXPR |
| || TREE_CODE (TREE_OPERAND (e, 1)) != INTEGER_CST |
| || !tree_int_cst_equal (iv->step, |
| fold_convert (type, TREE_OPERAND (e, 1)))) |
| return true; |
| e = TREE_OPERAND (e, 0); |
| if (!CONVERT_EXPR_P (e)) |
| return true; |
| base = TREE_OPERAND (e, 0); |
| if (!useless_type_conversion_p (type, TREE_TYPE (base))) |
| return true; |
| |
| if (tree_int_cst_sign_bit (iv->step)) |
| { |
| code = LT_EXPR; |
| extreme = wi::min_value (type); |
| } |
| else |
| { |
| code = GT_EXPR; |
| extreme = wi::max_value (type); |
| } |
| wi::overflow_type overflow = wi::OVF_NONE; |
| extreme = wi::sub (extreme, wi::to_wide (iv->step), |
| TYPE_SIGN (type), &overflow); |
| if (overflow) |
| return true; |
| e = fold_build2 (code, boolean_type_node, base, |
| wide_int_to_tree (type, extreme)); |
| e = simplify_using_initial_conditions (use_loop, e); |
| if (!integer_zerop (e)) |
| return true; |
| |
| if (POINTER_TYPE_P (TREE_TYPE (base))) |
| code = POINTER_PLUS_EXPR; |
| else |
| code = PLUS_EXPR; |
| |
| iv->base = fold_build2 (code, TREE_TYPE (base), base, iv->step); |
| return true; |
| } |
| |
| /* Like simple_iv_with_niters, but return TRUE when OP behaves as a simple |
| affine iv unconditionally. */ |
| |
| bool |
| simple_iv (class loop *wrto_loop, class loop *use_loop, tree op, |
| affine_iv *iv, bool allow_nonconstant_step) |
| { |
| return simple_iv_with_niters (wrto_loop, use_loop, op, iv, |
| NULL, allow_nonconstant_step); |
| } |
| |
| /* Finalize the scalar evolution analysis. */ |
| |
| void |
| scev_finalize (void) |
| { |
| if (!scalar_evolution_info) |
| return; |
| scalar_evolution_info->empty (); |
| scalar_evolution_info = NULL; |
| free_numbers_of_iterations_estimates (cfun); |
| } |
| |
| /* Returns true if the expression EXPR is considered to be too expensive |
| for scev_const_prop. */ |
| |
| static bool |
| expression_expensive_p (tree expr, hash_map<tree, uint64_t> &cache, |
| uint64_t &cost) |
| { |
| enum tree_code code; |
| |
| if (is_gimple_val (expr)) |
| return false; |
| |
| code = TREE_CODE (expr); |
| if (code == TRUNC_DIV_EXPR |
| || code == CEIL_DIV_EXPR |
| || code == FLOOR_DIV_EXPR |
| || code == ROUND_DIV_EXPR |
| || code == TRUNC_MOD_EXPR |
| || code == CEIL_MOD_EXPR |
| || code == FLOOR_MOD_EXPR |
| || code == ROUND_MOD_EXPR |
| || code == EXACT_DIV_EXPR) |
| { |
| /* Division by power of two is usually cheap, so we allow it. |
| Forbid anything else. */ |
| if (!integer_pow2p (TREE_OPERAND (expr, 1))) |
| return true; |
| } |
| |
| bool visited_p; |
| uint64_t &local_cost = cache.get_or_insert (expr, &visited_p); |
| if (visited_p) |
| { |
| uint64_t tem = cost + loca
|