| /* Function summary pass. |
| Copyright (C) 2003-2020 Free Software Foundation, Inc. |
| Contributed by Jan Hubicka |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 3, or (at your option) any later |
| version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| /* Analysis of function bodies used by inter-procedural passes |
| |
| We estimate for each function |
| - function body size and size after specializing into given context |
| - average function execution time in a given context |
| - function frame size |
| For each call |
| - call statement size, time and how often the parameters change |
| |
| ipa_fn_summary data structures store above information locally (i.e. |
| parameters of the function itself) and globally (i.e. parameters of |
| the function created by applying all the inline decisions already |
| present in the callgraph). |
| |
| We provide access to the ipa_fn_summary data structure and |
| basic logic updating the parameters when inlining is performed. |
| |
| The summaries are context sensitive. Context means |
| 1) partial assignment of known constant values of operands |
| 2) whether function is inlined into the call or not. |
| It is easy to add more variants. To represent function size and time |
| that depends on context (i.e. it is known to be optimized away when |
| context is known either by inlining or from IP-CP and cloning), |
| we use predicates. |
| |
| estimate_edge_size_and_time can be used to query |
| function size/time in the given context. ipa_merge_fn_summary_after_inlining merges |
| properties of caller and callee after inlining. |
| |
| Finally pass_inline_parameters is exported. This is used to drive |
| computation of function parameters used by the early inliner. IPA |
| inlined performs analysis via its analyze_function method. */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "backend.h" |
| #include "tree.h" |
| #include "gimple.h" |
| #include "alloc-pool.h" |
| #include "tree-pass.h" |
| #include "ssa.h" |
| #include "tree-streamer.h" |
| #include "cgraph.h" |
| #include "diagnostic.h" |
| #include "fold-const.h" |
| #include "print-tree.h" |
| #include "tree-inline.h" |
| #include "gimple-pretty-print.h" |
| #include "cfganal.h" |
| #include "gimple-iterator.h" |
| #include "tree-cfg.h" |
| #include "tree-ssa-loop-niter.h" |
| #include "tree-ssa-loop.h" |
| #include "symbol-summary.h" |
| #include "ipa-prop.h" |
| #include "ipa-fnsummary.h" |
| #include "cfgloop.h" |
| #include "tree-scalar-evolution.h" |
| #include "ipa-utils.h" |
| #include "cfgexpand.h" |
| #include "gimplify.h" |
| #include "stringpool.h" |
| #include "attribs.h" |
| #include "tree-into-ssa.h" |
| |
| /* Summaries. */ |
| fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries; |
| fast_function_summary <ipa_size_summary *, va_heap> *ipa_size_summaries; |
| fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries; |
| |
| /* Edge predicates goes here. */ |
| static object_allocator<predicate> edge_predicate_pool ("edge predicates"); |
| |
| |
| /* Dump IPA hints. */ |
| void |
| ipa_dump_hints (FILE *f, ipa_hints hints) |
| { |
| if (!hints) |
| return; |
| fprintf (f, "IPA hints:"); |
| if (hints & INLINE_HINT_indirect_call) |
| { |
| hints &= ~INLINE_HINT_indirect_call; |
| fprintf (f, " indirect_call"); |
| } |
| if (hints & INLINE_HINT_loop_iterations) |
| { |
| hints &= ~INLINE_HINT_loop_iterations; |
| fprintf (f, " loop_iterations"); |
| } |
| if (hints & INLINE_HINT_loop_stride) |
| { |
| hints &= ~INLINE_HINT_loop_stride; |
| fprintf (f, " loop_stride"); |
| } |
| if (hints & INLINE_HINT_same_scc) |
| { |
| hints &= ~INLINE_HINT_same_scc; |
| fprintf (f, " same_scc"); |
| } |
| if (hints & INLINE_HINT_in_scc) |
| { |
| hints &= ~INLINE_HINT_in_scc; |
| fprintf (f, " in_scc"); |
| } |
| if (hints & INLINE_HINT_cross_module) |
| { |
| hints &= ~INLINE_HINT_cross_module; |
| fprintf (f, " cross_module"); |
| } |
| if (hints & INLINE_HINT_declared_inline) |
| { |
| hints &= ~INLINE_HINT_declared_inline; |
| fprintf (f, " declared_inline"); |
| } |
| if (hints & INLINE_HINT_known_hot) |
| { |
| hints &= ~INLINE_HINT_known_hot; |
| fprintf (f, " known_hot"); |
| } |
| gcc_assert (!hints); |
| } |
| |
| |
| /* Record SIZE and TIME to SUMMARY. |
| The accounted code will be executed when EXEC_PRED is true. |
| When NONCONST_PRED is false the code will evaluate to constant and |
| will get optimized out in specialized clones of the function. |
| If CALL is true account to call_size_time_table rather than |
| size_time_table. */ |
| |
| void |
| ipa_fn_summary::account_size_time (int size, sreal time, |
| const predicate &exec_pred, |
| const predicate &nonconst_pred_in, |
| bool call) |
| { |
| size_time_entry *e; |
| bool found = false; |
| int i; |
| predicate nonconst_pred; |
| vec<size_time_entry, va_gc> *table = call |
| ? call_size_time_table : size_time_table; |
| |
| if (exec_pred == false) |
| return; |
| |
| nonconst_pred = nonconst_pred_in & exec_pred; |
| |
| if (nonconst_pred == false) |
| return; |
| |
| /* We need to create initial empty unconditional clause, but otherwise |
| we don't need to account empty times and sizes. */ |
| if (!size && time == 0 && table) |
| return; |
| |
| /* Only for calls we are unaccounting what we previously recorded. */ |
| gcc_checking_assert (time >= 0 || call); |
| |
| for (i = 0; vec_safe_iterate (table, i, &e); i++) |
| if (e->exec_predicate == exec_pred |
| && e->nonconst_predicate == nonconst_pred) |
| { |
| found = true; |
| break; |
| } |
| if (i == max_size_time_table_size) |
| { |
| i = 0; |
| found = true; |
| e = &(*table)[0]; |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, |
| "\t\tReached limit on number of entries, " |
| "ignoring the predicate."); |
| } |
| if (dump_file && (dump_flags & TDF_DETAILS) && (time != 0 || size)) |
| { |
| fprintf (dump_file, |
| "\t\tAccounting size:%3.2f, time:%3.2f on %spredicate exec:", |
| ((double) size) / ipa_fn_summary::size_scale, |
| (time.to_double ()), found ? "" : "new "); |
| exec_pred.dump (dump_file, conds, 0); |
| if (exec_pred != nonconst_pred) |
| { |
| fprintf (dump_file, " nonconst:"); |
| nonconst_pred.dump (dump_file, conds); |
| } |
| else |
| fprintf (dump_file, "\n"); |
| } |
| if (!found) |
| { |
| class size_time_entry new_entry; |
| new_entry.size = size; |
| new_entry.time = time; |
| new_entry.exec_predicate = exec_pred; |
| new_entry.nonconst_predicate = nonconst_pred; |
| if (call) |
| vec_safe_push (call_size_time_table, new_entry); |
| else |
| vec_safe_push (size_time_table, new_entry); |
| } |
| else |
| { |
| e->size += size; |
| e->time += time; |
| /* FIXME: PR bootstrap/92653 gcc_checking_assert (e->time >= -1); */ |
| /* Tolerate small roundoff issues. */ |
| if (e->time < 0) |
| e->time = 0; |
| } |
| } |
| |
| /* We proved E to be unreachable, redirect it to __builtin_unreachable. */ |
| |
| static struct cgraph_edge * |
| redirect_to_unreachable (struct cgraph_edge *e) |
| { |
| struct cgraph_node *callee = !e->inline_failed ? e->callee : NULL; |
| struct cgraph_node *target = cgraph_node::get_create |
| (builtin_decl_implicit (BUILT_IN_UNREACHABLE)); |
| |
| if (e->speculative) |
| e = cgraph_edge::resolve_speculation (e, target->decl); |
| else if (!e->callee) |
| e = cgraph_edge::make_direct (e, target); |
| else |
| e->redirect_callee (target); |
| class ipa_call_summary *es = ipa_call_summaries->get (e); |
| e->inline_failed = CIF_UNREACHABLE; |
| e->count = profile_count::zero (); |
| es->call_stmt_size = 0; |
| es->call_stmt_time = 0; |
| if (callee) |
| callee->remove_symbol_and_inline_clones (); |
| return e; |
| } |
| |
| /* Set predicate for edge E. */ |
| |
| static void |
| edge_set_predicate (struct cgraph_edge *e, predicate *predicate) |
| { |
| /* If the edge is determined to be never executed, redirect it |
| to BUILTIN_UNREACHABLE to make it clear to IPA passes the call will |
| be optimized out. */ |
| if (predicate && *predicate == false |
| /* When handling speculative edges, we need to do the redirection |
| just once. Do it always on the direct edge, so we do not |
| attempt to resolve speculation while duplicating the edge. */ |
| && (!e->speculative || e->callee)) |
| e = redirect_to_unreachable (e); |
| |
| class ipa_call_summary *es = ipa_call_summaries->get (e); |
| if (predicate && *predicate != true) |
| { |
| if (!es->predicate) |
| es->predicate = edge_predicate_pool.allocate (); |
| *es->predicate = *predicate; |
| } |
| else |
| { |
| if (es->predicate) |
| edge_predicate_pool.remove (es->predicate); |
| es->predicate = NULL; |
| } |
| } |
| |
| /* Set predicate for hint *P. */ |
| |
| static void |
| set_hint_predicate (predicate **p, predicate new_predicate) |
| { |
| if (new_predicate == false || new_predicate == true) |
| { |
| if (*p) |
| edge_predicate_pool.remove (*p); |
| *p = NULL; |
| } |
| else |
| { |
| if (!*p) |
| *p = edge_predicate_pool.allocate (); |
| **p = new_predicate; |
| } |
| } |
| |
| |
| /* Compute what conditions may or may not hold given information about |
| parameters. RET_CLAUSE returns truths that may hold in a specialized copy, |
| while RET_NONSPEC_CLAUSE returns truths that may hold in an nonspecialized |
| copy when called in a given context. It is a bitmask of conditions. Bit |
| 0 means that condition is known to be false, while bit 1 means that condition |
| may or may not be true. These differs - for example NOT_INLINED condition |
| is always false in the second and also builtin_constant_p tests cannot use |
| the fact that parameter is indeed a constant. |
| |
| KNOWN_VALS is partial mapping of parameters of NODE to constant values. |
| KNOWN_AGGS is a vector of aggregate known offset/value set for each |
| parameter. Return clause of possible truths. When INLINE_P is true, assume |
| that we are inlining. |
| |
| ERROR_MARK means compile time invariant. */ |
| |
| static void |
| evaluate_conditions_for_known_args (struct cgraph_node *node, |
| bool inline_p, |
| vec<tree> known_vals, |
| vec<value_range> known_value_ranges, |
| vec<ipa_agg_value_set> known_aggs, |
| clause_t *ret_clause, |
| clause_t *ret_nonspec_clause) |
| { |
| clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition; |
| clause_t nonspec_clause = 1 << predicate::not_inlined_condition; |
| class ipa_fn_summary *info = ipa_fn_summaries->get (node); |
| int i; |
| struct condition *c; |
| |
| for (i = 0; vec_safe_iterate (info->conds, i, &c); i++) |
| { |
| tree val = NULL; |
| tree res; |
| int j; |
| struct expr_eval_op *op; |
| |
| /* We allow call stmt to have fewer arguments than the callee function |
| (especially for K&R style programs). So bound check here (we assume |
| known_aggs vector, if non-NULL, has the same length as |
| known_vals). */ |
| gcc_checking_assert (!known_aggs.length () || !known_vals.length () |
| || (known_vals.length () == known_aggs.length ())); |
| |
| if (c->agg_contents) |
| { |
| struct ipa_agg_value_set *agg; |
| |
| if (c->code == predicate::changed |
| && !c->by_ref |
| && c->operand_num < (int)known_vals.length () |
| && (known_vals[c->operand_num] == error_mark_node)) |
| continue; |
| |
| if (c->operand_num < (int)known_aggs.length ()) |
| { |
| agg = &known_aggs[c->operand_num]; |
| val = ipa_find_agg_cst_for_param (agg, |
| c->operand_num |
| < (int) known_vals.length () |
| ? known_vals[c->operand_num] |
| : NULL, |
| c->offset, c->by_ref); |
| } |
| else |
| val = NULL_TREE; |
| } |
| else if (c->operand_num < (int) known_vals.length ()) |
| { |
| val = known_vals[c->operand_num]; |
| if (val == error_mark_node && c->code != predicate::changed) |
| val = NULL_TREE; |
| } |
| |
| if (!val |
| && (c->code == predicate::changed |
| || c->code == predicate::is_not_constant)) |
| { |
| clause |= 1 << (i + predicate::first_dynamic_condition); |
| nonspec_clause |= 1 << (i + predicate::first_dynamic_condition); |
| continue; |
| } |
| if (c->code == predicate::changed) |
| { |
| nonspec_clause |= 1 << (i + predicate::first_dynamic_condition); |
| continue; |
| } |
| |
| if (c->code == predicate::is_not_constant) |
| { |
| nonspec_clause |= 1 << (i + predicate::first_dynamic_condition); |
| continue; |
| } |
| |
| if (val && TYPE_SIZE (c->type) == TYPE_SIZE (TREE_TYPE (val))) |
| { |
| if (c->type != TREE_TYPE (val)) |
| val = fold_unary (VIEW_CONVERT_EXPR, c->type, val); |
| for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++) |
| { |
| if (!val) |
| break; |
| if (!op->val[0]) |
| val = fold_unary (op->code, op->type, val); |
| else if (!op->val[1]) |
| val = fold_binary (op->code, op->type, |
| op->index ? op->val[0] : val, |
| op->index ? val : op->val[0]); |
| else if (op->index == 0) |
| val = fold_ternary (op->code, op->type, |
| val, op->val[0], op->val[1]); |
| else if (op->index == 1) |
| val = fold_ternary (op->code, op->type, |
| op->val[0], val, op->val[1]); |
| else if (op->index == 2) |
| val = fold_ternary (op->code, op->type, |
| op->val[0], op->val[1], val); |
| else |
| val = NULL_TREE; |
| } |
| |
| res = val |
| ? fold_binary_to_constant (c->code, boolean_type_node, val, c->val) |
| : NULL; |
| |
| if (res && integer_zerop (res)) |
| continue; |
| if (res && integer_onep (res)) |
| { |
| clause |= 1 << (i + predicate::first_dynamic_condition); |
| nonspec_clause |= 1 << (i + predicate::first_dynamic_condition); |
| continue; |
| } |
| } |
| if (c->operand_num < (int) known_value_ranges.length () |
| && !c->agg_contents |
| && !known_value_ranges[c->operand_num].undefined_p () |
| && !known_value_ranges[c->operand_num].varying_p () |
| && TYPE_SIZE (c->type) |
| == TYPE_SIZE (known_value_ranges[c->operand_num].type ()) |
| && (!val || TREE_CODE (val) != INTEGER_CST)) |
| { |
| value_range vr = known_value_ranges[c->operand_num]; |
| if (!useless_type_conversion_p (c->type, vr.type ())) |
| { |
| value_range res; |
| range_fold_unary_expr (&res, NOP_EXPR, |
| c->type, &vr, vr.type ()); |
| vr = res; |
| } |
| tree type = c->type; |
| |
| for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++) |
| { |
| if (vr.varying_p () || vr.undefined_p ()) |
| break; |
| |
| value_range res; |
| if (!op->val[0]) |
| range_fold_unary_expr (&res, op->code, op->type, &vr, type); |
| else if (!op->val[1]) |
| { |
| value_range op0 (op->val[0], op->val[0]); |
| range_fold_binary_expr (&res, op->code, op->type, |
| op->index ? &op0 : &vr, |
| op->index ? &vr : &op0); |
| } |
| else |
| gcc_unreachable (); |
| type = op->type; |
| vr = res; |
| } |
| if (!vr.varying_p () && !vr.undefined_p ()) |
| { |
| value_range res; |
| value_range val_vr (c->val, c->val); |
| range_fold_binary_expr (&res, c->code, boolean_type_node, |
| &vr, |
| &val_vr); |
| if (res.zero_p ()) |
| continue; |
| } |
| } |
| |
| clause |= 1 << (i + predicate::first_dynamic_condition); |
| nonspec_clause |= 1 << (i + predicate::first_dynamic_condition); |
| } |
| *ret_clause = clause; |
| if (ret_nonspec_clause) |
| *ret_nonspec_clause = nonspec_clause; |
| } |
| |
| /* Return true if VRP will be exectued on the function. |
| We do not want to anticipate optimizations that will not happen. |
| |
| FIXME: This can be confused with -fdisable and debug counters and thus |
| it should not be used for correctness (only to make heuristics work). |
| This means that inliner should do its own optimizations of expressions |
| that it predicts to be constant so wrong code can not be triggered by |
| builtin_constant_p. */ |
| |
| static bool |
| vrp_will_run_p (struct cgraph_node *node) |
| { |
| return (opt_for_fn (node->decl, optimize) |
| && !opt_for_fn (node->decl, optimize_debug) |
| && opt_for_fn (node->decl, flag_tree_vrp)); |
| } |
| |
| /* Similarly about FRE. */ |
| |
| static bool |
| fre_will_run_p (struct cgraph_node *node) |
| { |
| return (opt_for_fn (node->decl, optimize) |
| && !opt_for_fn (node->decl, optimize_debug) |
| && opt_for_fn (node->decl, flag_tree_fre)); |
| } |
| |
| /* Work out what conditions might be true at invocation of E. |
| Compute costs for inlined edge if INLINE_P is true. |
| |
| Return in CLAUSE_PTR the evaluated conditions and in NONSPEC_CLAUSE_PTR |
| (if non-NULL) conditions evaluated for nonspecialized clone called |
| in a given context. |
| |
| KNOWN_VALS_PTR and KNOWN_AGGS_PTR must be non-NULL and will be filled by |
| known constant and aggregate values of parameters. |
| |
| KNOWN_CONTEXT_PTR, if non-NULL, will be filled by polymorphic call contexts |
| of parameter used by a polymorphic call. */ |
| |
| void |
| evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p, |
| clause_t *clause_ptr, |
| clause_t *nonspec_clause_ptr, |
| vec<tree> *known_vals_ptr, |
| vec<ipa_polymorphic_call_context> |
| *known_contexts_ptr, |
| vec<ipa_agg_value_set> *known_aggs_ptr) |
| { |
| struct cgraph_node *callee = e->callee->ultimate_alias_target (); |
| class ipa_fn_summary *info = ipa_fn_summaries->get (callee); |
| auto_vec<value_range, 32> known_value_ranges; |
| class ipa_edge_args *args; |
| |
| if (clause_ptr) |
| *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition; |
| |
| if (ipa_node_params_sum |
| && !e->call_stmt_cannot_inline_p |
| && (info->conds || known_contexts_ptr) |
| && (args = IPA_EDGE_REF (e)) != NULL) |
| { |
| struct cgraph_node *caller; |
| class ipa_node_params *caller_parms_info, *callee_pi = NULL; |
| class ipa_call_summary *es = ipa_call_summaries->get (e); |
| int i, count = ipa_get_cs_argument_count (args); |
| |
| if (count) |
| { |
| if (e->caller->inlined_to) |
| caller = e->caller->inlined_to; |
| else |
| caller = e->caller; |
| caller_parms_info = IPA_NODE_REF (caller); |
| callee_pi = IPA_NODE_REF (callee); |
| |
| /* Watch for thunks. */ |
| if (callee_pi) |
| /* Watch for variadic functions. */ |
| count = MIN (count, ipa_get_param_count (callee_pi)); |
| } |
| |
| if (callee_pi) |
| for (i = 0; i < count; i++) |
| { |
| struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i); |
| |
| if (ipa_is_param_used_by_indirect_call (callee_pi, i) |
| || ipa_is_param_used_by_ipa_predicates (callee_pi, i)) |
| { |
| /* Determine if we know constant value of the parameter. */ |
| tree cst = ipa_value_from_jfunc (caller_parms_info, jf, |
| ipa_get_type (callee_pi, i)); |
| |
| if (!cst && e->call_stmt |
| && i < (int)gimple_call_num_args (e->call_stmt)) |
| { |
| cst = gimple_call_arg (e->call_stmt, i); |
| if (!is_gimple_min_invariant (cst)) |
| cst = NULL; |
| } |
| if (cst) |
| { |
| gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO); |
| if (!known_vals_ptr->length ()) |
| vec_safe_grow_cleared (known_vals_ptr, count); |
| (*known_vals_ptr)[i] = cst; |
| } |
| else if (inline_p && !es->param[i].change_prob) |
| { |
| if (!known_vals_ptr->length ()) |
| vec_safe_grow_cleared (known_vals_ptr, count); |
| (*known_vals_ptr)[i] = error_mark_node; |
| } |
| |
| /* If we failed to get simple constant, try value range. */ |
| if ((!cst || TREE_CODE (cst) != INTEGER_CST) |
| && vrp_will_run_p (caller) |
| && ipa_is_param_used_by_ipa_predicates (callee_pi, i)) |
| { |
| value_range vr |
| = ipa_value_range_from_jfunc (caller_parms_info, e, jf, |
| ipa_get_type (callee_pi, |
| i)); |
| if (!vr.undefined_p () && !vr.varying_p ()) |
| { |
| if (!known_value_ranges.length ()) |
| known_value_ranges.safe_grow_cleared (count); |
| known_value_ranges[i] = vr; |
| } |
| } |
| |
| /* Determine known aggregate values. */ |
| if (fre_will_run_p (caller)) |
| { |
| ipa_agg_value_set agg |
| = ipa_agg_value_set_from_jfunc (caller_parms_info, |
| caller, &jf->agg); |
| if (agg.items.length ()) |
| { |
| if (!known_aggs_ptr->length ()) |
| vec_safe_grow_cleared (known_aggs_ptr, count); |
| (*known_aggs_ptr)[i] = agg; |
| } |
| } |
| } |
| |
| /* For calls used in polymorphic calls we further determine |
| polymorphic call context. */ |
| if (known_contexts_ptr |
| && ipa_is_param_used_by_polymorphic_call (callee_pi, i)) |
| { |
| ipa_polymorphic_call_context |
| ctx = ipa_context_from_jfunc (caller_parms_info, e, i, jf); |
| if (!ctx.useless_p ()) |
| { |
| if (!known_contexts_ptr->length ()) |
| known_contexts_ptr->safe_grow_cleared (count); |
| (*known_contexts_ptr)[i] |
| = ipa_context_from_jfunc (caller_parms_info, e, i, jf); |
| } |
| } |
| } |
| else |
| gcc_assert (!count || callee->thunk.thunk_p); |
| } |
| else if (e->call_stmt && !e->call_stmt_cannot_inline_p && info->conds) |
| { |
| int i, count = (int)gimple_call_num_args (e->call_stmt); |
| |
| for (i = 0; i < count; i++) |
| { |
| tree cst = gimple_call_arg (e->call_stmt, i); |
| if (!is_gimple_min_invariant (cst)) |
| cst = NULL; |
| if (cst) |
| { |
| if (!known_vals_ptr->length ()) |
| vec_safe_grow_cleared (known_vals_ptr, count); |
| (*known_vals_ptr)[i] = cst; |
| } |
| } |
| } |
| |
| evaluate_conditions_for_known_args (callee, inline_p, |
| *known_vals_ptr, |
| known_value_ranges, |
| *known_aggs_ptr, |
| clause_ptr, |
| nonspec_clause_ptr); |
| } |
| |
| |
| /* Allocate the function summary. */ |
| |
| static void |
| ipa_fn_summary_alloc (void) |
| { |
| gcc_checking_assert (!ipa_fn_summaries); |
| ipa_size_summaries = new ipa_size_summary_t (symtab); |
| ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab); |
| ipa_call_summaries = new ipa_call_summary_t (symtab); |
| } |
| |
| ipa_call_summary::~ipa_call_summary () |
| { |
| if (predicate) |
| edge_predicate_pool.remove (predicate); |
| |
| param.release (); |
| } |
| |
| ipa_fn_summary::~ipa_fn_summary () |
| { |
| if (loop_iterations) |
| edge_predicate_pool.remove (loop_iterations); |
| if (loop_stride) |
| edge_predicate_pool.remove (loop_stride); |
| vec_free (conds); |
| vec_free (size_time_table); |
| vec_free (call_size_time_table); |
| } |
| |
| void |
| ipa_fn_summary_t::remove_callees (cgraph_node *node) |
| { |
| cgraph_edge *e; |
| for (e = node->callees; e; e = e->next_callee) |
| ipa_call_summaries->remove (e); |
| for (e = node->indirect_calls; e; e = e->next_callee) |
| ipa_call_summaries->remove (e); |
| } |
| |
| /* Same as remap_predicate_after_duplication but handle hint predicate *P. |
| Additionally care about allocating new memory slot for updated predicate |
| and set it to NULL when it becomes true or false (and thus uninteresting). |
| */ |
| |
| static void |
| remap_hint_predicate_after_duplication (predicate **p, |
| clause_t possible_truths) |
| { |
| predicate new_predicate; |
| |
| if (!*p) |
| return; |
| |
| new_predicate = (*p)->remap_after_duplication (possible_truths); |
| /* We do not want to free previous predicate; it is used by node origin. */ |
| *p = NULL; |
| set_hint_predicate (p, new_predicate); |
| } |
| |
| |
| /* Hook that is called by cgraph.c when a node is duplicated. */ |
| void |
| ipa_fn_summary_t::duplicate (cgraph_node *src, |
| cgraph_node *dst, |
| ipa_fn_summary *, |
| ipa_fn_summary *info) |
| { |
| new (info) ipa_fn_summary (*ipa_fn_summaries->get (src)); |
| /* TODO: as an optimization, we may avoid copying conditions |
| that are known to be false or true. */ |
| info->conds = vec_safe_copy (info->conds); |
| |
| /* When there are any replacements in the function body, see if we can figure |
| out that something was optimized out. */ |
| if (ipa_node_params_sum && dst->clone.tree_map) |
| { |
| vec<size_time_entry, va_gc> *entry = info->size_time_table; |
| /* Use SRC parm info since it may not be copied yet. */ |
| class ipa_node_params *parms_info = IPA_NODE_REF (src); |
| vec<tree> known_vals = vNULL; |
| int count = ipa_get_param_count (parms_info); |
| int i, j; |
| clause_t possible_truths; |
| predicate true_pred = true; |
| size_time_entry *e; |
| int optimized_out_size = 0; |
| bool inlined_to_p = false; |
| struct cgraph_edge *edge, *next; |
| |
| info->size_time_table = 0; |
| known_vals.safe_grow_cleared (count); |
| for (i = 0; i < count; i++) |
| { |
| struct ipa_replace_map *r; |
| |
| for (j = 0; vec_safe_iterate (dst->clone.tree_map, j, &r); j++) |
| { |
| if (r->parm_num == i) |
| { |
| known_vals[i] = r->new_tree; |
| break; |
| } |
| } |
| } |
| evaluate_conditions_for_known_args (dst, false, |
| known_vals, |
| vNULL, |
| vNULL, |
| &possible_truths, |
| /* We are going to specialize, |
| so ignore nonspec truths. */ |
| NULL); |
| known_vals.release (); |
| |
| info->account_size_time (0, 0, true_pred, true_pred); |
| |
| /* Remap size_time vectors. |
| Simplify the predicate by pruning out alternatives that are known |
| to be false. |
| TODO: as on optimization, we can also eliminate conditions known |
| to be true. */ |
| for (i = 0; vec_safe_iterate (entry, i, &e); i++) |
| { |
| predicate new_exec_pred; |
| predicate new_nonconst_pred; |
| new_exec_pred = e->exec_predicate.remap_after_duplication |
| (possible_truths); |
| new_nonconst_pred = e->nonconst_predicate.remap_after_duplication |
| (possible_truths); |
| if (new_exec_pred == false || new_nonconst_pred == false) |
| optimized_out_size += e->size; |
| else |
| info->account_size_time (e->size, e->time, new_exec_pred, |
| new_nonconst_pred); |
| } |
| |
| /* Remap edge predicates with the same simplification as above. |
| Also copy constantness arrays. */ |
| for (edge = dst->callees; edge; edge = next) |
| { |
| predicate new_predicate; |
| class ipa_call_summary *es = ipa_call_summaries->get (edge); |
| next = edge->next_callee; |
| |
| if (!edge->inline_failed) |
| inlined_to_p = true; |
| if (!es->predicate) |
| continue; |
| new_predicate = es->predicate->remap_after_duplication |
| (possible_truths); |
| if (new_predicate == false && *es->predicate != false) |
| optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale; |
| edge_set_predicate (edge, &new_predicate); |
| } |
| |
| /* Remap indirect edge predicates with the same simplification as above. |
| Also copy constantness arrays. */ |
| for (edge = dst->indirect_calls; edge; edge = next) |
| { |
| predicate new_predicate; |
| class ipa_call_summary *es = ipa_call_summaries->get (edge); |
| next = edge->next_callee; |
| |
| gcc_checking_assert (edge->inline_failed); |
| if (!es->predicate) |
| continue; |
| new_predicate = es->predicate->remap_after_duplication |
| (possible_truths); |
| if (new_predicate == false && *es->predicate != false) |
| optimized_out_size += es->call_stmt_size * ipa_fn_summary::size_scale; |
| edge_set_predicate (edge, &new_predicate); |
| } |
| remap_hint_predicate_after_duplication (&info->loop_iterations, |
| possible_truths); |
| remap_hint_predicate_after_duplication (&info->loop_stride, |
| possible_truths); |
| |
| /* If inliner or someone after inliner will ever start producing |
| non-trivial clones, we will get trouble with lack of information |
| about updating self sizes, because size vectors already contains |
| sizes of the callees. */ |
| gcc_assert (!inlined_to_p || !optimized_out_size); |
| } |
| else |
| { |
| info->size_time_table = vec_safe_copy (info->size_time_table); |
| if (info->loop_iterations) |
| { |
| predicate p = *info->loop_iterations; |
| info->loop_iterations = NULL; |
| set_hint_predicate (&info->loop_iterations, p); |
| } |
| if (info->loop_stride) |
| { |
| predicate p = *info->loop_stride; |
| info->loop_stride = NULL; |
| set_hint_predicate (&info->loop_stride, p); |
| } |
| } |
| if (!dst->inlined_to) |
| ipa_update_overall_fn_summary (dst); |
| } |
| |
| |
| /* Hook that is called by cgraph.c when a node is duplicated. */ |
| |
| void |
| ipa_call_summary_t::duplicate (struct cgraph_edge *src, |
| struct cgraph_edge *dst, |
| class ipa_call_summary *srcinfo, |
| class ipa_call_summary *info) |
| { |
| new (info) ipa_call_summary (*srcinfo); |
| info->predicate = NULL; |
| edge_set_predicate (dst, srcinfo->predicate); |
| info->param = srcinfo->param.copy (); |
| if (!dst->indirect_unknown_callee && src->indirect_unknown_callee) |
| { |
| info->call_stmt_size -= (eni_size_weights.indirect_call_cost |
| - eni_size_weights.call_cost); |
| info->call_stmt_time -= (eni_time_weights.indirect_call_cost |
| - eni_time_weights.call_cost); |
| } |
| } |
| |
| /* Dump edge summaries associated to NODE and recursively to all clones. |
| Indent by INDENT. */ |
| |
| static void |
| dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node, |
| class ipa_fn_summary *info) |
| { |
| struct cgraph_edge *edge; |
| for (edge = node->callees; edge; edge = edge->next_callee) |
| { |
| class ipa_call_summary *es = ipa_call_summaries->get (edge); |
| struct cgraph_node *callee = edge->callee->ultimate_alias_target (); |
| int i; |
| |
| fprintf (f, |
| "%*s%s %s\n%*s freq:%4.2f", |
| indent, "", callee->dump_name (), |
| !edge->inline_failed |
| ? "inlined" : cgraph_inline_failed_string (edge-> inline_failed), |
| indent, "", edge->sreal_frequency ().to_double ()); |
| |
| if (cross_module_call_p (edge)) |
| fprintf (f, " cross module"); |
| |
| if (es) |
| fprintf (f, " loop depth:%2i size:%2i time: %2i", |
| es->loop_depth, es->call_stmt_size, es->call_stmt_time); |
| |
| ipa_fn_summary *s = ipa_fn_summaries->get (callee); |
| ipa_size_summary *ss = ipa_size_summaries->get (callee); |
| if (s != NULL) |
| fprintf (f, " callee size:%2i stack:%2i", |
| (int) (ss->size / ipa_fn_summary::size_scale), |
| (int) s->estimated_stack_size); |
| |
| if (es && es->predicate) |
| { |
| fprintf (f, " predicate: "); |
| es->predicate->dump (f, info->conds); |
| } |
| else |
| fprintf (f, "\n"); |
| if (es && es->param.exists ()) |
| for (i = 0; i < (int) es->param.length (); i++) |
| { |
| int prob = es->param[i].change_prob; |
| |
| if (!prob) |
| fprintf (f, "%*s op%i is compile time invariant\n", |
| indent + 2, "", i); |
| else if (prob != REG_BR_PROB_BASE) |
| fprintf (f, "%*s op%i change %f%% of time\n", indent + 2, "", i, |
| prob * 100.0 / REG_BR_PROB_BASE); |
| } |
| if (!edge->inline_failed) |
| { |
| ipa_size_summary *ss = ipa_size_summaries->get (callee); |
| fprintf (f, "%*sStack frame offset %i, callee self size %i\n", |
| indent + 2, "", |
| (int) ipa_get_stack_frame_offset (callee), |
| (int) ss->estimated_self_stack_size); |
| dump_ipa_call_summary (f, indent + 2, callee, info); |
| } |
| } |
| for (edge = node->indirect_calls; edge; edge = edge->next_callee) |
| { |
| class ipa_call_summary *es = ipa_call_summaries->get (edge); |
| fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i" |
| " time: %2i", |
| indent, "", |
| es->loop_depth, |
| edge->sreal_frequency ().to_double (), es->call_stmt_size, |
| es->call_stmt_time); |
| if (es->predicate) |
| { |
| fprintf (f, "predicate: "); |
| es->predicate->dump (f, info->conds); |
| } |
| else |
| fprintf (f, "\n"); |
| } |
| } |
| |
| |
| void |
| ipa_dump_fn_summary (FILE *f, struct cgraph_node *node) |
| { |
| if (node->definition) |
| { |
| class ipa_fn_summary *s = ipa_fn_summaries->get (node); |
| class ipa_size_summary *ss = ipa_size_summaries->get (node); |
| if (s != NULL) |
| { |
| size_time_entry *e; |
| int i; |
| fprintf (f, "IPA function summary for %s", node->dump_name ()); |
| if (DECL_DISREGARD_INLINE_LIMITS (node->decl)) |
| fprintf (f, " always_inline"); |
| if (s->inlinable) |
| fprintf (f, " inlinable"); |
| if (s->fp_expressions) |
| fprintf (f, " fp_expression"); |
| fprintf (f, "\n global time: %f\n", s->time.to_double ()); |
| fprintf (f, " self size: %i\n", ss->self_size); |
| fprintf (f, " global size: %i\n", ss->size); |
| fprintf (f, " min size: %i\n", s->min_size); |
| fprintf (f, " self stack: %i\n", |
| (int) ss->estimated_self_stack_size); |
| fprintf (f, " global stack: %i\n", (int) s->estimated_stack_size); |
| if (s->growth) |
| fprintf (f, " estimated growth:%i\n", (int) s->growth); |
| if (s->scc_no) |
| fprintf (f, " In SCC: %i\n", (int) s->scc_no); |
| for (i = 0; vec_safe_iterate (s->size_time_table, i, &e); i++) |
| { |
| fprintf (f, " size:%f, time:%f", |
| (double) e->size / ipa_fn_summary::size_scale, |
| e->time.to_double ()); |
| if (e->exec_predicate != true) |
| { |
| fprintf (f, ", executed if:"); |
| e->exec_predicate.dump (f, s->conds, 0); |
| } |
| if (e->exec_predicate != e->nonconst_predicate) |
| { |
| fprintf (f, ", nonconst if:"); |
| e->nonconst_predicate.dump (f, s->conds, 0); |
| } |
| fprintf (f, "\n"); |
| } |
| if (s->loop_iterations) |
| { |
| fprintf (f, " loop iterations:"); |
| s->loop_iterations->dump (f, s->conds); |
| } |
| if (s->loop_stride) |
| { |
| fprintf (f, " loop stride:"); |
| s->loop_stride->dump (f, s->conds); |
| } |
| fprintf (f, " calls:\n"); |
| dump_ipa_call_summary (f, 4, node, s); |
| fprintf (f, "\n"); |
| } |
| else |
| fprintf (f, "IPA summary for %s is missing.\n", node->dump_name ()); |
| } |
| } |
| |
| DEBUG_FUNCTION void |
| ipa_debug_fn_summary (struct cgraph_node *node) |
| { |
| ipa_dump_fn_summary (stderr, node); |
| } |
| |
| void |
| ipa_dump_fn_summaries (FILE *f) |
| { |
| struct cgraph_node *node; |
| |
| FOR_EACH_DEFINED_FUNCTION (node) |
| if (!node->inlined_to) |
| ipa_dump_fn_summary (f, node); |
| } |
| |
| /* Callback of walk_aliased_vdefs. Flags that it has been invoked to the |
| boolean variable pointed to by DATA. */ |
| |
| static bool |
| mark_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef ATTRIBUTE_UNUSED, |
| void *data) |
| { |
| bool *b = (bool *) data; |
| *b = true; |
| return true; |
| } |
| |
| /* If OP refers to value of function parameter, return the corresponding |
| parameter. If non-NULL, the size of the memory load (or the SSA_NAME of the |
| PARM_DECL) will be stored to *SIZE_P in that case too. */ |
| |
| static tree |
| unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op, |
| poly_int64 *size_p) |
| { |
| /* SSA_NAME referring to parm default def? */ |
| if (TREE_CODE (op) == SSA_NAME |
| && SSA_NAME_IS_DEFAULT_DEF (op) |
| && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL) |
| { |
| if (size_p) |
| *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op))); |
| return SSA_NAME_VAR (op); |
| } |
| /* Non-SSA parm reference? */ |
| if (TREE_CODE (op) == PARM_DECL) |
| { |
| bool modified = false; |
| |
| ao_ref refd; |
| ao_ref_init (&refd, op); |
| int walked = walk_aliased_vdefs (&refd, gimple_vuse (stmt), |
| mark_modified, &modified, NULL, NULL, |
| fbi->aa_walk_budget + 1); |
| if (walked < 0) |
| { |
| fbi->aa_walk_budget = 0; |
| return NULL_TREE; |
| } |
| if (!modified) |
| { |
| if (size_p) |
| *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op))); |
| return op; |
| } |
| } |
| return NULL_TREE; |
| } |
| |
| /* If OP refers to value of function parameter, return the corresponding |
| parameter. Also traverse chains of SSA register assignments. If non-NULL, |
| the size of the memory load (or the SSA_NAME of the PARM_DECL) will be |
| stored to *SIZE_P in that case too. */ |
| |
| static tree |
| unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op, |
| poly_int64 *size_p) |
| { |
| tree res = unmodified_parm_1 (fbi, stmt, op, size_p); |
| if (res) |
| return res; |
| |
| if (TREE_CODE (op) == SSA_NAME |
| && !SSA_NAME_IS_DEFAULT_DEF (op) |
| && gimple_assign_single_p (SSA_NAME_DEF_STMT (op))) |
| return unmodified_parm (fbi, SSA_NAME_DEF_STMT (op), |
| gimple_assign_rhs1 (SSA_NAME_DEF_STMT (op)), |
| size_p); |
| return NULL_TREE; |
| } |
| |
| /* If OP refers to a value of a function parameter or value loaded from an |
| aggregate passed to a parameter (either by value or reference), return TRUE |
| and store the number of the parameter to *INDEX_P, the access size into |
| *SIZE_P, and information whether and how it has been loaded from an |
| aggregate into *AGGPOS. INFO describes the function parameters, STMT is the |
| statement in which OP is used or loaded. */ |
| |
| static bool |
| unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi, |
| gimple *stmt, tree op, int *index_p, |
| poly_int64 *size_p, |
| struct agg_position_info *aggpos) |
| { |
| tree res = unmodified_parm_1 (fbi, stmt, op, size_p); |
| |
| gcc_checking_assert (aggpos); |
| if (res) |
| { |
| *index_p = ipa_get_param_decl_index (fbi->info, res); |
| if (*index_p < 0) |
| return false; |
| aggpos->agg_contents = false; |
| aggpos->by_ref = false; |
| return true; |
| } |
| |
| if (TREE_CODE (op) == SSA_NAME) |
| { |
| if (SSA_NAME_IS_DEFAULT_DEF (op) |
| || !gimple_assign_single_p (SSA_NAME_DEF_STMT (op))) |
| return false; |
| stmt = SSA_NAME_DEF_STMT (op); |
| op = gimple_assign_rhs1 (stmt); |
| if (!REFERENCE_CLASS_P (op)) |
| return unmodified_parm_or_parm_agg_item (fbi, stmt, op, index_p, size_p, |
| aggpos); |
| } |
| |
| aggpos->agg_contents = true; |
| return ipa_load_from_parm_agg (fbi, fbi->info->descriptors, |
| stmt, op, index_p, &aggpos->offset, |
| size_p, &aggpos->by_ref); |
| } |
| |
| /* See if statement might disappear after inlining. |
| 0 - means not eliminated |
| 1 - half of statements goes away |
| 2 - for sure it is eliminated. |
| We are not terribly sophisticated, basically looking for simple abstraction |
| penalty wrappers. */ |
| |
| static int |
| eliminated_by_inlining_prob (ipa_func_body_info *fbi, gimple *stmt) |
| { |
| enum gimple_code code = gimple_code (stmt); |
| enum tree_code rhs_code; |
| |
| if (!optimize) |
| return 0; |
| |
| switch (code) |
| { |
| case GIMPLE_RETURN: |
| return 2; |
| case GIMPLE_ASSIGN: |
| if (gimple_num_ops (stmt) != 2) |
| return 0; |
| |
| rhs_code = gimple_assign_rhs_code (stmt); |
| |
| /* Casts of parameters, loads from parameters passed by reference |
| and stores to return value or parameters are often free after |
| inlining due to SRA and further combining. |
| Assume that half of statements goes away. */ |
| if (CONVERT_EXPR_CODE_P (rhs_code) |
| || rhs_code == VIEW_CONVERT_EXPR |
| || rhs_code == ADDR_EXPR |
| || gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS) |
| { |
| tree rhs = gimple_assign_rhs1 (stmt); |
| tree lhs = gimple_assign_lhs (stmt); |
| tree inner_rhs = get_base_address (rhs); |
| tree inner_lhs = get_base_address (lhs); |
| bool rhs_free = false; |
| bool lhs_free = false; |
| |
| if (!inner_rhs) |
| inner_rhs = rhs; |
| if (!inner_lhs) |
| inner_lhs = lhs; |
| |
| /* Reads of parameter are expected to be free. */ |
| if (unmodified_parm (fbi, stmt, inner_rhs, NULL)) |
| rhs_free = true; |
| /* Match expressions of form &this->field. Those will most likely |
| combine with something upstream after inlining. */ |
| else if (TREE_CODE (inner_rhs) == ADDR_EXPR) |
| { |
| tree op = get_base_address (TREE_OPERAND (inner_rhs, 0)); |
| if (TREE_CODE (op) == PARM_DECL) |
| rhs_free = true; |
| else if (TREE_CODE (op) == MEM_REF |
| && unmodified_parm (fbi, stmt, TREE_OPERAND (op, 0), |
| NULL)) |
| rhs_free = true; |
| } |
| |
| /* When parameter is not SSA register because its address is taken |
| and it is just copied into one, the statement will be completely |
| free after inlining (we will copy propagate backward). */ |
| if (rhs_free && is_gimple_reg (lhs)) |
| return 2; |
| |
| /* Reads of parameters passed by reference |
| expected to be free (i.e. optimized out after inlining). */ |
| if (TREE_CODE (inner_rhs) == MEM_REF |
| && unmodified_parm (fbi, stmt, TREE_OPERAND (inner_rhs, 0), NULL)) |
| rhs_free = true; |
| |
| /* Copying parameter passed by reference into gimple register is |
| probably also going to copy propagate, but we can't be quite |
| sure. */ |
| if (rhs_free && is_gimple_reg (lhs)) |
| lhs_free = true; |
| |
| /* Writes to parameters, parameters passed by value and return value |
| (either directly or passed via invisible reference) are free. |
| |
| TODO: We ought to handle testcase like |
| struct a {int a,b;}; |
| struct a |
| returnstruct (void) |
| { |
| struct a a ={1,2}; |
| return a; |
| } |
| |
| This translate into: |
| |
| returnstruct () |
| { |
| int a$b; |
| int a$a; |
| struct a a; |
| struct a D.2739; |
| |
| <bb 2>: |
| D.2739.a = 1; |
| D.2739.b = 2; |
| return D.2739; |
| |
| } |
| For that we either need to copy ipa-split logic detecting writes |
| to return value. */ |
| if (TREE_CODE (inner_lhs) == PARM_DECL |
| || TREE_CODE (inner_lhs) == RESULT_DECL |
| || (TREE_CODE (inner_lhs) == MEM_REF |
| && (unmodified_parm (fbi, stmt, TREE_OPERAND (inner_lhs, 0), |
| NULL) |
| || (TREE_CODE (TREE_OPERAND (inner_lhs, 0)) == SSA_NAME |
| && SSA_NAME_VAR (TREE_OPERAND (inner_lhs, 0)) |
| && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND |
| (inner_lhs, |
| 0))) == RESULT_DECL)))) |
| lhs_free = true; |
| if (lhs_free |
| && (is_gimple_reg (rhs) || is_gimple_min_invariant (rhs))) |
| rhs_free = true; |
| if (lhs_free && rhs_free) |
| return 1; |
| } |
| return 0; |
| default: |
| return 0; |
| } |
| } |
| |
| /* Analyze EXPR if it represents a series of simple operations performed on |
| a function parameter and return true if so. FBI, STMT, EXPR, INDEX_P and |
| AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item. |
| Type of the parameter or load from an aggregate via the parameter is |
| stored in *TYPE_P. Operations on the parameter are recorded to |
| PARAM_OPS_P if it is not NULL. */ |
| |
| static bool |
| decompose_param_expr (struct ipa_func_body_info *fbi, |
| gimple *stmt, tree expr, |
| int *index_p, tree *type_p, |
| struct agg_position_info *aggpos, |
| expr_eval_ops *param_ops_p = NULL) |
| { |
| int op_limit = opt_for_fn (fbi->node->decl, param_ipa_max_param_expr_ops); |
| int op_count = 0; |
| |
| if (param_ops_p) |
| *param_ops_p = NULL; |
| |
| while (true) |
| { |
| expr_eval_op eval_op; |
| unsigned rhs_count; |
| unsigned cst_count = 0; |
| |
| if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL, |
| aggpos)) |
| { |
| tree type = TREE_TYPE (expr); |
| |
| if (aggpos->agg_contents) |
| { |
| /* Stop if containing bit-field. */ |
| if (TREE_CODE (expr) == BIT_FIELD_REF |
| || contains_bitfld_component_ref_p (expr)) |
| break; |
| } |
| |
| *type_p = type; |
| return true; |
| } |
| |
| if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr)) |
| break; |
| |
| if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr))) |
| break; |
| |
| switch (gimple_assign_rhs_class (stmt)) |
| { |
| case GIMPLE_SINGLE_RHS: |
| expr = gimple_assign_rhs1 (stmt); |
| continue; |
| |
| case GIMPLE_UNARY_RHS: |
| rhs_count = 1; |
| break; |
| |
| case GIMPLE_BINARY_RHS: |
| rhs_count = 2; |
| break; |
| |
| case GIMPLE_TERNARY_RHS: |
| rhs_count = 3; |
| break; |
| |
| default: |
| goto fail; |
| } |
| |
| /* Stop if expression is too complex. */ |
| if (op_count++ == op_limit) |
| break; |
| |
| if (param_ops_p) |
| { |
| eval_op.code = gimple_assign_rhs_code (stmt); |
| eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt)); |
| eval_op.val[0] = NULL_TREE; |
| eval_op.val[1] = NULL_TREE; |
| } |
| |
| expr = NULL_TREE; |
| for (unsigned i = 0; i < rhs_count; i++) |
| { |
| tree op = gimple_op (stmt, i + 1); |
| |
| gcc_assert (op && !TYPE_P (op)); |
| if (is_gimple_ip_invariant (op)) |
| { |
| if (++cst_count == rhs_count) |
| goto fail; |
| |
| eval_op.val[cst_count - 1] = op; |
| } |
| else if (!expr) |
| { |
| /* Found a non-constant operand, and record its index in rhs |
| operands. */ |
| eval_op.index = i; |
| expr = op; |
| } |
| else |
| { |
| /* Found more than one non-constant operands. */ |
| goto fail; |
| } |
| } |
| |
| if (param_ops_p) |
| vec_safe_insert (*param_ops_p, 0, eval_op); |
| } |
| |
| /* Failed to decompose, free resource and return. */ |
| fail: |
| if (param_ops_p) |
| vec_free (*param_ops_p); |
| |
| return false; |
| } |
| |
| /* If BB ends by a conditional we can turn into predicates, attach corresponding |
| predicates to the CFG edges. */ |
| |
| static void |
| set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi, |
| class ipa_fn_summary *summary, |
| class ipa_node_params *params_summary, |
| basic_block bb) |
| { |
| gimple *last; |
| tree op, op2; |
| int index; |
| struct agg_position_info aggpos; |
| enum tree_code code, inverted_code; |
| edge e; |
| edge_iterator ei; |
| gimple *set_stmt; |
| tree param_type; |
| expr_eval_ops param_ops; |
| |
| last = last_stmt (bb); |
| if (!last || gimple_code (last) != GIMPLE_COND) |
| return; |
| if (!is_gimple_ip_invariant (gimple_cond_rhs (last))) |
| return; |
| op = gimple_cond_lhs (last); |
| |
| if (decompose_param_expr (fbi, last, op, &index, ¶m_type, &aggpos, |
| ¶m_ops)) |
| { |
| code = gimple_cond_code (last); |
| inverted_code = invert_tree_comparison (code, HONOR_NANS (op)); |
| |
| FOR_EACH_EDGE (e, ei, bb->succs) |
| { |
| enum tree_code this_code = (e->flags & EDGE_TRUE_VALUE |
| ? code : inverted_code); |
| /* invert_tree_comparison will return ERROR_MARK on FP |
| comparisons that are not EQ/NE instead of returning proper |
| unordered one. Be sure it is not confused with NON_CONSTANT. |
| |
| And if the edge's target is the final block of diamond CFG graph |
| of this conditional statement, we do not need to compute |
| predicate for the edge because the final block's predicate must |
| be at least as that of the first block of the statement. */ |
| if (this_code != ERROR_MARK |
| && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest)) |
| { |
| predicate p |
| = add_condition (summary, params_summary, index, |
| param_type, &aggpos, |
| this_code, gimple_cond_rhs (last), param_ops); |
| e->aux = edge_predicate_pool.allocate (); |
| *(predicate *) e->aux = p; |
| } |
| } |
| vec_free (param_ops); |
| } |
| |
| if (TREE_CODE (op) != SSA_NAME) |
| return; |
| /* Special case |
| if (builtin_constant_p (op)) |
| constant_code |
| else |
| nonconstant_code. |
| Here we can predicate nonconstant_code. We can't |
| really handle constant_code since we have no predicate |
| for this and also the constant code is not known to be |
| optimized away when inliner doesn't see operand is constant. |
| Other optimizers might think otherwise. */ |
| if (gimple_cond_code (last) != NE_EXPR |
| || !integer_zerop (gimple_cond_rhs (last))) |
| return; |
| set_stmt = SSA_NAME_DEF_STMT (op); |
| if (!gimple_call_builtin_p (set_stmt, BUILT_IN_CONSTANT_P) |
| || gimple_call_num_args (set_stmt) != 1) |
| return; |
| op2 = gimple_call_arg (set_stmt, 0); |
| if (!decompose_param_expr (fbi, set_stmt, op2, &index, ¶m_type, &aggpos)) |
| return; |
| FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE) |
| { |
| predicate p = add_condition (summary, params_summary, index, |
| param_type, &aggpos, |
| predicate::is_not_constant, NULL_TREE); |
| e->aux = edge_predicate_pool.allocate (); |
| *(predicate *) e->aux = p; |
| } |
| } |
| |
| |
| /* If BB ends by a switch we can turn into predicates, attach corresponding |
| predicates to the CFG edges. */ |
| |
| static void |
| set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi, |
| class ipa_fn_summary *summary, |
| class ipa_node_params *params_summary, |
| basic_block bb) |
| { |
| gimple *lastg; |
| tree op; |
| int index; |
| struct agg_position_info aggpos; |
| edge e; |
| edge_iterator ei; |
| size_t n; |
| size_t case_idx; |
| tree param_type; |
| expr_eval_ops param_ops; |
| |
| lastg = last_stmt (bb); |
| if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH) |
| return; |
| gswitch *last = as_a <gswitch *> (lastg); |
| op = gimple_switch_index (last); |
| if (!decompose_param_expr (fbi, last, op, &index, ¶m_type, &aggpos, |
| ¶m_ops)) |
| return; |
| |
| auto_vec<std::pair<tree, tree> > ranges; |
| tree type = TREE_TYPE (op); |
| int bound_limit = opt_for_fn (fbi->node->decl, |
| param_ipa_max_switch_predicate_bounds); |
| int bound_count = 0; |
| wide_int vr_wmin, vr_wmax; |
| value_range_kind vr_type = get_range_info (op, &vr_wmin, &vr_wmax); |
| |
| FOR_EACH_EDGE (e, ei, bb->succs) |
| { |
| e->aux = edge_predicate_pool.allocate (); |
| *(predicate *) e->aux = false; |
| } |
| |
| e = gimple_switch_edge (cfun, last, 0); |
| /* Set BOUND_COUNT to maximum count to bypass computing predicate for |
| default case if its target basic block is in convergence point of all |
| switch cases, which can be determined by checking whether it |
| post-dominates the switch statement. */ |
| if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest)) |
| bound_count = INT_MAX; |
| |
| n = gimple_switch_num_labels (last); |
| for (case_idx = 1; case_idx < n; ++case_idx) |
| { |
| tree cl = gimple_switch_label (last, case_idx); |
| tree min = CASE_LOW (cl); |
| tree max = CASE_HIGH (cl); |
| predicate p; |
| |
| e = gimple_switch_edge (cfun, last, case_idx); |
| |
| /* The case value might not have same type as switch expression, |
| extend the value based on the expression type. */ |
| if (TREE_TYPE (min) != type) |
| min = wide_int_to_tree (type, wi::to_wide (min)); |
| |
| if (!max) |
| max = min; |
| else if (TREE_TYPE (max) != type) |
| max = wide_int_to_tree (type, wi::to_wide (max)); |
| |
| /* The case's target basic block is in convergence point of all switch |
| cases, its predicate should be at least as that of the switch |
| statement. */ |
| if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest)) |
| p = true; |
| else if (min == max) |
| p = add_condition (summary, params_summary, index, param_type, |
| &aggpos, EQ_EXPR, min, param_ops); |
| else |
| { |
| predicate p1, p2; |
| p1 = add_condition (summary, params_summary, index, param_type, |
| &aggpos, GE_EXPR, min, param_ops); |
| p2 = add_condition (summary, params_summary,index, param_type, |
| &aggpos, LE_EXPR, max, param_ops); |
| p = p1 & p2; |
| } |
| *(class predicate *) e->aux |
| = p.or_with (summary->conds, *(class predicate *) e->aux); |
| |
| /* If there are too many disjoint case ranges, predicate for default |
| case might become too complicated. So add a limit here. */ |
| if (bound_count > bound_limit) |
| continue; |
| |
| bool new_range = true; |
| |
| if (!ranges.is_empty ()) |
| { |
| wide_int curr_wmin = wi::to_wide (min); |
| wide_int last_wmax = wi::to_wide (ranges.last ().second); |
| |
| /* Merge case ranges if they are continuous. */ |
| if (curr_wmin == last_wmax + 1) |
| new_range = false; |
| else if (vr_type == VR_ANTI_RANGE) |
| { |
| /* If two disjoint case ranges can be connected by anti-range |
| of switch index, combine them to one range. */ |
| if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type))) |
| vr_type = VR_UNDEFINED; |
| else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type))) |
| new_range = false; |
| } |
| } |
| |
| /* Create/extend a case range. And we count endpoints of range set, |
| this number nearly equals to number of conditions that we will create |
| for predicate of default case. */ |
| if (new_range) |
| { |
| bound_count += (min == max) ? 1 : 2; |
| ranges.safe_push (std::make_pair (min, max)); |
| } |
| else |
| { |
| bound_count += (ranges.last ().first == ranges.last ().second); |
| ranges.last ().second = max; |
| } |
| } |
| |
| e = gimple_switch_edge (cfun, last, 0); |
| if (bound_count > bound_limit) |
| { |
| *(class predicate *) e->aux = true; |
| vec_free (param_ops); |
| return; |
| } |
| |
| predicate p_seg = true; |
| predicate p_all = false; |
| |
| if (vr_type != VR_RANGE) |
| { |
| vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type)); |
| vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type)); |
| } |
| |
| /* Construct predicate to represent default range set that is negation of |
| all case ranges. Case range is classified as containing single/non-single |
| values. Suppose a piece of case ranges in the following. |
| |
| [D1...D2] [S1] ... [Sn] [D3...D4] |
| |
| To represent default case's range sets between two non-single value |
| case ranges (From D2 to D3), we construct predicate as: |
| |
| D2 < x < D3 && x != S1 && ... && x != Sn |
| */ |
| for (size_t i = 0; i < ranges.length (); i++) |
| { |
| tree min = ranges[i].first; |
| tree max = ranges[i].second; |
| |
| if (min == max) |
| p_seg &= add_condition (summary, params_summary, index, |
| param_type, &aggpos, NE_EXPR, |
| min, param_ops); |
| else |
| { |
| /* Do not create sub-predicate for range that is beyond low bound |
| of switch index. */ |
| if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type))) |
| { |
| p_seg &= add_condition (summary, params_summary, index, |
| param_type, &aggpos, |
| LT_EXPR, min, param_ops); |
| p_all = p_all.or_with (summary->conds, p_seg); |
| } |
| |
| /* Do not create sub-predicate for range that is beyond up bound |
| of switch index. */ |
| if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type))) |
| { |
| p_seg = false; |
| break; |
| } |
| |
| p_seg = add_condition (summary, params_summary, index, |
| param_type, &aggpos, GT_EXPR, |
| max, param_ops); |
| } |
| } |
| |
| p_all = p_all.or_with (summary->conds, p_seg); |
| *(class predicate *) e->aux |
| = p_all.or_with (summary->conds, *(class predicate *) e->aux); |
| |
| vec_free (param_ops); |
| } |
| |
| |
| /* For each BB in NODE attach to its AUX pointer predicate under |
| which it is executable. */ |
| |
| static void |
| compute_bb_predicates (struct ipa_func_body_info *fbi, |
| struct cgraph_node *node, |
| class ipa_fn_summary *summary, |
| class ipa_node_params *params_summary) |
| { |
| struct function *my_function = DECL_STRUCT_FUNCTION (node->decl); |
| bool done = false; |
| basic_block bb; |
| |
| FOR_EACH_BB_FN (bb, my_function) |
| { |
| set_cond_stmt_execution_predicate (fbi, summary, params_summary, bb); |
| set_switch_stmt_execution_predicate (fbi, summary, params_summary, bb); |
| } |
| |
| /* Entry block is always executable. */ |
| ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux |
| = edge_predicate_pool.allocate (); |
| *(predicate *) ENTRY_BLOCK_PTR_FOR_FN (my_function)->aux = true; |
| |
| /* A simple dataflow propagation of predicates forward in the CFG. |
| TODO: work in reverse postorder. */ |
| while (!done) |
| { |
| done = true; |
| FOR_EACH_BB_FN (bb, my_function) |
| { |
| predicate p = false; |
| edge e; |
| edge_iterator ei; |
| FOR_EACH_EDGE (e, ei, bb->preds) |
| { |
| if (e->src->aux) |
| { |
| predicate this_bb_predicate |
| = *(predicate *) e->src->aux; |
| if (e->aux) |
| this_bb_predicate &= (*(class predicate *) e->aux); |
| p = p.or_with (summary->conds, this_bb_predicate); |
| if (p == true) |
| break; |
| } |
| } |
| if (p != false) |
| { |
| basic_block pdom_bb; |
| |
| if (!bb->aux) |
| { |
| done = false; |
| bb->aux = edge_predicate_pool.allocate (); |
| *((predicate *) bb->aux) = p; |
| } |
| else if (p != *(predicate *) bb->aux) |
| { |
| /* This OR operation is needed to ensure monotonous data flow |
| in the case we hit the limit on number of clauses and the |
| and/or operations above give approximate answers. */ |
| p = p.or_with (summary->conds, *(predicate *)bb->aux); |
| if (p != *(predicate *) bb->aux) |
| { |
| done = false; |
| *((predicate *) bb->aux) = p; |
| } |
| } |
| |
| /* For switch/if statement, we can OR-combine predicates of all |
| its cases/branches to get predicate for basic block in their |
| convergence point, but sometimes this will generate very |
| complicated predicate. Actually, we can get simplified |
| predicate in another way by using the fact that predicate |
| for a basic block must also hold true for its post dominators. |
| To be specific, basic block in convergence point of |
| conditional statement should include predicate of the |
| statement. */ |
| pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb); |
| if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb) |
| ; |
| else if (!pdom_bb->aux) |
| { |
| done = false; |
| pdom_bb->aux = edge_predicate_pool.allocate (); |
| *((predicate *) pdom_bb->aux) = p; |
| } |
| else if (p != *(predicate *) pdom_bb->aux) |
| { |
| p = p.or_with (summary->conds, *(predicate *)pdom_bb->aux); |
| if (p != *(predicate *) pdom_bb->aux) |
| { |
| done = false; |
| *((predicate *) pdom_bb->aux) = p; |
| } |
| } |
| } |
| } |
| } |
| } |
| |
| |
| /* Return predicate specifying when the STMT might have result that is not |
| a compile time constant. */ |
| |
| static predicate |
| will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi, |
| class ipa_fn_summary *summary, |
| class ipa_node_params *params_summary, |
| tree expr, |
| vec<predicate> nonconstant_names) |
| { |
| tree parm; |
| int index; |
| |
| while (UNARY_CLASS_P (expr)) |
| expr = TREE_OPERAND (expr, 0); |
| |
| parm = unmodified_parm (fbi, NULL, expr, NULL); |
| if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0) |
| return add_condition (summary, params_summary, index, TREE_TYPE (parm), NULL, |
| predicate::changed, NULL_TREE); |
| if (is_gimple_min_invariant (expr)) |
| return false; |
| if (TREE_CODE (expr) == SSA_NAME) |
| return nonconstant_names[SSA_NAME_VERSION (expr)]; |
| if (BINARY_CLASS_P (expr) || COMPARISON_CLASS_P (expr)) |
| { |
| predicate p1 |
| = will_be_nonconstant_expr_predicate (fbi, summary, |
| params_summary, |
| TREE_OPERAND (expr, 0), |
| nonconstant_names); |
| if (p1 == true) |
| return p1; |
| |
| predicate p2 |
| = will_be_nonconstant_expr_predicate (fbi, summary, |
| params_summary, |
| TREE_OPERAND (expr, 1), |
| nonconstant_names); |
| return p1.or_with (summary->conds, p2); |
| } |
| else if (TREE_CODE (expr) == COND_EXPR) |
| { |
| predicate p1 |
| = will_be_nonconstant_expr_predicate (fbi, summary, |
| params_summary, |
| TREE_OPERAND (expr, 0), |
| nonconstant_names); |
| if (p1 == true) |
| return p1; |
| |
| predicate p2 |
| = will_be_nonconstant_expr_predicate (fbi, summary, |
| params_summary, |
| TREE_OPERAND (expr, 1), |
| nonconstant_names); |
| if (p2 == true) |
| return p2; |
| p1 = p1.or_with (summary->conds, p2); |
| p2 = will_be_nonconstant_expr_predicate (fbi, summary, |
| params_summary, |
| TREE_OPERAND (expr, 2), |
| nonconstant_names); |
| return p2.or_with (summary->conds, p1); |
| } |
| else if (TREE_CODE (expr) == CALL_EXPR) |
| return true; |
| else |
| { |
| debug_tree (expr); |
| gcc_unreachable (); |
| } |
| return false; |
| } |
| |
| |
| /* Return predicate specifying when the STMT might have result that is not |
| a compile time constant. */ |
| |
| static predicate |
| will_be_nonconstant_predicate (struct ipa_func_body_info *fbi, |
| class ipa_fn_summary *summary, |
| class ipa_node_params *params_summary, |
| gimple *stmt, |
| vec<predicate> nonconstant_names) |
| { |
| predicate p = true; |
| ssa_op_iter iter; |
| tree use; |
| tree param_type = NULL_TREE; |
| predicate op_non_const; |
| bool is_load; |
| int base_index; |
| struct agg_position_info aggpos; |
| |
| /* What statements might be optimized away |
| when their arguments are constant. */ |
| if (gimple_code (stmt) != GIMPLE_ASSIGN |
| && gimple_code (stmt) != GIMPLE_COND |
| && gimple_code (stmt) != GIMPLE_SWITCH |
| && (gimple_code (stmt) != GIMPLE_CALL |
| || !(gimple_call_flags (stmt) & ECF_CONST))) |
| return p; |
| |
| /* Stores will stay anyway. */ |
| if (gimple_store_p (stmt)) |
| return p; |
| |
| is_load = gimple_assign_load_p (stmt); |
| |
| /* Loads can be optimized when the value is known. */ |
| if (is_load) |
| { |
| tree op = gimple_assign_rhs1 (stmt); |
| if (!decompose_param_expr (fbi, stmt, op, &base_index, ¶m_type, |
| &aggpos)) |
| return p; |
| } |
| else |
| base_index = -1; |
| |
| /* See if we understand all operands before we start |
| adding conditionals. */ |
| FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) |
| { |
| tree parm = unmodified_parm (fbi, stmt, use, NULL); |
| /* For arguments we can build a condition. */ |
| if (parm && ipa_get_param_decl_index (fbi->info, parm) >= 0) |
| continue; |
| if (TREE_CODE (use) != SSA_NAME) |
| return p; |
| /* If we know when operand is constant, |
| we still can say something useful. */ |
| if (nonconstant_names[SSA_NAME_VERSION (use)] != true) |
| continue; |
| return p; |
| } |
| |
| if (is_load) |
| op_non_const = |
| add_condition (summary, params_summary, |
| base_index, param_type, &aggpos, |
| predicate::changed, NULL_TREE); |
| else |
| op_non_const = false; |
| FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE) |
| { |
| tree parm = unmodified_parm (fbi, stmt, use, NULL); |
| int index; |
| |
| if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0) |
| { |
| if (index != base_index) |
| p = add_condition (summary, params_summary, index, |
| TREE_TYPE (parm), NULL, |
| predicate::changed, NULL_TREE); |
| else |
| continue; |
| } |
| else |
| p = nonconstant_names[SSA_NAME_VERSION (use)]; |
| op_non_const = p.or_with (summary->conds, op_non_const); |
| } |
| if ((gimple_code (stmt) == GIMPLE_ASSIGN || gimple_code (stmt) == GIMPLE_CALL) |
| && gimple_op (stmt, 0) |
| && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME) |
| nonconstant_names[SSA_NAME_VERSION (gimple_op (stmt, 0))] |
| = op_non_const; |
| return op_non_const; |
| } |
| |
| struct record_modified_bb_info |
| { |
| tree op; |
| bitmap bb_set; |
| gimple *stmt; |
| }; |
| |
| /* Value is initialized in INIT_BB and used in USE_BB. We want to compute |
| probability how often it changes between USE_BB. |
| INIT_BB->count/USE_BB->count is an estimate, but if INIT_BB |
| is in different loop nest, we can do better. |
| This is all just estimate. In theory we look for minimal cut separating |
| INIT_BB and USE_BB, but we only want to anticipate loop invariant motion |
| anyway. */ |
| |
| static basic_block |
| get_minimal_bb (basic_block init_bb, basic_block use_bb) |
| { |
| class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father); |
| if (l && l->header->count < init_bb->count) |
| return l->header; |
| return init_bb; |
| } |
| |
| /* Callback of walk_aliased_vdefs. Records basic blocks where the value may be |
| set except for info->stmt. */ |
| |
| static bool |
| record_modified (ao_ref *ao ATTRIBUTE_UNUSED, tree vdef, void *data) |
| { |
| struct record_modified_bb_info *info = |
| (struct record_modified_bb_info *) data; |
| if (SSA_NAME_DEF_STMT (vdef) == info->stmt) |
| return false; |
| if (gimple_clobber_p (SSA_NAME_DEF_STMT (vdef))) |
| return false; |
| bitmap_set_bit (info->bb_set, |
| SSA_NAME_IS_DEFAULT_DEF (vdef) |
| ? ENTRY_BLOCK_PTR_FOR_FN (cfun)->index |
| : get_minimal_bb |
| (gimple_bb (SSA_NAME_DEF_STMT (vdef)), |
| gimple_bb (info->stmt))->index); |
| if (dump_file) |
| { |
| fprintf (dump_file, " Param "); |
| print_generic_expr (dump_file, info->op, TDF_SLIM); |
| fprintf (dump_file, " changed at bb %i, minimal: %i stmt: ", |
| gimple_bb (SSA_NAME_DEF_STMT (vdef))->index, |
| get_minimal_bb |
| (gimple_bb (SSA_NAME_DEF_STMT (vdef)), |
| gimple_bb (info->stmt))->index); |
| print_gimple_stmt (dump_file, SSA_NAME_DEF_STMT (vdef), 0); |
| } |
| return false; |
| } |
| |
| /* Return probability (based on REG_BR_PROB_BASE) that I-th parameter of STMT |
| will change since last invocation of STMT. |
| |
| Value 0 is reserved for compile time invariants. |
| For common parameters it is REG_BR_PROB_BASE. For loop invariants it |
| ought to be REG_BR_PROB_BASE / estimated_iters. */ |
| |
| static int |
| param_change_prob (ipa_func_body_info *fbi, gimple *stmt, int i) |
| { |
| tree op = gimple_call_arg (stmt, i); |
| basic_block bb = gimple_bb (stmt); |
| |
| if (TREE_CODE (op) == WITH_SIZE_EXPR) |
| op = TREE_OPERAND (op, 0); |
| |
| tree base = get_base_address (op); |
| |
| /* Global invariants never change. */ |
| if (is_gimple_min_invariant (base)) |
| return 0; |
| |
| /* We would have to do non-trivial analysis to really work out what |
| is the probability of value to change (i.e. when init statement |
| is in a sibling loop of the call). |
| |
| We do an conservative estimate: when call is executed N times more often |
| than the statement defining value, we take the frequency 1/N. */ |
| if (TREE_CODE (base) == SSA_NAME) |
| { |
| profile_count init_count; |
| |
| if (!bb->count.nonzero_p ()) |
| return REG_BR_PROB_BASE; |
| |
| if (SSA_NAME_IS_DEFAULT_DEF (base)) |
| init_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; |
| else |
| init_count = get_minimal_bb |
| (gimple_bb (SSA_NAME_DEF_STMT (base)), |
| gimple_bb (stmt))->count; |
| |
| if (init_count < bb->count) |
| return MAX ((init_count.to_sreal_scale (bb->count) |
| * REG_BR_PROB_BASE).to_int (), 1); |
| return REG_BR_PROB_BASE; |
| } |
| else |
| { |
| ao_ref refd; |
| profile_count max = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count; |
| struct record_modified_bb_info info; |
| tree init = ctor_for_folding (base); |
| |
| if (init != error_mark_node) |
| return 0; |
| if (!bb->count.nonzero_p ()) |
| return REG_BR_PROB_BASE; |
| if (dump_file) |
| { |
| fprintf (dump_file, " Analyzing param change probability of "); |
| print_generic_expr (dump_file, op, TDF_SLIM); |
| fprintf (dump_file, "\n"); |
| } |
| ao_ref_init (&refd, op); |
| info.op = op; |
| info.stmt = stmt; |
| info.bb_set = BITMAP_ALLOC (NULL); |
| int walked |
| = walk_aliased_vdefs (&refd, gimple_vuse (stmt), record_modified, &info, |
| NULL, NULL, fbi->aa_walk_budget); |
| if (walked < 0 || bitmap_bit_p (info.bb_set, bb->index)) |
| { |
| if (dump_file) |
| { |
| if (walked < 0) |
| fprintf (dump_file, " Ran out of AA walking budget.\n"); |
| else |
| fprintf (dump_file, " Set in same BB as used.\n"); |
| } |
| BITMAP_FREE (info.bb_set); |
| return REG_BR_PROB_BASE; |
| } |
| |
| bitmap_iterator bi; |
| unsigned index; |
| /* Lookup the most frequent update of the value and believe that |
| it dominates all the other; precise analysis here is difficult. */ |
| EXECUTE_IF_SET_IN_BITMAP (info.bb_set, 0, index, bi) |
| max = max.max (BASIC_BLOCK_FOR_FN (cfun, index)->count); |
| if (dump_file) |
| { |
| fprintf (dump_file, " Set with count "); |
| max.dump (dump_file); |
| fprintf (dump_file, " and used with count "); |
| bb->count.dump (dump_file); |
| fprintf (dump_file, " freq %f\n", |
| max.to_sreal_scale (bb->count).to_double ()); |
| } |
| |
| BITMAP_FREE (info.bb_set); |
| if (max < bb->count) |
| return MAX ((max.to_sreal_scale (bb->count) |
| * REG_BR_PROB_BASE).to_int (), 1); |
| return REG_BR_PROB_BASE; |
| } |
| } |
| |
| /* Find whether a basic block BB is the final block of a (half) diamond CFG |
| sub-graph and if the predicate the condition depends on is known. If so, |
| return true and store the pointer the predicate in *P. */ |
| |
| static bool |
| phi_result_unknown_predicate (ipa_func_body_info *fbi, |
| ipa_fn_summary *summary, |
| class ipa_node_params *params_summary, |
| basic_block bb, |
| predicate *p, |
| vec<predicate> nonconstant_names) |
| { |
| edge e; |
| edge_iterator ei; |
| basic_block first_bb = NULL; |
| gimple *stmt; |
| |
| if (single_pred_p (bb)) |
| { |
| *p = false; |
| return true; |
| } |
| |
| FOR_EACH_EDGE (e, ei, bb->preds) |
| { |
| if (single_succ_p (e->src)) |
| { |
| if (!single_pred_p (e->src)) |
| return false; |
| if (!first_bb) |
| first_bb = single_pred (e->src); |
| else if (single_pred (e->src) != first_bb) |
| return false; |
| } |
| else |
| { |
| if (!first_bb) |
| first_bb = e->src; |
| else if (e->src != first_bb) |
| return false; |
| } |
| } |
| |
| if (!first_bb) |
| return false; |
| |
| stmt = last_stmt (first_bb); |
| if (!stmt |
| || gimple_code (stmt) != GIMPLE_COND |
| || !is_gimple_ip_invariant (gimple_cond_rhs (stmt))) |
| return false; |
| |
| *p = will_be_nonconstant_expr_predicate (fbi, summary, params_summary, |
| gimple_cond_lhs (stmt), |
| nonconstant_names); |
| if (*p == true) |
| return false; |
| else |
| return true; |
| } |
| |
| /* Given a PHI statement in a function described by inline properties SUMMARY |
| and *P being the predicate describing whether the selected PHI argument is |
| known, store a predicate for the result of the PHI statement into |
| NONCONSTANT_NAMES, if possible. */ |
| |
| static void |
| predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi, |
| predicate *p, |
| vec<predicate> nonconstant_names) |
| { |
| unsigned i; |
| |
| for (i = 0; i < gimple_phi_num_args (phi); i++) |
| { |
| tree arg = gimple_phi_arg (phi, i)->def; |
| if (!is_gimple_min_invariant (arg)) |
| { |
| gcc_assert (TREE_CODE (arg) == SSA_NAME); |
| *p = p->or_with (summary->conds, |
| nonconstant_names[SSA_NAME_VERSION (arg)]); |
| if (*p == true) |
| return; |
| } |
| } |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, "\t\tphi predicate: "); |
| p->dump (dump_file, summary->conds); |
| } |
| nonconstant_names[SSA_NAME_VERSION (gimple_phi_result (phi))] = *p; |
| } |
| |
| /* For a typical usage of __builtin_expect (a<b, 1), we |
| may introduce an extra relation stmt: |
| With the builtin, we have |
| t1 = a <= b; |
| t2 = (long int) t1; |
| t3 = __builtin_expect (t2, 1); |
| if (t3 != 0) |
| goto ... |
| Without the builtin, we have |
| if (a<=b) |
| goto... |
| This affects the size/time estimation and may have |
| an impact on the earlier inlining. |
| Here find this pattern and fix it up later. */ |
| |
| static gimple * |
| find_foldable_builtin_expect (basic_block bb) |
| { |
| gimple_stmt_iterator bsi; |
| |
| for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) |
| { |
| gimple *stmt = gsi_stmt (bsi); |
| if (gimple_call_builtin_p (stmt, BUILT_IN_EXPECT) |
| || gimple_call_builtin_p (stmt, BUILT_IN_EXPECT_WITH_PROBABILITY) |
| || gimple_call_internal_p (stmt, IFN_BUILTIN_EXPECT)) |
| { |
| tree var = gimple_call_lhs (stmt); |
| tree arg = gimple_call_arg (stmt, 0); |
| use_operand_p use_p; |
| gimple *use_stmt; |
| bool match = false; |
| bool done = false; |
| |
| if (!var || !arg) |
| continue; |
| gcc_assert (TREE_CODE (var) == SSA_NAME); |
| |
| while (TREE_CODE (arg) == SSA_NAME) |
| { |
| gimple *stmt_tmp = SSA_NAME_DEF_STMT (arg); |
| if (!is_gimple_assign (stmt_tmp)) |
| break; |
| switch (gimple_assign_rhs_code (stmt_tmp)) |
| { |
| case LT_EXPR: |
| case LE_EXPR: |
| case GT_EXPR: |
| case GE_EXPR: |
| case EQ_EXPR: |
| case NE_EXPR: |
| match = true; |
| done = true; |
| break; |
| CASE_CONVERT: |
| break; |
| default: |
| done = true; |
| break; |
| } |
| if (done) |
| break; |
| arg = gimple_assign_rhs1 (stmt_tmp); |
| } |
| |
| if (match && single_imm_use (var, &use_p, &use_stmt) |
| && gimple_code (use_stmt) == GIMPLE_COND) |
| return use_stmt; |
| } |
| } |
| return NULL; |
| } |
| |
| /* Return true when the basic blocks contains only clobbers followed by RESX. |
| Such BBs are kept around to make removal of dead stores possible with |
| presence of EH and will be optimized out by optimize_clobbers later in the |
| game. |
| |
| NEED_EH is used to recurse in case the clobber has non-EH predecessors |
| that can be clobber only, too.. When it is false, the RESX is not necessary |
| on the end of basic block. */ |
| |
| static bool |
| clobber_only_eh_bb_p (basic_block bb, bool need_eh = true) |
| { |
| gimple_stmt_iterator gsi = gsi_last_bb (bb); |
| edge_iterator ei; |
| edge e; |
| |
| if (need_eh) |
| { |
| if (gsi_end_p (gsi)) |
| return false; |
| if (gimple_code (gsi_stmt (gsi)) != GIMPLE_RESX) |
| return false; |
| gsi_prev (&gsi); |
| } |
| else if (!single_succ_p (bb)) |
| return false; |
| |
| for (; !gsi_end_p (gsi); gsi_prev (&gsi)) |
| { |
| gimple *stmt = gsi_stmt (gsi); |
| if (is_gimple_debug (stmt)) |
| continue; |
| if (gimple_clobber_p (stmt)) |
| continue; |
| if (gimple_code (stmt) == GIMPLE_LABEL) |
| break; |
| return false; |
| } |
| |
| /* See if all predecessors are either throws or clobber only BBs. */ |
| FOR_EACH_EDGE (e, ei, bb->preds) |
| if (!(e->flags & EDGE_EH) |
| && !clobber_only_eh_bb_p (e->src, false)) |
| return false; |
| |
| return true; |
| } |
| |
| /* Return true if STMT compute a floating point expression that may be affected |
| by -ffast-math and similar flags. */ |
| |
| static bool |
| fp_expression_p (gimple *stmt) |
| { |
| ssa_op_iter i; |
| tree op; |
| |
| FOR_EACH_SSA_TREE_OPERAND (op, stmt, i, SSA_OP_DEF|SSA_OP_USE) |
| if (FLOAT_TYPE_P (TREE_TYPE (op))) |
| return true; |
| return false; |
| } |
| |
| /* Analyze function body for NODE. |
| EARLY indicates run from early optimization pipeline. */ |
| |
| static void |
| analyze_function_body (struct cgraph_node *node, bool early) |
| { |
| sreal time = opt_for_fn (node->decl, param_uninlined_function_time); |
| /* Estimate static overhead for function prologue/epilogue and alignment. */ |
| int size = opt_for_fn (node->decl, param_uninlined_function_insns); |
| /* Benefits are scaled by probability of elimination that is in range |
| <0,2>. */ |
| basic_block bb; |
| struct function *my_function = DECL_STRUCT_FUNCTION (node->decl); |
| sreal freq; |
| class ipa_fn_summary *info = ipa_fn_summaries->get_create (node); |
| class ipa_node_params *params_summary = early ? NULL : IPA_NODE_REF (node); |
| predicate bb_predicate; |
| struct ipa_func_body_info fbi; |
| vec<predicate> nonconstant_names = vNULL; |
| int nblocks, n; |
| int *order; |
| gimple *fix_builtin_expect_stmt; |
| |
| gcc_assert (my_function && my_function->cfg); |
| gcc_assert (cfun == my_function); |
| |
| memset(&fbi, 0, sizeof(fbi)); |
| vec_free (info->conds); |
| info->conds = NULL; |
| vec_free (info->size_time_table); |
| info->size_time_table = NULL; |
| |
| /* When optimizing and analyzing for IPA inliner, initialize loop optimizer |
| so we can produce proper inline hints. |
| |
| When optimizing and analyzing for early inliner, initialize node params |
| so we can produce correct BB predicates. */ |
| |
| if (opt_for_fn (node->decl, optimize)) |
| { |
| calculate_dominance_info (CDI_DOMINATORS); |
| calculate_dominance_info (CDI_POST_DOMINATORS); |
| if (!early) |
| loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); |
| else |
| { |
| ipa_check_create_node_params (); |
| ipa_initialize_node_params (node); |
| } |
| |
| if (ipa_node_params_sum) |
| { |
| fbi.node = node; |
| fbi.info = IPA_NODE_REF (node); |
| fbi.bb_infos = vNULL; |
| fbi.bb_infos.safe_grow_cleared (last_basic_block_for_fn (cfun)); |
| fbi.param_count = count_formal_params (node->decl); |
| fbi.aa_walk_budget = opt_for_fn (node->decl, param_ipa_max_aa_steps); |
| |
| nonconstant_names.safe_grow_cleared |
| (SSANAMES (my_function)->length ()); |
| } |
| } |
| |
| if (dump_file) |
| fprintf (dump_file, "\nAnalyzing function body size: %s\n", |
| node->dump_name ()); |
| |
| /* When we run into maximal number of entries, we assign everything to the |
| constant truth case. Be sure to have it in list. */ |
| bb_predicate = true; |
| info->account_size_time (0, 0, bb_predicate, bb_predicate); |
| |
| bb_predicate = predicate::not_inlined (); |
| info->account_size_time (opt_for_fn (node->decl, |
| param_uninlined_function_insns) |
| * ipa_fn_summary::size_scale, |
| opt_for_fn (node->decl, |
| param_uninlined_function_time), |
| bb_predicate, |
| bb_predicate); |
| |
| if (fbi.info) |
| compute_bb_predicates (&fbi, node, info, params_summary); |
| order = XNEWVEC (int, n_basic_blocks_for_fn (cfun)); |
| nblocks = pre_and_rev_post_order_compute (NULL, order, false); |
| for (n = 0; n < nblocks; n++) |
| { |
| bb = BASIC_BLOCK_FOR_FN (cfun, order[n]); |
| freq = bb->count.to_sreal_scale (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count); |
| if (clobber_only_eh_bb_p (bb)) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "\n Ignoring BB %i;" |
| " it will be optimized away by cleanup_clobbers\n", |
| bb->index); |
| continue; |
| } |
| |
| /* TODO: Obviously predicates can be propagated down across CFG. */ |
| if (fbi.info) |
| { |
| if (bb->aux) |
| bb_predicate = *(predicate *) bb->aux; |
| else |
| bb_predicate = false; |
| } |
| else |
| bb_predicate = true; |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, "\n BB %i predicate:", bb->index); |
| bb_predicate.dump (dump_file, info->conds); |
| } |
| |
| if (fbi.info && nonconstant_names.exists ()) |
| { |
| predicate phi_predicate; |
| bool first_phi = true; |
| |
| for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); |
| gsi_next (&bsi)) |
| { |
| if (first_phi |
| && !phi_result_unknown_predicate (&fbi, info, |
| params_summary, |
| bb, |
| &phi_predicate, |
| nonconstant_names)) |
| break; |
| first_phi = false; |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, " "); |
| print_gimple_stmt (dump_file, gsi_stmt (bsi), 0); |
| } |
| predicate_for_phi_result (info, bsi.phi (), &phi_predicate, |
| nonconstant_names); |
| } |
| } |
| |
| fix_builtin_expect_stmt = find_foldable_builtin_expect (bb); |
| |
| for (gimple_stmt_iterator bsi = gsi_start_nondebug_bb (bb); |
| !gsi_end_p (bsi); gsi_next_nondebug (&bsi)) |
| { |
| gimple *stmt = gsi_stmt (bsi); |
| int this_size = estimate_num_insns (stmt, &eni_size_weights); |
| int this_time = estimate_num_insns (stmt, &eni_time_weights); |
| int prob; |
| predicate will_be_nonconstant; |
| |
| /* This relation stmt should be folded after we remove |
| __builtin_expect call. Adjust the cost here. */ |
| if (stmt == fix_builtin_expect_stmt) |
| { |
| this_size--; |
| this_time--; |
| } |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, " "); |
| print_gimple_stmt (dump_file, stmt, 0); |
| fprintf (dump_file, "\t\tfreq:%3.2f size:%3i time:%3i\n", |
| freq.to_double (), this_size, |
| this_time); |
| } |
| |
| if (is_gimple_call (stmt) |
| && !gimple_call_internal_p (stmt)) |
| { |
| struct cgraph_edge *edge = node->get_edge (stmt); |
| ipa_call_summary *es = ipa_call_summaries->get_create (edge); |
| |
| /* Special case: results of BUILT_IN_CONSTANT_P will be always |
| resolved as constant. We however don't want to optimize |
| out the cgraph edges. */ |
| if (nonconstant_names.exists () |
| && gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P) |
| && gimple_call_lhs (stmt) |
| && TREE_CODE (gimple_call_lhs (stmt)) == SSA_NAME) |
| { |
| predicate false_p = false; |
| nonconstant_names[SSA_NAME_VERSION (gimple_call_lhs (stmt))] |
| = false_p; |
| } |
| if (ipa_node_params_sum) |
| { |
| int count = gimple_call_num_args (stmt); |
| int i; |
| |
| if (count) |
| es->param.safe_grow_cleared (count); |
| for (i = 0; i < count; i++) |
| { |
| int prob = param_change_prob (&fbi, stmt, i); |
| gcc_assert (prob >= 0 && prob <= REG_BR_PROB_BASE); |
| es->param[i].change_prob = prob; |
| } |
| } |
| |
| es->call_stmt_size = this_size; |
| es->call_stmt_time = this_time; |
| es->loop_depth = bb_loop_depth (bb); |
| edge_set_predicate (edge, &bb_predicate); |
| if (edge->speculative) |
| { |
| cgraph_edge *indirect |
| = edge->speculative_call_indirect_edge (); |
| ipa_call_summary *es2 |
| = ipa_call_summaries->get_create (indirect); |
| ipa_call_summaries->duplicate (edge, indirect, |
| es, es2); |
| |
| /* Edge is the first direct call. |
| create and duplicate call summaries for multiple |
| speculative call targets. */ |
| for (cgraph_edge *direct |
| = edge->next_speculative_call_target (); |
| direct; |
| direct = direct->next_speculative_call_target ()) |
| { |
| ipa_call_summary *es3 |
| = ipa_call_summaries->get_create (direct); |
| ipa_call_summaries->duplicate (edge, direct, |
| es, es3); |
| } |
| } |
| } |
| |
| /* TODO: When conditional jump or switch is known to be constant, but |
| we did not translate it into the predicates, we really can account |
| just maximum of the possible paths. */ |
| if (fbi.info) |
| will_be_nonconstant |
| = will_be_nonconstant_predicate (&fbi, info, params_summary, |
| stmt, nonconstant_names); |
| else |
| will_be_nonconstant = true; |
| if (this_time || this_size) |
| { |
| sreal final_time = (sreal)this_time * freq; |
| |
| prob = eliminated_by_inlining_prob (&fbi, stmt); |
| if (prob == 1 && dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, |
| "\t\t50%% will be eliminated by inlining\n"); |
| if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "\t\tWill be eliminated by inlining\n"); |
| |
| class predicate p = bb_predicate & will_be_nonconstant; |
| |
| /* We can ignore statement when we proved it is never going |
| to happen, but we cannot do that for call statements |
| because edges are accounted specially. */ |
| |
| if (*(is_gimple_call (stmt) ? &bb_predicate : &p) != false) |
| { |
| time += final_time; |
| size += this_size; |
| } |
| |
| /* We account everything but the calls. Calls have their own |
| size/time info attached to cgraph edges. This is necessary |
| in order to make the cost disappear after inlining. */ |
| if (!is_gimple_call (stmt)) |
| { |
| if (prob) |
| { |
| predicate ip = bb_predicate & predicate::not_inlined (); |
| info->account_size_time (this_size * prob, |
| (final_time * prob) / 2, ip, |
| p); |
| } |
| if (prob != 2) |
| info->account_size_time (this_size * (2 - prob), |
| (final_time * (2 - prob) / 2), |
| bb_predicate, |
| p); |
| } |
| |
| if (!info->fp_expressions && fp_expression_p (stmt)) |
| { |
| info->fp_expressions = true; |
| if (dump_file) |
| fprintf (dump_file, " fp_expression set\n"); |
| } |
| } |
| |
| /* Account cost of address calculations in the statements. */ |
| for (unsigned int i = 0; i < gimple_num_ops (stmt); i++) |
| { |
| for (tree op = gimple_op (stmt, i); |
| op && handled_component_p (op); |
| op = TREE_OPERAND (op, 0)) |
| if ((TREE_CODE (op) == ARRAY_REF |
| || TREE_CODE (op) == ARRAY_RANGE_REF) |
| && TREE_CODE (TREE_OPERAND (op, 1)) == SSA_NAME) |
| { |
| predicate p = bb_predicate; |
| if (fbi.info) |
| p = p & will_be_nonconstant_expr_predicate |
| (&fbi, info, params_summary, |
| TREE_OPERAND (op, 1), |
| nonconstant_names); |
| if (p != false) |
| { |
| time += freq; |
| size += 1; |
| if (dump_file) |
| fprintf (dump_file, |
| "\t\tAccounting address calculation.\n"); |
| info->account_size_time (ipa_fn_summary::size_scale, |
| freq, |
| bb_predicate, |
| p); |
| } |
| } |
| } |
| |
| } |
| } |
| free (order); |
| |
| if (nonconstant_names.exists () && !early) |
| { |
| class loop *loop; |
| predicate loop_iterations = true; |
| predicate loop_stride = true; |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| flow_loops_dump (dump_file, NULL, 0); |
| scev_initialize (); |
| FOR_EACH_LOOP (loop, 0) |
| { |
| vec<edge> exits; |
| edge ex; |
| unsigned int j; |
| class tree_niter_desc niter_desc; |
| if (loop->header->aux) |
| bb_predicate = *(predicate *) loop->header->aux; |
| else |
| bb_predicate = false; |
| |
| exits = get_loop_exit_edges (loop); |
| FOR_EACH_VEC_ELT (exits, j, ex) |
| if (number_of_iterations_exit (loop, ex, &niter_desc, false) |
| && !is_gimple_min_invariant (niter_desc.niter)) |
| { |
| predicate will_be_nonconstant |
| = will_be_nonconstant_expr_predicate (&fbi, info, |
| params_summary, |
| niter_desc.niter, |
| nonconstant_names); |
| if (will_be_nonconstant != true) |
| will_be_nonconstant = bb_predicate & will_be_nonconstant; |
| if (will_be_nonconstant != true |
| && will_be_nonconstant != false) |
| /* This is slightly inprecise. We may want to represent each |
| loop with independent predicate. */ |
| loop_iterations &= will_be_nonconstant; |
| } |
| exits.release (); |
| } |
| |
| /* To avoid quadratic behavior we analyze stride predicates only |
| with respect to the containing loop. Thus we simply iterate |
| over all defs in the outermost loop body. */ |
| for (loop = loops_for_fn (cfun)->tree_root->inner; |
| loop != NULL; loop = loop->next) |
| { |
| basic_block *body = get_loop_body (loop); |
| for (unsigned i = 0; i < loop->num_nodes; i++) |
| { |
| gimple_stmt_iterator gsi; |
| if (body[i]->aux) |
| bb_predicate = *(predicate *) body[i]->aux; |
| else |
| bb_predicate = false; |
| for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); |
| gsi_next (&gsi)) |
| { |
| gimple *stmt = gsi_stmt (gsi); |
| |
| if (!is_gimple_assign (stmt)) |
| continue; |
| |
| tree def = gimple_assign_lhs (stmt); |
| if (TREE_CODE (def) != SSA_NAME) |
| continue; |
| |
| affine_iv iv; |
| if (!simple_iv (loop_containing_stmt (stmt), |
| loop_containing_stmt (stmt), |
| def, &iv, true) |
| || is_gimple_min_invariant (iv.step)) |
| continue; |
| |
| predicate will_be_nonconstant |
| = will_be_nonconstant_expr_predicate (&fbi, info, |
| params_summary, |
| iv.step, |
| nonconstant_names); |
| if (will_be_nonconstant != true) |
| will_be_nonconstant = bb_predicate & will_be_nonconstant; |
| if (will_be_nonconstant != true |
| && will_be_nonconstant != false) |
| /* This is slightly inprecise. We may want to represent |
| each loop with independent predicate. */ |
| loop_stride = loop_stride & will_be_nonconstant; |
| } |
| } |
| free (body); |
| } |
| ipa_fn_summary *s = ipa_fn_summaries->get (node); |
| set_hint_predicate (&s->loop_iterations, loop_iterations); |
| set_hint_predicate (&s->loop_stride, loop_stride); |
| scev_finalize (); |
| } |
| FOR_ALL_BB_FN (bb, my_function) |
| { |
| edge e; |
| edge_iterator ei; |
| |
| if (bb->aux) |
| edge_predicate_pool.remove ((predicate *)bb->aux); |
| bb->aux = NULL; |
| FOR_EACH_EDGE (e, ei, bb->succs) |
| { |
| if (e->aux) |
| edge_predicate_pool.remove ((predicate *) e->aux); |
| e->aux = NULL; |
| } |
| } |
| ipa_fn_summary *s = ipa_fn_summaries->get (node); |
| ipa_size_summary *ss = ipa_size_summaries->get (node); |
| s->time = time; |
| ss->self_size = size; |
| nonconstant_names.release (); |
| ipa_release_body_info (&fbi); |
| if (opt_for_fn (node->decl, optimize)) |
| { |
| if (!early) |
| loop_optimizer_finalize (); |
| else if (!ipa_edge_args_sum) |
| ipa_free_all_node_params (); |
| free_dominance_info (CDI_DOMINATORS); |
| free_dominance_info (CDI_POST_DOMINATORS); |
| } |
| if (dump_file) |
| { |
| fprintf (dump_file, "\n"); |
| ipa_dump_fn_summary (dump_file, node); |
| } |
| } |
| |
| |
| /* Compute function summary. |
| EARLY is true when we compute parameters during early opts. */ |
| |
| void |
| compute_fn_summary (struct cgraph_node *node, bool early) |
| { |
| HOST_WIDE_INT self_stack_size; |
| struct cgraph_edge *e; |
| |
| gcc_assert (!node->inlined_to); |
| |
| if (!ipa_fn_summaries) |
| ipa_fn_summary_alloc (); |
| |
| /* Create a new ipa_fn_summary. */ |
| ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node); |
| ipa_fn_summaries->remove (node); |
| class ipa_fn_summary *info = ipa_fn_summaries->get_create (node); |
| class ipa_size_summary *size_info = ipa_size_summaries->get_create (node); |
| |
| /* Estimate the stack size for the function if we're optimizing. */ |
| self_stack_size = optimize && !node->thunk.thunk_p |
| ? estimated_stack_frame_size (node) : 0; |
| size_info->estimated_self_stack_size = self_stack_size; |
| info->estimated_stack_size = self_stack_size; |
| |
| if (node->thunk.thunk_p) |
| { |
| ipa_call_summary *es = ipa_call_summaries->get_create (node->callees); |
| predicate t = true; |
| |
| node->can_change_signature = false; |
| es->call_stmt_size = eni_size_weights.call_cost; |
| es->call_stmt_time = eni_time_weights.call_cost; |
| info->account_size_time (ipa_fn_summary::size_scale |
| * opt_for_fn (node->decl, |
| param_uninlined_function_thunk_insns), |
| opt_for_fn (node->decl, |
| param_uninlined_function_thunk_time), t, t); |
| t = predicate::not_inlined (); |
| info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t); |
| ipa_update_overall_fn_summary (node); |
| size_info->self_size = size_info->size; |
| if (stdarg_p (TREE_TYPE (node->decl))) |
| { |
| info->inlinable = false; |
| node->callees->inline_failed = CIF_VARIADIC_THUNK; |
| } |
| else |
| info->inlinable = true; |
| } |
| else |
| { |
| /* Even is_gimple_min_invariant rely on current_function_decl. */ |
| push_cfun (DECL_STRUCT_FUNCTION (node->decl)); |
| |
| /* During IPA profile merging we may be called w/o virtual SSA form |
| built. */ |
| update_ssa (TODO_update_ssa_only_virtuals); |
| |
| /* Can this function be inlined at all? */ |
| if (!opt_for_fn (node->decl, optimize) |
| && !lookup_attribute ("always_inline", |
| DECL_ATTRIBUTES (node->decl))) |
| info->inlinable = false; |
| else |
| info->inlinable = tree_inlinable_function_p (node->decl); |
| |
| /* Type attributes can use parameter indices to describe them. */ |
| if (TYPE_ATTRIBUTES (TREE_TYPE (node->decl)) |
| /* Likewise for #pragma omp declare simd functions or functions |
| with simd attribute. */ |
| || lookup_attribute ("omp declare simd", |
| DECL_ATTRIBUTES (node->decl))) |
| node->can_change_signature = false; |
| else |
| { |
| /* Otherwise, inlinable functions always can change signature. */ |
| if (info->inlinable) |
| node->can_change_signature = true; |
| else |
| { |
| /* Functions calling builtin_apply cannot change signature. */ |
| for (e = node->callees; e; e = e->next_callee) |
| { |
| tree cdecl = e->callee->decl; |
| if (fndecl_built_in_p (cdecl, BUILT_IN_APPLY_ARGS) |
| || fndecl_built_in_p (cdecl, BUILT_IN_VA_START)) |
| break; |
| } |
| node->can_change_signature = !e; |
| } |
| } |
| analyze_function_body (node, early); |
| pop_cfun (); |
| } |
| |
| /* Inlining characteristics are maintained by the cgraph_mark_inline. */ |
| size_info->size = size_info->self_size; |
| info->estimated_stack_size = size_info->estimated_self_stack_size; |
| |
| /* Code above should compute exactly the same result as |
| ipa_update_overall_fn_summary except for case when speculative |
| edges are present since these are accounted to size but not |
| self_size. Do not compare time since different order the roundoff |
| errors result in slight changes. */ |
| ipa_update_overall_fn_summary (node); |
| if (flag_checking) |
| { |
| for (e = node->indirect_calls; e; e = e->next_callee) |
| if (e->speculative) |
| break; |
| gcc_assert (e || size_info->size == size_info->self_size); |
| } |
| } |
| |
| |
| /* Compute parameters of functions used by inliner using |
| current_function_decl. */ |
| |
| static unsigned int |
| compute_fn_summary_for_current (void) |
| { |
| compute_fn_summary (cgraph_node::get (current_function_decl), true); |
| return 0; |
| } |
| |
| /* Estimate benefit devirtualizing indirect edge IE, provided KNOWN_VALS, |
| KNOWN_CONTEXTS and KNOWN_AGGS. */ |
| |
| static bool |
| estimate_edge_devirt_benefit (struct cgraph_edge *ie, |
| int *size, int *time, |
| vec<tree> known_vals, |
| vec<ipa_polymorphic_call_context> known_contexts, |
| vec<ipa_agg_value_set> known_aggs) |
| { |
| tree target; |
| struct cgraph_node *callee; |
| class ipa_fn_summary *isummary; |
| enum availability avail; |
| bool speculative; |
| |
| if (!known_vals.length () && !known_contexts.length ()) |
| return false; |
| if (!opt_for_fn (ie->caller->decl, flag_indirect_inlining)) |
| return false; |
| |
| target = ipa_get_indirect_edge_target (ie, known_vals, known_contexts, |
| known_aggs, &speculative); |
| if (!target || speculative) |
| return false; |
| |
| /* Account for difference in cost between indirect and direct calls. */ |
| *size -= (eni_size_weights.indirect_call_cost - eni_size_weights.call_cost); |
| *time -= (eni_time_weights.indirect_call_cost - eni_time_weights.call_cost); |
| gcc_checking_assert (*time >= 0); |
| gcc_checking_assert (*size >= 0); |
| |
| callee = cgraph_node::get (target); |
| if (!callee || !callee->definition) |
| return false; |
| callee = callee->function_symbol (&avail); |
| if (avail < AVAIL_AVAILABLE) |
| return false; |
| isummary = ipa_fn_summaries->get (callee); |
| if (isummary == NULL) |
| return false; |
| |
| return isummary->inlinable; |
| } |
| |
| /* Increase SIZE, MIN_SIZE (if non-NULL) and TIME for size and time needed to |
| handle edge E with probability PROB. |
| Set HINTS if edge may be devirtualized. |
| KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS describe context of the call |
| site. */ |
| |
| static inline void |
| estimate_edge_size_and_time (struct cgraph_edge *e, int *size, int *min_size, |
| sreal *time, |
| vec<tree> known_vals, |
| |