| /* Interprocedural constant propagation |
| Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
| Free Software Foundation, Inc. |
| Contributed by Razya Ladelsky <RAZYA@il.ibm.com> |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 3, or (at your option) any later |
| version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| /* Interprocedural constant propagation. The aim of interprocedural constant |
| propagation (IPCP) is to find which function's argument has the same |
| constant value in each invocation throughout the whole program. For example, |
| consider the following program: |
| |
| int g (int y) |
| { |
| printf ("value is %d",y); |
| } |
| |
| int f (int x) |
| { |
| g (x); |
| } |
| |
| int h (int y) |
| { |
| g (y); |
| } |
| |
| void main (void) |
| { |
| f (3); |
| h (3); |
| } |
| |
| |
| The IPCP algorithm will find that g's formal argument y is always called |
| with the value 3. |
| |
| The algorithm used is based on "Interprocedural Constant Propagation", by |
| David Callahan, Keith D Cooper, Ken Kennedy, Linda Torczon, Comp86, pg |
| 152-161 |
| |
| The optimization is divided into three stages: |
| |
| First stage - intraprocedural analysis |
| ======================================= |
| This phase computes jump_function and modification flags. |
| |
| A jump function for a callsite represents the values passed as an actual |
| arguments of a given callsite. There are three types of values: |
| Pass through - the caller's formal parameter is passed as an actual argument. |
| Constant - a constant is passed as an actual argument. |
| Unknown - neither of the above. |
| |
| The jump function info, ipa_jump_func, is stored in ipa_edge_args |
| structure (defined in ipa_prop.h and pointed to by cgraph_node->aux) |
| modified_flags are defined in ipa_node_params structure |
| (defined in ipa_prop.h and pointed to by cgraph_edge->aux). |
| |
| -ipcp_generate_summary() is the first stage driver. |
| |
| Second stage - interprocedural analysis |
| ======================================== |
| This phase does the interprocedural constant propagation. |
| It computes lattices for all formal parameters in the program |
| and their value that may be: |
| TOP - unknown. |
| BOTTOM - non constant. |
| CONSTANT - constant value. |
| |
| Lattice describing a formal parameter p will have a constant value if all |
| callsites invoking this function have the same constant value passed to p. |
| |
| The lattices are stored in ipcp_lattice which is itself in ipa_node_params |
| structure (defined in ipa_prop.h and pointed to by cgraph_edge->aux). |
| |
| -ipcp_iterate_stage() is the second stage driver. |
| |
| Third phase - transformation of function code |
| ============================================ |
| Propagates the constant-valued formals into the function. |
| For each function whose parameters are constants, we create its clone. |
| |
| Then we process the clone in two ways: |
| 1. We insert an assignment statement 'parameter = const' at the beginning |
| of the cloned function. |
| 2. For read-only parameters that do not live in memory, we replace all their |
| uses with the constant. |
| |
| We also need to modify some callsites to call the cloned functions instead |
| of the original ones. For a callsite passing an argument found to be a |
| constant by IPCP, there are two different cases to handle: |
| 1. A constant is passed as an argument. In this case the callsite in the |
| should be redirected to call the cloned callee. |
| 2. A parameter (of the caller) passed as an argument (pass through |
| argument). In such cases both the caller and the callee have clones and |
| only the callsite in the cloned caller is redirected to call to the |
| cloned callee. |
| |
| This update is done in two steps: First all cloned functions are created |
| during a traversal of the call graph, during which all callsites are |
| redirected to call the cloned function. Then the callsites are traversed |
| and many calls redirected back to fit the description above. |
| |
| -ipcp_insert_stage() is the third phase driver. |
| |
| |
| This pass also performs devirtualization - turns virtual calls into direct |
| ones if it can prove that all invocations of the function call the same |
| callee. This is achieved by building a list of all base types (actually, |
| their BINFOs) that individual parameters can have in an iterative matter |
| just like propagating scalar constants and then examining whether virtual |
| calls which take a parameter as their object fold to the same target for all |
| these types. If we cannot enumerate all types or there is a type which does |
| not have any BINFO associated with it, cannot_devirtualize of the associated |
| parameter descriptor is set which is an equivalent of BOTTOM lattice value |
| in standard IPA constant propagation. |
| */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "tree.h" |
| #include "target.h" |
| #include "gimple.h" |
| #include "cgraph.h" |
| #include "ipa-prop.h" |
| #include "tree-flow.h" |
| #include "tree-pass.h" |
| #include "flags.h" |
| #include "timevar.h" |
| #include "diagnostic.h" |
| #include "tree-pretty-print.h" |
| #include "tree-dump.h" |
| #include "tree-inline.h" |
| #include "fibheap.h" |
| #include "params.h" |
| |
| /* Number of functions identified as candidates for cloning. When not cloning |
| we can simplify iterate stage not forcing it to go through the decision |
| on what is profitable and what not. */ |
| static int n_cloning_candidates; |
| |
| /* Maximal count found in program. */ |
| static gcov_type max_count; |
| |
| /* Cgraph nodes that has been completely replaced by cloning during iterate |
| * stage and will be removed after ipcp is finished. */ |
| static bitmap dead_nodes; |
| |
| static void ipcp_print_profile_data (FILE *); |
| static void ipcp_function_scale_print (FILE *); |
| |
| /* Get the original node field of ipa_node_params associated with node NODE. */ |
| static inline struct cgraph_node * |
| ipcp_get_orig_node (struct cgraph_node *node) |
| { |
| return IPA_NODE_REF (node)->ipcp_orig_node; |
| } |
| |
| /* Return true if NODE describes a cloned/versioned function. */ |
| static inline bool |
| ipcp_node_is_clone (struct cgraph_node *node) |
| { |
| return (ipcp_get_orig_node (node) != NULL); |
| } |
| |
| /* Create ipa_node_params and its data structures for NEW_NODE. Set ORIG_NODE |
| as the ipcp_orig_node field in ipa_node_params. */ |
| static void |
| ipcp_init_cloned_node (struct cgraph_node *orig_node, |
| struct cgraph_node *new_node) |
| { |
| gcc_checking_assert (ipa_node_params_vector |
| && (VEC_length (ipa_node_params_t, |
| ipa_node_params_vector) |
| > (unsigned) cgraph_max_uid)); |
| gcc_checking_assert (IPA_NODE_REF (new_node)->params); |
| IPA_NODE_REF (new_node)->ipcp_orig_node = orig_node; |
| } |
| |
| /* Return scale for NODE. */ |
| static inline gcov_type |
| ipcp_get_node_scale (struct cgraph_node *node) |
| { |
| return IPA_NODE_REF (node)->count_scale; |
| } |
| |
| /* Set COUNT as scale for NODE. */ |
| static inline void |
| ipcp_set_node_scale (struct cgraph_node *node, gcov_type count) |
| { |
| IPA_NODE_REF (node)->count_scale = count; |
| } |
| |
| /* Return whether LAT is a constant lattice. */ |
| static inline bool |
| ipcp_lat_is_const (struct ipcp_lattice *lat) |
| { |
| if (lat->type == IPA_CONST_VALUE) |
| return true; |
| else |
| return false; |
| } |
| |
| /* Return whether LAT is a constant lattice that ipa-cp can actually insert |
| into the code (i.e. constants excluding member pointers and pointers). */ |
| static inline bool |
| ipcp_lat_is_insertable (struct ipcp_lattice *lat) |
| { |
| return lat->type == IPA_CONST_VALUE; |
| } |
| |
| /* Return true if LAT1 and LAT2 are equal. */ |
| static inline bool |
| ipcp_lats_are_equal (struct ipcp_lattice *lat1, struct ipcp_lattice *lat2) |
| { |
| gcc_assert (ipcp_lat_is_const (lat1) && ipcp_lat_is_const (lat2)); |
| if (lat1->type != lat2->type) |
| return false; |
| |
| if (TREE_CODE (lat1->constant) == ADDR_EXPR |
| && TREE_CODE (lat2->constant) == ADDR_EXPR |
| && TREE_CODE (TREE_OPERAND (lat1->constant, 0)) == CONST_DECL |
| && TREE_CODE (TREE_OPERAND (lat2->constant, 0)) == CONST_DECL) |
| return operand_equal_p (DECL_INITIAL (TREE_OPERAND (lat1->constant, 0)), |
| DECL_INITIAL (TREE_OPERAND (lat2->constant, 0)), 0); |
| else |
| return operand_equal_p (lat1->constant, lat2->constant, 0); |
| } |
| |
| /* Compute Meet arithmetics: |
| Meet (IPA_BOTTOM, x) = IPA_BOTTOM |
| Meet (IPA_TOP,x) = x |
| Meet (const_a,const_b) = IPA_BOTTOM, if const_a != const_b. |
| MEET (const_a,const_b) = const_a, if const_a == const_b.*/ |
| static void |
| ipa_lattice_meet (struct ipcp_lattice *res, struct ipcp_lattice *lat1, |
| struct ipcp_lattice *lat2) |
| { |
| if (lat1->type == IPA_BOTTOM || lat2->type == IPA_BOTTOM) |
| { |
| res->type = IPA_BOTTOM; |
| return; |
| } |
| if (lat1->type == IPA_TOP) |
| { |
| res->type = lat2->type; |
| res->constant = lat2->constant; |
| return; |
| } |
| if (lat2->type == IPA_TOP) |
| { |
| res->type = lat1->type; |
| res->constant = lat1->constant; |
| return; |
| } |
| if (!ipcp_lats_are_equal (lat1, lat2)) |
| { |
| res->type = IPA_BOTTOM; |
| return; |
| } |
| res->type = lat1->type; |
| res->constant = lat1->constant; |
| } |
| |
| /* Return the lattice corresponding to the Ith formal parameter of the function |
| described by INFO. */ |
| static inline struct ipcp_lattice * |
| ipcp_get_lattice (struct ipa_node_params *info, int i) |
| { |
| return &(info->params[i].ipcp_lattice); |
| } |
| |
| /* Given the jump function JFUNC, compute the lattice LAT that describes the |
| value coming down the callsite. INFO describes the caller node so that |
| pass-through jump functions can be evaluated. */ |
| static void |
| ipcp_lattice_from_jfunc (struct ipa_node_params *info, struct ipcp_lattice *lat, |
| struct ipa_jump_func *jfunc) |
| { |
| if (jfunc->type == IPA_JF_CONST) |
| { |
| lat->type = IPA_CONST_VALUE; |
| lat->constant = jfunc->value.constant; |
| } |
| else if (jfunc->type == IPA_JF_PASS_THROUGH) |
| { |
| struct ipcp_lattice *caller_lat; |
| tree cst; |
| |
| caller_lat = ipcp_get_lattice (info, jfunc->value.pass_through.formal_id); |
| lat->type = caller_lat->type; |
| if (caller_lat->type != IPA_CONST_VALUE) |
| return; |
| cst = caller_lat->constant; |
| |
| if (jfunc->value.pass_through.operation != NOP_EXPR) |
| { |
| tree restype; |
| if (TREE_CODE_CLASS (jfunc->value.pass_through.operation) |
| == tcc_comparison) |
| restype = boolean_type_node; |
| else |
| restype = TREE_TYPE (cst); |
| cst = fold_binary (jfunc->value.pass_through.operation, |
| restype, cst, jfunc->value.pass_through.operand); |
| } |
| if (!cst || !is_gimple_ip_invariant (cst)) |
| lat->type = IPA_BOTTOM; |
| lat->constant = cst; |
| } |
| else if (jfunc->type == IPA_JF_ANCESTOR) |
| { |
| struct ipcp_lattice *caller_lat; |
| tree t; |
| |
| caller_lat = ipcp_get_lattice (info, jfunc->value.ancestor.formal_id); |
| lat->type = caller_lat->type; |
| if (caller_lat->type != IPA_CONST_VALUE) |
| return; |
| if (TREE_CODE (caller_lat->constant) != ADDR_EXPR) |
| { |
| /* This can happen when the constant is a NULL pointer. */ |
| lat->type = IPA_BOTTOM; |
| return; |
| } |
| t = TREE_OPERAND (caller_lat->constant, 0); |
| t = build_ref_for_offset (EXPR_LOCATION (t), t, |
| jfunc->value.ancestor.offset, |
| jfunc->value.ancestor.type, NULL, false); |
| lat->constant = build_fold_addr_expr (t); |
| } |
| else |
| lat->type = IPA_BOTTOM; |
| } |
| |
| /* True when OLD_LAT and NEW_LAT values are not the same. */ |
| |
| static bool |
| ipcp_lattice_changed (struct ipcp_lattice *old_lat, |
| struct ipcp_lattice *new_lat) |
| { |
| if (old_lat->type == new_lat->type) |
| { |
| if (!ipcp_lat_is_const (old_lat)) |
| return false; |
| if (ipcp_lats_are_equal (old_lat, new_lat)) |
| return false; |
| } |
| return true; |
| } |
| |
| /* Print all ipcp_lattices of all functions to F. */ |
| static void |
| ipcp_print_all_lattices (FILE * f) |
| { |
| struct cgraph_node *node; |
| int i, count; |
| |
| fprintf (f, "\nLattice:\n"); |
| for (node = cgraph_nodes; node; node = node->next) |
| { |
| struct ipa_node_params *info; |
| |
| if (!node->analyzed) |
| continue; |
| info = IPA_NODE_REF (node); |
| fprintf (f, " Node: %s:\n", cgraph_node_name (node)); |
| count = ipa_get_param_count (info); |
| for (i = 0; i < count; i++) |
| { |
| struct ipcp_lattice *lat = ipcp_get_lattice (info, i); |
| |
| fprintf (f, " param [%d]: ", i); |
| if (lat->type == IPA_CONST_VALUE) |
| { |
| tree cst = lat->constant; |
| fprintf (f, "type is CONST "); |
| print_generic_expr (f, cst, 0); |
| if (TREE_CODE (cst) == ADDR_EXPR |
| && TREE_CODE (TREE_OPERAND (cst, 0)) == CONST_DECL) |
| { |
| fprintf (f, " -> "); |
| print_generic_expr (f, DECL_INITIAL (TREE_OPERAND (cst, 0)), |
| 0); |
| } |
| } |
| else if (lat->type == IPA_TOP) |
| fprintf (f, "type is TOP"); |
| else |
| fprintf (f, "type is BOTTOM"); |
| if (ipa_param_cannot_devirtualize_p (info, i)) |
| fprintf (f, " - cannot_devirtualize set\n"); |
| else if (ipa_param_types_vec_empty (info, i)) |
| fprintf (f, " - type list empty\n"); |
| else |
| fprintf (f, "\n"); |
| } |
| } |
| } |
| |
| /* Return true if ipcp algorithms would allow cloning NODE. */ |
| |
| static bool |
| ipcp_versionable_function_p (struct cgraph_node *node) |
| { |
| struct cgraph_edge *edge; |
| |
| /* There are a number of generic reasons functions cannot be versioned. We |
| also cannot remove parameters if there are type attributes such as fnspec |
| present. */ |
| if (!node->local.versionable |
| || TYPE_ATTRIBUTES (TREE_TYPE (node->decl))) |
| return false; |
| |
| /* Removing arguments doesn't work if the function takes varargs |
| or use __builtin_apply_args. */ |
| for (edge = node->callees; edge; edge = edge->next_callee) |
| { |
| tree t = edge->callee->decl; |
| if (DECL_BUILT_IN_CLASS (t) == BUILT_IN_NORMAL |
| && (DECL_FUNCTION_CODE (t) == BUILT_IN_APPLY_ARGS |
| || DECL_FUNCTION_CODE (t) == BUILT_IN_VA_START)) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| /* Return true if this NODE is viable candidate for cloning. */ |
| static bool |
| ipcp_cloning_candidate_p (struct cgraph_node *node) |
| { |
| int n_calls = 0; |
| int n_hot_calls = 0; |
| gcov_type direct_call_sum = 0; |
| struct cgraph_edge *e; |
| |
| /* We never clone functions that are not visible from outside. |
| FIXME: in future we should clone such functions when they are called with |
| different constants, but current ipcp implementation is not good on this. |
| */ |
| if (cgraph_only_called_directly_p (node) || !node->analyzed) |
| return false; |
| |
| /* When function address is taken, we are pretty sure it will be called in hidden way. */ |
| if (node->address_taken) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Not considering %s for cloning; address is taken.\n", |
| cgraph_node_name (node)); |
| return false; |
| } |
| |
| if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Not considering %s for cloning; body is overwritable.\n", |
| cgraph_node_name (node)); |
| return false; |
| } |
| if (!ipcp_versionable_function_p (node)) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Not considering %s for cloning; body is not versionable.\n", |
| cgraph_node_name (node)); |
| return false; |
| } |
| for (e = node->callers; e; e = e->next_caller) |
| { |
| direct_call_sum += e->count; |
| n_calls ++; |
| if (cgraph_maybe_hot_edge_p (e)) |
| n_hot_calls ++; |
| } |
| |
| if (!n_calls) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Not considering %s for cloning; no direct calls.\n", |
| cgraph_node_name (node)); |
| return false; |
| } |
| if (node->local.inline_summary.self_size < n_calls) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Considering %s for cloning; code would shrink.\n", |
| cgraph_node_name (node)); |
| return true; |
| } |
| |
| if (!flag_ipa_cp_clone) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Not considering %s for cloning; -fipa-cp-clone disabled.\n", |
| cgraph_node_name (node)); |
| return false; |
| } |
| |
| if (!optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl))) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Not considering %s for cloning; optimizing it for size.\n", |
| cgraph_node_name (node)); |
| return false; |
| } |
| |
| /* When profile is available and function is hot, propagate into it even if |
| calls seems cold; constant propagation can improve function's speed |
| significantly. */ |
| if (max_count) |
| { |
| if (direct_call_sum > node->count * 90 / 100) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Considering %s for cloning; usually called directly.\n", |
| cgraph_node_name (node)); |
| return true; |
| } |
| } |
| if (!n_hot_calls) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n", |
| cgraph_node_name (node)); |
| return false; |
| } |
| if (dump_file) |
| fprintf (dump_file, "Considering %s for cloning.\n", |
| cgraph_node_name (node)); |
| return true; |
| } |
| |
| /* Mark parameter with index I of function described by INFO as unsuitable for |
| devirtualization. Return true if it has already been marked so. */ |
| |
| static bool |
| ipa_set_param_cannot_devirtualize (struct ipa_node_params *info, int i) |
| { |
| bool ret = info->params[i].cannot_devirtualize; |
| info->params[i].cannot_devirtualize = true; |
| if (info->params[i].types) |
| VEC_free (tree, heap, info->params[i].types); |
| return ret; |
| } |
| |
| /* Initialize ipcp_lattices array. The lattices corresponding to supported |
| types (integers, real types and Fortran constants defined as const_decls) |
| are initialized to IPA_TOP, the rest of them to IPA_BOTTOM. */ |
| static void |
| ipcp_initialize_node_lattices (struct cgraph_node *node) |
| { |
| int i; |
| struct ipa_node_params *info = IPA_NODE_REF (node); |
| enum ipa_lattice_type type; |
| |
| if (ipa_is_called_with_var_arguments (info)) |
| type = IPA_BOTTOM; |
| else if (node->local.local) |
| type = IPA_TOP; |
| /* When cloning is allowed, we can assume that externally visible functions |
| are not called. We will compensate this by cloning later. */ |
| else if (ipcp_cloning_candidate_p (node)) |
| type = IPA_TOP, n_cloning_candidates ++; |
| else |
| type = IPA_BOTTOM; |
| |
| for (i = 0; i < ipa_get_param_count (info) ; i++) |
| { |
| ipcp_get_lattice (info, i)->type = type; |
| if (type == IPA_BOTTOM) |
| ipa_set_param_cannot_devirtualize (info, i); |
| } |
| } |
| |
| /* Build a constant tree with type TREE_TYPE and value according to LAT. |
| Return the tree, or, if it is not possible to convert such value |
| to TREE_TYPE, NULL. */ |
| static tree |
| build_const_val (struct ipcp_lattice *lat, tree tree_type) |
| { |
| tree val; |
| |
| gcc_assert (ipcp_lat_is_const (lat)); |
| val = lat->constant; |
| |
| if (!useless_type_conversion_p (tree_type, TREE_TYPE (val))) |
| { |
| if (fold_convertible_p (tree_type, val)) |
| return fold_build1 (NOP_EXPR, tree_type, val); |
| else if (TYPE_SIZE (tree_type) == TYPE_SIZE (TREE_TYPE (val))) |
| return fold_build1 (VIEW_CONVERT_EXPR, tree_type, val); |
| else |
| return NULL; |
| } |
| return val; |
| } |
| |
| /* Compute the proper scale for NODE. It is the ratio between the number of |
| direct calls (represented on the incoming cgraph_edges) and sum of all |
| invocations of NODE (represented as count in cgraph_node). |
| |
| FIXME: This code is wrong. Since the callers can be also clones and |
| the clones are not scaled yet, the sums gets unrealistically high. |
| To properly compute the counts, we would need to do propagation across |
| callgraph (as external call to A might imply call to non-cloned B |
| if A's clone calls cloned B). */ |
| static void |
| ipcp_compute_node_scale (struct cgraph_node *node) |
| { |
| gcov_type sum; |
| struct cgraph_edge *cs; |
| |
| sum = 0; |
| /* Compute sum of all counts of callers. */ |
| for (cs = node->callers; cs != NULL; cs = cs->next_caller) |
| sum += cs->count; |
| /* Work around the unrealistically high sum problem. We just don't want |
| the non-cloned body to have negative or very low frequency. Since |
| majority of execution time will be spent in clones anyway, this should |
| give good enough profile. */ |
| if (sum > node->count * 9 / 10) |
| sum = node->count * 9 / 10; |
| if (node->count == 0) |
| ipcp_set_node_scale (node, 0); |
| else |
| ipcp_set_node_scale (node, sum * REG_BR_PROB_BASE / node->count); |
| } |
| |
| /* Return true if there are some formal parameters whose value is IPA_TOP (in |
| the whole compilation unit). Change their values to IPA_BOTTOM, since they |
| most probably get their values from outside of this compilation unit. */ |
| static bool |
| ipcp_change_tops_to_bottom (void) |
| { |
| int i, count; |
| struct cgraph_node *node; |
| bool prop_again; |
| |
| prop_again = false; |
| for (node = cgraph_nodes; node; node = node->next) |
| { |
| struct ipa_node_params *info = IPA_NODE_REF (node); |
| count = ipa_get_param_count (info); |
| for (i = 0; i < count; i++) |
| { |
| struct ipcp_lattice *lat = ipcp_get_lattice (info, i); |
| if (lat->type == IPA_TOP) |
| { |
| prop_again = true; |
| if (dump_file) |
| { |
| fprintf (dump_file, "Forcing param "); |
| print_generic_expr (dump_file, ipa_get_param (info, i), 0); |
| fprintf (dump_file, " of node %s to bottom.\n", |
| cgraph_node_name (node)); |
| } |
| lat->type = IPA_BOTTOM; |
| } |
| if (!ipa_param_cannot_devirtualize_p (info, i) |
| && ipa_param_types_vec_empty (info, i)) |
| { |
| prop_again = true; |
| ipa_set_param_cannot_devirtualize (info, i); |
| if (dump_file) |
| { |
| fprintf (dump_file, "Marking param "); |
| print_generic_expr (dump_file, ipa_get_param (info, i), 0); |
| fprintf (dump_file, " of node %s as unusable for " |
| "devirtualization.\n", |
| cgraph_node_name (node)); |
| } |
| } |
| } |
| } |
| return prop_again; |
| } |
| |
| /* Insert BINFO to the list of known types of parameter number I of the |
| function described by CALLEE_INFO. Return true iff the type information |
| associated with the callee parameter changed in any way. */ |
| |
| static bool |
| ipcp_add_param_type (struct ipa_node_params *callee_info, int i, tree binfo) |
| { |
| int j, count; |
| |
| if (ipa_param_cannot_devirtualize_p (callee_info, i)) |
| return false; |
| |
| if (callee_info->params[i].types) |
| { |
| count = VEC_length (tree, callee_info->params[i].types); |
| for (j = 0; j < count; j++) |
| if (VEC_index (tree, callee_info->params[i].types, j) == binfo) |
| return false; |
| } |
| |
| if (VEC_length (tree, callee_info->params[i].types) |
| == (unsigned) PARAM_VALUE (PARAM_DEVIRT_TYPE_LIST_SIZE)) |
| return !ipa_set_param_cannot_devirtualize (callee_info, i); |
| |
| VEC_safe_push (tree, heap, callee_info->params[i].types, binfo); |
| return true; |
| } |
| |
| /* Copy known types information for parameter number CALLEE_IDX of CALLEE_INFO |
| from a parameter of CALLER_INFO as described by JF. Return true iff the |
| type information changed in any way. JF must be a pass-through or an |
| ancestor jump function. */ |
| |
| static bool |
| ipcp_copy_types (struct ipa_node_params *caller_info, |
| struct ipa_node_params *callee_info, |
| int callee_idx, struct ipa_jump_func *jf) |
| { |
| int caller_idx, j, count; |
| bool res; |
| |
| if (ipa_param_cannot_devirtualize_p (callee_info, callee_idx)) |
| return false; |
| |
| if (jf->type == IPA_JF_PASS_THROUGH) |
| { |
| if (jf->value.pass_through.operation != NOP_EXPR) |
| { |
| ipa_set_param_cannot_devirtualize (callee_info, callee_idx); |
| return true; |
| } |
| caller_idx = jf->value.pass_through.formal_id; |
| } |
| else |
| caller_idx = jf->value.ancestor.formal_id; |
| |
| if (ipa_param_cannot_devirtualize_p (caller_info, caller_idx)) |
| { |
| ipa_set_param_cannot_devirtualize (callee_info, callee_idx); |
| return true; |
| } |
| |
| if (!caller_info->params[caller_idx].types) |
| return false; |
| |
| res = false; |
| count = VEC_length (tree, caller_info->params[caller_idx].types); |
| for (j = 0; j < count; j++) |
| { |
| tree binfo = VEC_index (tree, caller_info->params[caller_idx].types, j); |
| if (jf->type == IPA_JF_ANCESTOR) |
| { |
| binfo = get_binfo_at_offset (binfo, jf->value.ancestor.offset, |
| jf->value.ancestor.type); |
| if (!binfo) |
| { |
| ipa_set_param_cannot_devirtualize (callee_info, callee_idx); |
| return true; |
| } |
| } |
| res |= ipcp_add_param_type (callee_info, callee_idx, binfo); |
| } |
| return res; |
| } |
| |
| /* Propagate type information for parameter of CALLEE_INFO number I as |
| described by JF. CALLER_INFO describes the caller. Return true iff the |
| type information changed in any way. */ |
| |
| static bool |
| ipcp_propagate_types (struct ipa_node_params *caller_info, |
| struct ipa_node_params *callee_info, |
| struct ipa_jump_func *jf, int i) |
| { |
| switch (jf->type) |
| { |
| case IPA_JF_UNKNOWN: |
| case IPA_JF_CONST_MEMBER_PTR: |
| case IPA_JF_CONST: |
| break; |
| |
| case IPA_JF_KNOWN_TYPE: |
| return ipcp_add_param_type (callee_info, i, jf->value.base_binfo); |
| |
| case IPA_JF_PASS_THROUGH: |
| case IPA_JF_ANCESTOR: |
| return ipcp_copy_types (caller_info, callee_info, i, jf); |
| } |
| |
| /* If we reach this we cannot use this parameter for devirtualization. */ |
| return !ipa_set_param_cannot_devirtualize (callee_info, i); |
| } |
| |
| /* Interprocedural analysis. The algorithm propagates constants from the |
| caller's parameters to the callee's arguments. */ |
| static void |
| ipcp_propagate_stage (void) |
| { |
| int i; |
| struct ipcp_lattice inc_lat = { IPA_BOTTOM, NULL }; |
| struct ipcp_lattice new_lat = { IPA_BOTTOM, NULL }; |
| struct ipcp_lattice *dest_lat; |
| struct cgraph_edge *cs; |
| struct ipa_jump_func *jump_func; |
| struct ipa_func_list *wl; |
| int count; |
| |
| ipa_check_create_node_params (); |
| ipa_check_create_edge_args (); |
| |
| /* Initialize worklist to contain all functions. */ |
| wl = ipa_init_func_list (); |
| while (wl) |
| { |
| struct cgraph_node *node = ipa_pop_func_from_list (&wl); |
| struct ipa_node_params *info = IPA_NODE_REF (node); |
| |
| for (cs = node->callees; cs; cs = cs->next_callee) |
| { |
| struct ipa_node_params *callee_info = IPA_NODE_REF (cs->callee); |
| struct ipa_edge_args *args = IPA_EDGE_REF (cs); |
| |
| if (ipa_is_called_with_var_arguments (callee_info) |
| || !cs->callee->analyzed |
| || ipa_is_called_with_var_arguments (callee_info)) |
| continue; |
| |
| count = ipa_get_cs_argument_count (args); |
| for (i = 0; i < count; i++) |
| { |
| jump_func = ipa_get_ith_jump_func (args, i); |
| ipcp_lattice_from_jfunc (info, &inc_lat, jump_func); |
| dest_lat = ipcp_get_lattice (callee_info, i); |
| ipa_lattice_meet (&new_lat, &inc_lat, dest_lat); |
| if (ipcp_lattice_changed (&new_lat, dest_lat)) |
| { |
| dest_lat->type = new_lat.type; |
| dest_lat->constant = new_lat.constant; |
| ipa_push_func_to_list (&wl, cs->callee); |
| } |
| |
| if (ipcp_propagate_types (info, callee_info, jump_func, i)) |
| ipa_push_func_to_list (&wl, cs->callee); |
| } |
| } |
| } |
| } |
| |
| /* Call the constant propagation algorithm and re-call it if necessary |
| (if there are undetermined values left). */ |
| static void |
| ipcp_iterate_stage (void) |
| { |
| struct cgraph_node *node; |
| n_cloning_candidates = 0; |
| |
| if (dump_file) |
| fprintf (dump_file, "\nIPA iterate stage:\n\n"); |
| |
| if (in_lto_p) |
| ipa_update_after_lto_read (); |
| |
| for (node = cgraph_nodes; node; node = node->next) |
| { |
| ipcp_initialize_node_lattices (node); |
| ipcp_compute_node_scale (node); |
| } |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| ipcp_print_all_lattices (dump_file); |
| ipcp_function_scale_print (dump_file); |
| } |
| |
| ipcp_propagate_stage (); |
| if (ipcp_change_tops_to_bottom ()) |
| /* Some lattices have changed from IPA_TOP to IPA_BOTTOM. |
| This change should be propagated. */ |
| { |
| gcc_assert (n_cloning_candidates); |
| ipcp_propagate_stage (); |
| } |
| if (dump_file) |
| { |
| fprintf (dump_file, "\nIPA lattices after propagation:\n"); |
| ipcp_print_all_lattices (dump_file); |
| if (dump_flags & TDF_DETAILS) |
| ipcp_print_profile_data (dump_file); |
| } |
| } |
| |
| /* Check conditions to forbid constant insertion to function described by |
| NODE. */ |
| static inline bool |
| ipcp_node_modifiable_p (struct cgraph_node *node) |
| { |
| /* Once we will be able to do in-place replacement, we can be more |
| lax here. */ |
| return ipcp_versionable_function_p (node); |
| } |
| |
| /* Print count scale data structures. */ |
| static void |
| ipcp_function_scale_print (FILE * f) |
| { |
| struct cgraph_node *node; |
| |
| for (node = cgraph_nodes; node; node = node->next) |
| { |
| if (!node->analyzed) |
| continue; |
| fprintf (f, "printing scale for %s: ", cgraph_node_name (node)); |
| fprintf (f, "value is " HOST_WIDE_INT_PRINT_DEC |
| " \n", (HOST_WIDE_INT) ipcp_get_node_scale (node)); |
| } |
| } |
| |
| /* Print counts of all cgraph nodes. */ |
| static void |
| ipcp_print_func_profile_counts (FILE * f) |
| { |
| struct cgraph_node *node; |
| |
| for (node = cgraph_nodes; node; node = node->next) |
| { |
| fprintf (f, "function %s: ", cgraph_node_name (node)); |
| fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC |
| " \n", (HOST_WIDE_INT) node->count); |
| } |
| } |
| |
| /* Print counts of all cgraph edges. */ |
| static void |
| ipcp_print_call_profile_counts (FILE * f) |
| { |
| struct cgraph_node *node; |
| struct cgraph_edge *cs; |
| |
| for (node = cgraph_nodes; node; node = node->next) |
| { |
| for (cs = node->callees; cs; cs = cs->next_callee) |
| { |
| fprintf (f, "%s -> %s ", cgraph_node_name (cs->caller), |
| cgraph_node_name (cs->callee)); |
| fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC " \n", |
| (HOST_WIDE_INT) cs->count); |
| } |
| } |
| } |
| |
| /* Print profile info for all functions. */ |
| static void |
| ipcp_print_profile_data (FILE * f) |
| { |
| fprintf (f, "\nNODE COUNTS :\n"); |
| ipcp_print_func_profile_counts (f); |
| fprintf (f, "\nCS COUNTS stage:\n"); |
| ipcp_print_call_profile_counts (f); |
| } |
| |
| /* Build and initialize ipa_replace_map struct according to LAT. This struct is |
| processed by versioning, which operates according to the flags set. |
| PARM_TREE is the formal parameter found to be constant. LAT represents the |
| constant. */ |
| static struct ipa_replace_map * |
| ipcp_create_replace_map (tree parm_tree, struct ipcp_lattice *lat) |
| { |
| struct ipa_replace_map *replace_map; |
| tree const_val; |
| |
| const_val = build_const_val (lat, TREE_TYPE (parm_tree)); |
| if (const_val == NULL_TREE) |
| { |
| if (dump_file) |
| { |
| fprintf (dump_file, " const "); |
| print_generic_expr (dump_file, lat->constant, 0); |
| fprintf (dump_file, " can't be converted to param "); |
| print_generic_expr (dump_file, parm_tree, 0); |
| fprintf (dump_file, "\n"); |
| } |
| return NULL; |
| } |
| replace_map = ggc_alloc_ipa_replace_map (); |
| if (dump_file) |
| { |
| fprintf (dump_file, " replacing param "); |
| print_generic_expr (dump_file, parm_tree, 0); |
| fprintf (dump_file, " with const "); |
| print_generic_expr (dump_file, const_val, 0); |
| fprintf (dump_file, "\n"); |
| } |
| replace_map->old_tree = parm_tree; |
| replace_map->new_tree = const_val; |
| replace_map->replace_p = true; |
| replace_map->ref_p = false; |
| |
| return replace_map; |
| } |
| |
| /* Return true if this callsite should be redirected to the original callee |
| (instead of the cloned one). */ |
| static bool |
| ipcp_need_redirect_p (struct cgraph_edge *cs) |
| { |
| struct ipa_node_params *orig_callee_info; |
| int i, count; |
| struct cgraph_node *node = cs->callee, *orig; |
| |
| if (!n_cloning_candidates) |
| return false; |
| |
| if ((orig = ipcp_get_orig_node (node)) != NULL) |
| node = orig; |
| if (ipcp_get_orig_node (cs->caller)) |
| return false; |
| |
| orig_callee_info = IPA_NODE_REF (node); |
| count = ipa_get_param_count (orig_callee_info); |
| for (i = 0; i < count; i++) |
| { |
| struct ipcp_lattice *lat = ipcp_get_lattice (orig_callee_info, i); |
| struct ipa_jump_func *jump_func; |
| |
| jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i); |
| if ((ipcp_lat_is_const (lat) |
| && jump_func->type != IPA_JF_CONST) |
| || (!ipa_param_cannot_devirtualize_p (orig_callee_info, i) |
| && !ipa_param_types_vec_empty (orig_callee_info, i) |
| && jump_func->type != IPA_JF_CONST |
| && jump_func->type != IPA_JF_KNOWN_TYPE)) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* Fix the callsites and the call graph after function cloning was done. */ |
| static void |
| ipcp_update_callgraph (void) |
| { |
| struct cgraph_node *node; |
| |
| for (node = cgraph_nodes; node; node = node->next) |
| if (node->analyzed && ipcp_node_is_clone (node)) |
| { |
| bitmap args_to_skip = NULL; |
| struct cgraph_node *orig_node = ipcp_get_orig_node (node); |
| struct ipa_node_params *info = IPA_NODE_REF (orig_node); |
| int i, count = ipa_get_param_count (info); |
| struct cgraph_edge *cs, *next; |
| |
| if (node->local.can_change_signature) |
| { |
| args_to_skip = BITMAP_ALLOC (NULL); |
| for (i = 0; i < count; i++) |
| { |
| struct ipcp_lattice *lat = ipcp_get_lattice (info, i); |
| |
| /* We can proactively remove obviously unused arguments. */ |
| if (!ipa_is_param_used (info, i)) |
| { |
| bitmap_set_bit (args_to_skip, i); |
| continue; |
| } |
| |
| if (lat->type == IPA_CONST_VALUE) |
| bitmap_set_bit (args_to_skip, i); |
| } |
| } |
| for (cs = node->callers; cs; cs = next) |
| { |
| next = cs->next_caller; |
| if (!ipcp_node_is_clone (cs->caller) && ipcp_need_redirect_p (cs)) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Redirecting edge %s/%i -> %s/%i " |
| "back to %s/%i.", |
| cgraph_node_name (cs->caller), cs->caller->uid, |
| cgraph_node_name (cs->callee), cs->callee->uid, |
| cgraph_node_name (orig_node), orig_node->uid); |
| cgraph_redirect_edge_callee (cs, orig_node); |
| } |
| } |
| } |
| } |
| |
| /* Update profiling info for versioned functions and the functions they were |
| versioned from. */ |
| static void |
| ipcp_update_profiling (void) |
| { |
| struct cgraph_node *node, *orig_node; |
| gcov_type scale, scale_complement; |
| struct cgraph_edge *cs; |
| |
| for (node = cgraph_nodes; node; node = node->next) |
| { |
| if (ipcp_node_is_clone (node)) |
| { |
| orig_node = ipcp_get_orig_node (node); |
| scale = ipcp_get_node_scale (orig_node); |
| node->count = orig_node->count * scale / REG_BR_PROB_BASE; |
| scale_complement = REG_BR_PROB_BASE - scale; |
| orig_node->count = |
| orig_node->count * scale_complement / REG_BR_PROB_BASE; |
| for (cs = node->callees; cs; cs = cs->next_callee) |
| cs->count = cs->count * scale / REG_BR_PROB_BASE; |
| for (cs = orig_node->callees; cs; cs = cs->next_callee) |
| cs->count = cs->count * scale_complement / REG_BR_PROB_BASE; |
| } |
| } |
| } |
| |
| /* If NODE was cloned, how much would program grow? */ |
| static long |
| ipcp_estimate_growth (struct cgraph_node *node) |
| { |
| struct cgraph_edge *cs; |
| int redirectable_node_callers = 0; |
| int removable_args = 0; |
| bool need_original |
| = !cgraph_will_be_removed_from_program_if_no_direct_calls (node); |
| struct ipa_node_params *info; |
| int i, count; |
| int growth; |
| |
| for (cs = node->callers; cs != NULL; cs = cs->next_caller) |
| if (cs->caller == node || !ipcp_need_redirect_p (cs)) |
| redirectable_node_callers++; |
| else |
| need_original = true; |
| |
| /* If we will be able to fully replace original node, we never increase |
| program size. */ |
| if (!need_original) |
| return 0; |
| |
| info = IPA_NODE_REF (node); |
| count = ipa_get_param_count (info); |
| if (node->local.can_change_signature) |
| for (i = 0; i < count; i++) |
| { |
| struct ipcp_lattice *lat = ipcp_get_lattice (info, i); |
| |
| /* We can proactively remove obviously unused arguments. */ |
| if (!ipa_is_param_used (info, i)) |
| removable_args++; |
| |
| if (lat->type == IPA_CONST_VALUE) |
| removable_args++; |
| } |
| |
| /* We make just very simple estimate of savings for removal of operand from |
| call site. Precise cost is difficult to get, as our size metric counts |
| constants and moves as free. Generally we are looking for cases that |
| small function is called very many times. */ |
| growth = node->local.inline_summary.self_size |
| - removable_args * redirectable_node_callers; |
| if (growth < 0) |
| return 0; |
| return growth; |
| } |
| |
| |
| /* Estimate cost of cloning NODE. */ |
| static long |
| ipcp_estimate_cloning_cost (struct cgraph_node *node) |
| { |
| int freq_sum = 1; |
| gcov_type count_sum = 1; |
| struct cgraph_edge *e; |
| int cost; |
| |
| cost = ipcp_estimate_growth (node) * 1000; |
| if (!cost) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Versioning of %s will save code size\n", |
| cgraph_node_name (node)); |
| return 0; |
| } |
| |
| for (e = node->callers; e; e = e->next_caller) |
| if (!bitmap_bit_p (dead_nodes, e->caller->uid) |
| && !ipcp_need_redirect_p (e)) |
| { |
| count_sum += e->count; |
| freq_sum += e->frequency + 1; |
| } |
| |
| if (max_count) |
| cost /= count_sum * 1000 / max_count + 1; |
| else |
| cost /= freq_sum * 1000 / REG_BR_PROB_BASE + 1; |
| if (dump_file) |
| fprintf (dump_file, "Cost of versioning %s is %i, (size: %i, freq: %i)\n", |
| cgraph_node_name (node), cost, node->local.inline_summary.self_size, |
| freq_sum); |
| return cost + 1; |
| } |
| |
| /* Walk indirect calls of NODE and if any polymorphic can be turned into a |
| direct one now, do so. */ |
| |
| static void |
| ipcp_process_devirtualization_opportunities (struct cgraph_node *node) |
| { |
| struct ipa_node_params *info = IPA_NODE_REF (node); |
| struct cgraph_edge *ie, *next_ie; |
| |
| for (ie = node->indirect_calls; ie; ie = next_ie) |
| { |
| int param_index, types_count, j; |
| HOST_WIDE_INT token; |
| tree target, delta; |
| |
| next_ie = ie->next_callee; |
| if (!ie->indirect_info->polymorphic) |
| continue; |
| param_index = ie->indirect_info->param_index; |
| if (param_index == -1 |
| || ipa_param_cannot_devirtualize_p (info, param_index) |
| || ipa_param_types_vec_empty (info, param_index)) |
| continue; |
| |
| token = ie->indirect_info->otr_token; |
| target = NULL_TREE; |
| types_count = VEC_length (tree, info->params[param_index].types); |
| for (j = 0; j < types_count; j++) |
| { |
| tree binfo = VEC_index (tree, info->params[param_index].types, j); |
| tree d; |
| tree t = gimple_get_virt_mehtod_for_binfo (token, binfo, &d, true); |
| |
| if (!t) |
| { |
| target = NULL_TREE; |
| break; |
| } |
| else if (!target) |
| { |
| target = t; |
| delta = d; |
| } |
| else if (target != t || !tree_int_cst_equal (delta, d)) |
| { |
| target = NULL_TREE; |
| break; |
| } |
| } |
| |
| if (target) |
| ipa_make_edge_direct_to_target (ie, target, delta); |
| } |
| } |
| |
| /* Return number of live constant parameters. */ |
| static int |
| ipcp_const_param_count (struct cgraph_node *node) |
| { |
| int const_param = 0; |
| struct ipa_node_params *info = IPA_NODE_REF (node); |
| int count = ipa_get_param_count (info); |
| int i; |
| |
| for (i = 0; i < count; i++) |
| { |
| struct ipcp_lattice *lat = ipcp_get_lattice (info, i); |
| if ((ipcp_lat_is_insertable (lat) |
| /* Do not count obviously unused arguments. */ |
| && ipa_is_param_used (info, i)) |
| || (!ipa_param_cannot_devirtualize_p (info, i) |
| && !ipa_param_types_vec_empty (info, i))) |
| const_param++; |
| } |
| return const_param; |
| } |
| |
| /* Given that a formal parameter of NODE given by INDEX is known to be constant |
| CST, try to find any indirect edges that can be made direct and make them |
| so. Note that INDEX is the number the parameter at the time of analyzing |
| parameter uses and parameter removals should not be considered for it. (In |
| fact, the parameter itself has just been removed.) */ |
| |
| static void |
| ipcp_discover_new_direct_edges (struct cgraph_node *node, int index, tree cst) |
| { |
| struct cgraph_edge *ie, *next_ie; |
| |
| for (ie = node->indirect_calls; ie; ie = next_ie) |
| { |
| struct cgraph_indirect_call_info *ici = ie->indirect_info; |
| |
| next_ie = ie->next_callee; |
| if (ici->param_index != index |
| || ici->polymorphic) |
| continue; |
| |
| ipa_make_edge_direct_to_target (ie, cst, NULL_TREE); |
| } |
| } |
| |
| |
| /* Propagate the constant parameters found by ipcp_iterate_stage() |
| to the function's code. */ |
| static void |
| ipcp_insert_stage (void) |
| { |
| struct cgraph_node *node, *node1 = NULL; |
| int i; |
| VEC (cgraph_edge_p, heap) * redirect_callers; |
| VEC (ipa_replace_map_p,gc)* replace_trees; |
| int node_callers, count; |
| tree parm_tree; |
| struct ipa_replace_map *replace_param; |
| fibheap_t heap; |
| long overall_size = 0, new_size = 0; |
| long max_new_size; |
| |
| ipa_check_create_node_params (); |
| ipa_check_create_edge_args (); |
| if (dump_file) |
| fprintf (dump_file, "\nIPA insert stage:\n\n"); |
| |
| dead_nodes = BITMAP_ALLOC (NULL); |
| |
| for (node = cgraph_nodes; node; node = node->next) |
| if (node->analyzed) |
| { |
| if (node->count > max_count) |
| max_count = node->count; |
| overall_size += node->local.inline_summary.self_size; |
| } |
| |
| max_new_size = overall_size; |
| if (max_new_size < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS)) |
| max_new_size = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS); |
| max_new_size = max_new_size * PARAM_VALUE (PARAM_IPCP_UNIT_GROWTH) / 100 + 1; |
| |
| /* First collect all functions we proved to have constant arguments to |
| heap. */ |
| heap = fibheap_new (); |
| for (node = cgraph_nodes; node; node = node->next) |
| { |
| struct ipa_node_params *info; |
| /* Propagation of the constant is forbidden in certain conditions. */ |
| if (!node->analyzed || !ipcp_node_modifiable_p (node)) |
| continue; |
| info = IPA_NODE_REF (node); |
| if (ipa_is_called_with_var_arguments (info)) |
| continue; |
| if (ipcp_const_param_count (node)) |
| node->aux = fibheap_insert (heap, ipcp_estimate_cloning_cost (node), |
| node); |
| } |
| |
| /* Now clone in priority order until code size growth limits are met or |
| heap is emptied. */ |
| while (!fibheap_empty (heap)) |
| { |
| struct ipa_node_params *info; |
| int growth = 0; |
| bitmap args_to_skip; |
| struct cgraph_edge *cs; |
| |
| node = (struct cgraph_node *)fibheap_extract_min (heap); |
| node->aux = NULL; |
| if (dump_file) |
| fprintf (dump_file, "considering function %s\n", |
| cgraph_node_name (node)); |
| |
| growth = ipcp_estimate_growth (node); |
| |
| if (new_size + growth > max_new_size) |
| break; |
| if (growth |
| && optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node->decl))) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Not versioning, cold code would grow"); |
| continue; |
| } |
| |
| info = IPA_NODE_REF (node); |
| count = ipa_get_param_count (info); |
| |
| replace_trees = VEC_alloc (ipa_replace_map_p, gc, 1); |
| |
| if (node->local.can_change_signature) |
| args_to_skip = BITMAP_GGC_ALLOC (); |
| else |
| args_to_skip = NULL; |
| for (i = 0; i < count; i++) |
| { |
| struct ipcp_lattice *lat = ipcp_get_lattice (info, i); |
| parm_tree = ipa_get_param (info, i); |
| |
| /* We can proactively remove obviously unused arguments. */ |
| if (!ipa_is_param_used (info, i)) |
| { |
| if (args_to_skip) |
| bitmap_set_bit (args_to_skip, i); |
| continue; |
| } |
| |
| if (lat->type == IPA_CONST_VALUE) |
| { |
| replace_param = |
| ipcp_create_replace_map (parm_tree, lat); |
| if (replace_param == NULL) |
| break; |
| VEC_safe_push (ipa_replace_map_p, gc, replace_trees, replace_param); |
| if (args_to_skip) |
| bitmap_set_bit (args_to_skip, i); |
| } |
| } |
| if (i < count) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Not versioning, some parameters couldn't be replaced"); |
| continue; |
| } |
| |
| new_size += growth; |
| |
| /* Look if original function becomes dead after cloning. */ |
| for (cs = node->callers; cs != NULL; cs = cs->next_caller) |
| if (cs->caller == node || ipcp_need_redirect_p (cs)) |
| break; |
| if (!cs && cgraph_will_be_removed_from_program_if_no_direct_calls (node)) |
| bitmap_set_bit (dead_nodes, node->uid); |
| |
| /* Compute how many callers node has. */ |
| node_callers = 0; |
| for (cs = node->callers; cs != NULL; cs = cs->next_caller) |
| node_callers++; |
| redirect_callers = VEC_alloc (cgraph_edge_p, heap, node_callers); |
| for (cs = node->callers; cs != NULL; cs = cs->next_caller) |
| if (!cs->indirect_inlining_edge) |
| VEC_quick_push (cgraph_edge_p, redirect_callers, cs); |
| |
| /* Redirecting all the callers of the node to the |
| new versioned node. */ |
| node1 = |
| cgraph_create_virtual_clone (node, redirect_callers, replace_trees, |
| args_to_skip, "constprop"); |
| args_to_skip = NULL; |
| VEC_free (cgraph_edge_p, heap, redirect_callers); |
| replace_trees = NULL; |
| |
| if (node1 == NULL) |
| continue; |
| ipcp_process_devirtualization_opportunities (node1); |
| |
| if (dump_file) |
| fprintf (dump_file, "versioned function %s with growth %i, overall %i\n", |
| cgraph_node_name (node), (int)growth, (int)new_size); |
| ipcp_init_cloned_node (node, node1); |
| |
| info = IPA_NODE_REF (node); |
| for (i = 0; i < count; i++) |
| { |
| struct ipcp_lattice *lat = ipcp_get_lattice (info, i); |
| if (lat->type == IPA_CONST_VALUE) |
| ipcp_discover_new_direct_edges (node1, i, lat->constant); |
| } |
| |
| if (dump_file) |
| dump_function_to_file (node1->decl, dump_file, dump_flags); |
| |
| for (cs = node->callees; cs; cs = cs->next_callee) |
| if (cs->callee->aux) |
| { |
| fibheap_delete_node (heap, (fibnode_t) cs->callee->aux); |
| cs->callee->aux = fibheap_insert (heap, |
| ipcp_estimate_cloning_cost (cs->callee), |
| cs->callee); |
| } |
| } |
| |
| while (!fibheap_empty (heap)) |
| { |
| if (dump_file) |
| fprintf (dump_file, "skipping function %s\n", |
| cgraph_node_name (node)); |
| node = (struct cgraph_node *) fibheap_extract_min (heap); |
| node->aux = NULL; |
| } |
| fibheap_delete (heap); |
| BITMAP_FREE (dead_nodes); |
| ipcp_update_callgraph (); |
| ipcp_update_profiling (); |
| } |
| |
| /* The IPCP driver. */ |
| static unsigned int |
| ipcp_driver (void) |
| { |
| cgraph_remove_unreachable_nodes (true,dump_file); |
| if (dump_file) |
| { |
| fprintf (dump_file, "\nIPA structures before propagation:\n"); |
| if (dump_flags & TDF_DETAILS) |
| ipa_print_all_params (dump_file); |
| ipa_print_all_jump_functions (dump_file); |
| } |
| /* 2. Do the interprocedural propagation. */ |
| ipcp_iterate_stage (); |
| /* 3. Insert the constants found to the functions. */ |
| ipcp_insert_stage (); |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, "\nProfiling info after insert stage:\n"); |
| ipcp_print_profile_data (dump_file); |
| } |
| /* Free all IPCP structures. */ |
| ipa_free_all_structures_after_ipa_cp (); |
| if (dump_file) |
| fprintf (dump_file, "\nIPA constant propagation end\n"); |
| return 0; |
| } |
| |
| /* Initialization and computation of IPCP data structures. This is the initial |
| intraprocedural analysis of functions, which gathers information to be |
| propagated later on. */ |
| |
| static void |
| ipcp_generate_summary (void) |
| { |
| struct cgraph_node *node; |
| |
| if (dump_file) |
| fprintf (dump_file, "\nIPA constant propagation start:\n"); |
| ipa_check_create_node_params (); |
| ipa_check_create_edge_args (); |
| ipa_register_cgraph_hooks (); |
| |
| for (node = cgraph_nodes; node; node = node->next) |
| if (node->analyzed) |
| { |
| /* Unreachable nodes should have been eliminated before ipcp. */ |
| gcc_assert (node->needed || node->reachable); |
| |
| node->local.versionable = tree_versionable_function_p (node->decl); |
| ipa_analyze_node (node); |
| } |
| } |
| |
| /* Write ipcp summary for nodes in SET. */ |
| static void |
| ipcp_write_summary (cgraph_node_set set, |
| varpool_node_set vset ATTRIBUTE_UNUSED) |
| { |
| ipa_prop_write_jump_functions (set); |
| } |
| |
| /* Read ipcp summary. */ |
| static void |
| ipcp_read_summary (void) |
| { |
| ipa_prop_read_jump_functions (); |
| } |
| |
| /* Gate for IPCP optimization. */ |
| static bool |
| cgraph_gate_cp (void) |
| { |
| /* FIXME: We should remove the optimize check after we ensure we never run |
| IPA passes when not optimizing. */ |
| return flag_ipa_cp && optimize; |
| } |
| |
| struct ipa_opt_pass_d pass_ipa_cp = |
| { |
| { |
| IPA_PASS, |
| "cp", /* name */ |
| cgraph_gate_cp, /* gate */ |
| ipcp_driver, /* execute */ |
| NULL, /* sub */ |
| NULL, /* next */ |
| 0, /* static_pass_number */ |
| TV_IPA_CONSTANT_PROP, /* tv_id */ |
| 0, /* properties_required */ |
| 0, /* properties_provided */ |
| 0, /* properties_destroyed */ |
| 0, /* todo_flags_start */ |
| TODO_dump_cgraph | TODO_dump_func | |
| TODO_remove_functions | TODO_ggc_collect /* todo_flags_finish */ |
| }, |
| ipcp_generate_summary, /* generate_summary */ |
| ipcp_write_summary, /* write_summary */ |
| ipcp_read_summary, /* read_summary */ |
| NULL, /* write_optimization_summary */ |
| NULL, /* read_optimization_summary */ |
| NULL, /* stmt_fixup */ |
| 0, /* TODOs */ |
| NULL, /* function_transform */ |
| NULL, /* variable_transform */ |
| }; |