| /* Interprocedural constant propagation |
| Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 |
| Free Software Foundation, Inc. |
| Contributed by Razya Ladelsky <RAZYA@il.ibm.com> |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 3, or (at your option) any later |
| version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| /* Interprocedural constant propagation. The aim of interprocedural constant |
| propagation (IPCP) is to find which function's argument has the same |
| constant value in each invocation throughout the whole program. For example, |
| consider the following program: |
| |
| int g (int y) |
| { |
| printf ("value is %d",y); |
| } |
| |
| int f (int x) |
| { |
| g (x); |
| } |
| |
| int h (int y) |
| { |
| g (y); |
| } |
| |
| void main (void) |
| { |
| f (3); |
| h (3); |
| } |
| |
| |
| The IPCP algorithm will find that g's formal argument y is always called |
| with the value 3. |
| |
| The algorithm used is based on "Interprocedural Constant Propagation", by |
| Challahan David, Keith D Cooper, Ken Kennedy, Linda Torczon, Comp86, pg |
| 152-161 |
| |
| The optimization is divided into three stages: |
| |
| First stage - intraprocedural analysis |
| ======================================= |
| This phase computes jump_function and modification flags. |
| |
| A jump function for a callsite represents the values passed as an actual |
| arguments of a given callsite. There are three types of values: |
| Pass through - the caller's formal parameter is passed as an actual argument. |
| Constant - a constant is passed as an actual argument. |
| Unknown - neither of the above. |
| |
| The jump function info, ipa_jump_func, is stored in ipa_edge_args |
| structure (defined in ipa_prop.h and pointed to by cgraph_node->aux) |
| modified_flags are defined in ipa_node_params structure |
| (defined in ipa_prop.h and pointed to by cgraph_edge->aux). |
| |
| -ipcp_init_stage() is the first stage driver. |
| |
| Second stage - interprocedural analysis |
| ======================================== |
| This phase does the interprocedural constant propagation. |
| It computes lattices for all formal parameters in the program |
| and their value that may be: |
| TOP - unknown. |
| BOTTOM - non constant. |
| CONSTANT - constant value. |
| |
| Lattice describing a formal parameter p will have a constant value if all |
| callsites invoking this function have the same constant value passed to p. |
| |
| The lattices are stored in ipcp_lattice which is itself in ipa_node_params |
| structure (defined in ipa_prop.h and pointed to by cgraph_edge->aux). |
| |
| -ipcp_iterate_stage() is the second stage driver. |
| |
| Third phase - transformation of function code |
| ============================================ |
| Propagates the constant-valued formals into the function. |
| For each function whose parameters are constants, we create its clone. |
| |
| Then we process the clone in two ways: |
| 1. We insert an assignment statement 'parameter = const' at the beginning |
| of the cloned function. |
| 2. For read-only parameters that do not live in memory, we replace all their |
| uses with the constant. |
| |
| We also need to modify some callsites to call the cloned functions instead |
| of the original ones. For a callsite passing an argument found to be a |
| constant by IPCP, there are two different cases to handle: |
| 1. A constant is passed as an argument. In this case the callsite in the |
| should be redirected to call the cloned callee. |
| 2. A parameter (of the caller) passed as an argument (pass through |
| argument). In such cases both the caller and the callee have clones and |
| only the callsite in the cloned caller is redirected to call to the |
| cloned callee. |
| |
| This update is done in two steps: First all cloned functions are created |
| during a traversal of the call graph, during which all callsites are |
| redirected to call the cloned function. Then the callsites are traversed |
| and many calls redirected back to fit the description above. |
| |
| -ipcp_insert_stage() is the third phase driver. |
| |
| */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "tree.h" |
| #include "target.h" |
| #include "cgraph.h" |
| #include "ipa-prop.h" |
| #include "tree-flow.h" |
| #include "tree-pass.h" |
| #include "flags.h" |
| #include "timevar.h" |
| #include "diagnostic.h" |
| #include "tree-dump.h" |
| #include "tree-inline.h" |
| #include "fibheap.h" |
| #include "params.h" |
| |
| /* Number of functions identified as candidates for cloning. When not cloning |
| we can simplify iterate stage not forcing it to go through the decision |
| on what is profitable and what not. */ |
| static int n_cloning_candidates; |
| |
| /* Maximal count found in program. */ |
| static gcov_type max_count; |
| |
| /* Cgraph nodes that has been completely replaced by cloning during iterate |
| * stage and will be removed after ipcp is finished. */ |
| static bitmap dead_nodes; |
| |
| static void ipcp_print_profile_data (FILE *); |
| static void ipcp_function_scale_print (FILE *); |
| |
| /* Get the original node field of ipa_node_params associated with node NODE. */ |
| static inline struct cgraph_node * |
| ipcp_get_orig_node (struct cgraph_node *node) |
| { |
| return IPA_NODE_REF (node)->ipcp_orig_node; |
| } |
| |
| /* Return true if NODE describes a cloned/versioned function. */ |
| static inline bool |
| ipcp_node_is_clone (struct cgraph_node *node) |
| { |
| return (ipcp_get_orig_node (node) != NULL); |
| } |
| |
| /* Create ipa_node_params and its data structures for NEW_NODE. Set ORIG_NODE |
| as the ipcp_orig_node field in ipa_node_params. */ |
| static void |
| ipcp_init_cloned_node (struct cgraph_node *orig_node, |
| struct cgraph_node *new_node) |
| { |
| ipa_check_create_node_params (); |
| ipa_initialize_node_params (new_node); |
| IPA_NODE_REF (new_node)->ipcp_orig_node = orig_node; |
| } |
| |
| /* Perform intraprocedrual analysis needed for ipcp. */ |
| static void |
| ipcp_analyze_node (struct cgraph_node *node) |
| { |
| /* Unreachable nodes should have been eliminated before ipcp. */ |
| gcc_assert (node->needed || node->reachable); |
| |
| ipa_initialize_node_params (node); |
| ipa_detect_param_modifications (node); |
| } |
| |
| /* Return scale for NODE. */ |
| static inline gcov_type |
| ipcp_get_node_scale (struct cgraph_node *node) |
| { |
| return IPA_NODE_REF (node)->count_scale; |
| } |
| |
| /* Set COUNT as scale for NODE. */ |
| static inline void |
| ipcp_set_node_scale (struct cgraph_node *node, gcov_type count) |
| { |
| IPA_NODE_REF (node)->count_scale = count; |
| } |
| |
| /* Return whether LAT is a constant lattice. */ |
| static inline bool |
| ipcp_lat_is_const (struct ipcp_lattice *lat) |
| { |
| if (lat->type == IPA_CONST_VALUE) |
| return true; |
| else |
| return false; |
| } |
| |
| /* Return whether LAT is a constant lattice that ipa-cp can actually insert |
| into the code (i.e. constants excluding member pointers and pointers). */ |
| static inline bool |
| ipcp_lat_is_insertable (struct ipcp_lattice *lat) |
| { |
| return lat->type == IPA_CONST_VALUE; |
| } |
| |
| /* Return true if LAT1 and LAT2 are equal. */ |
| static inline bool |
| ipcp_lats_are_equal (struct ipcp_lattice *lat1, struct ipcp_lattice *lat2) |
| { |
| gcc_assert (ipcp_lat_is_const (lat1) && ipcp_lat_is_const (lat2)); |
| if (lat1->type != lat2->type) |
| return false; |
| |
| if (operand_equal_p (lat1->constant, lat2->constant, 0)) |
| return true; |
| |
| return false; |
| } |
| |
| /* Compute Meet arithmetics: |
| Meet (IPA_BOTTOM, x) = IPA_BOTTOM |
| Meet (IPA_TOP,x) = x |
| Meet (const_a,const_b) = IPA_BOTTOM, if const_a != const_b. |
| MEET (const_a,const_b) = const_a, if const_a == const_b.*/ |
| static void |
| ipa_lattice_meet (struct ipcp_lattice *res, struct ipcp_lattice *lat1, |
| struct ipcp_lattice *lat2) |
| { |
| if (lat1->type == IPA_BOTTOM || lat2->type == IPA_BOTTOM) |
| { |
| res->type = IPA_BOTTOM; |
| return; |
| } |
| if (lat1->type == IPA_TOP) |
| { |
| res->type = lat2->type; |
| res->constant = lat2->constant; |
| return; |
| } |
| if (lat2->type == IPA_TOP) |
| { |
| res->type = lat1->type; |
| res->constant = lat1->constant; |
| return; |
| } |
| if (!ipcp_lats_are_equal (lat1, lat2)) |
| { |
| res->type = IPA_BOTTOM; |
| return; |
| } |
| res->type = lat1->type; |
| res->constant = lat1->constant; |
| } |
| |
| /* Return the lattice corresponding to the Ith formal parameter of the function |
| described by INFO. */ |
| static inline struct ipcp_lattice * |
| ipcp_get_lattice (struct ipa_node_params *info, int i) |
| { |
| return &(info->params[i].ipcp_lattice); |
| } |
| |
| /* Given the jump function JFUNC, compute the lattice LAT that describes the |
| value coming down the callsite. INFO describes the caller node so that |
| pass-through jump functions can be evaluated. */ |
| static void |
| ipcp_lattice_from_jfunc (struct ipa_node_params *info, struct ipcp_lattice *lat, |
| struct ipa_jump_func *jfunc) |
| { |
| if (jfunc->type == IPA_JF_CONST) |
| { |
| lat->type = IPA_CONST_VALUE; |
| lat->constant = jfunc->value.constant; |
| } |
| else if (jfunc->type == IPA_JF_PASS_THROUGH) |
| { |
| struct ipcp_lattice *caller_lat; |
| tree cst; |
| |
| caller_lat = ipcp_get_lattice (info, jfunc->value.pass_through.formal_id); |
| lat->type = caller_lat->type; |
| if (caller_lat->type != IPA_CONST_VALUE) |
| return; |
| cst = caller_lat->constant; |
| |
| if (jfunc->value.pass_through.operation != NOP_EXPR) |
| { |
| tree restype; |
| if (TREE_CODE_CLASS (jfunc->value.pass_through.operation) |
| == tcc_comparison) |
| restype = boolean_type_node; |
| else |
| restype = TREE_TYPE (cst); |
| cst = fold_binary (jfunc->value.pass_through.operation, |
| restype, cst, jfunc->value.pass_through.operand); |
| } |
| if (!cst || !is_gimple_ip_invariant (cst)) |
| lat->type = IPA_BOTTOM; |
| lat->constant = cst; |
| } |
| else if (jfunc->type == IPA_JF_ANCESTOR) |
| { |
| struct ipcp_lattice *caller_lat; |
| tree t; |
| bool ok; |
| |
| caller_lat = ipcp_get_lattice (info, jfunc->value.ancestor.formal_id); |
| lat->type = caller_lat->type; |
| if (caller_lat->type != IPA_CONST_VALUE) |
| return; |
| if (TREE_CODE (caller_lat->constant) != ADDR_EXPR) |
| { |
| /* This can happen when the constant is a NULL pointer. */ |
| lat->type = IPA_BOTTOM; |
| return; |
| } |
| t = TREE_OPERAND (caller_lat->constant, 0); |
| ok = build_ref_for_offset (&t, TREE_TYPE (t), |
| jfunc->value.ancestor.offset, |
| jfunc->value.ancestor.type, false); |
| if (!ok) |
| { |
| lat->type = IPA_BOTTOM; |
| lat->constant = NULL_TREE; |
| } |
| else |
| lat->constant = build_fold_addr_expr (t); |
| } |
| else |
| lat->type = IPA_BOTTOM; |
| } |
| |
| /* True when OLD_LAT and NEW_LAT values are not the same. */ |
| |
| static bool |
| ipcp_lattice_changed (struct ipcp_lattice *old_lat, |
| struct ipcp_lattice *new_lat) |
| { |
| if (old_lat->type == new_lat->type) |
| { |
| if (!ipcp_lat_is_const (old_lat)) |
| return false; |
| if (ipcp_lats_are_equal (old_lat, new_lat)) |
| return false; |
| } |
| return true; |
| } |
| |
| /* Print all ipcp_lattices of all functions to F. */ |
| static void |
| ipcp_print_all_lattices (FILE * f) |
| { |
| struct cgraph_node *node; |
| int i, count; |
| |
| fprintf (f, "\nLattice:\n"); |
| for (node = cgraph_nodes; node; node = node->next) |
| { |
| struct ipa_node_params *info; |
| |
| if (!node->analyzed) |
| continue; |
| info = IPA_NODE_REF (node); |
| fprintf (f, " Node: %s:\n", cgraph_node_name (node)); |
| count = ipa_get_param_count (info); |
| for (i = 0; i < count; i++) |
| { |
| struct ipcp_lattice *lat = ipcp_get_lattice (info, i); |
| |
| fprintf (f, " param [%d]: ", i); |
| if (lat->type == IPA_CONST_VALUE) |
| { |
| fprintf (f, "type is CONST "); |
| print_generic_expr (f, lat->constant, 0); |
| fprintf (f, "\n"); |
| } |
| else if (lat->type == IPA_TOP) |
| fprintf (f, "type is TOP\n"); |
| else |
| fprintf (f, "type is BOTTOM\n"); |
| } |
| } |
| } |
| |
| /* Return true if ipcp algorithms would allow cloning NODE. */ |
| |
| static bool |
| ipcp_versionable_function_p (struct cgraph_node *node) |
| { |
| tree decl = node->decl; |
| basic_block bb; |
| |
| /* There are a number of generic reasons functions cannot be versioned. */ |
| if (!tree_versionable_function_p (decl)) |
| return false; |
| |
| /* Removing arguments doesn't work if the function takes varargs. */ |
| if (DECL_STRUCT_FUNCTION (decl)->stdarg) |
| return false; |
| |
| /* Removing arguments doesn't work if we use __builtin_apply_args. */ |
| FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (decl)) |
| { |
| gimple_stmt_iterator gsi; |
| for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
| { |
| const_gimple stmt = gsi_stmt (gsi); |
| tree t; |
| |
| if (!is_gimple_call (stmt)) |
| continue; |
| t = gimple_call_fndecl (stmt); |
| if (t == NULL_TREE) |
| continue; |
| if (DECL_BUILT_IN_CLASS (t) == BUILT_IN_NORMAL |
| && DECL_FUNCTION_CODE (t) == BUILT_IN_APPLY_ARGS) |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| /* Return true if this NODE is viable candidate for cloning. */ |
| static bool |
| ipcp_cloning_candidate_p (struct cgraph_node *node) |
| { |
| int n_calls = 0; |
| int n_hot_calls = 0; |
| gcov_type direct_call_sum = 0; |
| struct cgraph_edge *e; |
| |
| /* We never clone functions that are not visible from outside. |
| FIXME: in future we should clone such functions when they are called with |
| different constants, but current ipcp implementation is not good on this. |
| */ |
| if (cgraph_only_called_directly_p (node) || !node->analyzed) |
| return false; |
| |
| if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Not considering %s for cloning; body is overwrittable.\n", |
| cgraph_node_name (node)); |
| return false; |
| } |
| if (!ipcp_versionable_function_p (node)) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Not considering %s for cloning; body is not versionable.\n", |
| cgraph_node_name (node)); |
| return false; |
| } |
| for (e = node->callers; e; e = e->next_caller) |
| { |
| direct_call_sum += e->count; |
| n_calls ++; |
| if (cgraph_maybe_hot_edge_p (e)) |
| n_hot_calls ++; |
| } |
| |
| if (!n_calls) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Not considering %s for cloning; no direct calls.\n", |
| cgraph_node_name (node)); |
| return false; |
| } |
| if (node->local.inline_summary.self_size < n_calls) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Considering %s for cloning; code would shrink.\n", |
| cgraph_node_name (node)); |
| return true; |
| } |
| |
| if (!flag_ipa_cp_clone) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Not considering %s for cloning; -fipa-cp-clone disabled.\n", |
| cgraph_node_name (node)); |
| return false; |
| } |
| |
| if (!optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl))) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Not considering %s for cloning; optimizing it for size.\n", |
| cgraph_node_name (node)); |
| return false; |
| } |
| |
| /* When profile is available and function is hot, propagate into it even if |
| calls seems cold; constant propagation can improve function's speed |
| significandly. */ |
| if (max_count) |
| { |
| if (direct_call_sum > node->count * 90 / 100) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Considering %s for cloning; usually called directly.\n", |
| cgraph_node_name (node)); |
| return true; |
| } |
| } |
| if (!n_hot_calls) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n", |
| cgraph_node_name (node)); |
| return false; |
| } |
| if (dump_file) |
| fprintf (dump_file, "Considering %s for cloning.\n", |
| cgraph_node_name (node)); |
| return true; |
| } |
| |
| /* Initialize ipcp_lattices array. The lattices corresponding to supported |
| types (integers, real types and Fortran constants defined as const_decls) |
| are initialized to IPA_TOP, the rest of them to IPA_BOTTOM. */ |
| static void |
| ipcp_initialize_node_lattices (struct cgraph_node *node) |
| { |
| int i; |
| struct ipa_node_params *info = IPA_NODE_REF (node); |
| enum ipa_lattice_type type; |
| |
| if (ipa_is_called_with_var_arguments (info)) |
| type = IPA_BOTTOM; |
| else if (cgraph_only_called_directly_p (node)) |
| type = IPA_TOP; |
| /* When cloning is allowed, we can assume that externally visible functions |
| are not called. We will compensate this by cloning later. */ |
| else if (ipcp_cloning_candidate_p (node)) |
| type = IPA_TOP, n_cloning_candidates ++; |
| else |
| type = IPA_BOTTOM; |
| |
| for (i = 0; i < ipa_get_param_count (info) ; i++) |
| ipcp_get_lattice (info, i)->type = type; |
| } |
| |
| /* build INTEGER_CST tree with type TREE_TYPE and value according to LAT. |
| Return the tree. */ |
| static tree |
| build_const_val (struct ipcp_lattice *lat, tree tree_type) |
| { |
| tree val; |
| |
| gcc_assert (ipcp_lat_is_const (lat)); |
| val = lat->constant; |
| |
| if (!useless_type_conversion_p (tree_type, TREE_TYPE (val))) |
| { |
| if (fold_convertible_p (tree_type, val)) |
| return fold_build1 (NOP_EXPR, tree_type, val); |
| else |
| return fold_build1 (VIEW_CONVERT_EXPR, tree_type, val); |
| } |
| return val; |
| } |
| |
| /* Compute the proper scale for NODE. It is the ratio between the number of |
| direct calls (represented on the incoming cgraph_edges) and sum of all |
| invocations of NODE (represented as count in cgraph_node). |
| |
| FIXME: This code is wrong. Since the callers can be also clones and |
| the clones are not scaled yet, the sums gets unrealistically high. |
| To properly compute the counts, we would need to do propagation across |
| callgraph (as external call to A might imply call to non-clonned B |
| if A's clone calls clonned B). */ |
| static void |
| ipcp_compute_node_scale (struct cgraph_node *node) |
| { |
| gcov_type sum; |
| struct cgraph_edge *cs; |
| |
| sum = 0; |
| /* Compute sum of all counts of callers. */ |
| for (cs = node->callers; cs != NULL; cs = cs->next_caller) |
| sum += cs->count; |
| /* Work around the unrealistically high sum problem. We just don't want |
| the non-cloned body to have negative or very low frequency. Since |
| majority of execution time will be spent in clones anyway, this should |
| give good enough profile. */ |
| if (sum > node->count * 9 / 10) |
| sum = node->count * 9 / 10; |
| if (node->count == 0) |
| ipcp_set_node_scale (node, 0); |
| else |
| ipcp_set_node_scale (node, sum * REG_BR_PROB_BASE / node->count); |
| } |
| |
| /* Initialization and computation of IPCP data structures. This is the initial |
| intraprocedural analysis of functions, which gathers information to be |
| propagated later on. */ |
| static void |
| ipcp_init_stage (void) |
| { |
| struct cgraph_node *node; |
| struct cgraph_edge *cs; |
| |
| for (node = cgraph_nodes; node; node = node->next) |
| if (node->analyzed) |
| ipcp_analyze_node (node); |
| for (node = cgraph_nodes; node; node = node->next) |
| { |
| if (!node->analyzed) |
| continue; |
| /* building jump functions */ |
| for (cs = node->callees; cs; cs = cs->next_callee) |
| { |
| /* We do not need to bother analyzing calls to unknown |
| functions unless they may become known during lto/whopr. */ |
| if (!cs->callee->analyzed && !flag_lto && !flag_whopr) |
| continue; |
| ipa_count_arguments (cs); |
| if (ipa_get_cs_argument_count (IPA_EDGE_REF (cs)) |
| != ipa_get_param_count (IPA_NODE_REF (cs->callee))) |
| ipa_set_called_with_variable_arg (IPA_NODE_REF (cs->callee)); |
| ipa_compute_jump_functions (cs); |
| } |
| } |
| } |
| |
| /* Return true if there are some formal parameters whose value is IPA_TOP (in |
| the whole compilation unit). Change their values to IPA_BOTTOM, since they |
| most probably get their values from outside of this compilation unit. */ |
| static bool |
| ipcp_change_tops_to_bottom (void) |
| { |
| int i, count; |
| struct cgraph_node *node; |
| bool prop_again; |
| |
| prop_again = false; |
| for (node = cgraph_nodes; node; node = node->next) |
| { |
| struct ipa_node_params *info = IPA_NODE_REF (node); |
| count = ipa_get_param_count (info); |
| for (i = 0; i < count; i++) |
| { |
| struct ipcp_lattice *lat = ipcp_get_lattice (info, i); |
| if (lat->type == IPA_TOP) |
| { |
| prop_again = true; |
| if (dump_file) |
| { |
| fprintf (dump_file, "Forcing param "); |
| print_generic_expr (dump_file, ipa_get_param (info, i), 0); |
| fprintf (dump_file, " of node %s to bottom.\n", |
| cgraph_node_name (node)); |
| } |
| lat->type = IPA_BOTTOM; |
| } |
| } |
| } |
| return prop_again; |
| } |
| |
| /* Interprocedural analysis. The algorithm propagates constants from the |
| caller's parameters to the callee's arguments. */ |
| static void |
| ipcp_propagate_stage (void) |
| { |
| int i; |
| struct ipcp_lattice inc_lat = { IPA_BOTTOM, NULL }; |
| struct ipcp_lattice new_lat = { IPA_BOTTOM, NULL }; |
| struct ipcp_lattice *dest_lat; |
| struct cgraph_edge *cs; |
| struct ipa_jump_func *jump_func; |
| struct ipa_func_list *wl; |
| int count; |
| |
| ipa_check_create_node_params (); |
| ipa_check_create_edge_args (); |
| |
| /* Initialize worklist to contain all functions. */ |
| wl = ipa_init_func_list (); |
| while (wl) |
| { |
| struct cgraph_node *node = ipa_pop_func_from_list (&wl); |
| struct ipa_node_params *info = IPA_NODE_REF (node); |
| |
| for (cs = node->callees; cs; cs = cs->next_callee) |
| { |
| struct ipa_node_params *callee_info = IPA_NODE_REF (cs->callee); |
| struct ipa_edge_args *args = IPA_EDGE_REF (cs); |
| |
| if (ipa_is_called_with_var_arguments (callee_info) |
| || !cs->callee->analyzed |
| || ipa_is_called_with_var_arguments (callee_info)) |
| continue; |
| |
| count = ipa_get_cs_argument_count (args); |
| for (i = 0; i < count; i++) |
| { |
| jump_func = ipa_get_ith_jump_func (args, i); |
| ipcp_lattice_from_jfunc (info, &inc_lat, jump_func); |
| dest_lat = ipcp_get_lattice (callee_info, i); |
| ipa_lattice_meet (&new_lat, &inc_lat, dest_lat); |
| if (ipcp_lattice_changed (&new_lat, dest_lat)) |
| { |
| dest_lat->type = new_lat.type; |
| dest_lat->constant = new_lat.constant; |
| ipa_push_func_to_list (&wl, cs->callee); |
| } |
| } |
| } |
| } |
| } |
| |
| /* Call the constant propagation algorithm and re-call it if necessary |
| (if there are undetermined values left). */ |
| static void |
| ipcp_iterate_stage (void) |
| { |
| struct cgraph_node *node; |
| n_cloning_candidates = 0; |
| |
| if (dump_file) |
| fprintf (dump_file, "\nIPA iterate stage:\n\n"); |
| |
| if (in_lto_p) |
| ipa_update_after_lto_read (); |
| |
| for (node = cgraph_nodes; node; node = node->next) |
| { |
| ipcp_initialize_node_lattices (node); |
| ipcp_compute_node_scale (node); |
| } |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| ipcp_print_all_lattices (dump_file); |
| ipcp_function_scale_print (dump_file); |
| } |
| |
| ipcp_propagate_stage (); |
| if (ipcp_change_tops_to_bottom ()) |
| /* Some lattices have changed from IPA_TOP to IPA_BOTTOM. |
| This change should be propagated. */ |
| { |
| gcc_assert (n_cloning_candidates); |
| ipcp_propagate_stage (); |
| } |
| if (dump_file) |
| { |
| fprintf (dump_file, "\nIPA lattices after propagation:\n"); |
| ipcp_print_all_lattices (dump_file); |
| if (dump_flags & TDF_DETAILS) |
| ipcp_print_profile_data (dump_file); |
| } |
| } |
| |
| /* Check conditions to forbid constant insertion to function described by |
| NODE. */ |
| static inline bool |
| ipcp_node_modifiable_p (struct cgraph_node *node) |
| { |
| /* Once we will be able to do in-place replacement, we can be more |
| lax here. */ |
| return ipcp_versionable_function_p (node); |
| } |
| |
| /* Print count scale data structures. */ |
| static void |
| ipcp_function_scale_print (FILE * f) |
| { |
| struct cgraph_node *node; |
| |
| for (node = cgraph_nodes; node; node = node->next) |
| { |
| if (!node->analyzed) |
| continue; |
| fprintf (f, "printing scale for %s: ", cgraph_node_name (node)); |
| fprintf (f, "value is " HOST_WIDE_INT_PRINT_DEC |
| " \n", (HOST_WIDE_INT) ipcp_get_node_scale (node)); |
| } |
| } |
| |
| /* Print counts of all cgraph nodes. */ |
| static void |
| ipcp_print_func_profile_counts (FILE * f) |
| { |
| struct cgraph_node *node; |
| |
| for (node = cgraph_nodes; node; node = node->next) |
| { |
| fprintf (f, "function %s: ", cgraph_node_name (node)); |
| fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC |
| " \n", (HOST_WIDE_INT) node->count); |
| } |
| } |
| |
| /* Print counts of all cgraph edges. */ |
| static void |
| ipcp_print_call_profile_counts (FILE * f) |
| { |
| struct cgraph_node *node; |
| struct cgraph_edge *cs; |
| |
| for (node = cgraph_nodes; node; node = node->next) |
| { |
| for (cs = node->callees; cs; cs = cs->next_callee) |
| { |
| fprintf (f, "%s -> %s ", cgraph_node_name (cs->caller), |
| cgraph_node_name (cs->callee)); |
| fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC " \n", |
| (HOST_WIDE_INT) cs->count); |
| } |
| } |
| } |
| |
| /* Print profile info for all functions. */ |
| static void |
| ipcp_print_profile_data (FILE * f) |
| { |
| fprintf (f, "\nNODE COUNTS :\n"); |
| ipcp_print_func_profile_counts (f); |
| fprintf (f, "\nCS COUNTS stage:\n"); |
| ipcp_print_call_profile_counts (f); |
| } |
| |
| /* Build and initialize ipa_replace_map struct according to LAT. This struct is |
| processed by versioning, which operates according to the flags set. |
| PARM_TREE is the formal parameter found to be constant. LAT represents the |
| constant. */ |
| static struct ipa_replace_map * |
| ipcp_create_replace_map (tree parm_tree, struct ipcp_lattice *lat) |
| { |
| struct ipa_replace_map *replace_map; |
| tree const_val; |
| |
| replace_map = GGC_NEW (struct ipa_replace_map); |
| const_val = build_const_val (lat, TREE_TYPE (parm_tree)); |
| if (dump_file) |
| { |
| fprintf (dump_file, " replacing param "); |
| print_generic_expr (dump_file, parm_tree, 0); |
| fprintf (dump_file, " with const "); |
| print_generic_expr (dump_file, const_val, 0); |
| fprintf (dump_file, "\n"); |
| } |
| replace_map->old_tree = parm_tree; |
| replace_map->new_tree = const_val; |
| replace_map->replace_p = true; |
| replace_map->ref_p = false; |
| |
| return replace_map; |
| } |
| |
| /* Return true if this callsite should be redirected to the original callee |
| (instead of the cloned one). */ |
| static bool |
| ipcp_need_redirect_p (struct cgraph_edge *cs) |
| { |
| struct ipa_node_params *orig_callee_info; |
| int i, count; |
| struct ipa_jump_func *jump_func; |
| struct cgraph_node *node = cs->callee, *orig; |
| |
| if (!n_cloning_candidates) |
| return false; |
| |
| if ((orig = ipcp_get_orig_node (node)) != NULL) |
| node = orig; |
| if (ipcp_get_orig_node (cs->caller)) |
| return false; |
| |
| orig_callee_info = IPA_NODE_REF (node); |
| count = ipa_get_param_count (orig_callee_info); |
| for (i = 0; i < count; i++) |
| { |
| struct ipcp_lattice *lat = ipcp_get_lattice (orig_callee_info, i); |
| if (ipcp_lat_is_const (lat)) |
| { |
| jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i); |
| if (jump_func->type != IPA_JF_CONST) |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| /* Fix the callsites and the call graph after function cloning was done. */ |
| static void |
| ipcp_update_callgraph (void) |
| { |
| struct cgraph_node *node; |
| |
| for (node = cgraph_nodes; node; node = node->next) |
| if (node->analyzed && ipcp_node_is_clone (node)) |
| { |
| bitmap args_to_skip = BITMAP_ALLOC (NULL); |
| struct cgraph_node *orig_node = ipcp_get_orig_node (node); |
| struct ipa_node_params *info = IPA_NODE_REF (orig_node); |
| int i, count = ipa_get_param_count (info); |
| struct cgraph_edge *cs, *next; |
| |
| for (i = 0; i < count; i++) |
| { |
| struct ipcp_lattice *lat = ipcp_get_lattice (info, i); |
| tree parm_tree = ipa_get_param (info, i); |
| |
| /* We can proactively remove obviously unused arguments. */ |
| if (is_gimple_reg (parm_tree) |
| && !gimple_default_def (DECL_STRUCT_FUNCTION (orig_node->decl), |
| parm_tree)) |
| { |
| bitmap_set_bit (args_to_skip, i); |
| continue; |
| } |
| |
| if (lat->type == IPA_CONST_VALUE) |
| bitmap_set_bit (args_to_skip, i); |
| } |
| for (cs = node->callers; cs; cs = next) |
| { |
| next = cs->next_caller; |
| if (!ipcp_node_is_clone (cs->caller) && ipcp_need_redirect_p (cs)) |
| cgraph_redirect_edge_callee (cs, orig_node); |
| } |
| } |
| } |
| |
| /* Update profiling info for versioned functions and the functions they were |
| versioned from. */ |
| static void |
| ipcp_update_profiling (void) |
| { |
| struct cgraph_node *node, *orig_node; |
| gcov_type scale, scale_complement; |
| struct cgraph_edge *cs; |
| |
| for (node = cgraph_nodes; node; node = node->next) |
| { |
| if (ipcp_node_is_clone (node)) |
| { |
| orig_node = ipcp_get_orig_node (node); |
| scale = ipcp_get_node_scale (orig_node); |
| node->count = orig_node->count * scale / REG_BR_PROB_BASE; |
| scale_complement = REG_BR_PROB_BASE - scale; |
| orig_node->count = |
| orig_node->count * scale_complement / REG_BR_PROB_BASE; |
| for (cs = node->callees; cs; cs = cs->next_callee) |
| cs->count = cs->count * scale / REG_BR_PROB_BASE; |
| for (cs = orig_node->callees; cs; cs = cs->next_callee) |
| cs->count = cs->count * scale_complement / REG_BR_PROB_BASE; |
| } |
| } |
| } |
| |
| /* If NODE was cloned, how much would program grow? */ |
| static long |
| ipcp_estimate_growth (struct cgraph_node *node) |
| { |
| struct cgraph_edge *cs; |
| int redirectable_node_callers = 0; |
| int removable_args = 0; |
| bool need_original = !cgraph_only_called_directly_p (node); |
| struct ipa_node_params *info; |
| int i, count; |
| int growth; |
| |
| for (cs = node->callers; cs != NULL; cs = cs->next_caller) |
| if (cs->caller == node || !ipcp_need_redirect_p (cs)) |
| redirectable_node_callers++; |
| else |
| need_original = true; |
| |
| /* If we will be able to fully replace orignal node, we never increase |
| program size. */ |
| if (!need_original) |
| return 0; |
| |
| info = IPA_NODE_REF (node); |
| count = ipa_get_param_count (info); |
| for (i = 0; i < count; i++) |
| { |
| struct ipcp_lattice *lat = ipcp_get_lattice (info, i); |
| tree parm_tree = ipa_get_param (info, i); |
| |
| /* We can proactively remove obviously unused arguments. */ |
| if (is_gimple_reg (parm_tree) |
| && !gimple_default_def (DECL_STRUCT_FUNCTION (node->decl), |
| parm_tree)) |
| removable_args++; |
| |
| if (lat->type == IPA_CONST_VALUE) |
| removable_args++; |
| } |
| |
| /* We make just very simple estimate of savings for removal of operand from |
| call site. Precise cost is dificult to get, as our size metric counts |
| constants and moves as free. Generally we are looking for cases that |
| small function is called very many times. */ |
| growth = node->local.inline_summary.self_size |
| - removable_args * redirectable_node_callers; |
| if (growth < 0) |
| return 0; |
| return growth; |
| } |
| |
| |
| /* Estimate cost of cloning NODE. */ |
| static long |
| ipcp_estimate_cloning_cost (struct cgraph_node *node) |
| { |
| int freq_sum = 1; |
| gcov_type count_sum = 1; |
| struct cgraph_edge *e; |
| int cost; |
| |
| cost = ipcp_estimate_growth (node) * 1000; |
| if (!cost) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Versioning of %s will save code size\n", |
| cgraph_node_name (node)); |
| return 0; |
| } |
| |
| for (e = node->callers; e; e = e->next_caller) |
| if (!bitmap_bit_p (dead_nodes, e->caller->uid) |
| && !ipcp_need_redirect_p (e)) |
| { |
| count_sum += e->count; |
| freq_sum += e->frequency + 1; |
| } |
| |
| if (max_count) |
| cost /= count_sum * 1000 / max_count + 1; |
| else |
| cost /= freq_sum * 1000 / REG_BR_PROB_BASE + 1; |
| if (dump_file) |
| fprintf (dump_file, "Cost of versioning %s is %i, (size: %i, freq: %i)\n", |
| cgraph_node_name (node), cost, node->local.inline_summary.self_size, |
| freq_sum); |
| return cost + 1; |
| } |
| |
| /* Return number of live constant parameters. */ |
| static int |
| ipcp_const_param_count (struct cgraph_node *node) |
| { |
| int const_param = 0; |
| struct ipa_node_params *info = IPA_NODE_REF (node); |
| int count = ipa_get_param_count (info); |
| int i; |
| |
| for (i = 0; i < count; i++) |
| { |
| struct ipcp_lattice *lat = ipcp_get_lattice (info, i); |
| tree parm_tree = ipa_get_param (info, i); |
| if (ipcp_lat_is_insertable (lat) |
| /* Do not count obviously unused arguments. */ |
| && (!is_gimple_reg (parm_tree) |
| || gimple_default_def (DECL_STRUCT_FUNCTION (node->decl), |
| parm_tree))) |
| const_param++; |
| } |
| return const_param; |
| } |
| |
| /* Propagate the constant parameters found by ipcp_iterate_stage() |
| to the function's code. */ |
| static void |
| ipcp_insert_stage (void) |
| { |
| struct cgraph_node *node, *node1 = NULL; |
| int i; |
| VEC (cgraph_edge_p, heap) * redirect_callers; |
| VEC (ipa_replace_map_p,gc)* replace_trees; |
| int node_callers, count; |
| tree parm_tree; |
| struct ipa_replace_map *replace_param; |
| fibheap_t heap; |
| long overall_size = 0, new_size = 0; |
| long max_new_size; |
| |
| ipa_check_create_node_params (); |
| ipa_check_create_edge_args (); |
| if (dump_file) |
| fprintf (dump_file, "\nIPA insert stage:\n\n"); |
| |
| dead_nodes = BITMAP_ALLOC (NULL); |
| |
| for (node = cgraph_nodes; node; node = node->next) |
| if (node->analyzed) |
| { |
| if (node->count > max_count) |
| max_count = node->count; |
| overall_size += node->local.inline_summary.self_size; |
| } |
| |
| max_new_size = overall_size; |
| if (max_new_size < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS)) |
| max_new_size = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS); |
| max_new_size = max_new_size * PARAM_VALUE (PARAM_IPCP_UNIT_GROWTH) / 100 + 1; |
| |
| /* First collect all functions we proved to have constant arguments to heap. */ |
| heap = fibheap_new (); |
| for (node = cgraph_nodes; node; node = node->next) |
| { |
| struct ipa_node_params *info; |
| /* Propagation of the constant is forbidden in certain conditions. */ |
| if (!node->analyzed || !ipcp_node_modifiable_p (node)) |
| continue; |
| info = IPA_NODE_REF (node); |
| if (ipa_is_called_with_var_arguments (info)) |
| continue; |
| if (ipcp_const_param_count (node)) |
| node->aux = fibheap_insert (heap, ipcp_estimate_cloning_cost (node), node); |
| } |
| |
| /* Now clone in priority order until code size growth limits are met or |
| heap is emptied. */ |
| while (!fibheap_empty (heap)) |
| { |
| struct ipa_node_params *info; |
| int growth = 0; |
| bitmap args_to_skip; |
| struct cgraph_edge *cs; |
| |
| node = (struct cgraph_node *)fibheap_extract_min (heap); |
| node->aux = NULL; |
| if (dump_file) |
| fprintf (dump_file, "considering function %s\n", |
| cgraph_node_name (node)); |
| |
| growth = ipcp_estimate_growth (node); |
| |
| if (new_size + growth > max_new_size) |
| break; |
| if (growth |
| && optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node->decl))) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Not versioning, cold code would grow"); |
| continue; |
| } |
| |
| new_size += growth; |
| |
| /* Look if original function becomes dead after clonning. */ |
| for (cs = node->callers; cs != NULL; cs = cs->next_caller) |
| if (cs->caller == node || ipcp_need_redirect_p (cs)) |
| break; |
| if (!cs && cgraph_only_called_directly_p (node)) |
| bitmap_set_bit (dead_nodes, node->uid); |
| |
| info = IPA_NODE_REF (node); |
| count = ipa_get_param_count (info); |
| |
| replace_trees = VEC_alloc (ipa_replace_map_p, gc, 1); |
| args_to_skip = BITMAP_GGC_ALLOC (); |
| for (i = 0; i < count; i++) |
| { |
| struct ipcp_lattice *lat = ipcp_get_lattice (info, i); |
| parm_tree = ipa_get_param (info, i); |
| |
| /* We can proactively remove obviously unused arguments. */ |
| if (is_gimple_reg (parm_tree) |
| && !gimple_default_def (DECL_STRUCT_FUNCTION (node->decl), |
| parm_tree)) |
| { |
| bitmap_set_bit (args_to_skip, i); |
| continue; |
| } |
| |
| if (lat->type == IPA_CONST_VALUE) |
| { |
| replace_param = |
| ipcp_create_replace_map (parm_tree, lat); |
| VEC_safe_push (ipa_replace_map_p, gc, replace_trees, replace_param); |
| bitmap_set_bit (args_to_skip, i); |
| } |
| } |
| |
| /* Compute how many callers node has. */ |
| node_callers = 0; |
| for (cs = node->callers; cs != NULL; cs = cs->next_caller) |
| node_callers++; |
| redirect_callers = VEC_alloc (cgraph_edge_p, heap, node_callers); |
| for (cs = node->callers; cs != NULL; cs = cs->next_caller) |
| VEC_quick_push (cgraph_edge_p, redirect_callers, cs); |
| |
| /* Redirecting all the callers of the node to the |
| new versioned node. */ |
| node1 = |
| cgraph_create_virtual_clone (node, redirect_callers, replace_trees, |
| args_to_skip); |
| args_to_skip = NULL; |
| VEC_free (cgraph_edge_p, heap, redirect_callers); |
| replace_trees = NULL; |
| |
| if (node1 == NULL) |
| continue; |
| if (dump_file) |
| fprintf (dump_file, "versioned function %s with growth %i, overall %i\n", |
| cgraph_node_name (node), (int)growth, (int)new_size); |
| ipcp_init_cloned_node (node, node1); |
| |
| /* TODO: We can use indirect inlning info to produce new calls. */ |
| |
| if (dump_file) |
| dump_function_to_file (node1->decl, dump_file, dump_flags); |
| |
| for (cs = node->callees; cs; cs = cs->next_callee) |
| if (cs->callee->aux) |
| { |
| fibheap_delete_node (heap, (fibnode_t) cs->callee->aux); |
| cs->callee->aux = fibheap_insert (heap, |
| ipcp_estimate_cloning_cost (cs->callee), |
| cs->callee); |
| } |
| } |
| |
| while (!fibheap_empty (heap)) |
| { |
| if (dump_file) |
| fprintf (dump_file, "skipping function %s\n", |
| cgraph_node_name (node)); |
| node = (struct cgraph_node *) fibheap_extract_min (heap); |
| node->aux = NULL; |
| } |
| fibheap_delete (heap); |
| BITMAP_FREE (dead_nodes); |
| ipcp_update_callgraph (); |
| ipcp_update_profiling (); |
| } |
| |
| /* The IPCP driver. */ |
| static unsigned int |
| ipcp_driver (void) |
| { |
| cgraph_remove_unreachable_nodes (true,dump_file); |
| if (dump_file) |
| { |
| fprintf (dump_file, "\nIPA structures before propagation:\n"); |
| if (dump_flags & TDF_DETAILS) |
| ipa_print_all_params (dump_file); |
| ipa_print_all_jump_functions (dump_file); |
| } |
| /* 2. Do the interprocedural propagation. */ |
| ipcp_iterate_stage (); |
| /* 3. Insert the constants found to the functions. */ |
| ipcp_insert_stage (); |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| fprintf (dump_file, "\nProfiling info after insert stage:\n"); |
| ipcp_print_profile_data (dump_file); |
| } |
| /* Free all IPCP structures. */ |
| free_all_ipa_structures_after_ipa_cp (); |
| if (dump_file) |
| fprintf (dump_file, "\nIPA constant propagation end\n"); |
| return 0; |
| } |
| |
| /* Note function body size. */ |
| static void |
| ipcp_generate_summary (void) |
| { |
| if (dump_file) |
| fprintf (dump_file, "\nIPA constant propagation start:\n"); |
| ipa_check_create_node_params (); |
| ipa_check_create_edge_args (); |
| ipa_register_cgraph_hooks (); |
| /* 1. Call the init stage to initialize |
| the ipa_node_params and ipa_edge_args structures. */ |
| ipcp_init_stage (); |
| } |
| |
| /* Write ipcp summary for nodes in SET. */ |
| static void |
| ipcp_write_summary (cgraph_node_set set) |
| { |
| ipa_prop_write_jump_functions (set); |
| } |
| |
| /* Read ipcp summary. */ |
| static void |
| ipcp_read_summary (void) |
| { |
| ipa_prop_read_jump_functions (); |
| } |
| |
| /* Gate for IPCP optimization. */ |
| static bool |
| cgraph_gate_cp (void) |
| { |
| return flag_ipa_cp; |
| } |
| |
| struct ipa_opt_pass_d pass_ipa_cp = |
| { |
| { |
| IPA_PASS, |
| "cp", /* name */ |
| cgraph_gate_cp, /* gate */ |
| ipcp_driver, /* execute */ |
| NULL, /* sub */ |
| NULL, /* next */ |
| 0, /* static_pass_number */ |
| TV_IPA_CONSTANT_PROP, /* tv_id */ |
| 0, /* properties_required */ |
| 0, /* properties_provided */ |
| 0, /* properties_destroyed */ |
| 0, /* todo_flags_start */ |
| TODO_dump_cgraph | TODO_dump_func | |
| TODO_remove_functions /* todo_flags_finish */ |
| }, |
| ipcp_generate_summary, /* generate_summary */ |
| ipcp_write_summary, /* write_summary */ |
| ipcp_read_summary, /* read_summary */ |
| NULL, /* function_read_summary */ |
| lto_ipa_fixup_call_notes, /* stmt_fixup */ |
| 0, /* TODOs */ |
| NULL, /* function_transform */ |
| NULL, /* variable_transform */ |
| }; |