| /* Interprocedural scalar replacement of aggregates |
| Copyright (C) 2019-2022 Free Software Foundation, Inc. |
| Contributed by Martin Jambor <mjambor@suse.cz> |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 3, or (at your option) any later |
| version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| /* IPA-SRA is an interprocedural pass that removes unused function return |
| values (turning functions returning a value which is never used into void |
| functions) and removes unused function parameters. It can also replace an |
| aggregate parameter by a set of other parameters representing part of the |
| original, turning those passed by reference into new ones which pass the |
| value directly. |
| |
| The pass is a true IPA one, which means that it works in three stages in |
| order to be able to take advantage of LTO. First, summaries about functions |
| and each calls are generated. Function summaries (often called call graph |
| node summaries) contain mainly information about which parameters are |
| potential transformation candidates and which bits of candidates are |
| accessed. We differentiate between accesses done as a part of a call |
| statement (which might be not necessary if the callee is also transformed) |
| and others (which are mandatory). Call summaries (often called call graph |
| edge summaries) contain information about which function formal parameters |
| feed into which actual call arguments so that if two parameters are only |
| used in a sum which is then passed to another function which then however |
| does not use this parameter, all three parameters of the two functions can |
| be eliminated. Edge summaries also have flags whether the return value is |
| used or if it is only returned in the caller too. In LTO mode these |
| summaries are then streamed to the object file in the compilation phase and |
| streamed back in in the WPA analysis stage. |
| |
| The interprocedural analysis phase traverses the graph in topological order |
| in two sweeps, one in each direction. First, from callees to callers for |
| parameter removal and splitting. Each strongly-connected component is |
| processed iteratively until the situation in it stabilizes. The pass from |
| callers to callees is then carried out to remove unused return values in a |
| very similar fashion. |
| |
| Because parameter manipulation has big implications for call redirection |
| which is done only after all call graph nodes materialize, the |
| transformation phase is not part of this patch but is carried out by the |
| clone materialization and edge redirection itself, see comments in |
| ipa-param-manipulation.h for more details. */ |
| |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "backend.h" |
| #include "tree.h" |
| #include "gimple.h" |
| #include "predict.h" |
| #include "tree-pass.h" |
| #include "ssa.h" |
| #include "cgraph.h" |
| #include "gimple-pretty-print.h" |
| #include "alias.h" |
| #include "tree-eh.h" |
| #include "gimple-iterator.h" |
| #include "gimple-walk.h" |
| #include "tree-dfa.h" |
| #include "tree-sra.h" |
| #include "alloc-pool.h" |
| #include "symbol-summary.h" |
| #include "dbgcnt.h" |
| #include "tree-inline.h" |
| #include "ipa-utils.h" |
| #include "builtins.h" |
| #include "cfganal.h" |
| #include "tree-streamer.h" |
| #include "internal-fn.h" |
| #include "symtab-clones.h" |
| #include "attribs.h" |
| |
| static void ipa_sra_summarize_function (cgraph_node *); |
| |
| /* Bits used to track size of an aggregate in bytes interprocedurally. */ |
| #define ISRA_ARG_SIZE_LIMIT_BITS 16 |
| #define ISRA_ARG_SIZE_LIMIT (1 << ISRA_ARG_SIZE_LIMIT_BITS) |
| /* How many parameters can feed into a call actual argument and still be |
| tracked. */ |
| #define IPA_SRA_MAX_PARAM_FLOW_LEN 7 |
| |
| /* Structure describing accesses to a specific portion of an aggregate |
| parameter, as given by the offset and size. Any smaller accesses that occur |
| within a function that fall within another access form a tree. The pass |
| cannot analyze parameters with only partially overlapping accesses. */ |
| |
| struct GTY(()) param_access |
| { |
| /* Type that a potential replacement should have. This field only has |
| meaning in the summary building and transformation phases, when it is |
| reconstructed from the body. Must not be touched in IPA analysis |
| stage. */ |
| tree type; |
| |
| /* Alias reference type to be used in MEM_REFs when adjusting caller |
| arguments. */ |
| tree alias_ptr_type; |
| |
| /* Values returned by get_ref_base_and_extent but converted to bytes and |
| stored as unsigned ints. */ |
| unsigned unit_offset; |
| unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS; |
| |
| /* Set once we are sure that the access will really end up in a potentially |
| transformed function - initially not set for portions of formal parameters |
| that are only used as actual function arguments passed to callees. */ |
| unsigned certain : 1; |
| /* Set if the access has reverse scalar storage order. */ |
| unsigned reverse : 1; |
| }; |
| |
| /* This structure has the same purpose as the one above and additionally it |
| contains some fields that are only necessary in the summary generation |
| phase. */ |
| |
| struct gensum_param_access |
| { |
| /* Values returned by get_ref_base_and_extent. */ |
| HOST_WIDE_INT offset; |
| HOST_WIDE_INT size; |
| |
| /* if this access has any children (in terms of the definition above), this |
| points to the first one. */ |
| struct gensum_param_access *first_child; |
| /* In intraprocedural SRA, pointer to the next sibling in the access tree as |
| described above. */ |
| struct gensum_param_access *next_sibling; |
| |
| /* Type that a potential replacement should have. This field only has |
| meaning in the summary building and transformation phases, when it is |
| reconstructed from the body. Must not be touched in IPA analysis |
| stage. */ |
| tree type; |
| /* Alias reference type to be used in MEM_REFs when adjusting caller |
| arguments. */ |
| tree alias_ptr_type; |
| |
| /* Have there been writes to or reads from this exact location except for as |
| arguments to a function call that can be tracked. */ |
| bool nonarg; |
| |
| /* Set if the access has reverse scalar storage order. */ |
| bool reverse; |
| }; |
| |
| /* Summary describing a parameter in the IPA stages. */ |
| |
| struct GTY(()) isra_param_desc |
| { |
| /* List of access representatives to the parameters, sorted according to |
| their offset. */ |
| vec <param_access *, va_gc> *accesses; |
| |
| /* Unit size limit of total size of all replacements. */ |
| unsigned param_size_limit : ISRA_ARG_SIZE_LIMIT_BITS; |
| /* Sum of unit sizes of all certain replacements. */ |
| unsigned size_reached : ISRA_ARG_SIZE_LIMIT_BITS; |
| |
| /* A parameter that is used only in call arguments and can be removed if all |
| concerned actual arguments are removed. */ |
| unsigned locally_unused : 1; |
| /* An aggregate that is a candidate for breaking up or complete removal. */ |
| unsigned split_candidate : 1; |
| /* Is this a parameter passing stuff by reference? */ |
| unsigned by_ref : 1; |
| }; |
| |
| /* Structure used when generating summaries that describes a parameter. */ |
| |
| struct gensum_param_desc |
| { |
| /* Roots of param_accesses. */ |
| gensum_param_access *accesses; |
| /* Number of accesses in the access tree rooted in field accesses. */ |
| unsigned access_count; |
| |
| /* If the below is non-zero, this is the number of uses as actual arguments. */ |
| int call_uses; |
| /* Number of times this parameter has been directly passed to. */ |
| unsigned ptr_pt_count; |
| |
| /* Size limit of total size of all replacements. */ |
| unsigned param_size_limit; |
| /* Sum of sizes of nonarg accesses. */ |
| unsigned nonarg_acc_size; |
| |
| /* A parameter that is used only in call arguments and can be removed if all |
| concerned actual arguments are removed. */ |
| bool locally_unused; |
| /* An aggregate that is a candidate for breaking up or a pointer passing data |
| by reference that is a candidate for being converted to a set of |
| parameters passing those data by value. */ |
| bool split_candidate; |
| /* Is this a parameter passing stuff by reference? */ |
| bool by_ref; |
| |
| /* The number of this parameter as they are ordered in function decl. */ |
| int param_number; |
| /* For parameters passing data by reference, this is parameter index to |
| compute indices to bb_dereferences. */ |
| int deref_index; |
| }; |
| |
| /* Properly deallocate accesses of DESC. TODO: Since this data structure is |
| allocated in GC memory, this is not necessary and we can consider removing |
| the function. */ |
| |
| static void |
| free_param_decl_accesses (isra_param_desc *desc) |
| { |
| unsigned len = vec_safe_length (desc->accesses); |
| for (unsigned i = 0; i < len; ++i) |
| ggc_free ((*desc->accesses)[i]); |
| vec_free (desc->accesses); |
| } |
| |
| /* Class used to convey information about functions from the |
| intra-procedural analysis stage to inter-procedural one. */ |
| |
| class GTY((for_user)) isra_func_summary |
| { |
| public: |
| /* initialize the object. */ |
| |
| isra_func_summary () |
| : m_parameters (NULL), m_candidate (false), m_returns_value (false), |
| m_return_ignored (false), m_queued (false) |
| {} |
| |
| /* Destroy m_parameters. */ |
| |
| ~isra_func_summary (); |
| |
| /* Mark the function as not a candidate for any IPA-SRA transformation. |
| Return true if it was a candidate until now. */ |
| |
| bool zap (); |
| |
| /* Vector of parameter descriptors corresponding to the function being |
| analyzed. */ |
| vec<isra_param_desc, va_gc> *m_parameters; |
| |
| /* Whether the node is even a candidate for any IPA-SRA transformation at |
| all. */ |
| unsigned m_candidate : 1; |
| |
| /* Whether the original function returns any value. */ |
| unsigned m_returns_value : 1; |
| |
| /* Set to true if all call statements do not actually use the returned |
| value. */ |
| |
| unsigned m_return_ignored : 1; |
| |
| /* Whether the node is already queued in IPA SRA stack during processing of |
| call graphs SCCs. */ |
| |
| unsigned m_queued : 1; |
| }; |
| |
| /* Deallocate the memory pointed to by isra_func_summary. TODO: Since this |
| data structure is allocated in GC memory, this is not necessary and we can |
| consider removing the destructor. */ |
| |
| isra_func_summary::~isra_func_summary () |
| { |
| unsigned len = vec_safe_length (m_parameters); |
| for (unsigned i = 0; i < len; ++i) |
| free_param_decl_accesses (&(*m_parameters)[i]); |
| vec_free (m_parameters); |
| } |
| |
| /* Mark the function as not a candidate for any IPA-SRA transformation. Return |
| true if it was a candidate until now. */ |
| |
| bool |
| isra_func_summary::zap () |
| { |
| bool ret = m_candidate; |
| m_candidate = false; |
| |
| /* TODO: see the destructor above. */ |
| unsigned len = vec_safe_length (m_parameters); |
| for (unsigned i = 0; i < len; ++i) |
| free_param_decl_accesses (&(*m_parameters)[i]); |
| vec_free (m_parameters); |
| |
| return ret; |
| } |
| |
| /* Structure to describe which formal parameters feed into a particular actual |
| argument. */ |
| |
| struct isra_param_flow |
| { |
| /* Number of elements in array inputs that contain valid data. */ |
| char length; |
| /* Indices of formal parameters that feed into the described actual argument. |
| If aggregate_pass_through or pointer_pass_through below are true, it must |
| contain exactly one element which is passed through from a formal |
| parameter if the given number. Otherwise, the array contains indices of |
| callee's formal parameters which are used to calculate value of this |
| actual argument. */ |
| unsigned char inputs[IPA_SRA_MAX_PARAM_FLOW_LEN]; |
| |
| /* Offset within the formal parameter. */ |
| unsigned unit_offset; |
| /* Size of the portion of the formal parameter that is being passed. */ |
| unsigned unit_size : ISRA_ARG_SIZE_LIMIT_BITS; |
| |
| /* True when the value of this actual copy is a portion of a formal |
| parameter. */ |
| unsigned aggregate_pass_through : 1; |
| /* True when the value of this actual copy is a verbatim pass through of an |
| obtained pointer. */ |
| unsigned pointer_pass_through : 1; |
| /* True when it is safe to copy access candidates here from the callee, which |
| would mean introducing dereferences into callers of the caller. */ |
| unsigned safe_to_import_accesses : 1; |
| }; |
| |
| /* Structure used to convey information about calls from the intra-procedural |
| analysis stage to inter-procedural one. */ |
| |
| class isra_call_summary |
| { |
| public: |
| isra_call_summary () |
| : m_arg_flow (), m_return_ignored (false), m_return_returned (false), |
| m_bit_aligned_arg (false), m_before_any_store (false) |
| {} |
| |
| void init_inputs (unsigned arg_count); |
| void dump (FILE *f); |
| |
| /* Information about what formal parameters of the caller are used to compute |
| individual actual arguments of this call. */ |
| auto_vec <isra_param_flow> m_arg_flow; |
| |
| /* Set to true if the call statement does not have a LHS. */ |
| unsigned m_return_ignored : 1; |
| |
| /* Set to true if the LHS of call statement is only used to construct the |
| return value of the caller. */ |
| unsigned m_return_returned : 1; |
| |
| /* Set when any of the call arguments are not byte-aligned. */ |
| unsigned m_bit_aligned_arg : 1; |
| |
| /* Set to true if the call happend before any (other) store to memory in the |
| caller. */ |
| unsigned m_before_any_store : 1; |
| }; |
| |
| /* Class to manage function summaries. */ |
| |
| class GTY((user)) ipa_sra_function_summaries |
| : public function_summary <isra_func_summary *> |
| { |
| public: |
| ipa_sra_function_summaries (symbol_table *table, bool ggc): |
| function_summary<isra_func_summary *> (table, ggc) { } |
| |
| virtual void duplicate (cgraph_node *, cgraph_node *, |
| isra_func_summary *old_sum, |
| isra_func_summary *new_sum); |
| virtual void insert (cgraph_node *, isra_func_summary *); |
| }; |
| |
| /* Hook that is called by summary when a node is duplicated. */ |
| |
| void |
| ipa_sra_function_summaries::duplicate (cgraph_node *, cgraph_node *, |
| isra_func_summary *old_sum, |
| isra_func_summary *new_sum) |
| { |
| /* TODO: Somehow stop copying when ISRA is doing the cloning, it is |
| useless. */ |
| new_sum->m_candidate = old_sum->m_candidate; |
| new_sum->m_returns_value = old_sum->m_returns_value; |
| new_sum->m_return_ignored = old_sum->m_return_ignored; |
| gcc_assert (!old_sum->m_queued); |
| new_sum->m_queued = false; |
| |
| unsigned param_count = vec_safe_length (old_sum->m_parameters); |
| if (!param_count) |
| return; |
| vec_safe_reserve_exact (new_sum->m_parameters, param_count); |
| new_sum->m_parameters->quick_grow_cleared (param_count); |
| for (unsigned i = 0; i < param_count; i++) |
| { |
| isra_param_desc *s = &(*old_sum->m_parameters)[i]; |
| isra_param_desc *d = &(*new_sum->m_parameters)[i]; |
| |
| d->param_size_limit = s->param_size_limit; |
| d->size_reached = s->size_reached; |
| d->locally_unused = s->locally_unused; |
| d->split_candidate = s->split_candidate; |
| d->by_ref = s->by_ref; |
| |
| unsigned acc_count = vec_safe_length (s->accesses); |
| vec_safe_reserve_exact (d->accesses, acc_count); |
| for (unsigned j = 0; j < acc_count; j++) |
| { |
| param_access *from = (*s->accesses)[j]; |
| param_access *to = ggc_cleared_alloc<param_access> (); |
| to->type = from->type; |
| to->alias_ptr_type = from->alias_ptr_type; |
| to->unit_offset = from->unit_offset; |
| to->unit_size = from->unit_size; |
| to->certain = from->certain; |
| to->reverse = from->reverse; |
| d->accesses->quick_push (to); |
| } |
| } |
| } |
| |
| /* Pointer to the pass function summary holder. */ |
| |
| static GTY(()) ipa_sra_function_summaries *func_sums; |
| |
| /* Hook that is called by summary when new node appears. */ |
| |
| void |
| ipa_sra_function_summaries::insert (cgraph_node *node, isra_func_summary *) |
| { |
| if (opt_for_fn (node->decl, flag_ipa_sra)) |
| { |
| push_cfun (DECL_STRUCT_FUNCTION (node->decl)); |
| ipa_sra_summarize_function (node); |
| pop_cfun (); |
| } |
| else |
| func_sums->remove (node); |
| } |
| |
| /* Class to manage call summaries. */ |
| |
| class ipa_sra_call_summaries: public call_summary <isra_call_summary *> |
| { |
| public: |
| ipa_sra_call_summaries (symbol_table *table): |
| call_summary<isra_call_summary *> (table) { } |
| |
| /* Duplicate info when an edge is cloned. */ |
| virtual void duplicate (cgraph_edge *, cgraph_edge *, |
| isra_call_summary *old_sum, |
| isra_call_summary *new_sum); |
| }; |
| |
| static ipa_sra_call_summaries *call_sums; |
| |
| |
| /* Initialize m_arg_flow of a particular instance of isra_call_summary. |
| ARG_COUNT is the number of actual arguments passed. */ |
| |
| void |
| isra_call_summary::init_inputs (unsigned arg_count) |
| { |
| if (arg_count == 0) |
| { |
| gcc_checking_assert (m_arg_flow.length () == 0); |
| return; |
| } |
| if (m_arg_flow.length () == 0) |
| { |
| m_arg_flow.reserve_exact (arg_count); |
| m_arg_flow.quick_grow_cleared (arg_count); |
| } |
| else |
| gcc_checking_assert (arg_count == m_arg_flow.length ()); |
| } |
| |
| /* Dump all information in call summary to F. */ |
| |
| void |
| isra_call_summary::dump (FILE *f) |
| { |
| if (m_return_ignored) |
| fprintf (f, " return value ignored\n"); |
| if (m_return_returned) |
| fprintf (f, " return value used only to compute caller return value\n"); |
| if (m_before_any_store) |
| fprintf (f, " happens before any store to memory\n"); |
| for (unsigned i = 0; i < m_arg_flow.length (); i++) |
| { |
| fprintf (f, " Parameter %u:\n", i); |
| isra_param_flow *ipf = &m_arg_flow[i]; |
| |
| if (ipf->length) |
| { |
| bool first = true; |
| fprintf (f, " Scalar param sources: "); |
| for (int j = 0; j < ipf->length; j++) |
| { |
| if (!first) |
| fprintf (f, ", "); |
| else |
| first = false; |
| fprintf (f, "%i", (int) ipf->inputs[j]); |
| } |
| fprintf (f, "\n"); |
| } |
| if (ipf->aggregate_pass_through) |
| fprintf (f, " Aggregate pass through from the param given above, " |
| "unit offset: %u , unit size: %u\n", |
| ipf->unit_offset, ipf->unit_size); |
| if (ipf->pointer_pass_through) |
| fprintf (f, " Pointer pass through from the param given above, " |
| "safe_to_import_accesses: %u\n", ipf->safe_to_import_accesses); |
| } |
| } |
| |
| /* Duplicate edge summary when an edge is cloned. */ |
| |
| void |
| ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *, |
| isra_call_summary *old_sum, |
| isra_call_summary *new_sum) |
| { |
| unsigned arg_count = old_sum->m_arg_flow.length (); |
| new_sum->init_inputs (arg_count); |
| for (unsigned i = 0; i < arg_count; i++) |
| new_sum->m_arg_flow[i] = old_sum->m_arg_flow[i]; |
| |
| new_sum->m_return_ignored = old_sum->m_return_ignored; |
| new_sum->m_return_returned = old_sum->m_return_returned; |
| new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg; |
| new_sum->m_before_any_store = old_sum->m_before_any_store; |
| } |
| |
| |
| /* With all GTY stuff done, we can move to anonymous namespace. */ |
| namespace { |
| /* Quick mapping from a decl to its param descriptor. */ |
| |
| hash_map<tree, gensum_param_desc *> *decl2desc; |
| |
| /* Countdown of allowed Alias Analysis steps during summary building. */ |
| |
| int aa_walking_limit; |
| |
| /* This is a table in which for each basic block and parameter there is a |
| distance (offset + size) in that parameter which is dereferenced and |
| accessed in that BB. */ |
| HOST_WIDE_INT *bb_dereferences = NULL; |
| /* How many by-reference parameters there are in the current function. */ |
| int by_ref_count; |
| |
| /* Bitmap of BBs that can cause the function to "stop" progressing by |
| returning, throwing externally, looping infinitely or calling a function |
| which might abort etc.. */ |
| bitmap final_bbs; |
| |
| /* Obstack to allocate various small structures required only when generating |
| summary for a function. */ |
| struct obstack gensum_obstack; |
| |
| /* Return false the function is apparently unsuitable for IPA-SRA based on it's |
| attributes, return true otherwise. NODE is the cgraph node of the current |
| function. */ |
| |
| static bool |
| ipa_sra_preliminary_function_checks (cgraph_node *node) |
| { |
| if (!node->can_change_signature) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Function cannot change signature.\n"); |
| return false; |
| } |
| |
| if (!tree_versionable_function_p (node->decl)) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Function is not versionable.\n"); |
| return false; |
| } |
| |
| if (!opt_for_fn (node->decl, optimize) |
| || !opt_for_fn (node->decl, flag_ipa_sra)) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Not optimizing or IPA-SRA turned off for this " |
| "function.\n"); |
| return false; |
| } |
| |
| if (DECL_VIRTUAL_P (node->decl)) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Function is a virtual method.\n"); |
| return false; |
| } |
| |
| struct function *fun = DECL_STRUCT_FUNCTION (node->decl); |
| if (fun->stdarg) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Function uses stdarg. \n"); |
| return false; |
| } |
| |
| if (DECL_DISREGARD_INLINE_LIMITS (node->decl)) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Always inline function will be inlined " |
| "anyway. \n"); |
| return false; |
| } |
| |
| return true; |
| } |
| |
| /* Print access tree starting at ACCESS to F. */ |
| |
| static void |
| dump_gensum_access (FILE *f, gensum_param_access *access, unsigned indent) |
| { |
| fprintf (f, " "); |
| for (unsigned i = 0; i < indent; i++) |
| fprintf (f, " "); |
| fprintf (f, " * Access to offset: " HOST_WIDE_INT_PRINT_DEC, |
| access->offset); |
| fprintf (f, ", size: " HOST_WIDE_INT_PRINT_DEC, access->size); |
| fprintf (f, ", type: "); |
| print_generic_expr (f, access->type); |
| fprintf (f, ", alias_ptr_type: "); |
| print_generic_expr (f, access->alias_ptr_type); |
| fprintf (f, ", nonarg: %u, reverse: %u\n", access->nonarg, access->reverse); |
| for (gensum_param_access *ch = access->first_child; |
| ch; |
| ch = ch->next_sibling) |
| dump_gensum_access (f, ch, indent + 2); |
| } |
| |
| |
| /* Print access tree starting at ACCESS to F. */ |
| |
| static void |
| dump_isra_access (FILE *f, param_access *access) |
| { |
| fprintf (f, " * Access to unit offset: %u", access->unit_offset); |
| fprintf (f, ", unit size: %u", access->unit_size); |
| fprintf (f, ", type: "); |
| print_generic_expr (f, access->type); |
| fprintf (f, ", alias_ptr_type: "); |
| print_generic_expr (f, access->alias_ptr_type); |
| if (access->certain) |
| fprintf (f, ", certain"); |
| else |
| fprintf (f, ", not certain"); |
| if (access->reverse) |
| fprintf (f, ", reverse"); |
| fprintf (f, "\n"); |
| } |
| |
| /* Dump access tree starting at ACCESS to stderr. */ |
| |
| DEBUG_FUNCTION void |
| debug_isra_access (param_access *access) |
| { |
| dump_isra_access (stderr, access); |
| } |
| |
| /* Dump DESC to F. */ |
| |
| static void |
| dump_gensum_param_descriptor (FILE *f, gensum_param_desc *desc) |
| { |
| if (desc->locally_unused) |
| fprintf (f, " unused with %i call_uses\n", desc->call_uses); |
| if (!desc->split_candidate) |
| { |
| fprintf (f, " not a candidate\n"); |
| return; |
| } |
| if (desc->by_ref) |
| fprintf (f, " by_ref with %u pass throughs\n", desc->ptr_pt_count); |
| |
| for (gensum_param_access *acc = desc->accesses; acc; acc = acc->next_sibling) |
| dump_gensum_access (f, acc, 2); |
| } |
| |
| /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to |
| F. */ |
| |
| static void |
| dump_gensum_param_descriptors (FILE *f, tree fndecl, |
| vec<gensum_param_desc> *param_descriptions) |
| { |
| tree parm = DECL_ARGUMENTS (fndecl); |
| for (unsigned i = 0; |
| i < param_descriptions->length (); |
| ++i, parm = DECL_CHAIN (parm)) |
| { |
| fprintf (f, " Descriptor for parameter %i ", i); |
| print_generic_expr (f, parm, TDF_UID); |
| fprintf (f, "\n"); |
| dump_gensum_param_descriptor (f, &(*param_descriptions)[i]); |
| } |
| } |
| |
| |
| /* Dump DESC to F. */ |
| |
| static void |
| dump_isra_param_descriptor (FILE *f, isra_param_desc *desc) |
| { |
| if (desc->locally_unused) |
| { |
| fprintf (f, " (locally) unused\n"); |
| } |
| if (!desc->split_candidate) |
| { |
| fprintf (f, " not a candidate for splitting\n"); |
| return; |
| } |
| fprintf (f, " param_size_limit: %u, size_reached: %u%s\n", |
| desc->param_size_limit, desc->size_reached, |
| desc->by_ref ? ", by_ref" : ""); |
| |
| for (unsigned i = 0; i < vec_safe_length (desc->accesses); ++i) |
| { |
| param_access *access = (*desc->accesses)[i]; |
| dump_isra_access (f, access); |
| } |
| } |
| |
| /* Dump all parameter descriptors in IFS, assuming it describes FNDECL, to |
| F. */ |
| |
| static void |
| dump_isra_param_descriptors (FILE *f, tree fndecl, |
| isra_func_summary *ifs) |
| { |
| tree parm = DECL_ARGUMENTS (fndecl); |
| if (!ifs->m_parameters) |
| { |
| fprintf (f, " parameter descriptors not available\n"); |
| return; |
| } |
| |
| for (unsigned i = 0; |
| i < ifs->m_parameters->length (); |
| ++i, parm = DECL_CHAIN (parm)) |
| { |
| fprintf (f, " Descriptor for parameter %i ", i); |
| print_generic_expr (f, parm, TDF_UID); |
| fprintf (f, "\n"); |
| dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]); |
| } |
| } |
| |
| /* Add SRC to inputs of PARAM_FLOW, unless it would exceed storage. If the |
| function fails return false, otherwise return true. SRC must fit into an |
| unsigned char. Used for purposes of transitive unused parameter |
| removal. */ |
| |
| static bool |
| add_src_to_param_flow (isra_param_flow *param_flow, int src) |
| { |
| gcc_checking_assert (src >= 0 && src <= UCHAR_MAX); |
| if (param_flow->length == IPA_SRA_MAX_PARAM_FLOW_LEN) |
| return false; |
| |
| param_flow->inputs[(int) param_flow->length] = src; |
| param_flow->length++; |
| return true; |
| } |
| |
| /* Add a SRC to the inputs of PARAM_FLOW unless it is already there and assert |
| it is the only input. Used for purposes of transitive parameter |
| splitting. */ |
| |
| static void |
| set_single_param_flow_source (isra_param_flow *param_flow, int src) |
| { |
| gcc_checking_assert (src >= 0 && src <= UCHAR_MAX); |
| if (param_flow->length == 0) |
| { |
| param_flow->inputs[0] = src; |
| param_flow->length = 1; |
| } |
| else if (param_flow->length == 1) |
| gcc_assert (param_flow->inputs[0] == src); |
| else |
| gcc_unreachable (); |
| } |
| |
| /* Assert that there is only a single value in PARAM_FLOW's inputs and return |
| it. */ |
| |
| static unsigned |
| get_single_param_flow_source (const isra_param_flow *param_flow) |
| { |
| gcc_assert (param_flow->length == 1); |
| return param_flow->inputs[0]; |
| } |
| |
| /* Inspect all uses of NAME and simple arithmetic calculations involving NAME |
| in FUN represented with NODE and return a negative number if any of them is |
| used for something else than either an actual call argument, simple |
| arithmetic operation or debug statement. If there are no such uses, return |
| the number of actual arguments that this parameter eventually feeds to (or |
| zero if there is none). For any such parameter, mark PARM_NUM as one of its |
| sources. ANALYZED is a bitmap that tracks which SSA names we have already |
| started investigating. */ |
| |
| static int |
| isra_track_scalar_value_uses (function *fun, cgraph_node *node, tree name, |
| int parm_num, bitmap analyzed) |
| { |
| int res = 0; |
| imm_use_iterator imm_iter; |
| gimple *stmt; |
| |
| FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name) |
| { |
| if (is_gimple_debug (stmt)) |
| continue; |
| |
| /* TODO: We could handle at least const builtin functions like arithmetic |
| operations below. */ |
| if (is_gimple_call (stmt)) |
| { |
| int all_uses = 0; |
| use_operand_p use_p; |
| FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) |
| all_uses++; |
| |
| gcall *call = as_a <gcall *> (stmt); |
| unsigned arg_count; |
| if (gimple_call_internal_p (call) |
| || (arg_count = gimple_call_num_args (call)) == 0) |
| { |
| res = -1; |
| break; |
| } |
| |
| cgraph_edge *cs = node->get_edge (stmt); |
| gcc_checking_assert (cs); |
| isra_call_summary *csum = call_sums->get_create (cs); |
| csum->init_inputs (arg_count); |
| |
| int simple_uses = 0; |
| for (unsigned i = 0; i < arg_count; i++) |
| if (gimple_call_arg (call, i) == name) |
| { |
| if (!add_src_to_param_flow (&csum->m_arg_flow[i], parm_num)) |
| { |
| simple_uses = -1; |
| break; |
| } |
| simple_uses++; |
| } |
| |
| if (simple_uses < 0 |
| || all_uses != simple_uses) |
| { |
| res = -1; |
| break; |
| } |
| res += all_uses; |
| } |
| else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt) |
| && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt)) |
| || gimple_code (stmt) == GIMPLE_PHI)) |
| { |
| tree lhs; |
| if (gimple_code (stmt) == GIMPLE_PHI) |
| lhs = gimple_phi_result (stmt); |
| else |
| lhs = gimple_assign_lhs (stmt); |
| |
| if (TREE_CODE (lhs) != SSA_NAME) |
| { |
| res = -1; |
| break; |
| } |
| gcc_assert (!gimple_vdef (stmt)); |
| if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs))) |
| { |
| int tmp = isra_track_scalar_value_uses (fun, node, lhs, parm_num, |
| analyzed); |
| if (tmp < 0) |
| { |
| res = tmp; |
| break; |
| } |
| res += tmp; |
| } |
| } |
| else |
| { |
| res = -1; |
| break; |
| } |
| } |
| return res; |
| } |
| |
| /* Inspect all uses of PARM, which must be a gimple register, in FUN (which is |
| also described by NODE) and simple arithmetic calculations involving PARM |
| and return false if any of them is used for something else than either an |
| actual call argument, simple arithmetic operation or debug statement. If |
| there are no such uses, return true and store the number of actual arguments |
| that this parameter eventually feeds to (or zero if there is none) to |
| *CALL_USES_P. For any such parameter, mark PARM_NUM as one of its |
| sources. |
| |
| This function is similar to ptr_parm_has_nonarg_uses but its results are |
| meant for unused parameter removal, as opposed to splitting of parameters |
| passed by reference or converting them to passed by value. */ |
| |
| static bool |
| isra_track_scalar_param_local_uses (function *fun, cgraph_node *node, tree parm, |
| int parm_num, int *call_uses_p) |
| { |
| gcc_checking_assert (is_gimple_reg (parm)); |
| |
| tree name = ssa_default_def (fun, parm); |
| if (!name || has_zero_uses (name)) |
| { |
| *call_uses_p = 0; |
| return false; |
| } |
| |
| /* Edge summaries can only handle callers with fewer than 256 parameters. */ |
| if (parm_num > UCHAR_MAX) |
| return true; |
| |
| bitmap analyzed = BITMAP_ALLOC (NULL); |
| int call_uses = isra_track_scalar_value_uses (fun, node, name, parm_num, |
| analyzed); |
| BITMAP_FREE (analyzed); |
| if (call_uses < 0) |
| return true; |
| *call_uses_p = call_uses; |
| return false; |
| } |
| |
| /* Scan immediate uses of a default definition SSA name of a parameter PARM and |
| examine whether there are any nonarg uses that are not actual arguments or |
| otherwise infeasible uses. If so, return true, otherwise return false. |
| Create pass-through IPA flow records for any direct uses as argument calls |
| and if returning false, store their number into *PT_COUNT_P. NODE and FUN |
| must represent the function that is currently analyzed, PARM_NUM must be the |
| index of the analyzed parameter. |
| |
| This function is similar to isra_track_scalar_param_local_uses but its |
| results are meant for splitting of parameters passed by reference or turning |
| them into bits passed by value, as opposed to generic unused parameter |
| removal. */ |
| |
| static bool |
| ptr_parm_has_nonarg_uses (cgraph_node *node, function *fun, tree parm, |
| int parm_num, unsigned *pt_count_p) |
| { |
| imm_use_iterator ui; |
| gimple *stmt; |
| tree name = ssa_default_def (fun, parm); |
| bool ret = false; |
| unsigned pt_count = 0; |
| |
| if (!name || has_zero_uses (name)) |
| return false; |
| |
| /* Edge summaries can only handle callers with fewer than 256 parameters. */ |
| if (parm_num > UCHAR_MAX) |
| return true; |
| |
| FOR_EACH_IMM_USE_STMT (stmt, ui, name) |
| { |
| unsigned uses_ok = 0; |
| use_operand_p use_p; |
| |
| if (is_gimple_debug (stmt)) |
| continue; |
| |
| if (gimple_assign_single_p (stmt)) |
| { |
| tree rhs = gimple_assign_rhs1 (stmt); |
| if (!TREE_THIS_VOLATILE (rhs)) |
| { |
| while (handled_component_p (rhs)) |
| rhs = TREE_OPERAND (rhs, 0); |
| if (TREE_CODE (rhs) == MEM_REF |
| && TREE_OPERAND (rhs, 0) == name |
| && integer_zerop (TREE_OPERAND (rhs, 1)) |
| && types_compatible_p (TREE_TYPE (rhs), |
| TREE_TYPE (TREE_TYPE (name)))) |
| uses_ok++; |
| } |
| } |
| else if (is_gimple_call (stmt)) |
| { |
| gcall *call = as_a <gcall *> (stmt); |
| unsigned arg_count; |
| if (gimple_call_internal_p (call) |
| || (arg_count = gimple_call_num_args (call)) == 0) |
| { |
| ret = true; |
| break; |
| } |
| |
| cgraph_edge *cs = node->get_edge (stmt); |
| gcc_checking_assert (cs); |
| isra_call_summary *csum = call_sums->get_create (cs); |
| csum->init_inputs (arg_count); |
| |
| for (unsigned i = 0; i < arg_count; ++i) |
| { |
| tree arg = gimple_call_arg (stmt, i); |
| |
| if (arg == name) |
| { |
| /* TODO: Allow &MEM_REF[name + offset] here, |
| ipa_param_body_adjustments::modify_call_stmt has to be |
| adjusted too. */ |
| csum->m_arg_flow[i].pointer_pass_through = true; |
| set_single_param_flow_source (&csum->m_arg_flow[i], parm_num); |
| pt_count++; |
| uses_ok++; |
| continue; |
| } |
| |
| if (!TREE_THIS_VOLATILE (arg)) |
| { |
| while (handled_component_p (arg)) |
| arg = TREE_OPERAND (arg, 0); |
| if (TREE_CODE (arg) == MEM_REF |
| && TREE_OPERAND (arg, 0) == name |
| && integer_zerop (TREE_OPERAND (arg, 1)) |
| && types_compatible_p (TREE_TYPE (arg), |
| TREE_TYPE (TREE_TYPE (name)))) |
| uses_ok++; |
| } |
| } |
| } |
| |
| /* If the number of valid uses does not match the number of |
| uses in this stmt there is an unhandled use. */ |
| unsigned all_uses = 0; |
| FOR_EACH_IMM_USE_ON_STMT (use_p, ui) |
| all_uses++; |
| |
| gcc_checking_assert (uses_ok <= all_uses); |
| if (uses_ok != all_uses) |
| { |
| ret = true; |
| break; |
| } |
| } |
| |
| *pt_count_p = pt_count; |
| return ret; |
| } |
| |
| /* Initialize vector of parameter descriptors of NODE. Return true if there |
| are any candidates for splitting or unused aggregate parameter removal (the |
| function may return false if there are candidates for removal of register |
| parameters) and function body must be scanned. */ |
| |
| static bool |
| create_parameter_descriptors (cgraph_node *node, |
| vec<gensum_param_desc> *param_descriptions) |
| { |
| function *fun = DECL_STRUCT_FUNCTION (node->decl); |
| bool ret = false; |
| |
| int num = 0; |
| for (tree parm = DECL_ARGUMENTS (node->decl); |
| parm; |
| parm = DECL_CHAIN (parm), num++) |
| { |
| const char *msg; |
| gensum_param_desc *desc = &(*param_descriptions)[num]; |
| /* param_descriptions vector is grown cleared in the caller. */ |
| desc->param_number = num; |
| decl2desc->put (parm, desc); |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| print_generic_expr (dump_file, parm, TDF_UID); |
| |
| int scalar_call_uses; |
| tree type = TREE_TYPE (parm); |
| if (TREE_THIS_VOLATILE (parm)) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, " not a candidate, is volatile\n"); |
| continue; |
| } |
| if (!is_gimple_reg_type (type) && is_va_list_type (type)) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, " not a candidate, is a va_list type\n"); |
| continue; |
| } |
| if (TREE_ADDRESSABLE (parm)) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, " not a candidate, is addressable\n"); |
| continue; |
| } |
| if (TREE_ADDRESSABLE (type)) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, " not a candidate, type cannot be split\n"); |
| continue; |
| } |
| |
| if (is_gimple_reg (parm) |
| && !isra_track_scalar_param_local_uses (fun, node, parm, num, |
| &scalar_call_uses)) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, " is a scalar with only %i call uses\n", |
| scalar_call_uses); |
| |
| desc->locally_unused = true; |
| desc->call_uses = scalar_call_uses; |
| } |
| |
| if (POINTER_TYPE_P (type)) |
| { |
| type = TREE_TYPE (type); |
| |
| if (TREE_CODE (type) == FUNCTION_TYPE |
| || TREE_CODE (type) == METHOD_TYPE) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, " not a candidate, reference to " |
| "a function\n"); |
| continue; |
| } |
| if (TYPE_VOLATILE (type)) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, " not a candidate, reference to " |
| "a volatile type\n"); |
| continue; |
| } |
| if (TREE_CODE (type) == ARRAY_TYPE |
| && TYPE_NONALIASED_COMPONENT (type)) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, " not a candidate, reference to " |
| "a nonaliased component array\n"); |
| continue; |
| } |
| if (!is_gimple_reg (parm)) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, " not a candidate, a reference which is " |
| "not a gimple register (probably addressable)\n"); |
| continue; |
| } |
| if (is_va_list_type (type)) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, " not a candidate, reference to " |
| "a va list\n"); |
| continue; |
| } |
| if (ptr_parm_has_nonarg_uses (node, fun, parm, num, |
| &desc->ptr_pt_count)) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, " not a candidate, reference has " |
| "nonarg uses\n"); |
| continue; |
| } |
| desc->by_ref = true; |
| } |
| else if (!AGGREGATE_TYPE_P (type)) |
| { |
| /* This is in an else branch because scalars passed by reference are |
| still candidates to be passed by value. */ |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, " not a candidate, not an aggregate\n"); |
| continue; |
| } |
| |
| if (!COMPLETE_TYPE_P (type)) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, " not a candidate, not a complete type\n"); |
| continue; |
| } |
| if (!tree_fits_uhwi_p (TYPE_SIZE (type))) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, " not a candidate, size not representable\n"); |
| continue; |
| } |
| unsigned HOST_WIDE_INT type_size |
| = tree_to_uhwi (TYPE_SIZE (type)) / BITS_PER_UNIT; |
| if (type_size == 0 |
| || type_size >= ISRA_ARG_SIZE_LIMIT) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, " not a candidate, has zero or huge size\n"); |
| continue; |
| } |
| if (type_internals_preclude_sra_p (type, &msg)) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, " not a candidate, %s\n", msg); |
| continue; |
| } |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, " is a candidate\n"); |
| |
| ret = true; |
| desc->split_candidate = true; |
| if (desc->by_ref) |
| desc->deref_index = by_ref_count++; |
| } |
| return ret; |
| } |
| |
| /* Return pointer to descriptor of parameter DECL or NULL if it cannot be |
| found, which happens if DECL is for a static chain. */ |
| |
| static gensum_param_desc * |
| get_gensum_param_desc (tree decl) |
| { |
| gcc_checking_assert (TREE_CODE (decl) == PARM_DECL); |
| gensum_param_desc **slot = decl2desc->get (decl); |
| if (!slot) |
| /* This can happen for static chains which we cannot handle so far. */ |
| return NULL; |
| gcc_checking_assert (*slot); |
| return *slot; |
| } |
| |
| |
| /* Remove parameter described by DESC from candidates for IPA-SRA splitting and |
| write REASON to the dump file if there is one. */ |
| |
| static void |
| disqualify_split_candidate (gensum_param_desc *desc, const char *reason) |
| { |
| if (!desc->split_candidate) |
| return; |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "! Disqualifying parameter number %i - %s\n", |
| desc->param_number, reason); |
| |
| desc->split_candidate = false; |
| } |
| |
| /* Remove DECL from candidates for IPA-SRA and write REASON to the dump file if |
| there is one. */ |
| |
| static void |
| disqualify_split_candidate (tree decl, const char *reason) |
| { |
| gensum_param_desc *desc = get_gensum_param_desc (decl); |
| if (desc) |
| disqualify_split_candidate (desc, reason); |
| } |
| |
| /* Allocate a new access to DESC and fill it in with OFFSET and SIZE. But |
| first, check that there are not too many of them already. If so, do not |
| allocate anything and return NULL. */ |
| |
| static gensum_param_access * |
| allocate_access (gensum_param_desc *desc, |
| HOST_WIDE_INT offset, HOST_WIDE_INT size) |
| { |
| if (desc->access_count |
| == (unsigned) param_ipa_sra_max_replacements) |
| { |
| disqualify_split_candidate (desc, "Too many replacement candidates"); |
| return NULL; |
| } |
| |
| gensum_param_access *access |
| = (gensum_param_access *) obstack_alloc (&gensum_obstack, |
| sizeof (gensum_param_access)); |
| memset (access, 0, sizeof (*access)); |
| access->offset = offset; |
| access->size = size; |
| return access; |
| } |
| |
| /* In what context scan_expr_access has been called, whether it deals with a |
| load, a function argument, or a store. Please note that in rare |
| circumstances when it is not clear if the access is a load or store, |
| ISRA_CTX_STORE is used too. */ |
| |
| enum isra_scan_context {ISRA_CTX_LOAD, ISRA_CTX_ARG, ISRA_CTX_STORE}; |
| |
| /* Return an access describing memory access to the variable described by DESC |
| at OFFSET with SIZE in context CTX, starting at pointer to the linked list |
| at a certain tree level FIRST. Attempt to create it and put into the |
| appropriate place in the access tree if does not exist, but fail and return |
| NULL if there are already too many accesses, if it would create a partially |
| overlapping access or if an access would end up within a pre-existing |
| non-call access. */ |
| |
| static gensum_param_access * |
| get_access_1 (gensum_param_desc *desc, gensum_param_access **first, |
| HOST_WIDE_INT offset, HOST_WIDE_INT size, isra_scan_context ctx) |
| { |
| gensum_param_access *access = *first, **ptr = first; |
| |
| if (!access) |
| { |
| /* No pre-existing access at this level, just create it. */ |
| gensum_param_access *a = allocate_access (desc, offset, size); |
| if (!a) |
| return NULL; |
| *first = a; |
| return *first; |
| } |
| |
| if (access->offset >= offset + size) |
| { |
| /* We want to squeeze it in front of the very first access, just do |
| it. */ |
| gensum_param_access *r = allocate_access (desc, offset, size); |
| if (!r) |
| return NULL; |
| r->next_sibling = access; |
| *first = r; |
| return r; |
| } |
| |
| /* Skip all accesses that have to come before us until the next sibling is |
| already too far. */ |
| while (offset >= access->offset + access->size |
| && access->next_sibling |
| && access->next_sibling->offset < offset + size) |
| { |
| ptr = &access->next_sibling; |
| access = access->next_sibling; |
| } |
| |
| /* At this point we know we do not belong before access. */ |
| gcc_assert (access->offset < offset + size); |
| |
| if (access->offset == offset && access->size == size) |
| /* We found what we were looking for. */ |
| return access; |
| |
| if (access->offset <= offset |
| && access->offset + access->size >= offset + size) |
| { |
| /* We fit into access which is larger than us. We need to find/create |
| something below access. But we only allow nesting in call |
| arguments. */ |
| if (access->nonarg) |
| return NULL; |
| |
| return get_access_1 (desc, &access->first_child, offset, size, ctx); |
| } |
| |
| if (offset <= access->offset |
| && offset + size >= access->offset + access->size) |
| /* We are actually bigger than access, which fully fits into us, take its |
| place and make all accesses fitting into it its children. */ |
| { |
| /* But first, we only allow nesting in call arguments so check if that is |
| what we are trying to represent. */ |
| if (ctx != ISRA_CTX_ARG) |
| return NULL; |
| |
| gensum_param_access *r = allocate_access (desc, offset, size); |
| if (!r) |
| return NULL; |
| r->first_child = access; |
| |
| while (access->next_sibling |
| && access->next_sibling->offset < offset + size) |
| access = access->next_sibling; |
| if (access->offset + access->size > offset + size) |
| { |
| /* This must be a different access, which are sorted, so the |
| following must be true and this signals a partial overlap. */ |
| gcc_assert (access->offset > offset); |
| return NULL; |
| } |
| |
| r->next_sibling = access->next_sibling; |
| access->next_sibling = NULL; |
| *ptr = r; |
| return r; |
| } |
| |
| if (offset >= access->offset + access->size) |
| { |
| /* We belong after access. */ |
| gensum_param_access *r = allocate_access (desc, offset, size); |
| if (!r) |
| return NULL; |
| r->next_sibling = access->next_sibling; |
| access->next_sibling = r; |
| return r; |
| } |
| |
| if (offset < access->offset) |
| { |
| /* We know the following, otherwise we would have created a |
| super-access. */ |
| gcc_checking_assert (offset + size < access->offset + access->size); |
| return NULL; |
| } |
| |
| if (offset + size > access->offset + access->size) |
| { |
| /* Likewise. */ |
| gcc_checking_assert (offset > access->offset); |
| return NULL; |
| } |
| |
| gcc_unreachable (); |
| } |
| |
| /* Return an access describing memory access to the variable described by DESC |
| at OFFSET with SIZE in context CTX, mark it as used in context CTX. Attempt |
| to create if it does not exist, but fail and return NULL if there are |
| already too many accesses, if it would create a partially overlapping access |
| or if an access would end up in a non-call access. */ |
| |
| static gensum_param_access * |
| get_access (gensum_param_desc *desc, HOST_WIDE_INT offset, HOST_WIDE_INT size, |
| isra_scan_context ctx) |
| { |
| gcc_checking_assert (desc->split_candidate); |
| |
| gensum_param_access *access = get_access_1 (desc, &desc->accesses, offset, |
| size, ctx); |
| if (!access) |
| { |
| disqualify_split_candidate (desc, |
| "Bad access overlap or too many accesses"); |
| return NULL; |
| } |
| |
| switch (ctx) |
| { |
| case ISRA_CTX_STORE: |
| gcc_assert (!desc->by_ref); |
| /* Fall-through */ |
| case ISRA_CTX_LOAD: |
| access->nonarg = true; |
| break; |
| case ISRA_CTX_ARG: |
| break; |
| } |
| |
| return access; |
| } |
| |
| /* Verify that parameter access tree starting with ACCESS is in good shape. |
| PARENT_OFFSET and PARENT_SIZE are the corresponding fields of parent of |
| ACCESS or zero if there is none. */ |
| |
| static bool |
| verify_access_tree_1 (gensum_param_access *access, HOST_WIDE_INT parent_offset, |
| HOST_WIDE_INT parent_size) |
| { |
| while (access) |
| { |
| gcc_assert (access->offset >= 0 && access->size >= 0); |
| |
| if (parent_size != 0) |
| { |
| if (access->offset < parent_offset) |
| { |
| error ("Access offset before parent offset"); |
| return true; |
| } |
| if (access->size >= parent_size) |
| { |
| error ("Access size greater or equal to its parent size"); |
| return true; |
| } |
| if (access->offset + access->size > parent_offset + parent_size) |
| { |
| error ("Access terminates outside of its parent"); |
| return true; |
| } |
| } |
| |
| if (verify_access_tree_1 (access->first_child, access->offset, |
| access->size)) |
| return true; |
| |
| if (access->next_sibling |
| && (access->next_sibling->offset < access->offset + access->size)) |
| { |
| error ("Access overlaps with its sibling"); |
| return true; |
| } |
| |
| access = access->next_sibling; |
| } |
| return false; |
| } |
| |
| /* Verify that parameter access tree starting with ACCESS is in good shape, |
| halt compilation and dump the tree to stderr if not. */ |
| |
| DEBUG_FUNCTION void |
| isra_verify_access_tree (gensum_param_access *access) |
| { |
| if (verify_access_tree_1 (access, 0, 0)) |
| { |
| for (; access; access = access->next_sibling) |
| dump_gensum_access (stderr, access, 2); |
| internal_error ("IPA-SRA access verification failed"); |
| } |
| } |
| |
| |
| /* Callback of walk_stmt_load_store_addr_ops visit_addr used to determine |
| GIMPLE_ASM operands with memory constrains which cannot be scalarized. */ |
| |
| static bool |
| asm_visit_addr (gimple *, tree op, tree, void *) |
| { |
| op = get_base_address (op); |
| if (op |
| && TREE_CODE (op) == PARM_DECL) |
| disqualify_split_candidate (op, "Non-scalarizable GIMPLE_ASM operand."); |
| |
| return false; |
| } |
| |
| /* Mark a dereference of parameter identified by DESC of distance DIST in a |
| basic block BB, unless the BB has already been marked as a potentially |
| final. */ |
| |
| static void |
| mark_param_dereference (gensum_param_desc *desc, HOST_WIDE_INT dist, |
| basic_block bb) |
| { |
| gcc_assert (desc->by_ref); |
| gcc_checking_assert (desc->split_candidate); |
| |
| if (bitmap_bit_p (final_bbs, bb->index)) |
| return; |
| |
| int idx = bb->index * by_ref_count + desc->deref_index; |
| if (bb_dereferences[idx] < dist) |
| bb_dereferences[idx] = dist; |
| } |
| |
| /* Return true, if any potential replacements should use NEW_TYPE as opposed to |
| previously recorded OLD_TYPE. */ |
| |
| static bool |
| type_prevails_p (tree old_type, tree new_type) |
| { |
| if (old_type == new_type) |
| return false; |
| |
| /* Non-aggregates are always better. */ |
| if (!is_gimple_reg_type (old_type) |
| && is_gimple_reg_type (new_type)) |
| return true; |
| if (is_gimple_reg_type (old_type) |
| && !is_gimple_reg_type (new_type)) |
| return false; |
| |
| /* Prefer any complex or vector type over any other scalar type. */ |
| if (TREE_CODE (old_type) != COMPLEX_TYPE |
| && TREE_CODE (old_type) != VECTOR_TYPE |
| && (TREE_CODE (new_type) == COMPLEX_TYPE |
| || TREE_CODE (new_type) == VECTOR_TYPE)) |
| return true; |
| if ((TREE_CODE (old_type) == COMPLEX_TYPE |
| || TREE_CODE (old_type) == VECTOR_TYPE) |
| && TREE_CODE (new_type) != COMPLEX_TYPE |
| && TREE_CODE (new_type) != VECTOR_TYPE) |
| return false; |
| |
| /* Use the integral type with the bigger precision. */ |
| if (INTEGRAL_TYPE_P (old_type) |
| && INTEGRAL_TYPE_P (new_type)) |
| return (TYPE_PRECISION (new_type) > TYPE_PRECISION (old_type)); |
| |
| /* Attempt to disregard any integral type with non-full precision. */ |
| if (INTEGRAL_TYPE_P (old_type) |
| && (TREE_INT_CST_LOW (TYPE_SIZE (old_type)) |
| != TYPE_PRECISION (old_type))) |
| return true; |
| if (INTEGRAL_TYPE_P (new_type) |
| && (TREE_INT_CST_LOW (TYPE_SIZE (new_type)) |
| != TYPE_PRECISION (new_type))) |
| return false; |
| /* Stabilize the selection. */ |
| return TYPE_UID (old_type) < TYPE_UID (new_type); |
| } |
| |
| /* When scanning an expression which is a call argument, this structure |
| specifies the call and the position of the argument. */ |
| |
| struct scan_call_info |
| { |
| /* Call graph edge representing the call. */ |
| cgraph_edge *cs; |
| /* Total number of arguments in the call. */ |
| unsigned argument_count; |
| /* Number of the actual argument being scanned. */ |
| unsigned arg_idx; |
| }; |
| |
| /* Record use of ACCESS which belongs to a parameter described by DESC in a |
| call argument described by CALL_INFO. */ |
| |
| static void |
| record_nonregister_call_use (gensum_param_desc *desc, |
| scan_call_info *call_info, |
| unsigned unit_offset, unsigned unit_size) |
| { |
| isra_call_summary *csum = call_sums->get_create (call_info->cs); |
| csum->init_inputs (call_info->argument_count); |
| |
| isra_param_flow *param_flow = &csum->m_arg_flow[call_info->arg_idx]; |
| param_flow->aggregate_pass_through = true; |
| set_single_param_flow_source (param_flow, desc->param_number); |
| param_flow->unit_offset = unit_offset; |
| param_flow->unit_size = unit_size; |
| desc->call_uses++; |
| } |
| |
| /* Callback of walk_aliased_vdefs, just mark that there was a possible |
| modification. */ |
| |
| static bool |
| mark_maybe_modified (ao_ref *, tree, void *data) |
| { |
| bool *maybe_modified = (bool *) data; |
| *maybe_modified = true; |
| return true; |
| } |
| |
| /* Analyze expression EXPR from GIMPLE for accesses to parameters. CTX |
| specifies whether EXPR is used in a load, store or as an argument call. BB |
| must be the basic block in which expr resides. If CTX specifies call |
| argument context, CALL_INFO must describe that call and argument position, |
| otherwise it is ignored. */ |
| |
| static void |
| scan_expr_access (tree expr, gimple *stmt, isra_scan_context ctx, |
| basic_block bb, scan_call_info *call_info = NULL) |
| { |
| poly_int64 poffset, psize, pmax_size; |
| HOST_WIDE_INT offset, size, max_size; |
| tree base; |
| bool deref = false; |
| bool reverse; |
| |
| if (TREE_CODE (expr) == BIT_FIELD_REF |
| || TREE_CODE (expr) == IMAGPART_EXPR |
| || TREE_CODE (expr) == REALPART_EXPR) |
| expr = TREE_OPERAND (expr, 0); |
| |
| base = get_ref_base_and_extent (expr, &poffset, &psize, &pmax_size, &reverse); |
| |
| if (TREE_CODE (base) == MEM_REF) |
| { |
| tree op = TREE_OPERAND (base, 0); |
| if (TREE_CODE (op) != SSA_NAME |
| || !SSA_NAME_IS_DEFAULT_DEF (op)) |
| return; |
| base = SSA_NAME_VAR (op); |
| if (!base) |
| return; |
| deref = true; |
| } |
| if (TREE_CODE (base) != PARM_DECL) |
| return; |
| |
| gensum_param_desc *desc = get_gensum_param_desc (base); |
| if (!desc || !desc->split_candidate) |
| return; |
| |
| if (!poffset.is_constant (&offset) |
| || !psize.is_constant (&size) |
| || !pmax_size.is_constant (&max_size)) |
| { |
| disqualify_split_candidate (desc, "Encountered a polynomial-sized " |
| "access."); |
| return; |
| } |
| if (size < 0 || size != max_size) |
| { |
| disqualify_split_candidate (desc, "Encountered a variable sized access."); |
| return; |
| } |
| if (TREE_CODE (expr) == COMPONENT_REF |
| && DECL_BIT_FIELD (TREE_OPERAND (expr, 1))) |
| { |
| disqualify_split_candidate (desc, "Encountered a bit-field access."); |
| return; |
| } |
| if (offset < 0) |
| { |
| disqualify_split_candidate (desc, "Encountered an access at a " |
| "negative offset."); |
| return; |
| } |
| gcc_assert ((offset % BITS_PER_UNIT) == 0); |
| gcc_assert ((size % BITS_PER_UNIT) == 0); |
| if ((offset / BITS_PER_UNIT) >= (UINT_MAX - ISRA_ARG_SIZE_LIMIT) |
| || (size / BITS_PER_UNIT) >= ISRA_ARG_SIZE_LIMIT) |
| { |
| disqualify_split_candidate (desc, "Encountered an access with too big " |
| "offset or size"); |
| return; |
| } |
| |
| tree type = TREE_TYPE (expr); |
| unsigned int exp_align = get_object_alignment (expr); |
| |
| if (exp_align < TYPE_ALIGN (type)) |
| { |
| disqualify_split_candidate (desc, "Underaligned access."); |
| return; |
| } |
| |
| if (deref) |
| { |
| if (!desc->by_ref) |
| { |
| disqualify_split_candidate (desc, "Dereferencing a non-reference."); |
| return; |
| } |
| else if (ctx == ISRA_CTX_STORE) |
| { |
| disqualify_split_candidate (desc, "Storing to data passed by " |
| "reference."); |
| return; |
| } |
| |
| if (!aa_walking_limit) |
| { |
| disqualify_split_candidate (desc, "Out of alias analysis step " |
| "limit."); |
| return; |
| } |
| |
| gcc_checking_assert (gimple_vuse (stmt)); |
| bool maybe_modified = false; |
| ao_ref ar; |
| |
| ao_ref_init (&ar, expr); |
| bitmap visited = BITMAP_ALLOC (NULL); |
| int walked = walk_aliased_vdefs (&ar, gimple_vuse (stmt), |
| mark_maybe_modified, &maybe_modified, |
| &visited, NULL, aa_walking_limit); |
| BITMAP_FREE (visited); |
| if (walked > 0) |
| { |
| gcc_assert (aa_walking_limit > walked); |
| aa_walking_limit = aa_walking_limit - walked; |
| } |
| if (walked < 0) |
| aa_walking_limit = 0; |
| if (maybe_modified || walked < 0) |
| { |
| disqualify_split_candidate (desc, "Data passed by reference possibly " |
| "modified through an alias."); |
| return; |
| } |
| else |
| mark_param_dereference (desc, offset + size, bb); |
| } |
| else |
| /* Pointer parameters with direct uses should have been ruled out by |
| analyzing SSA default def when looking at the parameters. */ |
| gcc_assert (!desc->by_ref); |
| |
| gensum_param_access *access = get_access (desc, offset, size, ctx); |
| if (!access) |
| return; |
| |
| if (ctx == ISRA_CTX_ARG) |
| { |
| gcc_checking_assert (call_info); |
| |
| if (!deref) |
| record_nonregister_call_use (desc, call_info, offset / BITS_PER_UNIT, |
| size / BITS_PER_UNIT); |
| else |
| /* This is not a pass-through of a pointer, this is a use like any |
| other. */ |
| access->nonarg = true; |
| } |
| |
| if (!access->type) |
| { |
| access->type = type; |
| access->alias_ptr_type = reference_alias_ptr_type (expr); |
| access->reverse = reverse; |
| } |
| else |
| { |
| if (exp_align < TYPE_ALIGN (access->type)) |
| { |
| disqualify_split_candidate (desc, "Reference has lower alignment " |
| "than a previous one."); |
| return; |
| } |
| if (access->alias_ptr_type != reference_alias_ptr_type (expr)) |
| { |
| disqualify_split_candidate (desc, "Multiple alias pointer types."); |
| return; |
| } |
| if (access->reverse != reverse) |
| { |
| disqualify_split_candidate (desc, "Both normal and reverse " |
| "scalar storage order."); |
| return; |
| } |
| if (!deref |
| && (AGGREGATE_TYPE_P (type) || AGGREGATE_TYPE_P (access->type)) |
| && (TYPE_MAIN_VARIANT (access->type) != TYPE_MAIN_VARIANT (type))) |
| { |
| /* We need the same aggregate type on all accesses to be able to |
| distinguish transformation spots from pass-through arguments in |
| the transformation phase. */ |
| disqualify_split_candidate (desc, "We do not support aggregate " |
| "type punning."); |
| return; |
| } |
| |
| if (type_prevails_p (access->type, type)) |
| access->type = type; |
| } |
| } |
| |
| /* Scan body function described by NODE and FUN and create access trees for |
| parameters. */ |
| |
| static void |
| scan_function (cgraph_node *node, struct function *fun) |
| { |
| basic_block bb; |
| |
| FOR_EACH_BB_FN (bb, fun) |
| { |
| gimple_stmt_iterator gsi; |
| for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
| { |
| gimple *stmt = gsi_stmt (gsi); |
| |
| if (stmt_can_throw_external (fun, stmt)) |
| bitmap_set_bit (final_bbs, bb->index); |
| switch (gimple_code (stmt)) |
| { |
| case GIMPLE_RETURN: |
| { |
| tree t = gimple_return_retval (as_a <greturn *> (stmt)); |
| if (t != NULL_TREE) |
| scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb); |
| bitmap_set_bit (final_bbs, bb->index); |
| } |
| break; |
| |
| case GIMPLE_ASSIGN: |
| if (gimple_assign_single_p (stmt) |
| && !gimple_clobber_p (stmt)) |
| { |
| tree rhs = gimple_assign_rhs1 (stmt); |
| scan_expr_access (rhs, stmt, ISRA_CTX_LOAD, bb); |
| tree lhs = gimple_assign_lhs (stmt); |
| scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb); |
| } |
| break; |
| |
| case GIMPLE_CALL: |
| { |
| unsigned argument_count = gimple_call_num_args (stmt); |
| isra_scan_context ctx = ISRA_CTX_ARG; |
| scan_call_info call_info, *call_info_p = &call_info; |
| if (gimple_call_internal_p (stmt)) |
| { |
| call_info_p = NULL; |
| ctx = ISRA_CTX_LOAD; |
| internal_fn ifn = gimple_call_internal_fn (stmt); |
| if (internal_store_fn_p (ifn)) |
| ctx = ISRA_CTX_STORE; |
| } |
| else |
| { |
| call_info.cs = node->get_edge (stmt); |
| call_info.argument_count = argument_count; |
| } |
| |
| for (unsigned i = 0; i < argument_count; i++) |
| { |
| call_info.arg_idx = i; |
| scan_expr_access (gimple_call_arg (stmt, i), stmt, |
| ctx, bb, call_info_p); |
| } |
| |
| tree lhs = gimple_call_lhs (stmt); |
| if (lhs) |
| scan_expr_access (lhs, stmt, ISRA_CTX_STORE, bb); |
| int flags = gimple_call_flags (stmt); |
| if (((flags & (ECF_CONST | ECF_PURE)) == 0) |
| || (flags & ECF_LOOPING_CONST_OR_PURE)) |
| bitmap_set_bit (final_bbs, bb->index); |
| } |
| break; |
| |
| case GIMPLE_ASM: |
| { |
| gasm *asm_stmt = as_a <gasm *> (stmt); |
| walk_stmt_load_store_addr_ops (asm_stmt, NULL, NULL, NULL, |
| asm_visit_addr); |
| bitmap_set_bit (final_bbs, bb->index); |
| |
| for (unsigned i = 0; i < gimple_asm_ninputs (asm_stmt); i++) |
| { |
| tree t = TREE_VALUE (gimple_asm_input_op (asm_stmt, i)); |
| scan_expr_access (t, stmt, ISRA_CTX_LOAD, bb); |
| } |
| for (unsigned i = 0; i < gimple_asm_noutputs (asm_stmt); i++) |
| { |
| tree t = TREE_VALUE (gimple_asm_output_op (asm_stmt, i)); |
| scan_expr_access (t, stmt, ISRA_CTX_STORE, bb); |
| } |
| } |
| break; |
| |
| default: |
| break; |
| } |
| } |
| } |
| } |
| |
| /* Return true if SSA_NAME NAME of function described by FUN is only used in |
| return statements, or if results of any operations it is involved in are |
| only used in return statements. ANALYZED is a bitmap that tracks which SSA |
| names we have already started investigating. */ |
| |
| static bool |
| ssa_name_only_returned_p (function *fun, tree name, bitmap analyzed) |
| { |
| bool res = true; |
| imm_use_iterator imm_iter; |
| gimple *stmt; |
| |
| FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name) |
| { |
| if (is_gimple_debug (stmt)) |
| continue; |
| |
| if (gimple_code (stmt) == GIMPLE_RETURN) |
| { |
| tree t = gimple_return_retval (as_a <greturn *> (stmt)); |
| if (t != name) |
| { |
| res = false; |
| break; |
| } |
| } |
| else if (!stmt_unremovable_because_of_non_call_eh_p (fun, stmt) |
| && ((is_gimple_assign (stmt) && !gimple_has_volatile_ops (stmt)) |
| || gimple_code (stmt) == GIMPLE_PHI)) |
| { |
| /* TODO: And perhaps for const function calls too? */ |
| tree lhs; |
| if (gimple_code (stmt) == GIMPLE_PHI) |
| lhs = gimple_phi_result (stmt); |
| else |
| lhs = gimple_assign_lhs (stmt); |
| |
| if (TREE_CODE (lhs) != SSA_NAME) |
| { |
| res = false; |
| break; |
| } |
| gcc_assert (!gimple_vdef (stmt)); |
| if (bitmap_set_bit (analyzed, SSA_NAME_VERSION (lhs)) |
| && !ssa_name_only_returned_p (fun, lhs, analyzed)) |
| { |
| res = false; |
| break; |
| } |
| } |
| else |
| { |
| res = false; |
| break; |
| } |
| } |
| return res; |
| } |
| |
| /* Inspect the uses of the return value of the call associated with CS, and if |
| it is not used or if it is only used to construct the return value of the |
| caller, mark it as such in call or caller summary. Also check for |
| misaligned arguments. */ |
| |
| static void |
| isra_analyze_call (cgraph_edge *cs) |
| { |
| gcall *call_stmt = cs->call_stmt; |
| unsigned count = gimple_call_num_args (call_stmt); |
| isra_call_summary *csum = call_sums->get_create (cs); |
| |
| for (unsigned i = 0; i < count; i++) |
| { |
| tree arg = gimple_call_arg (call_stmt, i); |
| if (is_gimple_reg (arg)) |
| continue; |
| |
| tree offset; |
| poly_int64 bitsize, bitpos; |
| machine_mode mode; |
| int unsignedp, reversep, volatilep = 0; |
| get_inner_reference (arg, &bitsize, &bitpos, &offset, &mode, |
| &unsignedp, &reversep, &volatilep); |
| if (!multiple_p (bitpos, BITS_PER_UNIT)) |
| { |
| csum->m_bit_aligned_arg = true; |
| break; |
| } |
| } |
| |
| tree lhs = gimple_call_lhs (call_stmt); |
| if (lhs) |
| { |
| /* TODO: Also detect aggregates on a LHS of a call that are only returned |
| from this function (without being read anywhere). */ |
| if (TREE_CODE (lhs) == SSA_NAME) |
| { |
| bitmap analyzed = BITMAP_ALLOC (NULL); |
| if (ssa_name_only_returned_p (DECL_STRUCT_FUNCTION (cs->caller->decl), |
| lhs, analyzed)) |
| csum->m_return_returned = true; |
| BITMAP_FREE (analyzed); |
| } |
| } |
| else |
| csum->m_return_ignored = true; |
| } |
| |
| /* Look at all calls going out of NODE, described also by IFS and perform all |
| analyses necessary for IPA-SRA that are not done at body scan time or done |
| even when body is not scanned because the function is not a candidate. */ |
| |
| static void |
| isra_analyze_all_outgoing_calls (cgraph_node *node) |
| { |
| for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee) |
| isra_analyze_call (cs); |
| for (cgraph_edge *cs = node->indirect_calls; cs; cs = cs->next_callee) |
| isra_analyze_call (cs); |
| } |
| |
| /* Dump a dereferences table with heading STR to file F. */ |
| |
| static void |
| dump_dereferences_table (FILE *f, struct function *fun, const char *str) |
| { |
| basic_block bb; |
| |
| fprintf (dump_file, "%s", str); |
| FOR_BB_BETWEEN (bb, ENTRY_BLOCK_PTR_FOR_FN (fun), |
| EXIT_BLOCK_PTR_FOR_FN (fun), next_bb) |
| { |
| fprintf (f, "%4i %i ", bb->index, bitmap_bit_p (final_bbs, bb->index)); |
| if (bb != EXIT_BLOCK_PTR_FOR_FN (fun)) |
| { |
| int i; |
| for (i = 0; i < by_ref_count; i++) |
| { |
| int idx = bb->index * by_ref_count + i; |
| fprintf (f, " %4" HOST_WIDE_INT_PRINT "d", bb_dereferences[idx]); |
| } |
| } |
| fprintf (f, "\n"); |
| } |
| fprintf (dump_file, "\n"); |
| } |
| |
| /* Propagate distances in bb_dereferences in the opposite direction than the |
| control flow edges, in each step storing the maximum of the current value |
| and the minimum of all successors. These steps are repeated until the table |
| stabilizes. Note that BBs which might terminate the functions (according to |
| final_bbs bitmap) never updated in this way. */ |
| |
| static void |
| propagate_dereference_distances (struct function *fun) |
| { |
| basic_block bb; |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| dump_dereferences_table (dump_file, fun, |
| "Dereference table before propagation:\n"); |
| |
| auto_vec<basic_block> queue (last_basic_block_for_fn (fun)); |
| queue.quick_push (ENTRY_BLOCK_PTR_FOR_FN (fun)); |
| FOR_EACH_BB_FN (bb, fun) |
| { |
| queue.quick_push (bb); |
| bb->aux = bb; |
| } |
| |
| while (!queue.is_empty ()) |
| { |
| edge_iterator ei; |
| edge e; |
| bool change = false; |
| int i; |
| |
| bb = queue.pop (); |
| bb->aux = NULL; |
| |
| if (bitmap_bit_p (final_bbs, bb->index)) |
| continue; |
| |
| for (i = 0; i < by_ref_count; i++) |
| { |
| int idx = bb->index * by_ref_count + i; |
| bool first = true; |
| HOST_WIDE_INT inh = 0; |
| |
| FOR_EACH_EDGE (e, ei, bb->succs) |
| { |
| int succ_idx = e->dest->index * by_ref_count + i; |
| |
| if (e->dest == EXIT_BLOCK_PTR_FOR_FN (fun)) |
| continue; |
| |
| if (first) |
| { |
| first = false; |
| inh = bb_dereferences [succ_idx]; |
| } |
| else if (bb_dereferences [succ_idx] < inh) |
| inh = bb_dereferences [succ_idx]; |
| } |
| |
| if (!first && bb_dereferences[idx] < inh) |
| { |
| bb_dereferences[idx] = inh; |
| change = true; |
| } |
| } |
| |
| if (change) |
| FOR_EACH_EDGE (e, ei, bb->preds) |
| { |
| if (e->src->aux) |
| continue; |
| |
| e->src->aux = e->src; |
| queue.quick_push (e->src); |
| } |
| } |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| dump_dereferences_table (dump_file, fun, |
| "Dereference table after propagation:\n"); |
| } |
| |
| /* Perform basic checks on ACCESS to PARM described by DESC and all its |
| children, return true if the parameter cannot be split, otherwise return |
| true and update *TOTAL_SIZE and *ONLY_CALLS. ENTRY_BB_INDEX must be the |
| index of the entry BB in the function of PARM. */ |
| |
| static bool |
| check_gensum_access (tree parm, gensum_param_desc *desc, |
| gensum_param_access *access, |
| HOST_WIDE_INT *nonarg_acc_size, bool *only_calls, |
| int entry_bb_index) |
| { |
| if (access->nonarg) |
| { |
| *only_calls = false; |
| *nonarg_acc_size += access->size; |
| |
| if (access->first_child) |
| { |
| disqualify_split_candidate (desc, "Overlapping non-call uses."); |
| return true; |
| } |
| } |
| /* Do not decompose a non-BLKmode param in a way that would create |
| BLKmode params. Especially for by-reference passing (thus, |
| pointer-type param) this is hardly worthwhile. */ |
| if (DECL_MODE (parm) != BLKmode |
| && TYPE_MODE (access->type) == BLKmode) |
| { |
| disqualify_split_candidate (desc, "Would convert a non-BLK to a BLK."); |
| return true; |
| } |
| |
| if (desc->by_ref) |
| { |
| int idx = (entry_bb_index * by_ref_count + desc->deref_index); |
| if ((access->offset + access->size) > bb_dereferences[idx]) |
| { |
| disqualify_split_candidate (desc, "Would create a possibly " |
| "illegal dereference in a caller."); |
| return true; |
| } |
| } |
| |
| for (gensum_param_access *ch = access->first_child; |
| ch; |
| ch = ch->next_sibling) |
| if (check_gensum_access (parm, desc, ch, nonarg_acc_size, only_calls, |
| entry_bb_index)) |
| return true; |
| |
| return false; |
| } |
| |
| /* Copy data from FROM and all of its children to a vector of accesses in IPA |
| descriptor DESC. */ |
| |
| static void |
| copy_accesses_to_ipa_desc (gensum_param_access *from, isra_param_desc *desc) |
| { |
| param_access *to = ggc_cleared_alloc<param_access> (); |
| gcc_checking_assert ((from->offset % BITS_PER_UNIT) == 0); |
| gcc_checking_assert ((from->size % BITS_PER_UNIT) == 0); |
| to->unit_offset = from->offset / BITS_PER_UNIT; |
| to->unit_size = from->size / BITS_PER_UNIT; |
| to->type = from->type; |
| to->alias_ptr_type = from->alias_ptr_type; |
| to->certain = from->nonarg; |
| to->reverse = from->reverse; |
| vec_safe_push (desc->accesses, to); |
| |
| for (gensum_param_access *ch = from->first_child; |
| ch; |
| ch = ch->next_sibling) |
| copy_accesses_to_ipa_desc (ch, desc); |
| } |
| |
| /* Analyze function body scan results stored in param_accesses and |
| param_accesses, detect possible transformations and store information of |
| those in function summary. NODE, FUN and IFS are all various structures |
| describing the currently analyzed function. */ |
| |
| static void |
| process_scan_results (cgraph_node *node, struct function *fun, |
| isra_func_summary *ifs, |
| vec<gensum_param_desc> *param_descriptions) |
| { |
| bool check_pass_throughs = false; |
| bool dereferences_propagated = false; |
| tree parm = DECL_ARGUMENTS (node->decl); |
| unsigned param_count = param_descriptions->length(); |
| |
| for (unsigned desc_index = 0; |
| desc_index < param_count; |
| desc_index++, parm = DECL_CHAIN (parm)) |
| { |
| gensum_param_desc *desc = &(*param_descriptions)[desc_index]; |
| if (!desc->split_candidate) |
| continue; |
| |
| if (flag_checking) |
| isra_verify_access_tree (desc->accesses); |
| |
| if (!dereferences_propagated |
| && desc->by_ref |
| && desc->accesses) |
| { |
| propagate_dereference_distances (fun); |
| dereferences_propagated = true; |
| } |
| |
| HOST_WIDE_INT nonarg_acc_size = 0; |
| bool only_calls = true; |
| bool check_failed = false; |
| |
| int entry_bb_index = ENTRY_BLOCK_PTR_FOR_FN (fun)->index; |
| for (gensum_param_access *acc = desc->accesses; |
| acc; |
| acc = acc->next_sibling) |
| if (check_gensum_access (parm, desc, acc, &nonarg_acc_size, &only_calls, |
| entry_bb_index)) |
| { |
| check_failed = true; |
| break; |
| } |
| if (check_failed) |
| continue; |
| |
| if (only_calls) |
| desc->locally_unused = true; |
| |
| HOST_WIDE_INT cur_param_size |
| = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (parm))); |
| HOST_WIDE_INT param_size_limit; |
| if (!desc->by_ref || optimize_function_for_size_p (fun)) |
| param_size_limit = cur_param_size; |
| else |
| param_size_limit |
| = opt_for_fn (node->decl, |
| param_ipa_sra_ptr_growth_factor) * cur_param_size; |
| if (nonarg_acc_size > param_size_limit |
| || (!desc->by_ref && nonarg_acc_size == param_size_limit)) |
| { |
| disqualify_split_candidate (desc, "Would result into a too big set " |
| "of replacements."); |
| } |
| else |
| { |
| /* create_parameter_descriptors makes sure unit sizes of all |
| candidate parameters fit unsigned integers restricted to |
| ISRA_ARG_SIZE_LIMIT. */ |
| desc->param_size_limit = param_size_limit / BITS_PER_UNIT; |
| desc->nonarg_acc_size = nonarg_acc_size / BITS_PER_UNIT; |
| if (desc->split_candidate && desc->ptr_pt_count) |
| { |
| gcc_assert (desc->by_ref); |
| check_pass_throughs = true; |
| } |
| } |
| } |
| |
| /* When a pointer parameter is passed-through to a callee, in which it is |
| only used to read only one or a few items, we can attempt to transform it |
| to obtaining and passing through the items instead of the pointer. But we |
| must take extra care that 1) we do not introduce any segfault by moving |
| dereferences above control flow and that 2) the data is not modified |
| through an alias in this function. The IPA analysis must not introduce |
| any accesses candidates unless it can prove both. |
| |
| The current solution is very crude as it consists of ensuring that the |
| call postdominates entry BB and that the definition of VUSE of the call is |
| default definition. TODO: For non-recursive callees in the same |
| compilation unit we could do better by doing analysis in topological order |
| an looking into access candidates of callees, using their alias_ptr_types |
| to attempt real AA. We could also use the maximum known dereferenced |
| offset in this function at IPA level. |
| |
| TODO: Measure the overhead and the effect of just being pessimistic. |
| Maybe this is only -O3 material? */ |
| |
| bool pdoms_calculated = false; |
| if (check_pass_throughs) |
| for (cgraph_edge *cs = node->callees; cs; cs = cs->next_callee) |
| { |
| gcall *call_stmt = cs->call_stmt; |
| tree vuse = gimple_vuse (call_stmt); |
| |
| /* If the callee is a const function, we don't get a VUSE. In such |
| case there will be no memory accesses in the called function (or the |
| const attribute is wrong) and then we just don't care. */ |
| bool uses_memory_as_obtained = vuse && SSA_NAME_IS_DEFAULT_DEF (vuse); |
| |
| unsigned count = gimple_call_num_args (call_stmt); |
| isra_call_summary *csum = call_sums->get_create (cs); |
| csum->init_inputs (count); |
| csum->m_before_any_store = uses_memory_as_obtained; |
| for (unsigned argidx = 0; argidx < count; argidx++) |
| { |
| if (!csum->m_arg_flow[argidx].pointer_pass_through) |
| continue; |
| unsigned pidx |
| = get_single_param_flow_source (&csum->m_arg_flow[argidx]); |
| gensum_param_desc *desc = &(*param_descriptions)[pidx]; |
| if (!desc->split_candidate) |
| { |
| csum->m_arg_flow[argidx].pointer_pass_through = false; |
| continue; |
| } |
| if (!uses_memory_as_obtained) |
| continue; |
| |
| /* Post-dominator check placed last, hoping that it usually won't |
| be needed. */ |
| if (!pdoms_calculated) |
| { |
| gcc_checking_assert (cfun); |
| connect_infinite_loops_to_exit (); |
| calculate_dominance_info (CDI_POST_DOMINATORS); |
| pdoms_calculated = true; |
| } |
| if (dominated_by_p (CDI_POST_DOMINATORS, |
| gimple_bb (call_stmt), |
| single_succ (ENTRY_BLOCK_PTR_FOR_FN (fun)))) |
| csum->m_arg_flow[argidx].safe_to_import_accesses = true; |
| } |
| |
| } |
| if (pdoms_calculated) |
| { |
| free_dominance_info (CDI_POST_DOMINATORS); |
| remove_fake_exit_edges (); |
| } |
| |
| /* TODO: Add early exit if we disqualified everything. This also requires |
| that we either relax the restriction that |
| ipa_param_adjustments.m_always_copy_start must be the number of PARM_DECLs |
| or store the number of parameters to IPA-SRA function summary and use that |
| when just removing params. */ |
| |
| vec_safe_reserve_exact (ifs->m_parameters, param_count); |
| ifs->m_parameters->quick_grow_cleared (param_count); |
| for (unsigned desc_index = 0; desc_index < param_count; desc_index++) |
| { |
| gensum_param_desc *s = &(*param_descriptions)[desc_index]; |
| isra_param_desc *d = &(*ifs->m_parameters)[desc_index]; |
| |
| d->param_size_limit = s->param_size_limit; |
| d->size_reached = s->nonarg_acc_size; |
| d->locally_unused = s->locally_unused; |
| d->split_candidate = s->split_candidate; |
| d->by_ref = s->by_ref; |
| |
| for (gensum_param_access *acc = s->accesses; |
| acc; |
| acc = acc->next_sibling) |
| copy_accesses_to_ipa_desc (acc, d); |
| } |
| |
| if (dump_file) |
| dump_isra_param_descriptors (dump_file, node->decl, ifs); |
| } |
| |
| /* Return true if there are any overlaps among certain accesses of DESC. If |
| non-NULL, set *CERTAIN_ACCESS_PRESENT_P upon encountering a certain access |
| too. DESC is assumed to be a split candidate that is not locally |
| unused. */ |
| |
| static bool |
| overlapping_certain_accesses_p (isra_param_desc *desc, |
| bool *certain_access_present_p) |
| { |
| unsigned pclen = vec_safe_length (desc->accesses); |
| for (unsigned i = 0; i < pclen; i++) |
| { |
| param_access *a1 = (*desc->accesses)[i]; |
| |
| if (!a1->certain) |
| continue; |
| if (certain_access_present_p) |
| *certain_access_present_p = true; |
| for (unsigned j = i + 1; j < pclen; j++) |
| { |
| param_access *a2 = (*desc->accesses)[j]; |
| if (a2->certain |
| && a1->unit_offset < a2->unit_offset + a2->unit_size |
| && a1->unit_offset + a1->unit_size > a2->unit_offset) |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| /* Check for any overlaps of certain param accesses among splitting candidates |
| and signal an ICE if there are any. If CERTAIN_MUST_EXIST is set, also |
| check that used splitting candidates have at least one certain access. */ |
| |
| static void |
| verify_splitting_accesses (cgraph_node *node, bool certain_must_exist) |
| { |
| isra_func_summary *ifs = func_sums->get (node); |
| if (!ifs || !ifs->m_candidate) |
| return; |
| unsigned param_count = vec_safe_length (ifs->m_parameters); |
| for (unsigned pidx = 0; pidx < param_count; pidx++) |
| { |
| isra_param_desc *desc = &(*ifs->m_parameters)[pidx]; |
| if (!desc->split_candidate || desc->locally_unused) |
| continue; |
| |
| bool certain_access_present = !certain_must_exist; |
| if (overlapping_certain_accesses_p (desc, &certain_access_present)) |
| internal_error ("function %qs, parameter %u, has IPA-SRA accesses " |
| "which overlap", node->dump_name (), pidx); |
| if (!certain_access_present) |
| internal_error ("function %qs, parameter %u, is used but does not " |
| "have any certain IPA-SRA access", |
| node->dump_name (), pidx); |
| } |
| } |
| |
| /* Intraprocedural part of IPA-SRA analysis. Scan bodies of all functions in |
| this compilation unit and create summary structures describing IPA-SRA |
| opportunities and constraints in them. */ |
| |
| static void |
| ipa_sra_generate_summary (void) |
| { |
| struct cgraph_node *node; |
| |
| gcc_checking_assert (!func_sums); |
| gcc_checking_assert (!call_sums); |
| func_sums |
| = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ()) |
| ipa_sra_function_summaries (symtab, true)); |
| call_sums = new ipa_sra_call_summaries (symtab); |
| |
| FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) |
| ipa_sra_summarize_function (node); |
| return; |
| } |
| |
| /* Write intraprocedural analysis information about E and all of its outgoing |
| edges into a stream for LTO WPA. */ |
| |
| static void |
| isra_write_edge_summary (output_block *ob, cgraph_edge *e) |
| { |
| isra_call_summary *csum = call_sums->get (e); |
| unsigned input_count = csum->m_arg_flow.length (); |
| streamer_write_uhwi (ob, input_count); |
| for (unsigned i = 0; i < input_count; i++) |
| { |
| isra_param_flow *ipf = &csum->m_arg_flow[i]; |
| streamer_write_hwi (ob, ipf->length); |
| bitpack_d bp = bitpack_create (ob->main_stream); |
| for (int j = 0; j < ipf->length; j++) |
| bp_pack_value (&bp, ipf->inputs[j], 8); |
| bp_pack_value (&bp, ipf->aggregate_pass_through, 1); |
| bp_pack_value (&bp, ipf->pointer_pass_through, 1); |
| bp_pack_value (&bp, ipf->safe_to_import_accesses, 1); |
| streamer_write_bitpack (&bp); |
| streamer_write_uhwi (ob, ipf->unit_offset); |
| streamer_write_uhwi (ob, ipf->unit_size); |
| } |
| bitpack_d bp = bitpack_create (ob->main_stream); |
| bp_pack_value (&bp, csum->m_return_ignored, 1); |
| bp_pack_value (&bp, csum->m_return_returned, 1); |
| bp_pack_value (&bp, csum->m_bit_aligned_arg, 1); |
| bp_pack_value (&bp, csum->m_before_any_store, 1); |
| streamer_write_bitpack (&bp); |
| } |
| |
| /* Write intraprocedural analysis information about NODE and all of its outgoing |
| edges into a stream for LTO WPA. */ |
| |
| static void |
| isra_write_node_summary (output_block *ob, cgraph_node *node) |
| { |
| isra_func_summary *ifs = func_sums->get (node); |
| lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder; |
| int node_ref = lto_symtab_encoder_encode (encoder, node); |
| streamer_write_uhwi (ob, node_ref); |
| |
| unsigned param_desc_count = vec_safe_length (ifs->m_parameters); |
| streamer_write_uhwi (ob, param_desc_count); |
| for (unsigned i = 0; i < param_desc_count; i++) |
| { |
| isra_param_desc *desc = &(*ifs->m_parameters)[i]; |
| unsigned access_count = vec_safe_length (desc->accesses); |
| streamer_write_uhwi (ob, access_count); |
| for (unsigned j = 0; j < access_count; j++) |
| { |
| param_access *acc = (*desc->accesses)[j]; |
| stream_write_tree (ob, acc->type, true); |
| stream_write_tree (ob, acc->alias_ptr_type, true); |
| streamer_write_uhwi (ob, acc->unit_offset); |
| streamer_write_uhwi (ob, acc->unit_size); |
| bitpack_d bp = bitpack_create (ob->main_stream); |
| bp_pack_value (&bp, acc->certain, 1); |
| bp_pack_value (&bp, acc->reverse, 1); |
| streamer_write_bitpack (&bp); |
| } |
| streamer_write_uhwi (ob, desc->param_size_limit); |
| streamer_write_uhwi (ob, desc->size_reached); |
| bitpack_d bp = bitpack_create (ob->main_stream); |
| bp_pack_value (&bp, desc->locally_unused, 1); |
| bp_pack_value (&bp, desc->split_candidate, 1); |
| bp_pack_value (&bp, desc->by_ref, 1); |
| streamer_write_bitpack (&bp); |
| } |
| bitpack_d bp = bitpack_create (ob->main_stream); |
| bp_pack_value (&bp, ifs->m_candidate, 1); |
| bp_pack_value (&bp, ifs->m_returns_value, 1); |
| bp_pack_value (&bp, ifs->m_return_ignored, 1); |
| gcc_assert (!ifs->m_queued); |
| streamer_write_bitpack (&bp); |
| |
| for (cgraph_edge *e = node->callees; e; e = e->next_callee) |
| isra_write_edge_summary (ob, e); |
| for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee) |
| isra_write_edge_summary (ob, e); |
| } |
| |
| /* Write intraprocedural analysis information into a stream for LTO WPA. */ |
| |
| static void |
| ipa_sra_write_summary (void) |
| { |
| if (!func_sums || !call_sums) |
| return; |
| |
| struct output_block *ob = create_output_block (LTO_section_ipa_sra); |
| lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder; |
| ob->symbol = NULL; |
| |
| unsigned int count = 0; |
| lto_symtab_encoder_iterator lsei; |
| for (lsei = lsei_start_function_in_partition (encoder); |
| !lsei_end_p (lsei); |
| lsei_next_function_in_partition (&lsei)) |
| { |
| cgraph_node *node = lsei_cgraph_node (lsei); |
| if (node->has_gimple_body_p () |
| && func_sums->get (node) != NULL) |
| count++; |
| } |
| streamer_write_uhwi (ob, count); |
| |
| /* Process all of the functions. */ |
| for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei); |
| lsei_next_function_in_partition (&lsei)) |
| { |
| cgraph_node *node = lsei_cgraph_node (lsei); |
| if (node->has_gimple_body_p () |
| && func_sums->get (node) != NULL) |
| isra_write_node_summary (ob, node); |
| } |
| streamer_write_char_stream (ob->main_stream, 0); |
| produce_asm (ob, NULL); |
| destroy_output_block (ob); |
| } |
| |
| /* Read intraprocedural analysis information about E and all of its outgoing |
| edges into a stream for LTO WPA. */ |
| |
| static void |
| isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs) |
| { |
| isra_call_summary *csum = call_sums->get_create (cs); |
| unsigned input_count = streamer_read_uhwi (ib); |
| csum->init_inputs (input_count); |
| for (unsigned i = 0; i < input_count; i++) |
| { |
| isra_param_flow *ipf = &csum->m_arg_flow[i]; |
| ipf->length = streamer_read_hwi (ib); |
| bitpack_d bp = streamer_read_bitpack (ib); |
| for (int j = 0; j < ipf->length; j++) |
| ipf->inputs[j] = bp_unpack_value (&bp, 8); |
| ipf->aggregate_pass_through = bp_unpack_value (&bp, 1); |
| ipf->pointer_pass_through = bp_unpack_value (&bp, 1); |
| ipf->safe_to_import_accesses = bp_unpack_value (&bp, 1); |
| ipf->unit_offset = streamer_read_uhwi (ib); |
| ipf->unit_size = streamer_read_uhwi (ib); |
| } |
| bitpack_d bp = streamer_read_bitpack (ib); |
| csum->m_return_ignored = bp_unpack_value (&bp, 1); |
| csum->m_return_returned = bp_unpack_value (&bp, 1); |
| csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1); |
| csum->m_before_any_store = bp_unpack_value (&bp, 1); |
| } |
| |
| /* Read intraprocedural analysis information about NODE and all of its outgoing |
| edges into a stream for LTO WPA. */ |
| |
| static void |
| isra_read_node_info (struct lto_input_block *ib, cgraph_node *node, |
| struct data_in *data_in) |
| { |
| isra_func_summary *ifs = func_sums->get_create (node); |
| unsigned param_desc_count = streamer_read_uhwi (ib); |
| if (param_desc_count > 0) |
| { |
| vec_safe_reserve_exact (ifs->m_parameters, param_desc_count); |
| ifs->m_parameters->quick_grow_cleared (param_desc_count); |
| } |
| for (unsigned i = 0; i < param_desc_count; i++) |
| { |
| isra_param_desc *desc = &(*ifs->m_parameters)[i]; |
| unsigned access_count = streamer_read_uhwi (ib); |
| for (unsigned j = 0; j < access_count; j++) |
| { |
| param_access *acc = ggc_cleared_alloc<param_access> (); |
| acc->type = stream_read_tree (ib, data_in); |
| acc->alias_ptr_type = stream_read_tree (ib, data_in); |
| acc->unit_offset = streamer_read_uhwi (ib); |
| acc->unit_size = streamer_read_uhwi (ib); |
| bitpack_d bp = streamer_read_bitpack (ib); |
| acc->certain = bp_unpack_value (&bp, 1); |
| acc->reverse = bp_unpack_value (&bp, 1); |
| vec_safe_push (desc->accesses, acc); |
| } |
| desc->param_size_limit = streamer_read_uhwi (ib); |
| desc->size_reached = streamer_read_uhwi (ib); |
| bitpack_d bp = streamer_read_bitpack (ib); |
| desc->locally_unused = bp_unpack_value (&bp, 1); |
| desc->split_candidate = bp_unpack_value (&bp, 1); |
| desc->by_ref = bp_unpack_value (&bp, 1); |
| } |
| bitpack_d bp = streamer_read_bitpack (ib); |
| ifs->m_candidate = bp_unpack_value (&bp, 1); |
| ifs->m_returns_value = bp_unpack_value (&bp, 1); |
| ifs->m_return_ignored = bp_unpack_value (&bp, 1); |
| ifs->m_queued = 0; |
| |
| for (cgraph_edge *e = node->callees; e; e = e->next_callee) |
| isra_read_edge_summary (ib, e); |
| for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee) |
| isra_read_edge_summary (ib, e); |
| } |
| |
| /* Read IPA-SRA summaries from a section in file FILE_DATA of length LEN with |
| data DATA. TODO: This function was copied almost verbatim from ipa-prop.cc, |
| it should be possible to unify them somehow. */ |
| |
| static void |
| isra_read_summary_section (struct lto_file_decl_data *file_data, |
| const char *data, size_t len) |
| { |
| const struct lto_function_header *header = |
| (const struct lto_function_header *) data; |
| const int cfg_offset = sizeof (struct lto_function_header); |
| const int main_offset = cfg_offset + header->cfg_size; |
| const int string_offset = main_offset + header->main_size; |
| struct data_in *data_in; |
| unsigned int i; |
| unsigned int count; |
| |
| lto_input_block ib_main ((const char *) data + main_offset, |
| header->main_size, file_data->mode_table); |
| |
| data_in = |
| lto_data_in_create (file_data, (const char *) data + string_offset, |
| header->string_size, vNULL); |
| count = streamer_read_uhwi (&ib_main); |
| |
| for (i = 0; i < count; i++) |
| { |
| unsigned int index; |
| struct cgraph_node *node; |
| lto_symtab_encoder_t encoder; |
| |
| index = streamer_read_uhwi (&ib_main); |
| encoder = file_data->symtab_node_encoder; |
| node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder, |
| index)); |
| gcc_assert (node->definition); |
| isra_read_node_info (&ib_main, node, data_in); |
| } |
| lto_free_section_data (file_data, LTO_section_ipa_sra, NULL, data, |
| len); |
| lto_data_in_delete (data_in); |
| } |
| |
| /* Read intraprocedural analysis information into a stream for LTO WPA. */ |
| |
| static void |
| ipa_sra_read_summary (void) |
| { |
| struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data (); |
| struct lto_file_decl_data *file_data; |
| unsigned int j = 0; |
| |
| gcc_checking_assert (!func_sums); |
| gcc_checking_assert (!call_sums); |
| func_sums |
| = (new (ggc_alloc_no_dtor <ipa_sra_function_summaries> ()) |
| ipa_sra_function_summaries (symtab, true)); |
| call_sums = new ipa_sra_call_summaries (symtab); |
| |
| while ((file_data = file_data_vec[j++])) |
| { |
| size_t len; |
| const char *data |
| = lto_get_summary_section_data (file_data, LTO_section_ipa_sra, &len); |
| if (data) |
| isra_read_summary_section (file_data, data, len); |
| } |
| } |
| |
| /* Dump all IPA-SRA summary data for all cgraph nodes and edges to file F. */ |
| |
| static void |
| ipa_sra_dump_all_summaries (FILE *f) |
| { |
| cgraph_node *node; |
| FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node) |
| { |
| fprintf (f, "\nSummary for node %s:\n", node->dump_name ()); |
| |
| isra_func_summary *ifs = func_sums->get (node); |
| if (!ifs) |
| fprintf (f, " Function does not have any associated IPA-SRA " |
| "summary\n"); |
| else |
| { |
| if (!ifs->m_candidate) |
| { |
| fprintf (f, " Not a candidate function\n"); |
| continue; |
| } |
| if (ifs->m_returns_value) |
| fprintf (f, " Returns value\n"); |
| if (vec_safe_is_empty (ifs->m_parameters)) |
| fprintf (f, " No parameter information. \n"); |
| else |
| for (unsigned i = 0; i < ifs->m_parameters->length (); ++i) |
| { |
| fprintf (f, " Descriptor for parameter %i:\n", i); |
| dump_isra_param_descriptor (f, &(*ifs->m_parameters)[i]); |
| } |
| fprintf (f, "\n"); |
| } |
| |
| struct cgraph_edge *cs; |
| for (cs = node->callees; cs; cs = cs->next_callee) |
| { |
| fprintf (f, " Summary for edge %s->%s:\n", cs->caller->dump_name (), |
| cs->callee->dump_name ()); |
| isra_call_summary *csum = call_sums->get (cs); |
| if (csum) |
| csum->dump (f); |
| else |
| fprintf (f, " Call summary is MISSING!\n"); |
| } |
| |
| } |
| fprintf (f, "\n\n"); |
| } |
| |
| /* Perform function-scope viability tests that can be only made at IPA level |
| and return false if the function is deemed unsuitable for IPA-SRA. */ |
| |
| static bool |
| ipa_sra_ipa_function_checks (cgraph_node *node) |
| { |
| if (!node->can_be_local_p ()) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Function %s disqualified because it cannot be " |
| "made local.\n", node->dump_name ()); |
| return false; |
| } |
| if (!node->can_change_signature) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Function can not change signature.\n"); |
| return false; |
| } |
| |
| return true; |
| } |
| |
| /* Issues found out by check_callers_for_issues. */ |
| |
| struct caller_issues |
| { |
| /* The candidate being considered. */ |
| cgraph_node *candidate; |
| /* There is a thunk among callers. */ |
| bool thunk; |
| /* Call site with no available information. */ |
| bool unknown_callsite; |
| /* Call from outside the candidate's comdat group. */ |
| bool call_from_outside_comdat; |
| /* There is a bit-aligned load into one of non-gimple-typed arguments. */ |
| bool bit_aligned_aggregate_argument; |
| }; |
| |
| /* Worker for call_for_symbol_and_aliases, set any flags of passed caller_issues |
| that apply. */ |
| |
| static bool |
| check_for_caller_issues (struct cgraph_node *node, void *data) |
| { |
| struct caller_issues *issues = (struct caller_issues *) data; |
| |
| for (cgraph_edge *cs = node->callers; cs; cs = cs->next_caller) |
| { |
| if (cs->caller->thunk) |
| { |
| issues->thunk = true; |
| /* TODO: We should be able to process at least some types of |
| thunks. */ |
| return true; |
| } |
| if (issues->candidate->calls_comdat_local |
| && issues->candidate->same_comdat_group |
| && !issues->candidate->in_same_comdat_group_p (cs->caller)) |
| { |
| issues->call_from_outside_comdat = true; |
| return true; |
| } |
| |
| isra_call_summary *csum = call_sums->get (cs); |
| if (!csum) |
| { |
| issues->unknown_callsite = true; |
| return true; |
| } |
| |
| if (csum->m_bit_aligned_arg) |
| issues->bit_aligned_aggregate_argument = true; |
| } |
| return false; |
| } |
| |
| /* Look at all incoming edges to NODE, including aliases and thunks and look |
| for problems. Return true if NODE type should not be modified at all. */ |
| |
| static bool |
| check_all_callers_for_issues (cgraph_node *node) |
| { |
| struct caller_issues issues; |
| memset (&issues, 0, sizeof (issues)); |
| issues.candidate = node; |
| |
| node->call_for_symbol_and_aliases (check_for_caller_issues, &issues, true); |
| if (issues.unknown_callsite) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "A call of %s has not been analyzed. Disabling " |
| "all modifications.\n", node->dump_name ()); |
| return true; |
| } |
| /* TODO: We should be able to process at least some types of thunks. */ |
| if (issues.thunk) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "A call of %s is through thunk, which are not" |
| " handled yet. Disabling all modifications.\n", |
| node->dump_name ()); |
| return true; |
| } |
| if (issues.call_from_outside_comdat) |
| { |
| if (dump_file) |
| fprintf (dump_file, "Function would become private comdat called " |
| "outside of its comdat group.\n"); |
| return true; |
| } |
| |
| if (issues.bit_aligned_aggregate_argument) |
| { |
| /* Let's only remove parameters/return values from such functions. |
| TODO: We could only prevent splitting the problematic parameters if |
| anybody thinks it is worth it. */ |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "A call of %s has bit-aligned aggregate argument," |
| " disabling parameter splitting.\n", node->dump_name ()); |
| |
| isra_func_summary *ifs = func_sums->get (node); |
| gcc_checking_assert (ifs); |
| unsigned param_count = vec_safe_length (ifs->m_parameters); |
| for (unsigned i = 0; i < param_count; i++) |
| (*ifs->m_parameters)[i].split_candidate = false; |
| } |
| return false; |
| } |
| |
| /* Find the access with corresponding OFFSET and SIZE among accesses in |
| PARAM_DESC and return it or NULL if such an access is not there. */ |
| |
| static param_access * |
| find_param_access (isra_param_desc *param_desc, unsigned offset, unsigned size) |
| { |
| unsigned pclen = vec_safe_length (param_desc->accesses); |
| |
| /* The search is linear but the number of stored accesses is bound by |
| PARAM_IPA_SRA_MAX_REPLACEMENTS, so most probably 8. */ |
| |
| for (unsigned i = 0; i < pclen; i++) |
| if ((*param_desc->accesses)[i]->unit_offset == offset |
| && (*param_desc->accesses)[i]->unit_size == size) |
| return (*param_desc->accesses)[i]; |
| |
| return NULL; |
| } |
| |
| /* Return iff the total size of definite replacement SIZE would violate the |
| limit set for it in PARAM. */ |
| |
| static bool |
| size_would_violate_limit_p (isra_param_desc *desc, unsigned size) |
| { |
| unsigned limit = desc->param_size_limit; |
| if (size > limit |
| || (!desc->by_ref && size == limit)) |
| return true; |
| return false; |
| } |
| |
| /* Increase reached size of DESC by SIZE or disqualify it if it would violate |
| the set limit. IDX is the parameter number which is dumped when |
| disqualifying. */ |
| |
| static void |
| bump_reached_size (isra_param_desc *desc, unsigned size, unsigned idx) |
| { |
| unsigned after = desc->size_reached + size; |
| if (size_would_violate_limit_p (desc, after)) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, " ...size limit reached, disqualifying " |
| "candidate parameter %u\n", idx); |
| desc->split_candidate = false; |
| return; |
| } |
| desc->size_reached = after; |
| } |
| |
| /* Take all actions required to deal with an edge CS that represents a call to |
| an unknown or un-analyzed function, for both parameter removal and |
| splitting. */ |
| |
| static void |
| process_edge_to_unknown_caller (cgraph_edge *cs) |
| { |
| isra_func_summary *from_ifs = func_sums->get (cs->caller); |
| gcc_checking_assert (from_ifs); |
| isra_call_summary *csum = call_sums->get (cs); |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, "Processing an edge to an unknown caller from %s:\n", |
| cs->caller->dump_name ()); |
| |
| unsigned args_count = csum->m_arg_flow.length (); |
| for (unsigned i = 0; i < args_count; i++) |
| { |
| isra_param_flow *ipf = &csum->m_arg_flow[i]; |
| |
| if (ipf->pointer_pass_through) |
| { |
| isra_param_desc *param_desc |
| = &(*from_ifs->m_parameters)[get_single_param_flow_source (ipf)]; |
| param_desc->locally_unused = false; |
| param_desc->split_candidate = false; |
| continue; |
| } |
| if (ipf->aggregate_pass_through) |
| { |
| unsigned idx = get_single_param_flow_source (ipf); |
| isra_param_desc *param_desc = &(*from_ifs->m_parameters)[idx]; |
| |
| param_desc->locally_unused = false; |
| if (!param_desc->split_candidate) |
| continue; |
| gcc_assert (!param_desc->by_ref); |
| param_access *pacc = find_param_access (param_desc, ipf->unit_offset, |
| ipf->unit_size); |
| gcc_checking_assert (pacc); |
| pacc->certain = true; |
| if (overlapping_certain_accesses_p (param_desc, NULL)) |
| { |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| fprintf (dump_file, " ...leading to overlap, " |
| " disqualifying candidate parameter %u\n", |
| idx); |
| param_desc->split_candidate = false; |
| } |
| else |
| bump_reached_size (param_desc, pacc->unit_size, idx); |
| ipf->aggregate_pass_through = false; |
| continue; |
| } |
| |
| for (int j = 0; j < ipf->length; j++) |
| { |
| int input_idx = ipf->inputs[j]; |
| (*from_ifs->m_parameters)[input_idx].locally_unused = false; |
| } |
| } |
| } |
| |
| /* Propagate parameter removal information through cross-SCC edge CS, |
| i.e. decrease the use count in the caller parameter descriptor for each use |
| in this call. */ |
| |
| static void |
| param_removal_cross_scc_edge (cgraph_edge *cs) |
| { |
| enum availability availability; |
| cgraph_node *callee = cs->callee->function_symbol (&availability); |
| isra_func_summary *to_ifs = func_sums->get (callee); |
| if (!to_ifs || !to_ifs->m_candidate |
| || (availability < AVAIL_AVAILABLE) |
| || vec_safe_is_empty (to_ifs->m_parameters)) |
| { |
| process_edge_to_unknown_caller (cs); |
| return; |
| } |
| isra_func_summary *from_ifs = func_sums->get (cs->caller); |
| gcc_checking_assert (from_ifs); |
| |
| isra_call_summary *csum = call_sums->get (cs); |
| unsigned args_count = csum->m_arg_flow.length (); |
| unsigned param_count = vec_safe_length (to_ifs->m_parameters); |
| |
| for (unsigned i = 0; i < args_count; i++) |
| { |
| bool unused_in_callee; |
| if (i < param_count) |
| unused_in_callee = (*to_ifs->m_parameters)[i].locally_unused; |
| else |
| unused_in_callee = false; |
| |
| if (!unused_in_callee) |
| { |
| isra_param_flow *ipf = &csum->m_arg_flow[i]; |
| for (int j = 0; j < ipf->length; j++) |
| { |
| int input_idx = ipf->inputs[j]; |
| (*from_ifs->m_parameters)[input_idx].locally_unused = false; |
| } |
| } |
| } |
| } |
| |
| /* Unless it is already there, push NODE which is also described by IFS to |
| STACK. */ |
| |
| static void |
| isra_push_node_to_stack (cgraph_node *node, isra_func_summary *ifs, |
| vec<cgraph_node *> *stack) |
| { |
| if (!ifs->m_queued) |
| { |
| ifs->m_queued = true; |
| |