| /* SLP - Basic Block Vectorization |
| Copyright (C) 2007-2021 Free Software Foundation, Inc. |
| Contributed by Dorit Naishlos <dorit@il.ibm.com> |
| and Ira Rosen <irar@il.ibm.com> |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 3, or (at your option) any later |
| version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "backend.h" |
| #include "target.h" |
| #include "rtl.h" |
| #include "tree.h" |
| #include "gimple.h" |
| #include "tree-pass.h" |
| #include "ssa.h" |
| #include "optabs-tree.h" |
| #include "insn-config.h" |
| #include "recog.h" /* FIXME: for insn_data */ |
| #include "fold-const.h" |
| #include "stor-layout.h" |
| #include "gimple-iterator.h" |
| #include "cfgloop.h" |
| #include "tree-vectorizer.h" |
| #include "langhooks.h" |
| #include "gimple-walk.h" |
| #include "dbgcnt.h" |
| #include "tree-vector-builder.h" |
| #include "vec-perm-indices.h" |
| #include "gimple-fold.h" |
| #include "internal-fn.h" |
| #include "dump-context.h" |
| #include "cfganal.h" |
| #include "tree-eh.h" |
| #include "tree-cfg.h" |
| #include "alloc-pool.h" |
| |
| static bool vectorizable_slp_permutation (vec_info *, gimple_stmt_iterator *, |
| slp_tree, stmt_vector_for_cost *); |
| static void vect_print_slp_tree (dump_flags_t, dump_location_t, slp_tree); |
| |
| static object_allocator<_slp_tree> *slp_tree_pool; |
| static slp_tree slp_first_node; |
| |
| void |
| vect_slp_init (void) |
| { |
| slp_tree_pool = new object_allocator<_slp_tree> ("SLP nodes"); |
| } |
| |
| void |
| vect_slp_fini (void) |
| { |
| while (slp_first_node) |
| delete slp_first_node; |
| delete slp_tree_pool; |
| slp_tree_pool = NULL; |
| } |
| |
| void * |
| _slp_tree::operator new (size_t n) |
| { |
| gcc_assert (n == sizeof (_slp_tree)); |
| return slp_tree_pool->allocate_raw (); |
| } |
| |
| void |
| _slp_tree::operator delete (void *node, size_t n) |
| { |
| gcc_assert (n == sizeof (_slp_tree)); |
| slp_tree_pool->remove_raw (node); |
| } |
| |
| |
| /* Initialize a SLP node. */ |
| |
| _slp_tree::_slp_tree () |
| { |
| this->prev_node = NULL; |
| if (slp_first_node) |
| slp_first_node->prev_node = this; |
| this->next_node = slp_first_node; |
| slp_first_node = this; |
| SLP_TREE_SCALAR_STMTS (this) = vNULL; |
| SLP_TREE_SCALAR_OPS (this) = vNULL; |
| SLP_TREE_VEC_STMTS (this) = vNULL; |
| SLP_TREE_VEC_DEFS (this) = vNULL; |
| SLP_TREE_NUMBER_OF_VEC_STMTS (this) = 0; |
| SLP_TREE_CHILDREN (this) = vNULL; |
| SLP_TREE_LOAD_PERMUTATION (this) = vNULL; |
| SLP_TREE_LANE_PERMUTATION (this) = vNULL; |
| SLP_TREE_DEF_TYPE (this) = vect_uninitialized_def; |
| SLP_TREE_CODE (this) = ERROR_MARK; |
| SLP_TREE_VECTYPE (this) = NULL_TREE; |
| SLP_TREE_REPRESENTATIVE (this) = NULL; |
| SLP_TREE_REF_COUNT (this) = 1; |
| this->failed = NULL; |
| this->max_nunits = 1; |
| this->lanes = 0; |
| } |
| |
| /* Tear down a SLP node. */ |
| |
| _slp_tree::~_slp_tree () |
| { |
| if (this->prev_node) |
| this->prev_node->next_node = this->next_node; |
| else |
| slp_first_node = this->next_node; |
| if (this->next_node) |
| this->next_node->prev_node = this->prev_node; |
| SLP_TREE_CHILDREN (this).release (); |
| SLP_TREE_SCALAR_STMTS (this).release (); |
| SLP_TREE_SCALAR_OPS (this).release (); |
| SLP_TREE_VEC_STMTS (this).release (); |
| SLP_TREE_VEC_DEFS (this).release (); |
| SLP_TREE_LOAD_PERMUTATION (this).release (); |
| SLP_TREE_LANE_PERMUTATION (this).release (); |
| if (this->failed) |
| free (failed); |
| } |
| |
| /* Recursively free the memory allocated for the SLP tree rooted at NODE. */ |
| |
| void |
| vect_free_slp_tree (slp_tree node) |
| { |
| int i; |
| slp_tree child; |
| |
| if (--SLP_TREE_REF_COUNT (node) != 0) |
| return; |
| |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
| if (child) |
| vect_free_slp_tree (child); |
| |
| /* If the node defines any SLP only patterns then those patterns are no |
| longer valid and should be removed. */ |
| stmt_vec_info rep_stmt_info = SLP_TREE_REPRESENTATIVE (node); |
| if (rep_stmt_info && STMT_VINFO_SLP_VECT_ONLY_PATTERN (rep_stmt_info)) |
| { |
| stmt_vec_info stmt_info = vect_orig_stmt (rep_stmt_info); |
| STMT_VINFO_IN_PATTERN_P (stmt_info) = false; |
| STMT_SLP_TYPE (stmt_info) = STMT_SLP_TYPE (rep_stmt_info); |
| } |
| |
| delete node; |
| } |
| |
| /* Return a location suitable for dumpings related to the SLP instance. */ |
| |
| dump_user_location_t |
| _slp_instance::location () const |
| { |
| if (!root_stmts.is_empty ()) |
| return root_stmts[0]->stmt; |
| else |
| return SLP_TREE_SCALAR_STMTS (root)[0]->stmt; |
| } |
| |
| |
| /* Free the memory allocated for the SLP instance. */ |
| |
| void |
| vect_free_slp_instance (slp_instance instance) |
| { |
| vect_free_slp_tree (SLP_INSTANCE_TREE (instance)); |
| SLP_INSTANCE_LOADS (instance).release (); |
| SLP_INSTANCE_ROOT_STMTS (instance).release (); |
| instance->subgraph_entries.release (); |
| instance->cost_vec.release (); |
| free (instance); |
| } |
| |
| |
| /* Create an SLP node for SCALAR_STMTS. */ |
| |
| slp_tree |
| vect_create_new_slp_node (unsigned nops, tree_code code) |
| { |
| slp_tree node = new _slp_tree; |
| SLP_TREE_SCALAR_STMTS (node) = vNULL; |
| SLP_TREE_CHILDREN (node).create (nops); |
| SLP_TREE_DEF_TYPE (node) = vect_internal_def; |
| SLP_TREE_CODE (node) = code; |
| return node; |
| } |
| /* Create an SLP node for SCALAR_STMTS. */ |
| |
| static slp_tree |
| vect_create_new_slp_node (slp_tree node, |
| vec<stmt_vec_info> scalar_stmts, unsigned nops) |
| { |
| SLP_TREE_SCALAR_STMTS (node) = scalar_stmts; |
| SLP_TREE_CHILDREN (node).create (nops); |
| SLP_TREE_DEF_TYPE (node) = vect_internal_def; |
| SLP_TREE_REPRESENTATIVE (node) = scalar_stmts[0]; |
| SLP_TREE_LANES (node) = scalar_stmts.length (); |
| return node; |
| } |
| |
| /* Create an SLP node for SCALAR_STMTS. */ |
| |
| static slp_tree |
| vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts, unsigned nops) |
| { |
| return vect_create_new_slp_node (new _slp_tree, scalar_stmts, nops); |
| } |
| |
| /* Create an SLP node for OPS. */ |
| |
| static slp_tree |
| vect_create_new_slp_node (slp_tree node, vec<tree> ops) |
| { |
| SLP_TREE_SCALAR_OPS (node) = ops; |
| SLP_TREE_DEF_TYPE (node) = vect_external_def; |
| SLP_TREE_LANES (node) = ops.length (); |
| return node; |
| } |
| |
| /* Create an SLP node for OPS. */ |
| |
| static slp_tree |
| vect_create_new_slp_node (vec<tree> ops) |
| { |
| return vect_create_new_slp_node (new _slp_tree, ops); |
| } |
| |
| |
| /* This structure is used in creation of an SLP tree. Each instance |
| corresponds to the same operand in a group of scalar stmts in an SLP |
| node. */ |
| typedef struct _slp_oprnd_info |
| { |
| /* Def-stmts for the operands. */ |
| vec<stmt_vec_info> def_stmts; |
| /* Operands. */ |
| vec<tree> ops; |
| /* Information about the first statement, its vector def-type, type, the |
| operand itself in case it's constant, and an indication if it's a pattern |
| stmt. */ |
| tree first_op_type; |
| enum vect_def_type first_dt; |
| bool any_pattern; |
| } *slp_oprnd_info; |
| |
| |
| /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each |
| operand. */ |
| static vec<slp_oprnd_info> |
| vect_create_oprnd_info (int nops, int group_size) |
| { |
| int i; |
| slp_oprnd_info oprnd_info; |
| vec<slp_oprnd_info> oprnds_info; |
| |
| oprnds_info.create (nops); |
| for (i = 0; i < nops; i++) |
| { |
| oprnd_info = XNEW (struct _slp_oprnd_info); |
| oprnd_info->def_stmts.create (group_size); |
| oprnd_info->ops.create (group_size); |
| oprnd_info->first_dt = vect_uninitialized_def; |
| oprnd_info->first_op_type = NULL_TREE; |
| oprnd_info->any_pattern = false; |
| oprnds_info.quick_push (oprnd_info); |
| } |
| |
| return oprnds_info; |
| } |
| |
| |
| /* Free operands info. */ |
| |
| static void |
| vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info) |
| { |
| int i; |
| slp_oprnd_info oprnd_info; |
| |
| FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info) |
| { |
| oprnd_info->def_stmts.release (); |
| oprnd_info->ops.release (); |
| XDELETE (oprnd_info); |
| } |
| |
| oprnds_info.release (); |
| } |
| |
| |
| /* Return true if STMTS contains a pattern statement. */ |
| |
| static bool |
| vect_contains_pattern_stmt_p (vec<stmt_vec_info> stmts) |
| { |
| stmt_vec_info stmt_info; |
| unsigned int i; |
| FOR_EACH_VEC_ELT (stmts, i, stmt_info) |
| if (is_pattern_stmt_p (stmt_info)) |
| return true; |
| return false; |
| } |
| |
| /* Return true when all lanes in the external or constant NODE have |
| the same value. */ |
| |
| static bool |
| vect_slp_tree_uniform_p (slp_tree node) |
| { |
| gcc_assert (SLP_TREE_DEF_TYPE (node) == vect_constant_def |
| || SLP_TREE_DEF_TYPE (node) == vect_external_def); |
| |
| /* Pre-exsting vectors. */ |
| if (SLP_TREE_SCALAR_OPS (node).is_empty ()) |
| return false; |
| |
| unsigned i; |
| tree op, first = NULL_TREE; |
| FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) |
| if (!first) |
| first = op; |
| else if (!operand_equal_p (first, op, 0)) |
| return false; |
| |
| return true; |
| } |
| |
| /* Find the place of the data-ref in STMT_INFO in the interleaving chain |
| that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part |
| of the chain. */ |
| |
| int |
| vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info, |
| stmt_vec_info first_stmt_info) |
| { |
| stmt_vec_info next_stmt_info = first_stmt_info; |
| int result = 0; |
| |
| if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info)) |
| return -1; |
| |
| do |
| { |
| if (next_stmt_info == stmt_info) |
| return result; |
| next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info); |
| if (next_stmt_info) |
| result += DR_GROUP_GAP (next_stmt_info); |
| } |
| while (next_stmt_info); |
| |
| return -1; |
| } |
| |
| /* Check whether it is possible to load COUNT elements of type ELT_TYPE |
| using the method implemented by duplicate_and_interleave. Return true |
| if so, returning the number of intermediate vectors in *NVECTORS_OUT |
| (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT |
| (if nonnull). */ |
| |
| bool |
| can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count, |
| tree elt_type, unsigned int *nvectors_out, |
| tree *vector_type_out, |
| tree *permutes) |
| { |
| tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count); |
| if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type))) |
| return false; |
| |
| machine_mode base_vector_mode = TYPE_MODE (base_vector_type); |
| poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode); |
| unsigned int nvectors = 1; |
| for (;;) |
| { |
| scalar_int_mode int_mode; |
| poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT; |
| if (int_mode_for_size (elt_bits, 1).exists (&int_mode)) |
| { |
| /* Get the natural vector type for this SLP group size. */ |
| tree int_type = build_nonstandard_integer_type |
| (GET_MODE_BITSIZE (int_mode), 1); |
| tree vector_type |
| = get_vectype_for_scalar_type (vinfo, int_type, count); |
| if (vector_type |
| && VECTOR_MODE_P (TYPE_MODE (vector_type)) |
| && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)), |
| GET_MODE_SIZE (base_vector_mode))) |
| { |
| /* Try fusing consecutive sequences of COUNT / NVECTORS elements |
| together into elements of type INT_TYPE and using the result |
| to build NVECTORS vectors. */ |
| poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type)); |
| vec_perm_builder sel1 (nelts, 2, 3); |
| vec_perm_builder sel2 (nelts, 2, 3); |
| poly_int64 half_nelts = exact_div (nelts, 2); |
| for (unsigned int i = 0; i < 3; ++i) |
| { |
| sel1.quick_push (i); |
| sel1.quick_push (i + nelts); |
| sel2.quick_push (half_nelts + i); |
| sel2.quick_push (half_nelts + i + nelts); |
| } |
| vec_perm_indices indices1 (sel1, 2, nelts); |
| vec_perm_indices indices2 (sel2, 2, nelts); |
| if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1) |
| && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2)) |
| { |
| if (nvectors_out) |
| *nvectors_out = nvectors; |
| if (vector_type_out) |
| *vector_type_out = vector_type; |
| if (permutes) |
| { |
| permutes[0] = vect_gen_perm_mask_checked (vector_type, |
| indices1); |
| permutes[1] = vect_gen_perm_mask_checked (vector_type, |
| indices2); |
| } |
| return true; |
| } |
| } |
| } |
| if (!multiple_p (elt_bytes, 2, &elt_bytes)) |
| return false; |
| nvectors *= 2; |
| } |
| } |
| |
| /* Return true if DTA and DTB match. */ |
| |
| static bool |
| vect_def_types_match (enum vect_def_type dta, enum vect_def_type dtb) |
| { |
| return (dta == dtb |
| || ((dta == vect_external_def || dta == vect_constant_def) |
| && (dtb == vect_external_def || dtb == vect_constant_def))); |
| } |
| |
| /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that |
| they are of a valid type and that they match the defs of the first stmt of |
| the SLP group (stored in OPRNDS_INFO). This function tries to match stmts |
| by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP |
| indicates swap is required for cond_expr stmts. Specifically, *SWAP |
| is 1 if STMT is cond and operands of comparison need to be swapped; |
| *SWAP is 2 if STMT is cond and code of comparison needs to be inverted. |
| If there is any operand swap in this function, *SWAP is set to non-zero |
| value. |
| If there was a fatal error return -1; if the error could be corrected by |
| swapping operands of father node of this one, return 1; if everything is |
| ok return 0. */ |
| static int |
| vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char swap, |
| bool *skip_args, |
| vec<stmt_vec_info> stmts, unsigned stmt_num, |
| vec<slp_oprnd_info> *oprnds_info) |
| { |
| stmt_vec_info stmt_info = stmts[stmt_num]; |
| tree oprnd; |
| unsigned int i, number_of_oprnds; |
| enum vect_def_type dt = vect_uninitialized_def; |
| slp_oprnd_info oprnd_info; |
| int first_op_idx = 1; |
| unsigned int commutative_op = -1U; |
| bool first_op_cond = false; |
| bool first = stmt_num == 0; |
| |
| if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt)) |
| { |
| number_of_oprnds = gimple_call_num_args (stmt); |
| first_op_idx = 3; |
| if (gimple_call_internal_p (stmt)) |
| { |
| internal_fn ifn = gimple_call_internal_fn (stmt); |
| commutative_op = first_commutative_argument (ifn); |
| |
| /* Masked load, only look at mask. */ |
| if (ifn == IFN_MASK_LOAD) |
| { |
| number_of_oprnds = 1; |
| /* Mask operand index. */ |
| first_op_idx = 5; |
| } |
| } |
| } |
| else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt)) |
| { |
| enum tree_code code = gimple_assign_rhs_code (stmt); |
| number_of_oprnds = gimple_num_ops (stmt) - 1; |
| /* Swap can only be done for cond_expr if asked to, otherwise we |
| could result in different comparison code to the first stmt. */ |
| if (code == COND_EXPR |
| && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt))) |
| { |
| first_op_cond = true; |
| number_of_oprnds++; |
| } |
| else |
| commutative_op = commutative_tree_code (code) ? 0U : -1U; |
| } |
| else if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt)) |
| number_of_oprnds = gimple_phi_num_args (stmt); |
| else |
| return -1; |
| |
| bool swapped = (swap != 0); |
| bool backedge = false; |
| gcc_assert (!swapped || first_op_cond); |
| enum vect_def_type *dts = XALLOCAVEC (enum vect_def_type, number_of_oprnds); |
| for (i = 0; i < number_of_oprnds; i++) |
| { |
| if (first_op_cond) |
| { |
| /* Map indicating how operands of cond_expr should be swapped. */ |
| int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}}; |
| int *map = maps[swap]; |
| |
| if (i < 2) |
| oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt, |
| first_op_idx), map[i]); |
| else |
| oprnd = gimple_op (stmt_info->stmt, map[i]); |
| } |
| else if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt)) |
| { |
| oprnd = gimple_phi_arg_def (stmt, i); |
| backedge = dominated_by_p (CDI_DOMINATORS, |
| gimple_phi_arg_edge (stmt, i)->src, |
| gimple_bb (stmt_info->stmt)); |
| } |
| else |
| oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i)); |
| if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR) |
| oprnd = TREE_OPERAND (oprnd, 0); |
| |
| oprnd_info = (*oprnds_info)[i]; |
| |
| stmt_vec_info def_stmt_info; |
| if (!vect_is_simple_use (oprnd, vinfo, &dts[i], &def_stmt_info)) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: can't analyze def for %T\n", |
| oprnd); |
| |
| return -1; |
| } |
| |
| if (skip_args[i]) |
| { |
| oprnd_info->def_stmts.quick_push (NULL); |
| oprnd_info->ops.quick_push (NULL_TREE); |
| oprnd_info->first_dt = vect_uninitialized_def; |
| continue; |
| } |
| |
| oprnd_info->def_stmts.quick_push (def_stmt_info); |
| oprnd_info->ops.quick_push (oprnd); |
| |
| if (def_stmt_info |
| && is_pattern_stmt_p (def_stmt_info)) |
| { |
| if (STMT_VINFO_RELATED_STMT (vect_orig_stmt (def_stmt_info)) |
| != def_stmt_info) |
| oprnd_info->any_pattern = true; |
| else |
| /* If we promote this to external use the original stmt def. */ |
| oprnd_info->ops.last () |
| = gimple_get_lhs (vect_orig_stmt (def_stmt_info)->stmt); |
| } |
| |
| /* If there's a extern def on a backedge make sure we can |
| code-generate at the region start. |
| ??? This is another case that could be fixed by adjusting |
| how we split the function but at the moment we'd have conflicting |
| goals there. */ |
| if (backedge |
| && dts[i] == vect_external_def |
| && is_a <bb_vec_info> (vinfo) |
| && TREE_CODE (oprnd) == SSA_NAME |
| && !SSA_NAME_IS_DEFAULT_DEF (oprnd) |
| && !dominated_by_p (CDI_DOMINATORS, |
| as_a <bb_vec_info> (vinfo)->bbs[0], |
| gimple_bb (SSA_NAME_DEF_STMT (oprnd)))) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: extern def %T only defined " |
| "on backedge\n", oprnd); |
| return -1; |
| } |
| |
| if (first) |
| { |
| tree type = TREE_TYPE (oprnd); |
| dt = dts[i]; |
| if ((dt == vect_constant_def |
| || dt == vect_external_def) |
| && !GET_MODE_SIZE (vinfo->vector_mode).is_constant () |
| && (TREE_CODE (type) == BOOLEAN_TYPE |
| || !can_duplicate_and_interleave_p (vinfo, stmts.length (), |
| type))) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: invalid type of def " |
| "for variable-length SLP %T\n", oprnd); |
| return -1; |
| } |
| |
| /* For the swapping logic below force vect_reduction_def |
| for the reduction op in a SLP reduction group. */ |
| if (!STMT_VINFO_DATA_REF (stmt_info) |
| && REDUC_GROUP_FIRST_ELEMENT (stmt_info) |
| && (int)i == STMT_VINFO_REDUC_IDX (stmt_info) |
| && def_stmt_info) |
| dts[i] = dt = vect_reduction_def; |
| |
| /* Check the types of the definition. */ |
| switch (dt) |
| { |
| case vect_external_def: |
| case vect_constant_def: |
| case vect_internal_def: |
| case vect_reduction_def: |
| case vect_induction_def: |
| case vect_nested_cycle: |
| break; |
| |
| default: |
| /* FORNOW: Not supported. */ |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: illegal type of def %T\n", |
| oprnd); |
| return -1; |
| } |
| |
| oprnd_info->first_dt = dt; |
| oprnd_info->first_op_type = type; |
| } |
| } |
| if (first) |
| return 0; |
| |
| /* Now match the operand definition types to that of the first stmt. */ |
| for (i = 0; i < number_of_oprnds;) |
| { |
| if (skip_args[i]) |
| { |
| ++i; |
| continue; |
| } |
| |
| oprnd_info = (*oprnds_info)[i]; |
| dt = dts[i]; |
| stmt_vec_info def_stmt_info = oprnd_info->def_stmts[stmt_num]; |
| oprnd = oprnd_info->ops[stmt_num]; |
| tree type = TREE_TYPE (oprnd); |
| |
| if (!types_compatible_p (oprnd_info->first_op_type, type)) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: different operand types\n"); |
| return 1; |
| } |
| |
| /* Not first stmt of the group, check that the def-stmt/s match |
| the def-stmt/s of the first stmt. Allow different definition |
| types for reduction chains: the first stmt must be a |
| vect_reduction_def (a phi node), and the rest |
| end in the reduction chain. */ |
| if ((!vect_def_types_match (oprnd_info->first_dt, dt) |
| && !(oprnd_info->first_dt == vect_reduction_def |
| && !STMT_VINFO_DATA_REF (stmt_info) |
| && REDUC_GROUP_FIRST_ELEMENT (stmt_info) |
| && def_stmt_info |
| && !STMT_VINFO_DATA_REF (def_stmt_info) |
| && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info) |
| == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))) |
| || (!STMT_VINFO_DATA_REF (stmt_info) |
| && REDUC_GROUP_FIRST_ELEMENT (stmt_info) |
| && ((!def_stmt_info |
| || STMT_VINFO_DATA_REF (def_stmt_info) |
| || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info) |
| != REDUC_GROUP_FIRST_ELEMENT (stmt_info))) |
| != (oprnd_info->first_dt != vect_reduction_def)))) |
| { |
| /* Try swapping operands if we got a mismatch. For BB |
| vectorization only in case it will clearly improve things. */ |
| if (i == commutative_op && !swapped |
| && (!is_a <bb_vec_info> (vinfo) |
| || (!vect_def_types_match ((*oprnds_info)[i+1]->first_dt, |
| dts[i+1]) |
| && (vect_def_types_match (oprnd_info->first_dt, dts[i+1]) |
| || vect_def_types_match |
| ((*oprnds_info)[i+1]->first_dt, dts[i]))))) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "trying swapped operands\n"); |
| std::swap (dts[i], dts[i+1]); |
| std::swap ((*oprnds_info)[i]->def_stmts[stmt_num], |
| (*oprnds_info)[i+1]->def_stmts[stmt_num]); |
| std::swap ((*oprnds_info)[i]->ops[stmt_num], |
| (*oprnds_info)[i+1]->ops[stmt_num]); |
| swapped = true; |
| continue; |
| } |
| |
| if (is_a <bb_vec_info> (vinfo) |
| && !oprnd_info->any_pattern) |
| { |
| /* Now for commutative ops we should see whether we can |
| make the other operand matching. */ |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "treating operand as external\n"); |
| oprnd_info->first_dt = dt = vect_external_def; |
| } |
| else |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: different types\n"); |
| return 1; |
| } |
| } |
| |
| /* Make sure to demote the overall operand to external. */ |
| if (dt == vect_external_def) |
| oprnd_info->first_dt = vect_external_def; |
| /* For a SLP reduction chain we want to duplicate the reduction to |
| each of the chain members. That gets us a sane SLP graph (still |
| the stmts are not 100% correct wrt the initial values). */ |
| else if ((dt == vect_internal_def |
| || dt == vect_reduction_def) |
| && oprnd_info->first_dt == vect_reduction_def |
| && !STMT_VINFO_DATA_REF (stmt_info) |
| && REDUC_GROUP_FIRST_ELEMENT (stmt_info) |
| && !STMT_VINFO_DATA_REF (def_stmt_info) |
| && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info) |
| == REDUC_GROUP_FIRST_ELEMENT (stmt_info))) |
| { |
| oprnd_info->def_stmts[stmt_num] = oprnd_info->def_stmts[0]; |
| oprnd_info->ops[stmt_num] = oprnd_info->ops[0]; |
| } |
| |
| ++i; |
| } |
| |
| /* Swap operands. */ |
| if (swapped) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "swapped operands to match def types in %G", |
| stmt_info->stmt); |
| } |
| |
| return 0; |
| } |
| |
| /* Return true if call statements CALL1 and CALL2 are similar enough |
| to be combined into the same SLP group. */ |
| |
| static bool |
| compatible_calls_p (gcall *call1, gcall *call2) |
| { |
| unsigned int nargs = gimple_call_num_args (call1); |
| if (nargs != gimple_call_num_args (call2)) |
| return false; |
| |
| if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2)) |
| return false; |
| |
| if (gimple_call_internal_p (call1)) |
| { |
| if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)), |
| TREE_TYPE (gimple_call_lhs (call2)))) |
| return false; |
| for (unsigned int i = 0; i < nargs; ++i) |
| if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)), |
| TREE_TYPE (gimple_call_arg (call2, i)))) |
| return false; |
| } |
| else |
| { |
| if (!operand_equal_p (gimple_call_fn (call1), |
| gimple_call_fn (call2), 0)) |
| return false; |
| |
| if (gimple_call_fntype (call1) != gimple_call_fntype (call2)) |
| return false; |
| } |
| return true; |
| } |
| |
| /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the |
| caller's attempt to find the vector type in STMT_INFO with the narrowest |
| element type. Return true if VECTYPE is nonnull and if it is valid |
| for STMT_INFO. When returning true, update MAX_NUNITS to reflect the |
| number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for |
| vect_build_slp_tree. */ |
| |
| static bool |
| vect_record_max_nunits (vec_info *vinfo, stmt_vec_info stmt_info, |
| unsigned int group_size, |
| tree vectype, poly_uint64 *max_nunits) |
| { |
| if (!vectype) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: unsupported data-type in %G\n", |
| stmt_info->stmt); |
| /* Fatal mismatch. */ |
| return false; |
| } |
| |
| /* If populating the vector type requires unrolling then fail |
| before adjusting *max_nunits for basic-block vectorization. */ |
| if (is_a <bb_vec_info> (vinfo) |
| && !multiple_p (group_size, TYPE_VECTOR_SUBPARTS (vectype))) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: unrolling required " |
| "in basic block SLP\n"); |
| /* Fatal mismatch. */ |
| return false; |
| } |
| |
| /* In case of multiple types we need to detect the smallest type. */ |
| vect_update_max_nunits (max_nunits, vectype); |
| return true; |
| } |
| |
| /* Verify if the scalar stmts STMTS are isomorphic, require data |
| permutation or are of unsupported types of operation. Return |
| true if they are, otherwise return false and indicate in *MATCHES |
| which stmts are not isomorphic to the first one. If MATCHES[0] |
| is false then this indicates the comparison could not be |
| carried out or the stmts will never be vectorized by SLP. |
| |
| Note COND_EXPR is possibly isomorphic to another one after swapping its |
| operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to |
| the first stmt by swapping the two operands of comparison; set SWAP[i] |
| to 2 if stmt I is isormorphic to the first stmt by inverting the code |
| of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped |
| to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */ |
| |
| static bool |
| vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, |
| vec<stmt_vec_info> stmts, unsigned int group_size, |
| poly_uint64 *max_nunits, bool *matches, |
| bool *two_operators, tree *node_vectype) |
| { |
| unsigned int i; |
| stmt_vec_info first_stmt_info = stmts[0]; |
| enum tree_code first_stmt_code = ERROR_MARK; |
| enum tree_code alt_stmt_code = ERROR_MARK; |
| enum tree_code rhs_code = ERROR_MARK; |
| enum tree_code first_cond_code = ERROR_MARK; |
| tree lhs; |
| bool need_same_oprnds = false; |
| tree vectype = NULL_TREE, first_op1 = NULL_TREE; |
| optab optab; |
| int icode; |
| machine_mode optab_op2_mode; |
| machine_mode vec_mode; |
| stmt_vec_info first_load = NULL, prev_first_load = NULL; |
| bool first_stmt_load_p = false, load_p = false; |
| bool first_stmt_phi_p = false, phi_p = false; |
| bool maybe_soft_fail = false; |
| tree soft_fail_nunits_vectype = NULL_TREE; |
| |
| /* For every stmt in NODE find its def stmt/s. */ |
| stmt_vec_info stmt_info; |
| FOR_EACH_VEC_ELT (stmts, i, stmt_info) |
| { |
| gimple *stmt = stmt_info->stmt; |
| swap[i] = 0; |
| matches[i] = false; |
| |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt); |
| |
| /* Fail to vectorize statements marked as unvectorizable, throw |
| or are volatile. */ |
| if (!STMT_VINFO_VECTORIZABLE (stmt_info) |
| || stmt_can_throw_internal (cfun, stmt) |
| || gimple_has_volatile_ops (stmt)) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: unvectorizable statement %G", |
| stmt); |
| /* ??? For BB vectorization we want to commutate operands in a way |
| to shuffle all unvectorizable defs into one operand and have |
| the other still vectorized. The following doesn't reliably |
| work for this though but it's the easiest we can do here. */ |
| if (is_a <bb_vec_info> (vinfo) && i != 0) |
| continue; |
| /* Fatal mismatch. */ |
| matches[0] = false; |
| return false; |
| } |
| |
| lhs = gimple_get_lhs (stmt); |
| if (lhs == NULL_TREE) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: not GIMPLE_ASSIGN nor " |
| "GIMPLE_CALL %G", stmt); |
| if (is_a <bb_vec_info> (vinfo) && i != 0) |
| continue; |
| /* Fatal mismatch. */ |
| matches[0] = false; |
| return false; |
| } |
| |
| tree nunits_vectype; |
| if (!vect_get_vector_types_for_stmt (vinfo, stmt_info, &vectype, |
| &nunits_vectype, group_size)) |
| { |
| if (is_a <bb_vec_info> (vinfo) && i != 0) |
| continue; |
| /* Fatal mismatch. */ |
| matches[0] = false; |
| return false; |
| } |
| /* Record nunits required but continue analysis, producing matches[] |
| as if nunits was not an issue. This allows splitting of groups |
| to happen. */ |
| if (nunits_vectype |
| && !vect_record_max_nunits (vinfo, stmt_info, group_size, |
| nunits_vectype, max_nunits)) |
| { |
| gcc_assert (is_a <bb_vec_info> (vinfo)); |
| maybe_soft_fail = true; |
| soft_fail_nunits_vectype = nunits_vectype; |
| } |
| |
| gcc_assert (vectype); |
| |
| gcall *call_stmt = dyn_cast <gcall *> (stmt); |
| if (call_stmt) |
| { |
| rhs_code = CALL_EXPR; |
| |
| if (gimple_call_internal_p (stmt, IFN_MASK_LOAD)) |
| load_p = true; |
| else if ((gimple_call_internal_p (call_stmt) |
| && (!vectorizable_internal_fn_p |
| (gimple_call_internal_fn (call_stmt)))) |
| || gimple_call_tail_p (call_stmt) |
| || gimple_call_noreturn_p (call_stmt) |
| || !gimple_call_nothrow_p (call_stmt) |
| || gimple_call_chain (call_stmt)) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: unsupported call type %G", |
| call_stmt); |
| if (is_a <bb_vec_info> (vinfo) && i != 0) |
| continue; |
| /* Fatal mismatch. */ |
| matches[0] = false; |
| return false; |
| } |
| } |
| else if (gimple_code (stmt) == GIMPLE_PHI) |
| { |
| rhs_code = ERROR_MARK; |
| phi_p = true; |
| } |
| else |
| { |
| rhs_code = gimple_assign_rhs_code (stmt); |
| load_p = gimple_vuse (stmt); |
| } |
| |
| /* Check the operation. */ |
| if (i == 0) |
| { |
| *node_vectype = vectype; |
| first_stmt_code = rhs_code; |
| first_stmt_load_p = load_p; |
| first_stmt_phi_p = phi_p; |
| |
| /* Shift arguments should be equal in all the packed stmts for a |
| vector shift with scalar shift operand. */ |
| if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR |
| || rhs_code == LROTATE_EXPR |
| || rhs_code == RROTATE_EXPR) |
| { |
| vec_mode = TYPE_MODE (vectype); |
| |
| /* First see if we have a vector/vector shift. */ |
| optab = optab_for_tree_code (rhs_code, vectype, |
| optab_vector); |
| |
| if (!optab |
| || optab_handler (optab, vec_mode) == CODE_FOR_nothing) |
| { |
| /* No vector/vector shift, try for a vector/scalar shift. */ |
| optab = optab_for_tree_code (rhs_code, vectype, |
| optab_scalar); |
| |
| if (!optab) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: no optab.\n"); |
| if (is_a <bb_vec_info> (vinfo) && i != 0) |
| continue; |
| /* Fatal mismatch. */ |
| matches[0] = false; |
| return false; |
| } |
| icode = (int) optab_handler (optab, vec_mode); |
| if (icode == CODE_FOR_nothing) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: " |
| "op not supported by target.\n"); |
| if (is_a <bb_vec_info> (vinfo) && i != 0) |
| continue; |
| /* Fatal mismatch. */ |
| matches[0] = false; |
| return false; |
| } |
| optab_op2_mode = insn_data[icode].operand[2].mode; |
| if (!VECTOR_MODE_P (optab_op2_mode)) |
| { |
| need_same_oprnds = true; |
| first_op1 = gimple_assign_rhs2 (stmt); |
| } |
| } |
| } |
| else if (rhs_code == WIDEN_LSHIFT_EXPR) |
| { |
| need_same_oprnds = true; |
| first_op1 = gimple_assign_rhs2 (stmt); |
| } |
| else if (!load_p |
| && rhs_code == BIT_FIELD_REF) |
| { |
| tree vec = TREE_OPERAND (gimple_assign_rhs1 (stmt), 0); |
| if (!is_a <bb_vec_info> (vinfo) |
| || TREE_CODE (vec) != SSA_NAME |
| || !operand_equal_p (TYPE_SIZE (vectype), |
| TYPE_SIZE (TREE_TYPE (vec)))) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: " |
| "BIT_FIELD_REF not supported\n"); |
| /* Fatal mismatch. */ |
| matches[0] = false; |
| return false; |
| } |
| } |
| else if (call_stmt |
| && gimple_call_internal_p (call_stmt, IFN_DIV_POW2)) |
| { |
| need_same_oprnds = true; |
| first_op1 = gimple_call_arg (call_stmt, 1); |
| } |
| } |
| else |
| { |
| if (first_stmt_code != rhs_code |
| && alt_stmt_code == ERROR_MARK) |
| alt_stmt_code = rhs_code; |
| if ((first_stmt_code != rhs_code |
| && (first_stmt_code != IMAGPART_EXPR |
| || rhs_code != REALPART_EXPR) |
| && (first_stmt_code != REALPART_EXPR |
| || rhs_code != IMAGPART_EXPR) |
| /* Handle mismatches in plus/minus by computing both |
| and merging the results. */ |
| && !((first_stmt_code == PLUS_EXPR |
| || first_stmt_code == MINUS_EXPR) |
| && (alt_stmt_code == PLUS_EXPR |
| || alt_stmt_code == MINUS_EXPR) |
| && rhs_code == alt_stmt_code) |
| && !(STMT_VINFO_GROUPED_ACCESS (stmt_info) |
| && (first_stmt_code == ARRAY_REF |
| || first_stmt_code == BIT_FIELD_REF |
| || first_stmt_code == INDIRECT_REF |
| || first_stmt_code == COMPONENT_REF |
| || first_stmt_code == MEM_REF))) |
| || first_stmt_load_p != load_p |
| || first_stmt_phi_p != phi_p) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: different operation " |
| "in stmt %G", stmt); |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "original stmt %G", first_stmt_info->stmt); |
| } |
| /* Mismatch. */ |
| continue; |
| } |
| |
| if (!load_p |
| && first_stmt_code == BIT_FIELD_REF |
| && (TREE_OPERAND (gimple_assign_rhs1 (first_stmt_info->stmt), 0) |
| != TREE_OPERAND (gimple_assign_rhs1 (stmt_info->stmt), 0))) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: different BIT_FIELD_REF " |
| "arguments in %G", stmt); |
| /* Mismatch. */ |
| continue; |
| } |
| |
| if (!load_p && rhs_code == CALL_EXPR) |
| { |
| if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt), |
| as_a <gcall *> (stmt))) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: different calls in %G", |
| stmt); |
| /* Mismatch. */ |
| continue; |
| } |
| } |
| |
| if ((phi_p || gimple_could_trap_p (stmt_info->stmt)) |
| && (gimple_bb (first_stmt_info->stmt) |
| != gimple_bb (stmt_info->stmt))) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: different BB for PHI " |
| "or possibly trapping operation in %G", stmt); |
| /* Mismatch. */ |
| continue; |
| } |
| |
| if (need_same_oprnds) |
| { |
| tree other_op1 = (call_stmt |
| ? gimple_call_arg (call_stmt, 1) |
| : gimple_assign_rhs2 (stmt)); |
| if (!operand_equal_p (first_op1, other_op1, 0)) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: different shift " |
| "arguments in %G", stmt); |
| /* Mismatch. */ |
| continue; |
| } |
| } |
| |
| if (!types_compatible_p (vectype, *node_vectype)) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: different vector type " |
| "in %G", stmt); |
| /* Mismatch. */ |
| continue; |
| } |
| } |
| |
| /* Grouped store or load. */ |
| if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) |
| { |
| if (REFERENCE_CLASS_P (lhs)) |
| { |
| /* Store. */ |
| ; |
| } |
| else |
| { |
| /* Load. */ |
| first_load = DR_GROUP_FIRST_ELEMENT (stmt_info); |
| if (prev_first_load) |
| { |
| /* Check that there are no loads from different interleaving |
| chains in the same node. */ |
| if (prev_first_load != first_load) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, |
| vect_location, |
| "Build SLP failed: different " |
| "interleaving chains in one node %G", |
| stmt); |
| /* Mismatch. */ |
| continue; |
| } |
| } |
| else |
| prev_first_load = first_load; |
| } |
| } /* Grouped access. */ |
| else |
| { |
| if (load_p) |
| { |
| /* Not grouped load. */ |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: not grouped load %G", stmt); |
| |
| /* FORNOW: Not grouped loads are not supported. */ |
| if (is_a <bb_vec_info> (vinfo) && i != 0) |
| continue; |
| /* Fatal mismatch. */ |
| matches[0] = false; |
| return false; |
| } |
| |
| /* Not memory operation. */ |
| if (!phi_p |
| && TREE_CODE_CLASS (rhs_code) != tcc_binary |
| && TREE_CODE_CLASS (rhs_code) != tcc_unary |
| && TREE_CODE_CLASS (rhs_code) != tcc_expression |
| && TREE_CODE_CLASS (rhs_code) != tcc_comparison |
| && rhs_code != VIEW_CONVERT_EXPR |
| && rhs_code != CALL_EXPR |
| && rhs_code != BIT_FIELD_REF) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: operation unsupported %G", |
| stmt); |
| if (is_a <bb_vec_info> (vinfo) && i != 0) |
| continue; |
| /* Fatal mismatch. */ |
| matches[0] = false; |
| return false; |
| } |
| |
| if (rhs_code == COND_EXPR) |
| { |
| tree cond_expr = gimple_assign_rhs1 (stmt); |
| enum tree_code cond_code = TREE_CODE (cond_expr); |
| enum tree_code swap_code = ERROR_MARK; |
| enum tree_code invert_code = ERROR_MARK; |
| |
| if (i == 0) |
| first_cond_code = TREE_CODE (cond_expr); |
| else if (TREE_CODE_CLASS (cond_code) == tcc_comparison) |
| { |
| bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0)); |
| swap_code = swap_tree_comparison (cond_code); |
| invert_code = invert_tree_comparison (cond_code, honor_nans); |
| } |
| |
| if (first_cond_code == cond_code) |
| ; |
| /* Isomorphic can be achieved by swapping. */ |
| else if (first_cond_code == swap_code) |
| swap[i] = 1; |
| /* Isomorphic can be achieved by inverting. */ |
| else if (first_cond_code == invert_code) |
| swap[i] = 2; |
| else |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: different" |
| " operation %G", stmt); |
| /* Mismatch. */ |
| continue; |
| } |
| } |
| } |
| |
| matches[i] = true; |
| } |
| |
| for (i = 0; i < group_size; ++i) |
| if (!matches[i]) |
| return false; |
| |
| /* If we allowed a two-operation SLP node verify the target can cope |
| with the permute we are going to use. */ |
| if (alt_stmt_code != ERROR_MARK |
| && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference) |
| { |
| *two_operators = true; |
| } |
| |
| if (maybe_soft_fail) |
| { |
| unsigned HOST_WIDE_INT const_nunits; |
| if (!TYPE_VECTOR_SUBPARTS |
| (soft_fail_nunits_vectype).is_constant (&const_nunits) |
| || const_nunits > group_size) |
| matches[0] = false; |
| else |
| { |
| /* With constant vector elements simulate a mismatch at the |
| point we need to split. */ |
| unsigned tail = group_size & (const_nunits - 1); |
| memset (&matches[group_size - tail], 0, sizeof (bool) * tail); |
| } |
| return false; |
| } |
| |
| return true; |
| } |
| |
| /* Traits for the hash_set to record failed SLP builds for a stmt set. |
| Note we never remove apart from at destruction time so we do not |
| need a special value for deleted that differs from empty. */ |
| struct bst_traits |
| { |
| typedef vec <stmt_vec_info> value_type; |
| typedef vec <stmt_vec_info> compare_type; |
| static inline hashval_t hash (value_type); |
| static inline bool equal (value_type existing, value_type candidate); |
| static inline bool is_empty (value_type x) { return !x.exists (); } |
| static inline bool is_deleted (value_type x) { return !x.exists (); } |
| static const bool empty_zero_p = true; |
| static inline void mark_empty (value_type &x) { x.release (); } |
| static inline void mark_deleted (value_type &x) { x.release (); } |
| static inline void remove (value_type &x) { x.release (); } |
| }; |
| inline hashval_t |
| bst_traits::hash (value_type x) |
| { |
| inchash::hash h; |
| for (unsigned i = 0; i < x.length (); ++i) |
| h.add_int (gimple_uid (x[i]->stmt)); |
| return h.end (); |
| } |
| inline bool |
| bst_traits::equal (value_type existing, value_type candidate) |
| { |
| if (existing.length () != candidate.length ()) |
| return false; |
| for (unsigned i = 0; i < existing.length (); ++i) |
| if (existing[i] != candidate[i]) |
| return false; |
| return true; |
| } |
| |
| /* ??? This was std::pair<std::pair<tree_code, vect_def_type>, tree> |
| but then vec::insert does memmove and that's not compatible with |
| std::pair. */ |
| struct chain_op_t |
| { |
| chain_op_t (tree_code code_, vect_def_type dt_, tree op_) |
| : code (code_), dt (dt_), op (op_) {} |
| tree_code code; |
| vect_def_type dt; |
| tree op; |
| }; |
| |
| /* Comparator for sorting associatable chains. */ |
| |
| static int |
| dt_sort_cmp (const void *op1_, const void *op2_, void *) |
| { |
| auto *op1 = (const chain_op_t *) op1_; |
| auto *op2 = (const chain_op_t *) op2_; |
| if (op1->dt != op2->dt) |
| return (int)op1->dt - (int)op2->dt; |
| return (int)op1->code - (int)op2->code; |
| } |
| |
| /* Linearize the associatable expression chain at START with the |
| associatable operation CODE (where PLUS_EXPR also allows MINUS_EXPR), |
| filling CHAIN with the result and using WORKLIST as intermediate storage. |
| CODE_STMT and ALT_CODE_STMT are filled with the first stmt using CODE |
| or MINUS_EXPR. *CHAIN_STMTS if not NULL is filled with all computation |
| stmts, starting with START. */ |
| |
| static void |
| vect_slp_linearize_chain (vec_info *vinfo, |
| vec<std::pair<tree_code, gimple *> > &worklist, |
| vec<chain_op_t> &chain, |
| enum tree_code code, gimple *start, |
| gimple *&code_stmt, gimple *&alt_code_stmt, |
| vec<gimple *> *chain_stmts) |
| { |
| /* For each lane linearize the addition/subtraction (or other |
| uniform associatable operation) expression tree. */ |
| worklist.safe_push (std::make_pair (code, start)); |
| while (!worklist.is_empty ()) |
| { |
| auto entry = worklist.pop (); |
| gassign *stmt = as_a <gassign *> (entry.second); |
| enum tree_code in_code = entry.first; |
| enum tree_code this_code = gimple_assign_rhs_code (stmt); |
| /* Pick some stmts suitable for SLP_TREE_REPRESENTATIVE. */ |
| if (!code_stmt |
| && gimple_assign_rhs_code (stmt) == code) |
| code_stmt = stmt; |
| else if (!alt_code_stmt |
| && gimple_assign_rhs_code (stmt) == MINUS_EXPR) |
| alt_code_stmt = stmt; |
| if (chain_stmts) |
| chain_stmts->safe_push (stmt); |
| for (unsigned opnum = 1; opnum <= 2; ++opnum) |
| { |
| tree op = gimple_op (stmt, opnum); |
| vect_def_type dt; |
| stmt_vec_info def_stmt_info; |
| bool res = vect_is_simple_use (op, vinfo, &dt, &def_stmt_info); |
| gcc_assert (res); |
| if (dt == vect_internal_def |
| && is_pattern_stmt_p (def_stmt_info)) |
| op = gimple_get_lhs (def_stmt_info->stmt); |
| gimple *use_stmt; |
| use_operand_p use_p; |
| if (dt == vect_internal_def |
| && single_imm_use (op, &use_p, &use_stmt) |
| && is_gimple_assign (def_stmt_info->stmt) |
| && (gimple_assign_rhs_code (def_stmt_info->stmt) == code |
| || (code == PLUS_EXPR |
| && (gimple_assign_rhs_code (def_stmt_info->stmt) |
| == MINUS_EXPR)))) |
| { |
| tree_code op_def_code = this_code; |
| if (op_def_code == MINUS_EXPR && opnum == 1) |
| op_def_code = PLUS_EXPR; |
| if (in_code == MINUS_EXPR) |
| op_def_code = op_def_code == PLUS_EXPR ? MINUS_EXPR : PLUS_EXPR; |
| worklist.safe_push (std::make_pair (op_def_code, |
| def_stmt_info->stmt)); |
| } |
| else |
| { |
| tree_code op_def_code = this_code; |
| if (op_def_code == MINUS_EXPR && opnum == 1) |
| op_def_code = PLUS_EXPR; |
| if (in_code == MINUS_EXPR) |
| op_def_code = op_def_code == PLUS_EXPR ? MINUS_EXPR : PLUS_EXPR; |
| chain.safe_push (chain_op_t (op_def_code, dt, op)); |
| } |
| } |
| } |
| } |
| |
| typedef hash_map <vec <stmt_vec_info>, slp_tree, |
| simple_hashmap_traits <bst_traits, slp_tree> > |
| scalar_stmts_to_slp_tree_map_t; |
| |
| static slp_tree |
| vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, |
| vec<stmt_vec_info> stmts, unsigned int group_size, |
| poly_uint64 *max_nunits, |
| bool *matches, unsigned *limit, unsigned *tree_size, |
| scalar_stmts_to_slp_tree_map_t *bst_map); |
| |
| static slp_tree |
| vect_build_slp_tree (vec_info *vinfo, |
| vec<stmt_vec_info> stmts, unsigned int group_size, |
| poly_uint64 *max_nunits, |
| bool *matches, unsigned *limit, unsigned *tree_size, |
| scalar_stmts_to_slp_tree_map_t *bst_map) |
| { |
| if (slp_tree *leader = bst_map->get (stmts)) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n", |
| !(*leader)->failed ? "" : "failed ", *leader); |
| if (!(*leader)->failed) |
| { |
| SLP_TREE_REF_COUNT (*leader)++; |
| vect_update_max_nunits (max_nunits, (*leader)->max_nunits); |
| stmts.release (); |
| return *leader; |
| } |
| memcpy (matches, (*leader)->failed, sizeof (bool) * group_size); |
| return NULL; |
| } |
| |
| /* Seed the bst_map with a stub node to be filled by vect_build_slp_tree_2 |
| so we can pick up backedge destinations during discovery. */ |
| slp_tree res = new _slp_tree; |
| SLP_TREE_DEF_TYPE (res) = vect_internal_def; |
| SLP_TREE_SCALAR_STMTS (res) = stmts; |
| bst_map->put (stmts.copy (), res); |
| |
| if (*limit == 0) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "SLP discovery limit exceeded\n"); |
| /* Mark the node invalid so we can detect those when still in use |
| as backedge destinations. */ |
| SLP_TREE_SCALAR_STMTS (res) = vNULL; |
| SLP_TREE_DEF_TYPE (res) = vect_uninitialized_def; |
| res->failed = XNEWVEC (bool, group_size); |
| memset (res->failed, 0, sizeof (bool) * group_size); |
| memset (matches, 0, sizeof (bool) * group_size); |
| return NULL; |
| } |
| --*limit; |
| |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "starting SLP discovery for node %p\n", res); |
| |
| poly_uint64 this_max_nunits = 1; |
| slp_tree res_ = vect_build_slp_tree_2 (vinfo, res, stmts, group_size, |
| &this_max_nunits, |
| matches, limit, tree_size, bst_map); |
| if (!res_) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "SLP discovery for node %p failed\n", res); |
| /* Mark the node invalid so we can detect those when still in use |
| as backedge destinations. */ |
| SLP_TREE_SCALAR_STMTS (res) = vNULL; |
| SLP_TREE_DEF_TYPE (res) = vect_uninitialized_def; |
| res->failed = XNEWVEC (bool, group_size); |
| if (flag_checking) |
| { |
| unsigned i; |
| for (i = 0; i < group_size; ++i) |
| if (!matches[i]) |
| break; |
| gcc_assert (i < group_size); |
| } |
| memcpy (res->failed, matches, sizeof (bool) * group_size); |
| } |
| else |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "SLP discovery for node %p succeeded\n", res); |
| gcc_assert (res_ == res); |
| res->max_nunits = this_max_nunits; |
| vect_update_max_nunits (max_nunits, this_max_nunits); |
| /* Keep a reference for the bst_map use. */ |
| SLP_TREE_REF_COUNT (res)++; |
| } |
| return res_; |
| } |
| |
| /* Helper for building an associated SLP node chain. */ |
| |
| static void |
| vect_slp_build_two_operator_nodes (slp_tree perm, tree vectype, |
| slp_tree op0, slp_tree op1, |
| stmt_vec_info oper1, stmt_vec_info oper2, |
| vec<std::pair<unsigned, unsigned> > lperm) |
| { |
| unsigned group_size = SLP_TREE_LANES (op1); |
| |
| slp_tree child1 = new _slp_tree; |
| SLP_TREE_DEF_TYPE (child1) = vect_internal_def; |
| SLP_TREE_VECTYPE (child1) = vectype; |
| SLP_TREE_LANES (child1) = group_size; |
| SLP_TREE_CHILDREN (child1).create (2); |
| SLP_TREE_CHILDREN (child1).quick_push (op0); |
| SLP_TREE_CHILDREN (child1).quick_push (op1); |
| SLP_TREE_REPRESENTATIVE (child1) = oper1; |
| |
| slp_tree child2 = new _slp_tree; |
| SLP_TREE_DEF_TYPE (child2) = vect_internal_def; |
| SLP_TREE_VECTYPE (child2) = vectype; |
| SLP_TREE_LANES (child2) = group_size; |
| SLP_TREE_CHILDREN (child2).create (2); |
| SLP_TREE_CHILDREN (child2).quick_push (op0); |
| SLP_TREE_REF_COUNT (op0)++; |
| SLP_TREE_CHILDREN (child2).quick_push (op1); |
| SLP_TREE_REF_COUNT (op1)++; |
| SLP_TREE_REPRESENTATIVE (child2) = oper2; |
| |
| SLP_TREE_DEF_TYPE (perm) = vect_internal_def; |
| SLP_TREE_CODE (perm) = VEC_PERM_EXPR; |
| SLP_TREE_VECTYPE (perm) = vectype; |
| SLP_TREE_LANES (perm) = group_size; |
| /* ??? We should set this NULL but that's not expected. */ |
| SLP_TREE_REPRESENTATIVE (perm) = oper1; |
| SLP_TREE_LANE_PERMUTATION (perm) = lperm; |
| SLP_TREE_CHILDREN (perm).quick_push (child1); |
| SLP_TREE_CHILDREN (perm).quick_push (child2); |
| } |
| |
| /* Recursively build an SLP tree starting from NODE. |
| Fail (and return a value not equal to zero) if def-stmts are not |
| isomorphic, require data permutation or are of unsupported types of |
| operation. Otherwise, return 0. |
| The value returned is the depth in the SLP tree where a mismatch |
| was found. */ |
| |
| static slp_tree |
| vect_build_slp_tree_2 (vec_info *vinfo, slp_tree node, |
| vec<stmt_vec_info> stmts, unsigned int group_size, |
| poly_uint64 *max_nunits, |
| bool *matches, unsigned *limit, unsigned *tree_size, |
| scalar_stmts_to_slp_tree_map_t *bst_map) |
| { |
| unsigned nops, i, this_tree_size = 0; |
| poly_uint64 this_max_nunits = *max_nunits; |
| |
| matches[0] = false; |
| |
| stmt_vec_info stmt_info = stmts[0]; |
| if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt)) |
| nops = gimple_call_num_args (stmt); |
| else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt)) |
| { |
| nops = gimple_num_ops (stmt) - 1; |
| if (gimple_assign_rhs_code (stmt) == COND_EXPR) |
| nops++; |
| } |
| else if (gphi *phi = dyn_cast <gphi *> (stmt_info->stmt)) |
| nops = gimple_phi_num_args (phi); |
| else |
| return NULL; |
| |
| /* If the SLP node is a PHI (induction or reduction), terminate |
| the recursion. */ |
| bool *skip_args = XALLOCAVEC (bool, nops); |
| memset (skip_args, 0, sizeof (bool) * nops); |
| if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) |
| if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt)) |
| { |
| tree scalar_type = TREE_TYPE (PHI_RESULT (stmt)); |
| tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, |
| group_size); |
| if (!vect_record_max_nunits (vinfo, stmt_info, group_size, vectype, |
| max_nunits)) |
| return NULL; |
| |
| vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info); |
| if (def_type == vect_induction_def) |
| { |
| /* Induction PHIs are not cycles but walk the initial |
| value. Only for inner loops through, for outer loops |
| we need to pick up the value from the actual PHIs |
| to more easily support peeling and epilogue vectorization. */ |
| class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
| if (!nested_in_vect_loop_p (loop, stmt_info)) |
| skip_args[loop_preheader_edge (loop)->dest_idx] = true; |
| else |
| loop = loop->inner; |
| skip_args[loop_latch_edge (loop)->dest_idx] = true; |
| } |
| else if (def_type == vect_reduction_def |
| || def_type == vect_double_reduction_def |
| || def_type == vect_nested_cycle) |
| { |
| /* Else def types have to match. */ |
| stmt_vec_info other_info; |
| bool all_same = true; |
| FOR_EACH_VEC_ELT (stmts, i, other_info) |
| { |
| if (STMT_VINFO_DEF_TYPE (other_info) != def_type) |
| return NULL; |
| if (other_info != stmt_info) |
| all_same = false; |
| } |
| class loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
| /* Reduction initial values are not explicitely represented. */ |
| if (!nested_in_vect_loop_p (loop, stmt_info)) |
| skip_args[loop_preheader_edge (loop)->dest_idx] = true; |
| /* Reduction chain backedge defs are filled manually. |
| ??? Need a better way to identify a SLP reduction chain PHI. |
| Or a better overall way to SLP match those. */ |
| if (all_same && def_type == vect_reduction_def) |
| skip_args[loop_latch_edge (loop)->dest_idx] = true; |
| } |
| else if (def_type != vect_internal_def) |
| return NULL; |
| } |
| |
| |
| bool two_operators = false; |
| unsigned char *swap = XALLOCAVEC (unsigned char, group_size); |
| tree vectype = NULL_TREE; |
| if (!vect_build_slp_tree_1 (vinfo, swap, stmts, group_size, |
| &this_max_nunits, matches, &two_operators, |
| &vectype)) |
| return NULL; |
| |
| /* If the SLP node is a load, terminate the recursion unless masked. */ |
| if (STMT_VINFO_GROUPED_ACCESS (stmt_info) |
| && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))) |
| { |
| if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt)) |
| { |
| /* Masked load. */ |
| gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD)); |
| nops = 1; |
| } |
| else |
| { |
| *max_nunits = this_max_nunits; |
| (*tree_size)++; |
| node = vect_create_new_slp_node (node, stmts, 0); |
| SLP_TREE_VECTYPE (node) = vectype; |
| /* And compute the load permutation. Whether it is actually |
| a permutation depends on the unrolling factor which is |
| decided later. */ |
| vec<unsigned> load_permutation; |
| int j; |
| stmt_vec_info load_info; |
| load_permutation.create (group_size); |
| stmt_vec_info first_stmt_info |
| = DR_GROUP_FIRST_ELEMENT (SLP_TREE_SCALAR_STMTS (node)[0]); |
| FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info) |
| { |
| int load_place = vect_get_place_in_interleaving_chain |
| (load_info, first_stmt_info); |
| gcc_assert (load_place != -1); |
| load_permutation.safe_push (load_place); |
| } |
| SLP_TREE_LOAD_PERMUTATION (node) = load_permutation; |
| return node; |
| } |
| } |
| else if (gimple_assign_single_p (stmt_info->stmt) |
| && !gimple_vuse (stmt_info->stmt) |
| && gimple_assign_rhs_code (stmt_info->stmt) == BIT_FIELD_REF) |
| { |
| /* vect_build_slp_tree_2 determined all BIT_FIELD_REFs reference |
| the same SSA name vector of a compatible type to vectype. */ |
| vec<std::pair<unsigned, unsigned> > lperm = vNULL; |
| tree vec = TREE_OPERAND (gimple_assign_rhs1 (stmt_info->stmt), 0); |
| stmt_vec_info estmt_info; |
| FOR_EACH_VEC_ELT (stmts, i, estmt_info) |
| { |
| gassign *estmt = as_a <gassign *> (estmt_info->stmt); |
| tree bfref = gimple_assign_rhs1 (estmt); |
| HOST_WIDE_INT lane; |
| if (!known_eq (bit_field_size (bfref), |
| tree_to_poly_uint64 (TYPE_SIZE (TREE_TYPE (vectype)))) |
| || !constant_multiple_p (bit_field_offset (bfref), |
| bit_field_size (bfref), &lane)) |
| { |
| lperm.release (); |
| matches[0] = false; |
| return NULL; |
| } |
| lperm.safe_push (std::make_pair (0, (unsigned)lane)); |
| } |
| slp_tree vnode = vect_create_new_slp_node (vNULL); |
| /* ??? We record vectype here but we hide eventually necessary |
| punning and instead rely on code generation to materialize |
| VIEW_CONVERT_EXPRs as necessary. We instead should make |
| this explicit somehow. */ |
| SLP_TREE_VECTYPE (vnode) = vectype; |
| SLP_TREE_VEC_DEFS (vnode).safe_push (vec); |
| /* We are always building a permutation node even if it is an identity |
| permute to shield the rest of the vectorizer from the odd node |
| representing an actual vector without any scalar ops. |
| ??? We could hide it completely with making the permute node |
| external? */ |
| node = vect_create_new_slp_node (node, stmts, 1); |
| SLP_TREE_CODE (node) = VEC_PERM_EXPR; |
| SLP_TREE_LANE_PERMUTATION (node) = lperm; |
| SLP_TREE_VECTYPE (node) = vectype; |
| SLP_TREE_CHILDREN (node).quick_push (vnode); |
| return node; |
| } |
| /* When discovery reaches an associatable operation see whether we can |
| improve that to match up lanes in a way superior to the operand |
| swapping code which at most looks at two defs. |
| ??? For BB vectorization we cannot do the brute-force search |
| for matching as we can succeed by means of builds from scalars |
| and have no good way to "cost" one build against another. */ |
| else if (is_a <loop_vec_info> (vinfo) |
| /* ??? We don't handle !vect_internal_def defs below. */ |
| && STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def |
| && is_gimple_assign (stmt_info->stmt) |
| && (associative_tree_code (gimple_assign_rhs_code (stmt_info->stmt)) |
| || gimple_assign_rhs_code (stmt_info->stmt) == MINUS_EXPR) |
| && ((FLOAT_TYPE_P (vectype) && flag_associative_math) |
| || (INTEGRAL_TYPE_P (TREE_TYPE (vectype)) |
| && TYPE_OVERFLOW_WRAPS (TREE_TYPE (vectype))))) |
| { |
| /* See if we have a chain of (mixed) adds or subtracts or other |
| associatable ops. */ |
| enum tree_code code = gimple_assign_rhs_code (stmt_info->stmt); |
| if (code == MINUS_EXPR) |
| code = PLUS_EXPR; |
| stmt_vec_info other_op_stmt_info = NULL; |
| stmt_vec_info op_stmt_info = NULL; |
| unsigned chain_len = 0; |
| auto_vec<chain_op_t> chain; |
| auto_vec<std::pair<tree_code, gimple *> > worklist; |
| auto_vec<vec<chain_op_t> > chains (group_size); |
| auto_vec<slp_tree, 4> children; |
| bool hard_fail = true; |
| for (unsigned lane = 0; lane < group_size; ++lane) |
| { |
| /* For each lane linearize the addition/subtraction (or other |
| uniform associatable operation) expression tree. */ |
| gimple *op_stmt = NULL, *other_op_stmt = NULL; |
| vect_slp_linearize_chain (vinfo, worklist, chain, code, |
| stmts[lane]->stmt, op_stmt, other_op_stmt, |
| NULL); |
| if (!op_stmt_info && op_stmt) |
| op_stmt_info = vinfo->lookup_stmt (op_stmt); |
| if (!other_op_stmt_info && other_op_stmt) |
| other_op_stmt_info = vinfo->lookup_stmt (other_op_stmt); |
| if (chain.length () == 2) |
| { |
| /* In a chain of just two elements resort to the regular |
| operand swapping scheme. If we run into a length |
| mismatch still hard-FAIL. */ |
| if (chain_len == 0) |
| hard_fail = false; |
| else |
| { |
| matches[lane] = false; |
| /* ??? We might want to process the other lanes, but |
| make sure to not give false matching hints to the |
| caller for lanes we did not process. */ |
| if (lane != group_size - 1) |
| matches[0] = false; |
| } |
| break; |
| } |
| else if (chain_len == 0) |
| chain_len = chain.length (); |
| else if (chain.length () != chain_len) |
| { |
| /* ??? Here we could slip in magic to compensate with |
| neutral operands. */ |
| matches[lane] = false; |
| if (lane != group_size - 1) |
| matches[0] = false; |
| break; |
| } |
| chains.quick_push (chain.copy ()); |
| chain.truncate (0); |
| } |
| if (chains.length () == group_size) |
| { |
| /* We cannot yet use SLP_TREE_CODE to communicate the operation. */ |
| if (!op_stmt_info) |
| { |
| hard_fail = false; |
| goto out; |
| } |
| /* Now we have a set of chains with the same length. */ |
| /* 1. pre-sort according to def_type and operation. */ |
| for (unsigned lane = 0; lane < group_size; ++lane) |
| chains[lane].stablesort (dt_sort_cmp, vinfo); |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "pre-sorted chains of %s\n", |
| get_tree_code_name (code)); |
| for (unsigned lane = 0; lane < group_size; ++lane) |
| { |
| for (unsigned opnum = 0; opnum < chain_len; ++opnum) |
| dump_printf (MSG_NOTE, "%s %T ", |
| get_tree_code_name (chains[lane][opnum].code), |
| chains[lane][opnum].op); |
| dump_printf (MSG_NOTE, "\n"); |
| } |
| } |
| /* 2. try to build children nodes, associating as necessary. */ |
| for (unsigned n = 0; n < chain_len; ++n) |
| { |
| vect_def_type dt = chains[0][n].dt; |
| unsigned lane; |
| for (lane = 0; lane < group_size; ++lane) |
| if (chains[lane][n].dt != dt) |
| { |
| if (dt == vect_constant_def |
| && chains[lane][n].dt == vect_external_def) |
| dt = vect_external_def; |
| else if (dt == vect_external_def |
| && chains[lane][n].dt == vect_constant_def) |
| ; |
| else |
| break; |
| } |
| if (lane != group_size) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "giving up on chain due to mismatched " |
| "def types\n"); |
| matches[lane] = false; |
| if (lane != group_size - 1) |
| matches[0] = false; |
| goto out; |
| } |
| if (dt == vect_constant_def |
| || dt == vect_external_def) |
| { |
| /* We can always build those. Might want to sort last |
| or defer building. */ |
| vec<tree> ops; |
| ops.create (group_size); |
| for (lane = 0; lane < group_size; ++lane) |
| ops.quick_push (chains[lane][n].op); |
| slp_tree child = vect_create_new_slp_node (ops); |
| SLP_TREE_DEF_TYPE (child) = dt; |
| children.safe_push (child); |
| } |
| else if (dt != vect_internal_def) |
| { |
| /* Not sure, we might need sth special. |
| gcc.dg/vect/pr96854.c, |
| gfortran.dg/vect/fast-math-pr37021.f90 |
| and gfortran.dg/vect/pr61171.f trigger. */ |
| /* Soft-fail for now. */ |
| hard_fail = false; |
| goto out; |
| } |
| else |
| { |
| vec<stmt_vec_info> op_stmts; |
| op_stmts.create (group_size); |
| slp_tree child = NULL; |
| /* Brute-force our way. We have to consider a lane |
| failing after fixing an earlier fail up in the |
| SLP discovery recursion. So track the current |
| permute per lane. */ |
| unsigned *perms = XALLOCAVEC (unsigned, group_size); |
| memset (perms, 0, sizeof (unsigned) * group_size); |
| do |
| { |
| op_stmts.truncate (0); |
| for (lane = 0; lane < group_size; ++lane) |
| op_stmts.quick_push |
| (vinfo->lookup_def (chains[lane][n].op)); |
| child = vect_build_slp_tree (vinfo, op_stmts, |
| group_size, &this_max_nunits, |
| matches, limit, |
| &this_tree_size, bst_map); |
| /* ??? We're likely getting too many fatal mismatches |
| here so maybe we want to ignore them (but then we |
| have no idea which lanes fatally mismatched). */ |
| if (child || !matches[0]) |
| break; |
| /* Swap another lane we have not yet matched up into |
| lanes that did not match. If we run out of |
| permute possibilities for a lane terminate the |
| search. */ |
| bool term = false; |
| for (lane = 1; lane < group_size; ++lane) |
| if (!matches[lane]) |
| { |
| if (n + perms[lane] + 1 == chain_len) |
| { |
| term = true; |
| break; |
| } |
| std::swap (chains[lane][n], |
| chains[lane][n + perms[lane] + 1]); |
| perms[lane]++; |
| } |
| if (term) |
| break; |
| } |
| while (1); |
| if (!child) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "failed to match up op %d\n", n); |
| op_stmts.release (); |
| if (lane != group_size - 1) |
| matches[0] = false; |
| else |
| matches[lane] = false; |
| goto out; |
| } |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "matched up op %d to\n", n); |
| vect_print_slp_tree (MSG_NOTE, vect_location, child); |
| } |
| children.safe_push (child); |
| } |
| } |
| /* 3. build SLP nodes to combine the chain. */ |
| for (unsigned lane = 0; lane < group_size; ++lane) |
| if (chains[lane][0].code != code) |
| { |
| /* See if there's any alternate all-PLUS entry. */ |
| unsigned n; |
| for (n = 1; n < chain_len; ++n) |
| { |
| for (lane = 0; lane < group_size; ++lane) |
| if (chains[lane][n].code != code) |
| break; |
| if (lane == group_size) |
| break; |
| } |
| if (n != chain_len) |
| { |
| /* Swap that in at first position. */ |
| std::swap (children[0], children[n]); |
| for (lane = 0; lane < group_size; ++lane) |
| std::swap (chains[lane][0], chains[lane][n]); |
| } |
| else |
| { |
| /* ??? When this triggers and we end up with two |
| vect_constant/external_def up-front things break (ICE) |
| spectacularly finding an insertion place for the |
| all-constant op. We should have a fully |
| vect_internal_def operand though(?) so we can swap |
| that into first place and then prepend the all-zero |
| constant. */ |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "inserting constant zero to compensate " |
| "for (partially) negated first " |
| "operand\n"); |
| chain_len++; |
| for (lane = 0; lane < group_size; ++lane) |
| chains[lane].safe_insert |
| (0, chain_op_t (code, vect_constant_def, NULL_TREE)); |
| vec<tree> zero_ops; |
| zero_ops.create (group_size); |
| zero_ops.quick_push (build_zero_cst (TREE_TYPE (vectype))); |
| for (lane = 1; lane < group_size; ++lane) |
| zero_ops.quick_push (zero_ops[0]); |
| slp_tree zero = vect_create_new_slp_node (zero_ops); |
| SLP_TREE_DEF_TYPE (zero) = vect_constant_def; |
| children.safe_insert (0, zero); |
| } |
| break; |
| } |
| for (unsigned i = 1; i < children.length (); ++i) |
| { |
| slp_tree op0 = children[i - 1]; |
| slp_tree op1 = children[i]; |
| bool this_two_op = false; |
| for (unsigned lane = 0; lane < group_size; ++lane) |
| if (chains[lane][i].code != chains[0][i].code) |
| { |
| this_two_op = true; |
| break; |
| } |
| slp_tree child; |
| if (i == children.length () - 1) |
| child = vect_create_new_slp_node (node, stmts, 2); |
| else |
| child = vect_create_new_slp_node (2, ERROR_MARK); |
| if (this_two_op) |
| { |
| vec<std::pair<unsigned, unsigned> > lperm; |
| lperm.create (group_size); |
| for (unsigned lane = 0; lane < group_size; ++lane) |
| lperm.quick_push (std::make_pair |
| (chains[lane][i].code != chains[0][i].code, lane)); |
| vect_slp_build_two_operator_nodes (child, vectype, op0, op1, |
| (chains[0][i].code == code |
| ? op_stmt_info |
| : other_op_stmt_info), |
| (chains[0][i].code == code |
| ? other_op_stmt_info |
| : op_stmt_info), |
| lperm); |
| } |
| else |
| { |
| SLP_TREE_DEF_TYPE (child) = vect_internal_def; |
| SLP_TREE_VECTYPE (child) = vectype; |
| SLP_TREE_LANES (child) = group_size; |
| SLP_TREE_CHILDREN (child).quick_push (op0); |
| SLP_TREE_CHILDREN (child).quick_push (op1); |
| SLP_TREE_REPRESENTATIVE (child) |
| = (chains[0][i].code == code |
| ? op_stmt_info : other_op_stmt_info); |
| } |
| children[i] = child; |
| } |
| *tree_size += this_tree_size + 1; |
| *max_nunits = this_max_nunits; |
| while (!chains.is_empty ()) |
| chains.pop ().release (); |
| return node; |
| } |
| out: |
| while (!children.is_empty ()) |
| vect_free_slp_tree (children.pop ()); |
| while (!chains.is_empty ()) |
| chains.pop ().release (); |
| /* Hard-fail, otherwise we might run into quadratic processing of the |
| chains starting one stmt into the chain again. */ |
| if (hard_fail) |
| return NULL; |
| /* Fall thru to normal processing. */ |
| } |
| |
| /* Get at the operands, verifying they are compatible. */ |
| vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size); |
| slp_oprnd_info oprnd_info; |
| FOR_EACH_VEC_ELT (stmts, i, stmt_info) |
| { |
| int res = vect_get_and_check_slp_defs (vinfo, swap[i], skip_args, |
| stmts, i, &oprnds_info); |
| if (res != 0) |
| matches[(res == -1) ? 0 : i] = false; |
| if (!matches[0]) |
| break; |
| } |
| for (i = 0; i < group_size; ++i) |
| if (!matches[i]) |
| { |
| vect_free_oprnd_info (oprnds_info); |
| return NULL; |
| } |
| swap = NULL; |
| |
| auto_vec<slp_tree, 4> children; |
| |
| stmt_info = stmts[0]; |
| |
| /* Create SLP_TREE nodes for the definition node/s. */ |
| FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info) |
| { |
| slp_tree child; |
| unsigned int j; |
| |
| /* We're skipping certain operands from processing, for example |
| outer loop reduction initial defs. */ |
| if (skip_args[i]) |
| { |
| children.safe_push (NULL); |
| continue; |
| } |
| |
| if (oprnd_info->first_dt == vect_uninitialized_def) |
| { |
| /* COND_EXPR have one too many eventually if the condition |
| is a SSA name. */ |
| gcc_assert (i == 3 && nops == 4); |
| continue; |
| } |
| |
| if (is_a <bb_vec_info> (vinfo) |
| && oprnd_info->first_dt == vect_internal_def |
| && !oprnd_info->any_pattern) |
| { |
| /* For BB vectorization, if all defs are the same do not |
| bother to continue the build along the single-lane |
| graph but use a splat of the scalar value. */ |
| stmt_vec_info first_def = oprnd_info->def_stmts[0]; |
| for (j = 1; j < group_size; ++j) |
| if (oprnd_info->def_stmts[j] != first_def) |
| break; |
| if (j == group_size |
| /* But avoid doing this for loads where we may be |
| able to CSE things, unless the stmt is not |
| vectorizable. */ |
| && (!STMT_VINFO_VECTORIZABLE (first_def) |
| || !gimple_vuse (first_def->stmt))) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "Using a splat of the uniform operand\n"); |
| oprnd_info->first_dt = vect_external_def; |
| } |
| } |
| |
| if (oprnd_info->first_dt == vect_external_def |
| || oprnd_info->first_dt == vect_constant_def) |
| { |
| slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops); |
| SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt; |
| oprnd_info->ops = vNULL; |
| children.safe_push (invnode); |
| continue; |
| } |
| |
| if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts, |
| group_size, &this_max_nunits, |
| matches, limit, |
| &this_tree_size, bst_map)) != NULL) |
| { |
| oprnd_info->def_stmts = vNULL; |
| children.safe_push (child); |
| continue; |
| } |
| |
| /* If the SLP build for operand zero failed and operand zero |
| and one can be commutated try that for the scalar stmts |
| that failed the match. */ |
| if (i == 0 |
| /* A first scalar stmt mismatch signals a fatal mismatch. */ |
| && matches[0] |
| /* ??? For COND_EXPRs we can swap the comparison operands |
| as well as the arms under some constraints. */ |
| && nops == 2 |
| && oprnds_info[1]->first_dt == vect_internal_def |
| && is_gimple_assign (stmt_info->stmt) |
| /* Swapping operands for reductions breaks assumptions later on. */ |
| && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def |
| && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def) |
| { |
| /* See whether we can swap the matching or the non-matching |
| stmt operands. */ |
| bool swap_not_matching = true; |
| do |
| { |
| for (j = 0; j < group_size; ++j) |
| { |
| if (matches[j] != !swap_not_matching) |
| continue; |
| stmt_vec_info stmt_info = stmts[j]; |
| /* Verify if we can swap operands of this stmt. */ |
| gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt); |
| if (!stmt |
| || !commutative_tree_code (gimple_assign_rhs_code (stmt))) |
| { |
| if (!swap_not_matching) |
| goto fail; |
| swap_not_matching = false; |
| break; |
| } |
| } |
| } |
| while (j != group_size); |
| |
| /* Swap mismatched definition stmts. */ |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "Re-trying with swapped operands of stmts "); |
| for (j = 0; j < group_size; ++j) |
| if (matches[j] == !swap_not_matching) |
| { |
| std::swap (oprnds_info[0]->def_stmts[j], |
| oprnds_info[1]->def_stmts[j]); |
| std::swap (oprnds_info[0]->ops[j], |
| oprnds_info[1]->ops[j]); |
| if (dump_enabled_p ()) |
| dump_printf (MSG_NOTE, "%d ", j); |
| } |
| if (dump_enabled_p ()) |
| dump_printf (MSG_NOTE, "\n"); |
| /* After swapping some operands we lost track whether an |
| operand has any pattern defs so be conservative here. */ |
| if (oprnds_info[0]->any_pattern || oprnds_info[1]->any_pattern) |
| oprnds_info[0]->any_pattern = oprnds_info[1]->any_pattern = true; |
| /* And try again with scratch 'matches' ... */ |
| bool *tem = XALLOCAVEC (bool, group_size); |
| if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts, |
| group_size, &this_max_nunits, |
| tem, limit, |
| &this_tree_size, bst_map)) != NULL) |
| { |
| oprnd_info->def_stmts = vNULL; |
| children.safe_push (child); |
| continue; |
| } |
| } |
| fail: |
| |
| /* If the SLP build failed and we analyze a basic-block |
| simply treat nodes we fail to build as externally defined |
| (and thus build vectors from the scalar defs). |
| The cost model will reject outright expensive cases. |
| ??? This doesn't treat cases where permutation ultimatively |
| fails (or we don't try permutation below). Ideally we'd |
| even compute a permutation that will end up with the maximum |
| SLP tree size... */ |
| if (is_a <bb_vec_info> (vinfo) |
| /* ??? Rejecting patterns this way doesn't work. We'd have to |
| do extra work to cancel the pattern so the uses see the |
| scalar version. */ |
| && !is_pattern_stmt_p (stmt_info) |
| && !oprnd_info->any_pattern) |
| { |
| /* But if there's a leading vector sized set of matching stmts |
| fail here so we can split the group. This matches the condition |
| vect_analyze_slp_instance uses. */ |
| /* ??? We might want to split here and combine the results to support |
| multiple vector sizes better. */ |
| for (j = 0; j < group_size; ++j) |
| if (!matches[j]) |
| break; |
| if (!known_ge (j, TYPE_VECTOR_SUBPARTS (vectype))) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "Building vector operands from scalars\n"); |
| this_tree_size++; |
| child = vect_create_new_slp_node (oprnd_info->ops); |
| children.safe_push (child); |
| oprnd_info->ops = vNULL; |
| continue; |
| } |
| } |
| |
| gcc_assert (child == NULL); |
| FOR_EACH_VEC_ELT (children, j, child) |
| if (child) |
| vect_free_slp_tree (child); |
| vect_free_oprnd_info (oprnds_info); |
| return NULL; |
| } |
| |
| vect_free_oprnd_info (oprnds_info); |
| |
| /* If we have all children of a child built up from uniform scalars |
| or does more than one possibly expensive vector construction then |
| just throw that away, causing it built up from scalars. |
| The exception is the SLP node for the vector store. */ |
| if (is_a <bb_vec_info> (vinfo) |
| && !STMT_VINFO_GROUPED_ACCESS (stmt_info) |
| /* ??? Rejecting patterns this way doesn't work. We'd have to |
| do extra work to cancel the pattern so the uses see the |
| scalar version. */ |
| && !is_pattern_stmt_p (stmt_info)) |
| { |
| slp_tree child; |
| unsigned j; |
| bool all_uniform_p = true; |
| unsigned n_vector_builds = 0; |
| FOR_EACH_VEC_ELT (children, j, child) |
| { |
| if (!child) |
| ; |
| else if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) |
| all_uniform_p = false; |
| else if (!vect_slp_tree_uniform_p (child)) |
| { |
| all_uniform_p = false; |
| if (SLP_TREE_DEF_TYPE (child) == vect_external_def) |
| n_vector_builds++; |
| } |
| } |
| if (all_uniform_p |
| || n_vector_builds > 1 |
| || (n_vector_builds == children.length () |
| && is_a <gphi *> (stmt_info->stmt))) |
| { |
| /* Roll back. */ |
| matches[0] = false; |
| FOR_EACH_VEC_ELT (children, j, child) |
| if (child) |
| vect_free_slp_tree (child); |
| |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "Building parent vector operands from " |
| "scalars instead\n"); |
| return NULL; |
| } |
| } |
| |
| *tree_size += this_tree_size + 1; |
| *max_nunits = this_max_nunits; |
| |
| if (two_operators) |
| { |
| /* ??? We'd likely want to either cache in bst_map sth like |
| { a+b, NULL, a+b, NULL } and { NULL, a-b, NULL, a-b } or |
| the true { a+b, a+b, a+b, a+b } ... but there we don't have |
| explicit stmts to put in so the keying on 'stmts' doesn't |
| work (but we have the same issue with nodes that use 'ops'). */ |
| slp_tree one = new _slp_tree; |
| slp_tree two = new _slp_tree; |
| SLP_TREE_DEF_TYPE (one) = vect_internal_def; |
| SLP_TREE_DEF_TYPE (two) = vect_internal_def; |
| SLP_TREE_VECTYPE (one) = vectype; |
| SLP_TREE_VECTYPE (two) = vectype; |
| SLP_TREE_CHILDREN (one).safe_splice (children); |
| SLP_TREE_CHILDREN (two).safe_splice (children); |
| slp_tree child; |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (two), i, child) |
| SLP_TREE_REF_COUNT (child)++; |
| |
| /* Here we record the original defs since this |
| node represents the final lane configuration. */ |
| node = vect_create_new_slp_node (node, stmts, 2); |
| SLP_TREE_VECTYPE (node) = vectype; |
| SLP_TREE_CODE (node) = VEC_PERM_EXPR; |
| SLP_TREE_CHILDREN (node).quick_push (one); |
| SLP_TREE_CHILDREN (node).quick_push (two); |
| gassign *stmt = as_a <gassign *> (stmts[0]->stmt); |
| enum tree_code code0 = gimple_assign_rhs_code (stmt); |
| enum tree_code ocode = ERROR_MARK; |
| stmt_vec_info ostmt_info; |
| unsigned j = 0; |
| FOR_EACH_VEC_ELT (stmts, i, ostmt_info) |
| { |
| gassign *ostmt = as_a <gassign *> (ostmt_info->stmt); |
| if (gimple_assign_rhs_code (ostmt) != code0) |
| { |
| SLP_TREE_LANE_PERMUTATION (node).safe_push (std::make_pair (1, i)); |
| ocode = gimple_assign_rhs_code (ostmt); |
| j = i; |
| } |
| else |
| SLP_TREE_LANE_PERMUTATION (node).safe_push (std::make_pair (0, i)); |
| } |
| SLP_TREE_CODE (one) = code0; |
| SLP_TREE_CODE (two) = ocode; |
| SLP_TREE_LANES (one) = stmts.length (); |
| SLP_TREE_LANES (two) = stmts.length (); |
| SLP_TREE_REPRESENTATIVE (one) = stmts[0]; |
| SLP_TREE_REPRESENTATIVE (two) = stmts[j]; |
| return node; |
| } |
| |
| node = vect_create_new_slp_node (node, stmts, nops); |
| SLP_TREE_VECTYPE (node) = vectype; |
| SLP_TREE_CHILDREN (node).splice (children); |
| return node; |
| } |
| |
| /* Dump a single SLP tree NODE. */ |
| |
| static void |
| vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc, |
| slp_tree node) |
| { |
| unsigned i, j; |
| slp_tree child; |
| stmt_vec_info stmt_info; |
| tree op; |
| |
| dump_metadata_t metadata (dump_kind, loc.get_impl_location ()); |
| dump_user_location_t user_loc = loc.get_user_location (); |
| dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u, refcnt=%u)\n", |
| SLP_TREE_DEF_TYPE (node) == vect_external_def |
| ? " (external)" |
| : (SLP_TREE_DEF_TYPE (node) == vect_constant_def |
| ? " (constant)" |
| : ""), node, |
| estimated_poly_value (node->max_nunits), |
| SLP_TREE_REF_COUNT (node)); |
| if (SLP_TREE_DEF_TYPE (node) == vect_internal_def) |
| { |
| if (SLP_TREE_CODE (node) == VEC_PERM_EXPR) |
| dump_printf_loc (metadata, user_loc, "op: VEC_PERM_EXPR\n"); |
| else |
| dump_printf_loc (metadata, user_loc, "op template: %G", |
| SLP_TREE_REPRESENTATIVE (node)->stmt); |
| } |
| if (SLP_TREE_SCALAR_STMTS (node).exists ()) |
| FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) |
| dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt); |
| else |
| { |
| dump_printf_loc (metadata, user_loc, "\t{ "); |
| FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op) |
| dump_printf (metadata, "%T%s ", op, |
| i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : ""); |
| dump_printf (metadata, "}\n"); |
| } |
| if (SLP_TREE_LOAD_PERMUTATION (node).exists ()) |
| { |
| dump_printf_loc (metadata, user_loc, "\tload permutation {"); |
| FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), i, j) |
| dump_printf (dump_kind, " %u", j); |
| dump_printf (dump_kind, " }\n"); |
| } |
| if (SLP_TREE_LANE_PERMUTATION (node).exists ()) |
| { |
| dump_printf_loc (metadata, user_loc, "\tlane permutation {"); |
| for (i = 0; i < SLP_TREE_LANE_PERMUTATION (node).length (); ++i) |
| dump_printf (dump_kind, " %u[%u]", |
| SLP_TREE_LANE_PERMUTATION (node)[i].first, |
| SLP_TREE_LANE_PERMUTATION (node)[i].second); |
| dump_printf (dump_kind, " }\n"); |
| } |
| if (SLP_TREE_CHILDREN (node).is_empty ()) |
| return; |
| dump_printf_loc (metadata, user_loc, "\tchildren"); |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
| dump_printf (dump_kind, " %p", (void *)child); |
| dump_printf (dump_kind, "\n"); |
| } |
| |
| DEBUG_FUNCTION void |
| debug (slp_tree node) |
| { |
| debug_dump_context ctx; |
| vect_print_slp_tree (MSG_NOTE, |
| dump_location_t::from_location_t (UNKNOWN_LOCATION), |
| node); |
| } |
| |
| /* Recursive helper for the dot producer below. */ |
| |
| static void |
| dot_slp_tree (FILE *f, slp_tree node, hash_set<slp_tree> &visited) |
| { |
| if (visited.add (node)) |
| return; |
| |
| fprintf (f, "\"%p\" [label=\"", (void *)node); |
| vect_print_slp_tree (MSG_NOTE, |
| dump_location_t::from_location_t (UNKNOWN_LOCATION), |
| node); |
| fprintf (f, "\"];\n"); |
| |
| |
| for (slp_tree child : SLP_TREE_CHILDREN (node)) |
| fprintf (f, "\"%p\" -> \"%p\";", (void *)node, (void *)child); |
| |
| for (slp_tree child : SLP_TREE_CHILDREN (node)) |
| dot_slp_tree (f, child, visited); |
| } |
| |
| DEBUG_FUNCTION void |
| dot_slp_tree (const char *fname, slp_tree node) |
| { |
| FILE *f = fopen (fname, "w"); |
| fprintf (f, "digraph {\n"); |
| fflush (f); |
| { |
| debug_dump_context ctx (f); |
| hash_set<slp_tree> visited; |
| dot_slp_tree (f, node, visited); |
| } |
| fflush (f); |
| fprintf (f, "}\n"); |
| fclose (f); |
| } |
| |
| /* Dump a slp tree NODE using flags specified in DUMP_KIND. */ |
| |
| static void |
| vect_print_slp_graph (dump_flags_t dump_kind, dump_location_t loc, |
| slp_tree node, hash_set<slp_tree> &visited) |
| { |
| unsigned i; |
| slp_tree child; |
| |
| if (visited.add (node)) |
| return; |
| |
| vect_print_slp_tree (dump_kind, loc, node); |
| |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
| if (child) |
| vect_print_slp_graph (dump_kind, loc, child, visited); |
| } |
| |
| static void |
| vect_print_slp_graph (dump_flags_t dump_kind, dump_location_t loc, |
| slp_tree entry) |
| { |
| hash_set<slp_tree> visited; |
| vect_print_slp_graph (dump_kind, loc, entry, visited); |
| } |
| |
| /* Mark the tree rooted at NODE with PURE_SLP. */ |
| |
| static void |
| vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited) |
| { |
| int i; |
| stmt_vec_info stmt_info; |
| slp_tree child; |
| |
| if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) |
| return; |
| |
| if (visited.add (node)) |
| return; |
| |
| FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) |
| STMT_SLP_TYPE (stmt_info) = pure_slp; |
| |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
| if (child) |
| vect_mark_slp_stmts (child, visited); |
| } |
| |
| static void |
| vect_mark_slp_stmts (slp_tree node) |
| { |
| hash_set<slp_tree> visited; |
| vect_mark_slp_stmts (node, visited); |
| } |
| |
| /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */ |
| |
| static void |
| vect_mark_slp_stmts_relevant (slp_tree node, hash_set<slp_tree> &visited) |
| { |
| int i; |
| stmt_vec_info stmt_info; |
| slp_tree child; |
| |
| if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) |
| return; |
| |
| if (visited.add (node)) |
| return; |
| |
| FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) |
| { |
| gcc_assert (!STMT_VINFO_RELEVANT (stmt_info) |
| || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope); |
| STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope; |
| } |
| |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
| if (child) |
| vect_mark_slp_stmts_relevant (child, visited); |
| } |
| |
| static void |
| vect_mark_slp_stmts_relevant (slp_tree node) |
| { |
| hash_set<slp_tree> visited; |
| vect_mark_slp_stmts_relevant (node, visited); |
| } |
| |
| |
| /* Gather loads in the SLP graph NODE and populate the INST loads array. */ |
| |
| static void |
| vect_gather_slp_loads (vec<slp_tree> &loads, slp_tree node, |
| hash_set<slp_tree> &visited) |
| { |
| if (!node || visited.add (node)) |
| return; |
| |
| if (SLP_TREE_CHILDREN (node).length () == 0) |
| { |
| if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) |
| return; |
| stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0]; |
| if (STMT_VINFO_GROUPED_ACCESS (stmt_info) |
| && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))) |
| loads.safe_push (node); |
| } |
| else |
| { |
| unsigned i; |
| slp_tree child; |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
| vect_gather_slp_loads (loads, child, visited); |
| } |
| } |
| |
| |
| /* Find the last store in SLP INSTANCE. */ |
| |
| stmt_vec_info |
| vect_find_last_scalar_stmt_in_slp (slp_tree node) |
| { |
| stmt_vec_info last = NULL; |
| stmt_vec_info stmt_vinfo; |
| |
| for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++) |
| { |
| stmt_vinfo = vect_orig_stmt (stmt_vinfo); |
| last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo; |
| } |
| |
| return last; |
| } |
| |
| /* Find the first stmt in NODE. */ |
| |
| stmt_vec_info |
| vect_find_first_scalar_stmt_in_slp (slp_tree node) |
| { |
| stmt_vec_info first = NULL; |
| stmt_vec_info stmt_vinfo; |
| |
| for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++) |
| { |
| stmt_vinfo = vect_orig_stmt (stmt_vinfo); |
| if (!first |
| || get_later_stmt (stmt_vinfo, first) == first) |
| first = stmt_vinfo; |
| } |
| |
| return first; |
| } |
| |
| /* Splits a group of stores, currently beginning at FIRST_VINFO, into |
| two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE |
| (also containing the first GROUP1_SIZE stmts, since stores are |
| consecutive), the second containing the remainder. |
| Return the first stmt in the second group. */ |
| |
| static stmt_vec_info |
| vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size) |
| { |
| gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo); |
| gcc_assert (group1_size > 0); |
| int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size; |
| gcc_assert (group2_size > 0); |
| DR_GROUP_SIZE (first_vinfo) = group1_size; |
| |
| stmt_vec_info stmt_info = first_vinfo; |
| for (unsigned i = group1_size; i > 1; i--) |
| { |
| stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info); |
| gcc_assert (DR_GROUP_GAP (stmt_info) == 1); |
| } |
| /* STMT is now the last element of the first group. */ |
| stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info); |
| DR_GROUP_NEXT_ELEMENT (stmt_info) = 0; |
| |
| DR_GROUP_SIZE (group2) = group2_size; |
| for (stmt_info = group2; stmt_info; |
| stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info)) |
| { |
| DR_GROUP_FIRST_ELEMENT (stmt_info) = group2; |
| gcc_assert (DR_GROUP_GAP (stmt_info) == 1); |
| } |
| |
| /* For the second group, the DR_GROUP_GAP is that before the original group, |
| plus skipping over the first vector. */ |
| DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size; |
| |
| /* DR_GROUP_GAP of the first group now has to skip over the second group too. */ |
| DR_GROUP_GAP (first_vinfo) += group2_size; |
| |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n", |
| group1_size, group2_size); |
| |
| return group2; |
| } |
| |
| /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE |
| statements and a vector of NUNITS elements. */ |
| |
| static poly_uint64 |
| calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size) |
| { |
| return exact_div (common_multiple (nunits, group_size), group_size); |
| } |
| |
| /* Helper that checks to see if a node is a load node. */ |
| |
| static inline bool |
| vect_is_slp_load_node (slp_tree root) |
| { |
| return SLP_TREE_DEF_TYPE (root) == vect_internal_def |
| && STMT_VINFO_GROUPED_ACCESS (SLP_TREE_REPRESENTATIVE (root)) |
| && DR_IS_READ (STMT_VINFO_DATA_REF (SLP_TREE_REPRESENTATIVE (root))); |
| } |
| |
| |
| /* Helper function of optimize_load_redistribution that performs the operation |
| recursively. */ |
| |
| static slp_tree |
| optimize_load_redistribution_1 (scalar_stmts_to_slp_tree_map_t *bst_map, |
| vec_info *vinfo, unsigned int group_size, |
| hash_map<slp_tree, slp_tree> *load_map, |
| slp_tree root) |
| { |
| if (slp_tree *leader = load_map->get (root)) |
| return *leader; |
| |
| slp_tree node; |
| unsigned i; |
| |
| /* For now, we don't know anything about externals so do not do anything. */ |
| if (!root || SLP_TREE_DEF_TYPE (root) != vect_internal_def) |
| return NULL; |
| else if (SLP_TREE_CODE (root) == VEC_PERM_EXPR) |
| { |
| /* First convert this node into a load node and add it to the leaves |
| list and flatten the permute from a lane to a load one. If it's |
| unneeded it will be elided later. */ |
| vec<stmt_vec_info> stmts; |
| stmts.create (SLP_TREE_LANES (root)); |
| lane_permutation_t lane_perm = SLP_TREE_LANE_PERMUTATION (root); |
| for (unsigned j = 0; j < lane_perm.length (); j++) |
| { |
| std::pair<unsigned, unsigned> perm = lane_perm[j]; |
| node = SLP_TREE_CHILDREN (root)[perm.first]; |
| |
| if (!vect_is_slp_load_node (node) |
| || SLP_TREE_CHILDREN (node).exists ()) |
| { |
| stmts.release (); |
| goto next; |
| } |
| |
| stmts.quick_push (SLP_TREE_SCALAR_STMTS (node)[perm.second]); |
| } |
| |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "converting stmts on permute node %p\n", root); |
| |
| bool *matches = XALLOCAVEC (bool, group_size); |
| poly_uint64 max_nunits = 1; |
| unsigned tree_size = 0, limit = 1; |
| node = vect_build_slp_tree (vinfo, stmts, group_size, &max_nunits, |
| matches, &limit, &tree_size, bst_map); |
| if (!node) |
| stmts.release (); |
| |
| load_map->put (root, node); |
| return node; |
| } |
| |
| next: |
| load_map->put (root, NULL); |
| |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i , node) |
| { |
| slp_tree value |
| = optimize_load_redistribution_1 (bst_map, vinfo, group_size, load_map, |
| node); |
| if (value) |
| { |
| SLP_TREE_REF_COUNT (value)++; |
| SLP_TREE_CHILDREN (root)[i] = value; |
| /* ??? We know the original leafs of the replaced nodes will |
| be referenced by bst_map, only the permutes created by |
| pattern matching are not. */ |
| if (SLP_TREE_REF_COUNT (node) == 1) |
| load_map->remove (node); |
| vect_free_slp_tree (node); |
| } |
| } |
| |
| return NULL; |
| } |
| |
| /* Temporary workaround for loads not being CSEd during SLP build. This |
| function will traverse the SLP tree rooted in ROOT for INSTANCE and find |
| VEC_PERM nodes that blend vectors from multiple nodes that all read from the |
| same DR such that the final operation is equal to a permuted load. Such |
| NODES are then directly converted into LOADS themselves. The nodes are |
| CSEd using BST_MAP. */ |
| |
| static void |
| optimize_load_redistribution (scalar_stmts_to_slp_tree_map_t *bst_map, |
| vec_info *vinfo, unsigned int group_size, |
| hash_map<slp_tree, slp_tree> *load_map, |
| slp_tree root) |
| { |
| slp_tree node; |
| unsigned i; |
| |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i , node) |
| { |
| slp_tree value |
| = optimize_load_redistribution_1 (bst_map, vinfo, group_size, load_map, |
| node); |
| if (value) |
| { |
| SLP_TREE_REF_COUNT (value)++; |
| SLP_TREE_CHILDREN (root)[i] = value; |
| /* ??? We know the original leafs of the replaced nodes will |
| be referenced by bst_map, only the permutes created by |
| pattern matching are not. */ |
| if (SLP_TREE_REF_COUNT (node) == 1) |
| load_map->remove (node); |
| vect_free_slp_tree (node); |
| } |
| } |
| } |
| |
| /* Helper function of vect_match_slp_patterns. |
| |
| Attempts to match patterns against the slp tree rooted in REF_NODE using |
| VINFO. Patterns are matched in post-order traversal. |
| |
| If matching is successful the value in REF_NODE is updated and returned, if |
| not then it is returned unchanged. */ |
| |
| static bool |
| vect_match_slp_patterns_2 (slp_tree *ref_node, vec_info *vinfo, |
| slp_tree_to_load_perm_map_t *perm_cache, |
| hash_set<slp_tree> *visited) |
| { |
| unsigned i; |
| slp_tree node = *ref_node; |
| bool found_p = false; |
| if (!node || visited->add (node)) |
| return false; |
| |
| slp_tree child; |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
| found_p |= vect_match_slp_patterns_2 (&SLP_TREE_CHILDREN (node)[i], |
| vinfo, perm_cache, visited); |
| |
| for (unsigned x = 0; x < num__slp_patterns; x++) |
| { |
| vect_pattern *pattern = slp_patterns[x] (perm_cache, ref_node); |
| if (pattern) |
| { |
| pattern->build (vinfo); |
| delete pattern; |
| found_p = true; |
| } |
| } |
| |
| return found_p; |
| } |
| |
| /* Applies pattern matching to the given SLP tree rooted in REF_NODE using |
| vec_info VINFO. |
| |
| The modified tree is returned. Patterns are tried in order and multiple |
| patterns may match. */ |
| |
| static bool |
| vect_match_slp_patterns (slp_instance instance, vec_info *vinfo, |
| hash_set<slp_tree> *visited, |
| slp_tree_to_load_perm_map_t *perm_cache) |
| { |
| DUMP_VECT_SCOPE ("vect_match_slp_patterns"); |
| slp_tree *ref_node = &SLP_INSTANCE_TREE (instance); |
| |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "Analyzing SLP tree %p for patterns\n", |
| SLP_INSTANCE_TREE (instance)); |
| |
| return vect_match_slp_patterns_2 (ref_node, vinfo, perm_cache, visited); |
| } |
| |
| /* STMT_INFO is a store group of size GROUP_SIZE that we are considering |
| splitting into two, with the first split group having size NEW_GROUP_SIZE. |
| Return true if we could use IFN_STORE_LANES instead and if that appears |
| to be the better approach. */ |
| |
| static bool |
| vect_slp_prefer_store_lanes_p (vec_info *vinfo, stmt_vec_info stmt_info, |
| unsigned int group_size, |
| unsigned int new_group_size) |
| { |
| tree scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info))); |
| tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type); |
| if (!vectype) |
| return false; |
| /* Allow the split if one of the two new groups would operate on full |
| vectors *within* rather than across one scalar loop iteration. |
| This is purely a heuristic, but it should work well for group |
| sizes of 3 and 4, where the possible splits are: |
| |
| 3->2+1: OK if the vector has exactly two elements |
| 4->2+2: Likewise |
| 4->3+1: Less clear-cut. */ |
| if (multiple_p (group_size - new_group_size, TYPE_VECTOR_SUBPARTS (vectype)) |
| || multiple_p (new_group_size, TYPE_VECTOR_SUBPARTS (vectype))) |
| return false; |
| return vect_store_lanes_supported (vectype, group_size, false); |
| } |
| |
| /* Analyze an SLP instance starting from a group of grouped stores. Call |
| vect_build_slp_tree to build a tree of packed stmts if possible. |
| Return FALSE if it's impossible to SLP any stmt in the loop. */ |
| |
| static bool |
| vect_analyze_slp_instance (vec_info *vinfo, |
| scalar_stmts_to_slp_tree_map_t *bst_map, |
| stmt_vec_info stmt_info, slp_instance_kind kind, |
| unsigned max_tree_size, unsigned *limit); |
| |
| /* Analyze an SLP instance starting from SCALAR_STMTS which are a group |
| of KIND. Return true if successful. */ |
| |
| static bool |
| vect_build_slp_instance (vec_info *vinfo, |
| slp_instance_kind kind, |
| vec<stmt_vec_info> &scalar_stmts, |
| vec<stmt_vec_info> &root_stmt_infos, |
| unsigned max_tree_size, unsigned *limit, |
| scalar_stmts_to_slp_tree_map_t *bst_map, |
| /* ??? We need stmt_info for group splitting. */ |
| stmt_vec_info stmt_info_) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "Starting SLP discovery for\n"); |
| for (unsigned i = 0; i < scalar_stmts.length (); ++i) |
| dump_printf_loc (MSG_NOTE, vect_location, |
| " %G", scalar_stmts[i]->stmt); |
| } |
| |
| /* Build the tree for the SLP instance. */ |
| unsigned int group_size = scalar_stmts.length (); |
| bool *matches = XALLOCAVEC (bool, group_size); |
| poly_uint64 max_nunits = 1; |
| unsigned tree_size = 0; |
| unsigned i; |
| slp_tree node = vect_build_slp_tree (vinfo, scalar_stmts, group_size, |
| &max_nunits, matches, limit, |
| &tree_size, bst_map); |
| if (node != NULL) |
| { |
| /* Calculate the unrolling factor based on the smallest type. */ |
| poly_uint64 unrolling_factor |
| = calculate_unrolling_factor (max_nunits, group_size); |
| |
| if (maybe_ne (unrolling_factor, 1U) |
| && is_a <bb_vec_info> (vinfo)) |
| { |
| unsigned HOST_WIDE_INT const_max_nunits; |
| if (!max_nunits.is_constant (&const_max_nunits) |
| || const_max_nunits > group_size) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: store group " |
| "size not a multiple of the vector size " |
| "in basic block SLP\n"); |
| vect_free_slp_tree (node); |
| return false; |
| } |
| /* Fatal mismatch. */ |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "SLP discovery succeeded but node needs " |
| |