| /* SLP - Basic Block Vectorization |
| Copyright (C) 2007-2018 Free Software Foundation, Inc. |
| Contributed by Dorit Naishlos <dorit@il.ibm.com> |
| and Ira Rosen <irar@il.ibm.com> |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 3, or (at your option) any later |
| version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "backend.h" |
| #include "target.h" |
| #include "rtl.h" |
| #include "tree.h" |
| #include "gimple.h" |
| #include "tree-pass.h" |
| #include "ssa.h" |
| #include "optabs-tree.h" |
| #include "insn-config.h" |
| #include "recog.h" /* FIXME: for insn_data */ |
| #include "params.h" |
| #include "fold-const.h" |
| #include "stor-layout.h" |
| #include "gimple-iterator.h" |
| #include "cfgloop.h" |
| #include "tree-vectorizer.h" |
| #include "langhooks.h" |
| #include "gimple-walk.h" |
| #include "dbgcnt.h" |
| #include "tree-vector-builder.h" |
| #include "vec-perm-indices.h" |
| #include "gimple-fold.h" |
| #include "internal-fn.h" |
| |
| |
| /* Recursively free the memory allocated for the SLP tree rooted at NODE. */ |
| |
| static void |
| vect_free_slp_tree (slp_tree node) |
| { |
| int i; |
| slp_tree child; |
| |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
| vect_free_slp_tree (child); |
| |
| gimple *stmt; |
| FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) |
| /* After transform some stmts are removed and thus their vinfo is gone. */ |
| if (vinfo_for_stmt (stmt)) |
| { |
| gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0); |
| STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--; |
| } |
| |
| SLP_TREE_CHILDREN (node).release (); |
| SLP_TREE_SCALAR_STMTS (node).release (); |
| SLP_TREE_VEC_STMTS (node).release (); |
| SLP_TREE_LOAD_PERMUTATION (node).release (); |
| |
| free (node); |
| } |
| |
| |
| /* Free the memory allocated for the SLP instance. */ |
| |
| void |
| vect_free_slp_instance (slp_instance instance) |
| { |
| vect_free_slp_tree (SLP_INSTANCE_TREE (instance)); |
| SLP_INSTANCE_LOADS (instance).release (); |
| free (instance); |
| } |
| |
| |
| /* Create an SLP node for SCALAR_STMTS. */ |
| |
| static slp_tree |
| vect_create_new_slp_node (vec<gimple *> scalar_stmts) |
| { |
| slp_tree node; |
| gimple *stmt = scalar_stmts[0]; |
| unsigned int nops; |
| |
| if (is_gimple_call (stmt)) |
| nops = gimple_call_num_args (stmt); |
| else if (is_gimple_assign (stmt)) |
| { |
| nops = gimple_num_ops (stmt) - 1; |
| if (gimple_assign_rhs_code (stmt) == COND_EXPR) |
| nops++; |
| } |
| else if (gimple_code (stmt) == GIMPLE_PHI) |
| nops = 0; |
| else |
| return NULL; |
| |
| node = XNEW (struct _slp_tree); |
| SLP_TREE_SCALAR_STMTS (node) = scalar_stmts; |
| SLP_TREE_VEC_STMTS (node).create (0); |
| SLP_TREE_CHILDREN (node).create (nops); |
| SLP_TREE_LOAD_PERMUTATION (node) = vNULL; |
| SLP_TREE_TWO_OPERATORS (node) = false; |
| SLP_TREE_DEF_TYPE (node) = vect_internal_def; |
| |
| unsigned i; |
| FOR_EACH_VEC_ELT (scalar_stmts, i, stmt) |
| STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++; |
| |
| return node; |
| } |
| |
| |
| /* This structure is used in creation of an SLP tree. Each instance |
| corresponds to the same operand in a group of scalar stmts in an SLP |
| node. */ |
| typedef struct _slp_oprnd_info |
| { |
| /* Def-stmts for the operands. */ |
| vec<gimple *> def_stmts; |
| /* Information about the first statement, its vector def-type, type, the |
| operand itself in case it's constant, and an indication if it's a pattern |
| stmt. */ |
| tree first_op_type; |
| enum vect_def_type first_dt; |
| bool first_pattern; |
| bool second_pattern; |
| } *slp_oprnd_info; |
| |
| |
| /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each |
| operand. */ |
| static vec<slp_oprnd_info> |
| vect_create_oprnd_info (int nops, int group_size) |
| { |
| int i; |
| slp_oprnd_info oprnd_info; |
| vec<slp_oprnd_info> oprnds_info; |
| |
| oprnds_info.create (nops); |
| for (i = 0; i < nops; i++) |
| { |
| oprnd_info = XNEW (struct _slp_oprnd_info); |
| oprnd_info->def_stmts.create (group_size); |
| oprnd_info->first_dt = vect_uninitialized_def; |
| oprnd_info->first_op_type = NULL_TREE; |
| oprnd_info->first_pattern = false; |
| oprnd_info->second_pattern = false; |
| oprnds_info.quick_push (oprnd_info); |
| } |
| |
| return oprnds_info; |
| } |
| |
| |
| /* Free operands info. */ |
| |
| static void |
| vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info) |
| { |
| int i; |
| slp_oprnd_info oprnd_info; |
| |
| FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info) |
| { |
| oprnd_info->def_stmts.release (); |
| XDELETE (oprnd_info); |
| } |
| |
| oprnds_info.release (); |
| } |
| |
| |
| /* Find the place of the data-ref in STMT in the interleaving chain that starts |
| from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */ |
| |
| int |
| vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt) |
| { |
| gimple *next_stmt = first_stmt; |
| int result = 0; |
| |
| if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) |
| return -1; |
| |
| do |
| { |
| if (next_stmt == stmt) |
| return result; |
| next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); |
| if (next_stmt) |
| result += GROUP_GAP (vinfo_for_stmt (next_stmt)); |
| } |
| while (next_stmt); |
| |
| return -1; |
| } |
| |
| /* Check whether it is possible to load COUNT elements of type ELT_MODE |
| using the method implemented by duplicate_and_interleave. Return true |
| if so, returning the number of intermediate vectors in *NVECTORS_OUT |
| (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT |
| (if nonnull). */ |
| |
| bool |
| can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode, |
| unsigned int *nvectors_out, |
| tree *vector_type_out, |
| tree *permutes) |
| { |
| poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode); |
| poly_int64 nelts; |
| unsigned int nvectors = 1; |
| for (;;) |
| { |
| scalar_int_mode int_mode; |
| poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT; |
| if (multiple_p (current_vector_size, elt_bytes, &nelts) |
| && int_mode_for_size (elt_bits, 0).exists (&int_mode)) |
| { |
| tree int_type = build_nonstandard_integer_type |
| (GET_MODE_BITSIZE (int_mode), 1); |
| tree vector_type = build_vector_type (int_type, nelts); |
| if (VECTOR_MODE_P (TYPE_MODE (vector_type))) |
| { |
| vec_perm_builder sel1 (nelts, 2, 3); |
| vec_perm_builder sel2 (nelts, 2, 3); |
| poly_int64 half_nelts = exact_div (nelts, 2); |
| for (unsigned int i = 0; i < 3; ++i) |
| { |
| sel1.quick_push (i); |
| sel1.quick_push (i + nelts); |
| sel2.quick_push (half_nelts + i); |
| sel2.quick_push (half_nelts + i + nelts); |
| } |
| vec_perm_indices indices1 (sel1, 2, nelts); |
| vec_perm_indices indices2 (sel2, 2, nelts); |
| if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1) |
| && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2)) |
| { |
| if (nvectors_out) |
| *nvectors_out = nvectors; |
| if (vector_type_out) |
| *vector_type_out = vector_type; |
| if (permutes) |
| { |
| permutes[0] = vect_gen_perm_mask_checked (vector_type, |
| indices1); |
| permutes[1] = vect_gen_perm_mask_checked (vector_type, |
| indices2); |
| } |
| return true; |
| } |
| } |
| } |
| if (!multiple_p (elt_bytes, 2, &elt_bytes)) |
| return false; |
| nvectors *= 2; |
| } |
| } |
| |
| /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that |
| they are of a valid type and that they match the defs of the first stmt of |
| the SLP group (stored in OPRNDS_INFO). This function tries to match stmts |
| by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP |
| indicates swap is required for cond_expr stmts. Specifically, *SWAP |
| is 1 if STMT is cond and operands of comparison need to be swapped; |
| *SWAP is 2 if STMT is cond and code of comparison needs to be inverted. |
| If there is any operand swap in this function, *SWAP is set to non-zero |
| value. |
| If there was a fatal error return -1; if the error could be corrected by |
| swapping operands of father node of this one, return 1; if everything is |
| ok return 0. */ |
| static int |
| vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap, |
| vec<gimple *> stmts, unsigned stmt_num, |
| vec<slp_oprnd_info> *oprnds_info) |
| { |
| gimple *stmt = stmts[stmt_num]; |
| tree oprnd; |
| unsigned int i, number_of_oprnds; |
| gimple *def_stmt; |
| enum vect_def_type dt = vect_uninitialized_def; |
| bool pattern = false; |
| slp_oprnd_info oprnd_info; |
| int first_op_idx = 1; |
| bool commutative = false; |
| bool first_op_cond = false; |
| bool first = stmt_num == 0; |
| bool second = stmt_num == 1; |
| |
| if (is_gimple_call (stmt)) |
| { |
| number_of_oprnds = gimple_call_num_args (stmt); |
| first_op_idx = 3; |
| } |
| else if (is_gimple_assign (stmt)) |
| { |
| enum tree_code code = gimple_assign_rhs_code (stmt); |
| number_of_oprnds = gimple_num_ops (stmt) - 1; |
| /* Swap can only be done for cond_expr if asked to, otherwise we |
| could result in different comparison code to the first stmt. */ |
| if (gimple_assign_rhs_code (stmt) == COND_EXPR |
| && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt))) |
| { |
| first_op_cond = true; |
| number_of_oprnds++; |
| } |
| else |
| commutative = commutative_tree_code (code); |
| } |
| else |
| return -1; |
| |
| bool swapped = (*swap != 0); |
| gcc_assert (!swapped || first_op_cond); |
| for (i = 0; i < number_of_oprnds; i++) |
| { |
| again: |
| if (first_op_cond) |
| { |
| /* Map indicating how operands of cond_expr should be swapped. */ |
| int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}}; |
| int *map = maps[*swap]; |
| |
| if (i < 2) |
| oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]); |
| else |
| oprnd = gimple_op (stmt, map[i]); |
| } |
| else |
| oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i)); |
| |
| oprnd_info = (*oprnds_info)[i]; |
| |
| if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt)) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: can't analyze def for "); |
| dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd); |
| dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); |
| } |
| |
| return -1; |
| } |
| |
| /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt |
| from the pattern. Check that all the stmts of the node are in the |
| pattern. */ |
| if (def_stmt && gimple_bb (def_stmt) |
| && vect_stmt_in_region_p (vinfo, def_stmt) |
| && vinfo_for_stmt (def_stmt) |
| && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)) |
| && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt)) |
| && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) |
| { |
| pattern = true; |
| if (!first && !oprnd_info->first_pattern |
| /* Allow different pattern state for the defs of the |
| first stmt in reduction chains. */ |
| && (oprnd_info->first_dt != vect_reduction_def |
| || (!second && !oprnd_info->second_pattern))) |
| { |
| if (i == 0 |
| && !swapped |
| && commutative) |
| { |
| swapped = true; |
| goto again; |
| } |
| |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: some of the stmts" |
| " are in a pattern, and others are not "); |
| dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd); |
| dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); |
| } |
| |
| return 1; |
| } |
| |
| def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); |
| dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)); |
| |
| if (dt == vect_unknown_def_type) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Unsupported pattern.\n"); |
| return -1; |
| } |
| |
| switch (gimple_code (def_stmt)) |
| { |
| case GIMPLE_PHI: |
| case GIMPLE_ASSIGN: |
| break; |
| |
| default: |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "unsupported defining stmt:\n"); |
| return -1; |
| } |
| } |
| |
| if (second) |
| oprnd_info->second_pattern = pattern; |
| |
| if (first) |
| { |
| oprnd_info->first_dt = dt; |
| oprnd_info->first_pattern = pattern; |
| oprnd_info->first_op_type = TREE_TYPE (oprnd); |
| } |
| else |
| { |
| /* Not first stmt of the group, check that the def-stmt/s match |
| the def-stmt/s of the first stmt. Allow different definition |
| types for reduction chains: the first stmt must be a |
| vect_reduction_def (a phi node), and the rest |
| vect_internal_def. */ |
| tree type = TREE_TYPE (oprnd); |
| if ((oprnd_info->first_dt != dt |
| && !(oprnd_info->first_dt == vect_reduction_def |
| && dt == vect_internal_def) |
| && !((oprnd_info->first_dt == vect_external_def |
| || oprnd_info->first_dt == vect_constant_def) |
| && (dt == vect_external_def |
| || dt == vect_constant_def))) |
| || !types_compatible_p (oprnd_info->first_op_type, type)) |
| { |
| /* Try swapping operands if we got a mismatch. */ |
| if (i == 0 |
| && !swapped |
| && commutative) |
| { |
| swapped = true; |
| goto again; |
| } |
| |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: different types\n"); |
| |
| return 1; |
| } |
| if ((dt == vect_constant_def |
| || dt == vect_external_def) |
| && !current_vector_size.is_constant () |
| && (TREE_CODE (type) == BOOLEAN_TYPE |
| || !can_duplicate_and_interleave_p (stmts.length (), |
| TYPE_MODE (type)))) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: invalid type of def " |
| "for variable-length SLP "); |
| dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd); |
| dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); |
| } |
| return -1; |
| } |
| } |
| |
| /* Check the types of the definitions. */ |
| switch (dt) |
| { |
| case vect_constant_def: |
| case vect_external_def: |
| break; |
| |
| case vect_reduction_def: |
| case vect_induction_def: |
| case vect_internal_def: |
| oprnd_info->def_stmts.quick_push (def_stmt); |
| break; |
| |
| default: |
| /* FORNOW: Not supported. */ |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: illegal type of def "); |
| dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd); |
| dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); |
| } |
| |
| return -1; |
| } |
| } |
| |
| /* Swap operands. */ |
| if (swapped) |
| { |
| /* If there are already uses of this stmt in a SLP instance then |
| we've committed to the operand order and can't swap it. */ |
| if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: cannot swap operands of " |
| "shared stmt "); |
| dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); |
| } |
| return -1; |
| } |
| |
| if (first_op_cond) |
| { |
| tree cond = gimple_assign_rhs1 (stmt); |
| enum tree_code code = TREE_CODE (cond); |
| |
| /* Swap. */ |
| if (*swap == 1) |
| { |
| swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0), |
| &TREE_OPERAND (cond, 1)); |
| TREE_SET_CODE (cond, swap_tree_comparison (code)); |
| } |
| /* Invert. */ |
| else |
| { |
| swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt), |
| gimple_assign_rhs3_ptr (stmt)); |
| bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0)); |
| code = invert_tree_comparison (TREE_CODE (cond), honor_nans); |
| gcc_assert (code != ERROR_MARK); |
| TREE_SET_CODE (cond, code); |
| } |
| } |
| else |
| swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt), |
| gimple_assign_rhs2_ptr (stmt)); |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "swapped operands to match def types in "); |
| dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); |
| } |
| } |
| |
| *swap = swapped; |
| return 0; |
| } |
| |
| /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the |
| caller's attempt to find the vector type in STMT with the narrowest |
| element type. Return true if VECTYPE is nonnull and if it is valid |
| for VINFO. When returning true, update MAX_NUNITS to reflect the |
| number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are |
| as for vect_build_slp_tree. */ |
| |
| static bool |
| vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size, |
| tree vectype, poly_uint64 *max_nunits) |
| { |
| if (!vectype) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: unsupported data-type in "); |
| dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); |
| dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); |
| } |
| /* Fatal mismatch. */ |
| return false; |
| } |
| |
| /* If populating the vector type requires unrolling then fail |
| before adjusting *max_nunits for basic-block vectorization. */ |
| poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); |
| unsigned HOST_WIDE_INT const_nunits; |
| if (is_a <bb_vec_info> (vinfo) |
| && (!nunits.is_constant (&const_nunits) |
| || const_nunits > group_size)) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: unrolling required " |
| "in basic block SLP\n"); |
| /* Fatal mismatch. */ |
| return false; |
| } |
| |
| /* In case of multiple types we need to detect the smallest type. */ |
| vect_update_max_nunits (max_nunits, vectype); |
| return true; |
| } |
| |
| /* Verify if the scalar stmts STMTS are isomorphic, require data |
| permutation or are of unsupported types of operation. Return |
| true if they are, otherwise return false and indicate in *MATCHES |
| which stmts are not isomorphic to the first one. If MATCHES[0] |
| is false then this indicates the comparison could not be |
| carried out or the stmts will never be vectorized by SLP. |
| |
| Note COND_EXPR is possibly ismorphic to another one after swapping its |
| operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to |
| the first stmt by swapping the two operands of comparison; set SWAP[i] |
| to 2 if stmt I is isormorphic to the first stmt by inverting the code |
| of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped |
| to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */ |
| |
| static bool |
| vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, |
| vec<gimple *> stmts, unsigned int group_size, |
| unsigned nops, poly_uint64 *max_nunits, |
| bool *matches, bool *two_operators) |
| { |
| unsigned int i; |
| gimple *first_stmt = stmts[0], *stmt = stmts[0]; |
| enum tree_code first_stmt_code = ERROR_MARK; |
| enum tree_code alt_stmt_code = ERROR_MARK; |
| enum tree_code rhs_code = ERROR_MARK; |
| enum tree_code first_cond_code = ERROR_MARK; |
| tree lhs; |
| bool need_same_oprnds = false; |
| tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE; |
| optab optab; |
| int icode; |
| machine_mode optab_op2_mode; |
| machine_mode vec_mode; |
| HOST_WIDE_INT dummy; |
| gimple *first_load = NULL, *prev_first_load = NULL; |
| |
| /* For every stmt in NODE find its def stmt/s. */ |
| FOR_EACH_VEC_ELT (stmts, i, stmt) |
| { |
| swap[i] = 0; |
| matches[i] = false; |
| |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for "); |
| dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); |
| } |
| |
| /* Fail to vectorize statements marked as unvectorizable. */ |
| if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt))) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: unvectorizable statement "); |
| dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); |
| } |
| /* Fatal mismatch. */ |
| matches[0] = false; |
| return false; |
| } |
| |
| lhs = gimple_get_lhs (stmt); |
| if (lhs == NULL_TREE) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: not GIMPLE_ASSIGN nor " |
| "GIMPLE_CALL "); |
| dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); |
| } |
| /* Fatal mismatch. */ |
| matches[0] = false; |
| return false; |
| } |
| |
| scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy); |
| vectype = get_vectype_for_scalar_type (scalar_type); |
| if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype, |
| max_nunits)) |
| { |
| /* Fatal mismatch. */ |
| matches[0] = false; |
| return false; |
| } |
| |
| if (gcall *call_stmt = dyn_cast <gcall *> (stmt)) |
| { |
| rhs_code = CALL_EXPR; |
| if (gimple_call_internal_p (call_stmt) |
| || gimple_call_tail_p (call_stmt) |
| || gimple_call_noreturn_p (call_stmt) |
| || !gimple_call_nothrow_p (call_stmt) |
| || gimple_call_chain (call_stmt)) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: unsupported call type "); |
| dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, |
| call_stmt, 0); |
| } |
| /* Fatal mismatch. */ |
| matches[0] = false; |
| return false; |
| } |
| } |
| else |
| rhs_code = gimple_assign_rhs_code (stmt); |
| |
| /* Check the operation. */ |
| if (i == 0) |
| { |
| first_stmt_code = rhs_code; |
| |
| /* Shift arguments should be equal in all the packed stmts for a |
| vector shift with scalar shift operand. */ |
| if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR |
| || rhs_code == LROTATE_EXPR |
| || rhs_code == RROTATE_EXPR) |
| { |
| vec_mode = TYPE_MODE (vectype); |
| |
| /* First see if we have a vector/vector shift. */ |
| optab = optab_for_tree_code (rhs_code, vectype, |
| optab_vector); |
| |
| if (!optab |
| || optab_handler (optab, vec_mode) == CODE_FOR_nothing) |
| { |
| /* No vector/vector shift, try for a vector/scalar shift. */ |
| optab = optab_for_tree_code (rhs_code, vectype, |
| optab_scalar); |
| |
| if (!optab) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: no optab.\n"); |
| /* Fatal mismatch. */ |
| matches[0] = false; |
| return false; |
| } |
| icode = (int) optab_handler (optab, vec_mode); |
| if (icode == CODE_FOR_nothing) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: " |
| "op not supported by target.\n"); |
| /* Fatal mismatch. */ |
| matches[0] = false; |
| return false; |
| } |
| optab_op2_mode = insn_data[icode].operand[2].mode; |
| if (!VECTOR_MODE_P (optab_op2_mode)) |
| { |
| need_same_oprnds = true; |
| first_op1 = gimple_assign_rhs2 (stmt); |
| } |
| } |
| } |
| else if (rhs_code == WIDEN_LSHIFT_EXPR) |
| { |
| need_same_oprnds = true; |
| first_op1 = gimple_assign_rhs2 (stmt); |
| } |
| } |
| else |
| { |
| if (first_stmt_code != rhs_code |
| && alt_stmt_code == ERROR_MARK) |
| alt_stmt_code = rhs_code; |
| if (first_stmt_code != rhs_code |
| && (first_stmt_code != IMAGPART_EXPR |
| || rhs_code != REALPART_EXPR) |
| && (first_stmt_code != REALPART_EXPR |
| || rhs_code != IMAGPART_EXPR) |
| /* Handle mismatches in plus/minus by computing both |
| and merging the results. */ |
| && !((first_stmt_code == PLUS_EXPR |
| || first_stmt_code == MINUS_EXPR) |
| && (alt_stmt_code == PLUS_EXPR |
| || alt_stmt_code == MINUS_EXPR) |
| && rhs_code == alt_stmt_code) |
| && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)) |
| && (first_stmt_code == ARRAY_REF |
| || first_stmt_code == BIT_FIELD_REF |
| || first_stmt_code == INDIRECT_REF |
| || first_stmt_code == COMPONENT_REF |
| || first_stmt_code == MEM_REF))) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: different operation " |
| "in stmt "); |
| dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "original stmt "); |
| dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, |
| first_stmt, 0); |
| } |
| /* Mismatch. */ |
| continue; |
| } |
| |
| if (need_same_oprnds |
| && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0)) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: different shift " |
| "arguments in "); |
| dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); |
| } |
| /* Mismatch. */ |
| continue; |
| } |
| |
| if (rhs_code == CALL_EXPR) |
| { |
| gimple *first_stmt = stmts[0]; |
| if (gimple_call_num_args (stmt) != nops |
| || !operand_equal_p (gimple_call_fn (first_stmt), |
| gimple_call_fn (stmt), 0) |
| || gimple_call_fntype (first_stmt) |
| != gimple_call_fntype (stmt)) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: different calls in "); |
| dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, |
| stmt, 0); |
| } |
| /* Mismatch. */ |
| continue; |
| } |
| } |
| } |
| |
| /* Grouped store or load. */ |
| if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))) |
| { |
| if (REFERENCE_CLASS_P (lhs)) |
| { |
| /* Store. */ |
| ; |
| } |
| else |
| { |
| /* Load. */ |
| first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)); |
| if (prev_first_load) |
| { |
| /* Check that there are no loads from different interleaving |
| chains in the same node. */ |
| if (prev_first_load != first_load) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, |
| vect_location, |
| "Build SLP failed: different " |
| "interleaving chains in one node "); |
| dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, |
| stmt, 0); |
| } |
| /* Mismatch. */ |
| continue; |
| } |
| } |
| else |
| prev_first_load = first_load; |
| } |
| } /* Grouped access. */ |
| else |
| { |
| if (TREE_CODE_CLASS (rhs_code) == tcc_reference) |
| { |
| /* Not grouped load. */ |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: not grouped load "); |
| dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); |
| } |
| |
| /* FORNOW: Not grouped loads are not supported. */ |
| /* Fatal mismatch. */ |
| matches[0] = false; |
| return false; |
| } |
| |
| /* Not memory operation. */ |
| if (TREE_CODE_CLASS (rhs_code) != tcc_binary |
| && TREE_CODE_CLASS (rhs_code) != tcc_unary |
| && TREE_CODE_CLASS (rhs_code) != tcc_expression |
| && TREE_CODE_CLASS (rhs_code) != tcc_comparison |
| && rhs_code != CALL_EXPR) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: operation"); |
| dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported "); |
| dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); |
| } |
| /* Fatal mismatch. */ |
| matches[0] = false; |
| return false; |
| } |
| |
| if (rhs_code == COND_EXPR) |
| { |
| tree cond_expr = gimple_assign_rhs1 (stmt); |
| enum tree_code cond_code = TREE_CODE (cond_expr); |
| enum tree_code swap_code = ERROR_MARK; |
| enum tree_code invert_code = ERROR_MARK; |
| |
| if (i == 0) |
| first_cond_code = TREE_CODE (cond_expr); |
| else if (TREE_CODE_CLASS (cond_code) == tcc_comparison) |
| { |
| bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0)); |
| swap_code = swap_tree_comparison (cond_code); |
| invert_code = invert_tree_comparison (cond_code, honor_nans); |
| } |
| |
| if (first_cond_code == cond_code) |
| ; |
| /* Isomorphic can be achieved by swapping. */ |
| else if (first_cond_code == swap_code) |
| swap[i] = 1; |
| /* Isomorphic can be achieved by inverting. */ |
| else if (first_cond_code == invert_code) |
| swap[i] = 2; |
| else |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: different" |
| " operation"); |
| dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, |
| stmt, 0); |
| } |
| /* Mismatch. */ |
| continue; |
| } |
| } |
| } |
| |
| matches[i] = true; |
| } |
| |
| for (i = 0; i < group_size; ++i) |
| if (!matches[i]) |
| return false; |
| |
| /* If we allowed a two-operation SLP node verify the target can cope |
| with the permute we are going to use. */ |
| poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); |
| if (alt_stmt_code != ERROR_MARK |
| && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference) |
| { |
| unsigned HOST_WIDE_INT count; |
| if (!nunits.is_constant (&count)) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: different operations " |
| "not allowed with variable-length SLP.\n"); |
| return false; |
| } |
| vec_perm_builder sel (count, count, 1); |
| for (i = 0; i < count; ++i) |
| { |
| unsigned int elt = i; |
| if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code) |
| elt += count; |
| sel.quick_push (elt); |
| } |
| vec_perm_indices indices (sel, 2, count); |
| if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices)) |
| { |
| for (i = 0; i < group_size; ++i) |
| if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code) |
| { |
| matches[i] = false; |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: different operation " |
| "in stmt "); |
| dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, |
| stmts[i], 0); |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "original stmt "); |
| dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, |
| first_stmt, 0); |
| } |
| } |
| return false; |
| } |
| *two_operators = true; |
| } |
| |
| return true; |
| } |
| |
| /* Traits for the hash_set to record failed SLP builds for a stmt set. |
| Note we never remove apart from at destruction time so we do not |
| need a special value for deleted that differs from empty. */ |
| struct bst_traits |
| { |
| typedef vec <gimple *> value_type; |
| typedef vec <gimple *> compare_type; |
| static inline hashval_t hash (value_type); |
| static inline bool equal (value_type existing, value_type candidate); |
| static inline bool is_empty (value_type x) { return !x.exists (); } |
| static inline bool is_deleted (value_type x) { return !x.exists (); } |
| static inline void mark_empty (value_type &x) { x.release (); } |
| static inline void mark_deleted (value_type &x) { x.release (); } |
| static inline void remove (value_type &x) { x.release (); } |
| }; |
| inline hashval_t |
| bst_traits::hash (value_type x) |
| { |
| inchash::hash h; |
| for (unsigned i = 0; i < x.length (); ++i) |
| h.add_int (gimple_uid (x[i])); |
| return h.end (); |
| } |
| inline bool |
| bst_traits::equal (value_type existing, value_type candidate) |
| { |
| if (existing.length () != candidate.length ()) |
| return false; |
| for (unsigned i = 0; i < existing.length (); ++i) |
| if (existing[i] != candidate[i]) |
| return false; |
| return true; |
| } |
| |
| typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t; |
| static scalar_stmts_set_t *bst_fail; |
| |
| static slp_tree |
| vect_build_slp_tree_2 (vec_info *vinfo, |
| vec<gimple *> stmts, unsigned int group_size, |
| poly_uint64 *max_nunits, |
| vec<slp_tree> *loads, |
| bool *matches, unsigned *npermutes, unsigned *tree_size, |
| unsigned max_tree_size); |
| |
| static slp_tree |
| vect_build_slp_tree (vec_info *vinfo, |
| vec<gimple *> stmts, unsigned int group_size, |
| poly_uint64 *max_nunits, vec<slp_tree> *loads, |
| bool *matches, unsigned *npermutes, unsigned *tree_size, |
| unsigned max_tree_size) |
| { |
| if (bst_fail->contains (stmts)) |
| return NULL; |
| slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits, |
| loads, matches, npermutes, tree_size, |
| max_tree_size); |
| /* When SLP build fails for stmts record this, otherwise SLP build |
| can be exponential in time when we allow to construct parts from |
| scalars, see PR81723. */ |
| if (! res) |
| { |
| vec <gimple *> x; |
| x.create (stmts.length ()); |
| x.splice (stmts); |
| bst_fail->add (x); |
| } |
| return res; |
| } |
| |
| /* Recursively build an SLP tree starting from NODE. |
| Fail (and return a value not equal to zero) if def-stmts are not |
| isomorphic, require data permutation or are of unsupported types of |
| operation. Otherwise, return 0. |
| The value returned is the depth in the SLP tree where a mismatch |
| was found. */ |
| |
| static slp_tree |
| vect_build_slp_tree_2 (vec_info *vinfo, |
| vec<gimple *> stmts, unsigned int group_size, |
| poly_uint64 *max_nunits, |
| vec<slp_tree> *loads, |
| bool *matches, unsigned *npermutes, unsigned *tree_size, |
| unsigned max_tree_size) |
| { |
| unsigned nops, i, this_tree_size = 0; |
| poly_uint64 this_max_nunits = *max_nunits; |
| gimple *stmt; |
| slp_tree node; |
| |
| matches[0] = false; |
| |
| stmt = stmts[0]; |
| if (is_gimple_call (stmt)) |
| nops = gimple_call_num_args (stmt); |
| else if (is_gimple_assign (stmt)) |
| { |
| nops = gimple_num_ops (stmt) - 1; |
| if (gimple_assign_rhs_code (stmt) == COND_EXPR) |
| nops++; |
| } |
| else if (gimple_code (stmt) == GIMPLE_PHI) |
| nops = 0; |
| else |
| return NULL; |
| |
| /* If the SLP node is a PHI (induction or reduction), terminate |
| the recursion. */ |
| if (gimple_code (stmt) == GIMPLE_PHI) |
| { |
| tree scalar_type = TREE_TYPE (PHI_RESULT (stmt)); |
| tree vectype = get_vectype_for_scalar_type (scalar_type); |
| if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype, |
| max_nunits)) |
| return NULL; |
| |
| vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)); |
| /* Induction from different IVs is not supported. */ |
| if (def_type == vect_induction_def) |
| { |
| FOR_EACH_VEC_ELT (stmts, i, stmt) |
| if (stmt != stmts[0]) |
| return NULL; |
| } |
| else |
| { |
| /* Else def types have to match. */ |
| FOR_EACH_VEC_ELT (stmts, i, stmt) |
| { |
| /* But for reduction chains only check on the first stmt. */ |
| if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) |
| && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt) |
| continue; |
| if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type) |
| return NULL; |
| } |
| } |
| node = vect_create_new_slp_node (stmts); |
| return node; |
| } |
| |
| |
| bool two_operators = false; |
| unsigned char *swap = XALLOCAVEC (unsigned char, group_size); |
| if (!vect_build_slp_tree_1 (vinfo, swap, |
| stmts, group_size, nops, |
| &this_max_nunits, matches, &two_operators)) |
| return NULL; |
| |
| /* If the SLP node is a load, terminate the recursion. */ |
| if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)) |
| && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))) |
| { |
| *max_nunits = this_max_nunits; |
| node = vect_create_new_slp_node (stmts); |
| loads->safe_push (node); |
| return node; |
| } |
| |
| /* Get at the operands, verifying they are compatible. */ |
| vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size); |
| slp_oprnd_info oprnd_info; |
| FOR_EACH_VEC_ELT (stmts, i, stmt) |
| { |
| int res = vect_get_and_check_slp_defs (vinfo, &swap[i], |
| stmts, i, &oprnds_info); |
| if (res != 0) |
| matches[(res == -1) ? 0 : i] = false; |
| if (!matches[0]) |
| break; |
| } |
| for (i = 0; i < group_size; ++i) |
| if (!matches[i]) |
| { |
| vect_free_oprnd_info (oprnds_info); |
| return NULL; |
| } |
| |
| auto_vec<slp_tree, 4> children; |
| auto_vec<slp_tree> this_loads; |
| |
| stmt = stmts[0]; |
| |
| if (tree_size) |
| max_tree_size -= *tree_size; |
| |
| /* Create SLP_TREE nodes for the definition node/s. */ |
| FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info) |
| { |
| slp_tree child; |
| unsigned old_nloads = this_loads.length (); |
| unsigned old_tree_size = this_tree_size; |
| unsigned int j; |
| |
| if (oprnd_info->first_dt != vect_internal_def |
| && oprnd_info->first_dt != vect_reduction_def |
| && oprnd_info->first_dt != vect_induction_def) |
| continue; |
| |
| if (++this_tree_size > max_tree_size) |
| { |
| FOR_EACH_VEC_ELT (children, j, child) |
| vect_free_slp_tree (child); |
| vect_free_oprnd_info (oprnds_info); |
| return NULL; |
| } |
| |
| if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts, |
| group_size, &this_max_nunits, |
| &this_loads, matches, npermutes, |
| &this_tree_size, |
| max_tree_size)) != NULL) |
| { |
| /* If we have all children of child built up from scalars then just |
| throw that away and build it up this node from scalars. */ |
| if (!SLP_TREE_CHILDREN (child).is_empty () |
| /* ??? Rejecting patterns this way doesn't work. We'd have to |
| do extra work to cancel the pattern so the uses see the |
| scalar version. */ |
| && !is_pattern_stmt_p |
| (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))) |
| { |
| slp_tree grandchild; |
| |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) |
| if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def) |
| break; |
| if (!grandchild) |
| { |
| /* Roll back. */ |
| this_loads.truncate (old_nloads); |
| this_tree_size = old_tree_size; |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) |
| vect_free_slp_tree (grandchild); |
| SLP_TREE_CHILDREN (child).truncate (0); |
| |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "Building parent vector operands from " |
| "scalars instead\n"); |
| oprnd_info->def_stmts = vNULL; |
| SLP_TREE_DEF_TYPE (child) = vect_external_def; |
| children.safe_push (child); |
| continue; |
| } |
| } |
| |
| oprnd_info->def_stmts = vNULL; |
| children.safe_push (child); |
| continue; |
| } |
| |
| /* If the SLP build failed fatally and we analyze a basic-block |
| simply treat nodes we fail to build as externally defined |
| (and thus build vectors from the scalar defs). |
| The cost model will reject outright expensive cases. |
| ??? This doesn't treat cases where permutation ultimatively |
| fails (or we don't try permutation below). Ideally we'd |
| even compute a permutation that will end up with the maximum |
| SLP tree size... */ |
| if (is_a <bb_vec_info> (vinfo) |
| && !matches[0] |
| /* ??? Rejecting patterns this way doesn't work. We'd have to |
| do extra work to cancel the pattern so the uses see the |
| scalar version. */ |
| && !is_pattern_stmt_p (vinfo_for_stmt (stmt))) |
| { |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "Building vector operands from scalars\n"); |
| child = vect_create_new_slp_node (oprnd_info->def_stmts); |
| SLP_TREE_DEF_TYPE (child) = vect_external_def; |
| children.safe_push (child); |
| oprnd_info->def_stmts = vNULL; |
| continue; |
| } |
| |
| /* If the SLP build for operand zero failed and operand zero |
| and one can be commutated try that for the scalar stmts |
| that failed the match. */ |
| if (i == 0 |
| /* A first scalar stmt mismatch signals a fatal mismatch. */ |
| && matches[0] |
| /* ??? For COND_EXPRs we can swap the comparison operands |
| as well as the arms under some constraints. */ |
| && nops == 2 |
| && oprnds_info[1]->first_dt == vect_internal_def |
| && is_gimple_assign (stmt) |
| /* Do so only if the number of not successful permutes was nor more |
| than a cut-ff as re-trying the recursive match on |
| possibly each level of the tree would expose exponential |
| behavior. */ |
| && *npermutes < 4) |
| { |
| /* See whether we can swap the matching or the non-matching |
| stmt operands. */ |
| bool swap_not_matching = true; |
| do |
| { |
| for (j = 0; j < group_size; ++j) |
| { |
| if (matches[j] != !swap_not_matching) |
| continue; |
| gimple *stmt = stmts[j]; |
| /* Verify if we can swap operands of this stmt. */ |
| if (!is_gimple_assign (stmt) |
| || !commutative_tree_code (gimple_assign_rhs_code (stmt))) |
| { |
| if (!swap_not_matching) |
| goto fail; |
| swap_not_matching = false; |
| break; |
| } |
| /* Verify if we can safely swap or if we committed to a |
| specific operand order already. |
| ??? Instead of modifying GIMPLE stmts here we could |
| record whether we want to swap operands in the SLP |
| node and temporarily do that when processing it |
| (or wrap operand accessors in a helper). */ |
| else if (swap[j] != 0 |
| || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))) |
| { |
| if (!swap_not_matching) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, |
| vect_location, |
| "Build SLP failed: cannot swap " |
| "operands of shared stmt "); |
| dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, |
| TDF_SLIM, stmts[j], 0); |
| } |
| goto fail; |
| } |
| swap_not_matching = false; |
| break; |
| } |
| } |
| } |
| while (j != group_size); |
| |
| /* Swap mismatched definition stmts. */ |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "Re-trying with swapped operands of stmts "); |
| for (j = 0; j < group_size; ++j) |
| if (matches[j] == !swap_not_matching) |
| { |
| std::swap (oprnds_info[0]->def_stmts[j], |
| oprnds_info[1]->def_stmts[j]); |
| dump_printf (MSG_NOTE, "%d ", j); |
| } |
| dump_printf (MSG_NOTE, "\n"); |
| /* And try again with scratch 'matches' ... */ |
| bool *tem = XALLOCAVEC (bool, group_size); |
| if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts, |
| group_size, &this_max_nunits, |
| &this_loads, tem, npermutes, |
| &this_tree_size, |
| max_tree_size)) != NULL) |
| { |
| /* ... so if successful we can apply the operand swapping |
| to the GIMPLE IL. This is necessary because for example |
| vect_get_slp_defs uses operand indexes and thus expects |
| canonical operand order. This is also necessary even |
| if we end up building the operand from scalars as |
| we'll continue to process swapped operand two. */ |
| for (j = 0; j < group_size; ++j) |
| { |
| gimple *stmt = stmts[j]; |
| gimple_set_plf (stmt, GF_PLF_1, false); |
| } |
| for (j = 0; j < group_size; ++j) |
| { |
| gimple *stmt = stmts[j]; |
| if (matches[j] == !swap_not_matching) |
| { |
| /* Avoid swapping operands twice. */ |
| if (gimple_plf (stmt, GF_PLF_1)) |
| continue; |
| swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt), |
| gimple_assign_rhs2_ptr (stmt)); |
| gimple_set_plf (stmt, GF_PLF_1, true); |
| } |
| } |
| /* Verify we swap all duplicates or none. */ |
| if (flag_checking) |
| for (j = 0; j < group_size; ++j) |
| { |
| gimple *stmt = stmts[j]; |
| gcc_assert (gimple_plf (stmt, GF_PLF_1) |
| == (matches[j] == !swap_not_matching)); |
| } |
| |
| /* If we have all children of child built up from scalars then |
| just throw that away and build it up this node from scalars. */ |
| if (!SLP_TREE_CHILDREN (child).is_empty () |
| /* ??? Rejecting patterns this way doesn't work. We'd have |
| to do extra work to cancel the pattern so the uses see the |
| scalar version. */ |
| && !is_pattern_stmt_p |
| (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))) |
| { |
| unsigned int j; |
| slp_tree grandchild; |
| |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) |
| if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def) |
| break; |
| if (!grandchild) |
| { |
| /* Roll back. */ |
| this_loads.truncate (old_nloads); |
| this_tree_size = old_tree_size; |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) |
| vect_free_slp_tree (grandchild); |
| SLP_TREE_CHILDREN (child).truncate (0); |
| |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "Building parent vector operands from " |
| "scalars instead\n"); |
| oprnd_info->def_stmts = vNULL; |
| SLP_TREE_DEF_TYPE (child) = vect_external_def; |
| children.safe_push (child); |
| continue; |
| } |
| } |
| |
| oprnd_info->def_stmts = vNULL; |
| children.safe_push (child); |
| continue; |
| } |
| |
| ++*npermutes; |
| } |
| |
| fail: |
| gcc_assert (child == NULL); |
| FOR_EACH_VEC_ELT (children, j, child) |
| vect_free_slp_tree (child); |
| vect_free_oprnd_info (oprnds_info); |
| return NULL; |
| } |
| |
| vect_free_oprnd_info (oprnds_info); |
| |
| if (tree_size) |
| *tree_size += this_tree_size; |
| *max_nunits = this_max_nunits; |
| loads->safe_splice (this_loads); |
| |
| node = vect_create_new_slp_node (stmts); |
| SLP_TREE_TWO_OPERATORS (node) = two_operators; |
| SLP_TREE_CHILDREN (node).splice (children); |
| return node; |
| } |
| |
| /* Dump a slp tree NODE using flags specified in DUMP_KIND. */ |
| |
| static void |
| vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node) |
| { |
| int i; |
| gimple *stmt; |
| slp_tree child; |
| |
| dump_printf_loc (dump_kind, loc, "node%s\n", |
| SLP_TREE_DEF_TYPE (node) != vect_internal_def |
| ? " (external)" : ""); |
| FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) |
| { |
| dump_printf_loc (dump_kind, loc, "\tstmt %d ", i); |
| dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0); |
| } |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
| vect_print_slp_tree (dump_kind, loc, child); |
| } |
| |
| |
| /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID). |
| If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index |
| J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the |
| stmts in NODE are to be marked. */ |
| |
| static void |
| vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j) |
| { |
| int i; |
| gimple *stmt; |
| slp_tree child; |
| |
| if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) |
| return; |
| |
| FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) |
| if (j < 0 || i == j) |
| STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark; |
| |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
| vect_mark_slp_stmts (child, mark, j); |
| } |
| |
| |
| /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */ |
| |
| static void |
| vect_mark_slp_stmts_relevant (slp_tree node) |
| { |
| int i; |
| gimple *stmt; |
| stmt_vec_info stmt_info; |
| slp_tree child; |
| |
| if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) |
| return; |
| |
| FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) |
| { |
| stmt_info = vinfo_for_stmt (stmt); |
| gcc_assert (!STMT_VINFO_RELEVANT (stmt_info) |
| || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope); |
| STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope; |
| } |
| |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
| vect_mark_slp_stmts_relevant (child); |
| } |
| |
| |
| /* Rearrange the statements of NODE according to PERMUTATION. */ |
| |
| static void |
| vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size, |
| vec<unsigned> permutation) |
| { |
| gimple *stmt; |
| vec<gimple *> tmp_stmts; |
| unsigned int i; |
| slp_tree child; |
| |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
| vect_slp_rearrange_stmts (child, group_size, permutation); |
| |
| gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ()); |
| tmp_stmts.create (group_size); |
| tmp_stmts.quick_grow_cleared (group_size); |
| |
| FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) |
| tmp_stmts[permutation[i]] = stmt; |
| |
| SLP_TREE_SCALAR_STMTS (node).release (); |
| SLP_TREE_SCALAR_STMTS (node) = tmp_stmts; |
| } |
| |
| |
| /* Attempt to reorder stmts in a reduction chain so that we don't |
| require any load permutation. Return true if that was possible, |
| otherwise return false. */ |
| |
| static bool |
| vect_attempt_slp_rearrange_stmts (slp_instance slp_instn) |
| { |
| unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn); |
| unsigned int i, j; |
| unsigned int lidx; |
| slp_tree node, load; |
| |
| /* Compare all the permutation sequences to the first one. We know |
| that at least one load is permuted. */ |
| node = SLP_INSTANCE_LOADS (slp_instn)[0]; |
| if (!node->load_permutation.exists ()) |
| return false; |
| for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i) |
| { |
| if (!load->load_permutation.exists ()) |
| return false; |
| FOR_EACH_VEC_ELT (load->load_permutation, j, lidx) |
| if (lidx != node->load_permutation[j]) |
| return false; |
| } |
| |
| /* Check that the loads in the first sequence are different and there |
| are no gaps between them. */ |
| auto_sbitmap load_index (group_size); |
| bitmap_clear (load_index); |
| FOR_EACH_VEC_ELT (node->load_permutation, i, lidx) |
| { |
| if (lidx >= group_size) |
| return false; |
| if (bitmap_bit_p (load_index, lidx)) |
| return false; |
| |
| bitmap_set_bit (load_index, lidx); |
| } |
| for (i = 0; i < group_size; i++) |
| if (!bitmap_bit_p (load_index, i)) |
| return false; |
| |
| /* This permutation is valid for reduction. Since the order of the |
| statements in the nodes is not important unless they are memory |
| accesses, we can rearrange the statements in all the nodes |
| according to the order of the loads. */ |
| vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size, |
| node->load_permutation); |
| |
| /* We are done, no actual permutations need to be generated. */ |
| poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn); |
| FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) |
| { |
| gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0]; |
| first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)); |
| /* But we have to keep those permutations that are required because |
| of handling of gaps. */ |
| if (known_eq (unrolling_factor, 1U) |
| || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt)) |
| && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)) |
| SLP_TREE_LOAD_PERMUTATION (node).release (); |
| else |
| for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j) |
| SLP_TREE_LOAD_PERMUTATION (node)[j] = j; |
| } |
| |
| return true; |
| } |
| |
| /* Check if the required load permutations in the SLP instance |
| SLP_INSTN are supported. */ |
| |
| static bool |
| vect_supported_load_permutation_p (slp_instance slp_instn) |
| { |
| unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn); |
| unsigned int i, j, k, next; |
| slp_tree node; |
| gimple *stmt, *load, *next_load; |
| |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_NOTE, vect_location, "Load permutation "); |
| FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) |
| if (node->load_permutation.exists ()) |
| FOR_EACH_VEC_ELT (node->load_permutation, j, next) |
| dump_printf (MSG_NOTE, "%d ", next); |
| else |
| for (k = 0; k < group_size; ++k) |
| dump_printf (MSG_NOTE, "%d ", k); |
| dump_printf (MSG_NOTE, "\n"); |
| } |
| |
| /* In case of reduction every load permutation is allowed, since the order |
| of the reduction statements is not important (as opposed to the case of |
| grouped stores). The only condition we need to check is that all the |
| load nodes are of the same size and have the same permutation (and then |
| rearrange all the nodes of the SLP instance according to this |
| permutation). */ |
| |
| /* Check that all the load nodes are of the same size. */ |
| /* ??? Can't we assert this? */ |
| FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) |
| if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size) |
| return false; |
| |
| node = SLP_INSTANCE_TREE (slp_instn); |
| stmt = SLP_TREE_SCALAR_STMTS (node)[0]; |
| |
| /* Reduction (there are no data-refs in the root). |
| In reduction chain the order of the loads is not important. */ |
| if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)) |
| && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) |
| vect_attempt_slp_rearrange_stmts (slp_instn); |
| |
| /* In basic block vectorization we allow any subchain of an interleaving |
| chain. |
| FORNOW: not supported in loop SLP because of realignment compications. */ |
| if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt))) |
| { |
| /* Check whether the loads in an instance form a subchain and thus |
| no permutation is necessary. */ |
| FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) |
| { |
| if (!SLP_TREE_LOAD_PERMUTATION (node).exists ()) |
| continue; |
| bool subchain_p = true; |
| next_load = NULL; |
| FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load) |
| { |
| if (j != 0 |
| && (next_load != load |
| || GROUP_GAP (vinfo_for_stmt (load)) != 1)) |
| { |
| subchain_p = false; |
| break; |
| } |
| next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load)); |
| } |
| if (subchain_p) |
| SLP_TREE_LOAD_PERMUTATION (node).release (); |
| else |
| { |
| stmt_vec_info group_info |
| = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]); |
| group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info)); |
| unsigned HOST_WIDE_INT nunits; |
| unsigned k, maxk = 0; |
| FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k) |
| if (k > maxk) |
| maxk = k; |
| /* In BB vectorization we may not actually use a loaded vector |
| accessing elements in excess of GROUP_SIZE. */ |
| tree vectype = STMT_VINFO_VECTYPE (group_info); |
| if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits) |
| || maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1))) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "BB vectorization with gaps at the end of " |
| "a load is not supported\n"); |
| return false; |
| } |
| |
| /* Verify the permutation can be generated. */ |
| vec<tree> tem; |
| unsigned n_perms; |
| if (!vect_transform_slp_perm_load (node, tem, NULL, |
| 1, slp_instn, true, &n_perms)) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, |
| vect_location, |
| "unsupported load permutation\n"); |
| return false; |
| } |
| } |
| } |
| return true; |
| } |
| |
| /* For loop vectorization verify we can generate the permutation. Be |
| conservative about the vectorization factor, there are permutations |
| that will use three vector inputs only starting from a specific factor |
| and the vectorization factor is not yet final. |
| ??? The SLP instance unrolling factor might not be the maximum one. */ |
| unsigned n_perms; |
| poly_uint64 test_vf |
| = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), |
| LOOP_VINFO_VECT_FACTOR |
| (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)))); |
| FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) |
| if (node->load_permutation.exists () |
| && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf, |
| slp_instn, true, &n_perms)) |
| return false; |
| |
| return true; |
| } |
| |
| |
| /* Find the last store in SLP INSTANCE. */ |
| |
| gimple * |
| vect_find_last_scalar_stmt_in_slp (slp_tree node) |
| { |
| gimple *last = NULL, *stmt; |
| |
| for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++) |
| { |
| stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); |
| if (is_pattern_stmt_p (stmt_vinfo)) |
| last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last); |
| else |
| last = get_later_stmt (stmt, last); |
| } |
| |
| return last; |
| } |
| |
| /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */ |
| |
| static void |
| vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node, |
| stmt_vector_for_cost *prologue_cost_vec, |
| stmt_vector_for_cost *body_cost_vec, |
| unsigned ncopies_for_cost, |
| scalar_stmts_set_t* visited) |
| { |
| unsigned i, j; |
| slp_tree child; |
| gimple *stmt; |
| stmt_vec_info stmt_info; |
| tree lhs; |
| |
| /* If we already costed the exact same set of scalar stmts we're done. |
| We share the generated vector stmts for those. */ |
| if (visited->contains (SLP_TREE_SCALAR_STMTS (node))) |
| return; |
| |
| visited->add (SLP_TREE_SCALAR_STMTS (node).copy ()); |
| |
| /* Recurse down the SLP tree. */ |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
| if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) |
| vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec, |
| body_cost_vec, ncopies_for_cost, visited); |
| |
| /* Look at the first scalar stmt to determine the cost. */ |
| stmt = SLP_TREE_SCALAR_STMTS (node)[0]; |
| stmt_info = vinfo_for_stmt (stmt); |
| if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) |
| { |
| vect_memory_access_type memory_access_type |
| = (STMT_VINFO_STRIDED_P (stmt_info) |
| ? VMAT_STRIDED_SLP |
| : VMAT_CONTIGUOUS); |
| if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info))) |
| vect_model_store_cost (stmt_info, ncopies_for_cost, |
| memory_access_type, VLS_STORE, |
| node, prologue_cost_vec, body_cost_vec); |
| else |
| { |
| gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))); |
| if (SLP_TREE_LOAD_PERMUTATION (node).exists ()) |
| { |
| /* If the load is permuted then the alignment is determined by |
| the first group element not by the first scalar stmt DR. */ |
| stmt = GROUP_FIRST_ELEMENT (stmt_info); |
| stmt_info = vinfo_for_stmt (stmt); |
| /* Record the cost for the permutation. */ |
| unsigned n_perms; |
| vect_transform_slp_perm_load (node, vNULL, NULL, |
| ncopies_for_cost, instance, true, |
| &n_perms); |
| record_stmt_cost (body_cost_vec, n_perms, vec_perm, |
| stmt_info, 0, vect_body); |
| unsigned assumed_nunits |
| = vect_nunits_for_cost (STMT_VINFO_VECTYPE (stmt_info)); |
| /* And adjust the number of loads performed. This handles |
| redundancies as well as loads that are later dead. */ |
| auto_sbitmap perm (GROUP_SIZE (stmt_info)); |
| bitmap_clear (perm); |
| for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i) |
| bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]); |
| ncopies_for_cost = 0; |
| bool load_seen = false; |
| for (i = 0; i < GROUP_SIZE (stmt_info); ++i) |
| { |
| if (i % assumed_nunits == 0) |
| { |
| if (load_seen) |
| ncopies_for_cost++; |
| load_seen = false; |
| } |
| if (bitmap_bit_p (perm, i)) |
| load_seen = true; |
| } |
| if (load_seen) |
| ncopies_for_cost++; |
| gcc_assert (ncopies_for_cost |
| <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info) |
| + assumed_nunits - 1) / assumed_nunits); |
| poly_uint64 uf = SLP_INSTANCE_UNROLLING_FACTOR (instance); |
| ncopies_for_cost *= estimated_poly_value (uf); |
| } |
| /* Record the cost for the vector loads. */ |
| vect_model_load_cost (stmt_info, ncopies_for_cost, |
| memory_access_type, node, prologue_cost_vec, |
| body_cost_vec); |
| return; |
| } |
| } |
| else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type) |
| { |
| /* ncopies_for_cost is the number of IVs we generate. */ |
| record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt, |
| stmt_info, 0, vect_body); |
| |
| /* Prologue cost for the initial values and step vector. */ |
| record_stmt_cost (prologue_cost_vec, ncopies_for_cost, |
| CONSTANT_CLASS_P |
| (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED |
| (stmt_info)) |
| ? vector_load : vec_construct, |
| stmt_info, 0, vect_prologue); |
| record_stmt_cost (prologue_cost_vec, 1, |
| CONSTANT_CLASS_P |
| (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info)) |
| ? vector_load : vec_construct, |
| stmt_info, 0, vect_prologue); |
| |
| /* ??? No easy way to get at the actual number of vector stmts |
| to be geneated and thus the derived IVs. */ |
| } |
| else |
| { |
| record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt, |
| stmt_info, 0, vect_body); |
| if (SLP_TREE_TWO_OPERATORS (node)) |
| { |
| record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt, |
| stmt_info, 0, vect_body); |
| record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm, |
| stmt_info, 0, vect_body); |
| } |
| } |
| |
| /* Push SLP node def-type to stmts. */ |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
| if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) |
| FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt) |
| STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child); |
| |
| /* Scan operands and account for prologue cost of constants/externals. |
| ??? This over-estimates cost for multiple uses and should be |
| re-engineered. */ |
| stmt = SLP_TREE_SCALAR_STMTS (node)[0]; |
| lhs = gimple_get_lhs (stmt); |
| for (i = 0; i < gimple_num_ops (stmt); ++i) |
| { |
| tree op = gimple_op (stmt, i); |
| gimple *def_stmt; |
| enum vect_def_type dt; |
| if (!op || op == lhs) |
| continue; |
| if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt) |
| && (dt == vect_constant_def || dt == vect_external_def)) |
| { |
| /* Without looking at the actual initializer a vector of |
| constants can be implemented as load from the constant pool. |
| When all elements are the same we can use a splat. */ |
| tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op)); |
| unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length (); |
| unsigned num_vects_to_check; |
| unsigned HOST_WIDE_INT const_nunits; |
| unsigned nelt_limit; |
| if (TYPE_VECTOR_SUBPARTS (vectype).is_constant (&const_nunits) |
| && ! multiple_p (const_nunits, group_size)) |
| { |
| num_vects_to_check = SLP_TREE_NUMBER_OF_VEC_STMTS (node); |
| nelt_limit = const_nunits; |
| } |
| else |
| { |
| /* If either the vector has variable length or the vectors |
| are composed of repeated whole groups we only need to |
| cost construction once. All vectors will be the same. */ |
| num_vects_to_check = 1; |
| nelt_limit = group_size; |
| } |
| tree elt = NULL_TREE; |
| unsigned nelt = 0; |
| for (unsigned j = 0; j < num_vects_to_check * nelt_limit; ++j) |
| { |
| unsigned si = j % group_size; |
| if (nelt == 0) |
| elt = gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], i); |
| /* ??? We're just tracking whether all operands of a single |
| vector initializer are the same, ideally we'd check if |
| we emitted the same one already. */ |
| else if (elt != gimple_op (SLP_TREE_SCALAR_STMTS (node)[si], i)) |
| elt = NULL_TREE; |
| nelt++; |
| if (nelt == nelt_limit) |
| { |
| /* ??? We need to pass down stmt_info for a vector type |
| even if it points to the wrong stmt. */ |
| record_stmt_cost (prologue_cost_vec, 1, |
| dt == vect_external_def |
| ? (elt ? scalar_to_vec : vec_construct) |
| : vector_load, |
| stmt_info, 0, vect_prologue); |
| nelt = 0; |
| } |
| } |
| } |
| } |
| |
| /* Restore stmt def-types. */ |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
| if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) |
| FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt) |
| STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def; |
| } |
| |
| /* Compute the cost for the SLP instance INSTANCE. */ |
| |
| static void |
| vect_analyze_slp_cost (slp_instance instance, void *data, scalar_stmts_set_t *visited) |
| { |
| stmt_vector_for_cost body_cost_vec, prologue_cost_vec; |
| unsigned ncopies_for_cost; |
| stmt_info_for_cost *si; |
| unsigned i; |
| |
| /* Calculate the number of vector stmts to create based on the unrolling |
| factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is |
| GROUP_SIZE / NUNITS otherwise. */ |
| unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance); |
| slp_tree node = SLP_INSTANCE_TREE (instance); |
| stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]); |
| /* Get the estimated vectorization factor, which is always one for |
| basic-block vectorization. */ |
| unsigned int assumed_vf; |
| if (STMT_VINFO_LOOP_VINFO (stmt_info)) |
| assumed_vf = vect_vf_for_cost (STMT_VINFO_LOOP_VINFO (stmt_info)); |
| else |
| assumed_vf = 1; |
| /* For reductions look at a reduction operand in case the reduction |
| operation is widening like DOT_PROD or SAD. */ |
| tree vectype_for_cost = STMT_VINFO_VECTYPE (stmt_info); |
| if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) |
| { |
| gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]; |
| switch (gimple_assign_rhs_code (stmt)) |
| { |
| case DOT_PROD_EXPR: |
| case SAD_EXPR: |
| vectype_for_cost = get_vectype_for_scalar_type |
| (TREE_TYPE (gimple_assign_rhs1 (stmt))); |
| break; |
| default:; |
| } |
| } |
| unsigned int assumed_nunits = vect_nunits_for_cost (vectype_for_cost); |
| ncopies_for_cost = (least_common_multiple (assumed_nunits, |
| group_size * assumed_vf) |
| / assumed_nunits); |
| |
| prologue_cost_vec.create (10); |
| body_cost_vec.create (10); |
| vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance), |
| &prologue_cost_vec, &body_cost_vec, |
| ncopies_for_cost, visited); |
| |
| /* Record the prologue costs, which were delayed until we were |
| sure that SLP was successful. */ |
| FOR_EACH_VEC_ELT (prologue_cost_vec, i, si) |
| { |
| struct _stmt_vec_info *stmt_info |
| = si->stmt ? vinfo_for_stmt (si->stmt) : NULL; |
| (void) add_stmt_cost (data, si->count, si->kind, stmt_info, |
| si->misalign, vect_prologue); |
| } |
| |
| /* Record the instance's instructions in the target cost model. */ |
| FOR_EACH_VEC_ELT (body_cost_vec, i, si) |
| { |
| struct _stmt_vec_info *stmt_info |
| = si->stmt ? vinfo_for_stmt (si->stmt) : NULL; |
| (void) add_stmt_cost (data, si->count, si->kind, stmt_info, |
| si->misalign, vect_body); |
| } |
| |
| prologue_cost_vec.release (); |
| body_cost_vec.release (); |
| } |
| |
| /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups: |
| one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing |
| the first GROUP1_SIZE stmts, since stores are consecutive), the second |
| containing the remainder. |
| Return the first stmt in the second group. */ |
| |
| static gimple * |
| vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size) |
| { |
| stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt); |
| gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt); |
| gcc_assert (group1_size > 0); |
| int group2_size = GROUP_SIZE (first_vinfo) - group1_size; |
| gcc_assert (group2_size > 0); |
| GROUP_SIZE (first_vinfo) = group1_size; |
| |
| gimple *stmt = first_stmt; |
| for (unsigned i = group1_size; i > 1; i--) |
| { |
| stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)); |
| gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1); |
| } |
| /* STMT is now the last element of the first group. */ |
| gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)); |
| GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0; |
| |
| GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size; |
| for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt))) |
| { |
| GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2; |
| gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1); |
| } |
| |
| /* For the second group, the GROUP_GAP is that before the original group, |
| plus skipping over the first vector. */ |
| GROUP_GAP (vinfo_for_stmt (group2)) = |
| GROUP_GAP (first_vinfo) + group1_size; |
| |
| /* GROUP_GAP of the first group now has to skip over the second group too. */ |
| GROUP_GAP (first_vinfo) += group2_size; |
| |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n", |
| group1_size, group2_size); |
| |
| return group2; |
| } |
| |
| /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE |
| statements and a vector of NUNITS elements. */ |
| |
| static poly_uint64 |
| calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size) |
| { |
| return exact_div (common_multiple (nunits, group_size), group_size); |
| } |
| |
| /* Analyze an SLP instance starting from a group of grouped stores. Call |
| vect_build_slp_tree to build a tree of packed stmts if possible. |
| Return FALSE if it's impossible to SLP any stmt in the loop. */ |
| |
| static bool |
| vect_analyze_slp_instance (vec_info *vinfo, |
| gimple *stmt, unsigned max_tree_size) |
| { |
| slp_instance new_instance; |
| slp_tree node; |
| unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt)); |
| tree vectype, scalar_type = NULL_TREE; |
| gimple *next; |
| unsigned int i; |
| vec<slp_tree> loads; |
| struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); |
| vec<gimple *> scalar_stmts; |
| |
| if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) |
| { |
| if (dr) |
| { |
| scalar_type = TREE_TYPE (DR_REF (dr)); |
| vectype = get_vectype_for_scalar_type (scalar_type); |
| } |
| else |
| { |
| gcc_assert (is_a <loop_vec_info> (vinfo)); |
| vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); |
| } |
| |
| group_size = GROUP_SIZE (vinfo_for_stmt (stmt)); |
| } |
| else |
| { |
| gcc_assert (is_a <loop_vec_info> (vinfo)); |
| vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); |
| group_size = as_a <loop_vec_info> (vinfo)->reductions.length (); |
| } |
| |
| if (!vectype) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: unsupported data-type "); |
| dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type); |
| dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); |
| } |
| |
| return false; |
| } |
| poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); |
| |
| /* Create a node (a root of the SLP tree) for the packed grouped stores. */ |
| scalar_stmts.create (group_size); |
| next = stmt; |
| if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) |
| { |
| /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */ |
| while (next) |
| { |
| if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next)) |
| && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next))) |
| scalar_stmts.safe_push ( |
| STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next))); |
| else |
| scalar_stmts.safe_push (next); |
| next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next)); |
| } |
| /* Mark the first element of the reduction chain as reduction to properly |
| transform the node. In the reduction analysis phase only the last |
| element of the chain is marked as reduction. */ |
| if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))) |
| STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def; |
| } |
| else |
| { |
| /* Collect reduction statements. */ |
| vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions; |
| for (i = 0; reductions.iterate (i, &next); i++) |
| scalar_stmts.safe_push (next); |
| } |
| |
| loads.create (group_size); |
| |
| /* Build the tree for the SLP instance. */ |
| bool *matches = XALLOCAVEC (bool, group_size); |
| unsigned npermutes = 0; |
| bst_fail = new scalar_stmts_set_t (); |
| poly_uint64 max_nunits = nunits; |
| node = vect_build_slp_tree (vinfo, scalar_stmts, group_size, |
| &max_nunits, &loads, matches, &npermutes, |
| NULL, max_tree_size); |
| delete bst_fail; |
| if (node != NULL) |
| { |
| /* Calculate the unrolling factor based on the smallest type. */ |
| poly_uint64 unrolling_factor |
| = calculate_unrolling_factor (max_nunits, group_size); |
| |
| if (maybe_ne (unrolling_factor, 1U) |
| && is_a <bb_vec_info> (vinfo)) |
| { |
| unsigned HOST_WIDE_INT const_max_nunits; |
| if (!max_nunits.is_constant (&const_max_nunits) |
| || const_max_nunits > group_size) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: store group " |
| "size not a multiple of the vector size " |
| "in basic block SLP\n"); |
| vect_free_slp_tree (node); |
| loads.release (); |
| return false; |
| } |
| /* Fatal mismatch. */ |
| matches[group_size / const_max_nunits * const_max_nunits] = false; |
| vect_free_slp_tree (node); |
| loads.release (); |
| } |
| else |
| { |
| /* Create a new SLP instance. */ |
| new_instance = XNEW (struct _slp_instance); |
| SLP_INSTANCE_TREE (new_instance) = node; |
| SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size; |
| SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor; |
| SLP_INSTANCE_LOADS (new_instance) = loads; |
| |
| /* Compute the load permutation. */ |
| slp_tree load_node; |
| bool loads_permuted = false; |
| FOR_EACH_VEC_ELT (loads, i, load_node) |
| { |
| vec<unsigned> load_permutation; |
| int j; |
| gimple *load, *first_stmt; |
| bool this_load_permuted = false; |
| load_permutation.create (group_size); |
| first_stmt = GROUP_FIRST_ELEMENT |
| (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0])); |
| FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load) |
| { |
| int load_place = vect_get_place_in_interleaving_chain |
| (load, first_stmt); |
| gcc_assert (load_place != -1); |
| if (load_place != j) |
| this_load_permuted = true; |
| load_permutation.safe_push (load_place); |
| } |
| if (!this_load_permuted |
| /* The load requires permutation when unrolling exposes |
| a gap either because the group is larger than the SLP |
| group-size or because there is a gap between the groups. */ |
| && (known_eq (unrolling_factor, 1U) |
| || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt)) |
| && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))) |
| { |
| load_permutation.release (); |
| continue; |
| } |
| SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation; |
| loads_permuted = true; |
| } |
| |
| if (loads_permuted) |
| { |
| if (!vect_supported_load_permutation_p (new_instance)) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Build SLP failed: unsupported load " |
| "permutation "); |
| dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, |
| TDF_SLIM, stmt, 0); |
| } |
| vect_free_slp_instance (new_instance); |
| return false; |
| } |
| } |
| |
| /* If the loads and stores can be handled with load/store-lan |
| instructions do not generate this SLP instance. */ |
| if (is_a <loop_vec_info> (vinfo) |
| && loads_permuted |
| && dr && vect_store_lanes_supported (vectype, group_size, false)) |
| { |
| slp_tree load_node; |
| FOR_EACH_VEC_ELT (loads, i, load_node) |
| { |
| gimple *first_stmt = GROUP_FIRST_ELEMENT |
| (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0])); |
| stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt); |
| /* Use SLP for strided accesses (or if we |
| can't load-lanes). */ |
| if (STMT_VINFO_STRIDED_P (stmt_vinfo) |
| || ! vect_load_lanes_supported |
| (STMT_VINFO_VECTYPE (stmt_vinfo), |
| GROUP_SIZE (stmt_vinfo), false)) |
| break; |
| } |
| if (i == loads.length ()) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Built SLP cancelled: can use " |
| "load/store-lanes\n"); |
| vect_free_slp_instance (new_instance); |
| return false; |
| } |
| } |
| |
| vinfo->slp_instances.safe_push (new_instance); |
| |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "Final SLP tree for instance:\n"); |
| vect_print_slp_tree (MSG_NOTE, vect_location, node); |
| } |
| |
| return true; |
| } |
| } |
| else |
| { |
| /* Failed to SLP. */ |
| /* Free the allocated memory. */ |
| scalar_stmts.release (); |
| loads.release (); |
| } |
| |
| /* For basic block SLP, try to break the group up into multiples of the |
| vector size. */ |
| unsigned HOST_WIDE_INT const_nunits; |
| if (is_a <bb_vec_info> (vinfo) |
| && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) |
| && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)) |
| && nunits.is_constant (&const_nunits)) |
| { |
| /* We consider breaking the group only on VF boundaries from the existing |
| start. */ |
| for (i = 0; i < group_size; i++) |
| if (!matches[i]) break; |
| |
| if (i >= const_nunits && i < group_size) |
| { |
| /* Split into two groups at the first vector boundary before i. */ |
| gcc_assert ((const_nunits & (const_nunits - 1)) == 0); |
| unsigned group1_size = i & ~(const_nunits - 1); |
| |
| gimple *rest = vect_split_slp_store_group (stmt, group1_size); |
| bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size); |
| /* If the first non-match was in the middle of a vector, |
| skip the rest of that vector. */ |
| if (group1_size < i) |
| { |
| i = group1_size + const_nunits; |
| if (i < group_size) |
| rest = vect_split_slp_store_group (rest, const_nunits); |
| } |
| if (i < group_size) |
| res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size); |
| return res; |
| } |
| /* Even though the first vector did not all match, we might be able to SLP |
| (some) of the remainder. FORNOW ignore this possibility. */ |
| } |
| |
| return false; |
| } |
| |
| |
| /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP |
| trees of packed scalar stmts if SLP is possible. */ |
| |
| bool |
| vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) |
| { |
| unsigned int i; |
| gimple *first_element; |
| |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n"); |
| |
| /* Find SLP sequences starting from groups of grouped stores. */ |
| FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element) |
| vect_analyze_slp_instance (vinfo, first_element, max_tree_size); |
| |
| if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) |
| { |
| if (loop_vinfo->reduction_chains.length () > 0) |
| { |
| /* Find SLP sequences starting from reduction chains. */ |
| FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element) |
| if (! vect_analyze_slp_instance (vinfo, first_element, |
| max_tree_size)) |
| { |
| /* Dissolve reduction chain group. */ |
| gimple *next, *stmt = first_element; |
| while (stmt) |
| { |
| stmt_vec_info vinfo = vinfo_for_stmt (stmt); |
| next = GROUP_NEXT_ELEMENT (vinfo); |
| GROUP_FIRST_ELEMENT (vinfo) = NULL; |
| GROUP_NEXT_ELEMENT (vinfo) = NULL; |
| stmt = next; |
| } |
| STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element)) |
| = vect_internal_def; |
| } |
| } |
| |
| /* Find SLP sequences starting from groups of reductions. */ |
| if (loop_vinfo->reductions.length () > 1) |
| vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0], |
| max_tree_size); |
| } |
| |
| return true; |
| } |
| |
| |
| /* For each possible SLP instance decide whether to SLP it and calculate overall |
| unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at |
| least one instance. */ |
| |
| bool |
| vect_make_slp_decision (loop_vec_info loop_vinfo) |
| { |
| unsigned int i; |
| poly_uint64 unrolling_factor = 1; |
| vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); |
| slp_instance instance; |
| int decided_to_slp = 0; |
| |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ===" |
| "\n"); |
| |
| FOR_EACH_VEC_ELT (slp_instances, i, instance) |
| { |
| /* FORNOW: SLP if you can. */ |
| /* All unroll factors have the form current_vector_size * X for some |
| rational X, so they must have a common multiple. */ |
| unrolling_factor |
| = force_common_multiple (unrolling_factor, |
| SLP_INSTANCE_UNROLLING_FACTOR (instance)); |
| |
| /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we |
| call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and |
| loop-based vectorization. Such stmts will be marked as HYBRID. */ |
| vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1); |
| decided_to_slp++; |
| } |
| |
| LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor; |
| |
| if (decided_to_slp && dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "Decided to SLP %d instances. Unrolling factor ", |
| decided_to_slp); |
| dump_dec (MSG_NOTE, unrolling_factor); |
| dump_printf (MSG_NOTE, "\n"); |
| } |
| |
| return (decided_to_slp > 0); |
| } |
| |
| |
| /* Find stmts that must be both vectorized and SLPed (since they feed stmts that |
| can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */ |
| |
| static void |
| vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype) |
| { |
| gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i]; |
| imm_use_iterator imm_iter; |
| gimple *use_stmt; |
| stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt); |
| slp_tree child; |
| loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); |
| struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
| int j; |
| |
| /* Propagate hybrid down the SLP tree. */ |
| if (stype == hybrid) |
| ; |
| else if (HYBRID_SLP_STMT (stmt_vinfo)) |
| stype = hybrid; |
| else |
| { |
| /* Check if a pure SLP stmt has uses in non-SLP stmts. */ |
| gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo)); |
| /* If we get a pattern stmt here we have to use the LHS of the |
| original stmt for immediate uses. */ |
| if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo) |
| && STMT_VINFO_RELATED_STMT (stmt_vinfo)) |
| stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); |
| tree def; |
| if (gimple_code (stmt) == GIMPLE_PHI) |
| def = gimple_phi_result (stmt); |
| else |
| def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF); |
| if (def) |
| FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def) |
| { |
| if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) |
| continue; |
| use_vinfo = vinfo_for_stmt (use_stmt); |
| if (STMT_VINFO_IN_PATTERN_P (use_vinfo) |
| && STMT_VINFO_RELATED_STMT (use_vinfo)) |
| use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo)); |
| if (!STMT_SLP_TYPE (use_vinfo) |
| && (STMT_VINFO_RELEVANT (use_vinfo) |
| || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo))) |
| && !(gimple_code (use_stmt) == GIMPLE_PHI |
| && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def)) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_NOTE, vect_location, "use of SLP " |
| "def in non-SLP stmt: "); |
| dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0); |
| } |
| stype = hybrid; |
| } |
| } |
| } |
| |
| if (stype == hybrid |
| && !HYBRID_SLP_STMT (stmt_vinfo)) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: "); |
| dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); |
| } |
| STMT_SLP_TYPE (stmt_vinfo) = hybrid; |
| } |
| |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) |
| if (SLP_TREE_DEF_TYPE (child) != vect_external_def) |
| vect_detect_hybrid_slp_stmts (child, i, stype); |
| } |
| |
| /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */ |
| |
| static tree |
| vect_detect_hybrid_slp_1 (tree *tp, int *, void *data) |
| { |
| walk_stmt_info *wi = (walk_stmt_info *)data; |
| struct loop *loopp = (struct loop *)wi->info; |
| |
| if (wi->is_lhs) |
| return NULL_TREE; |
| |
| if (TREE_CODE (*tp) == SSA_NAME |
| && !SSA_NAME_IS_DEFAULT_DEF (*tp)) |
| { |
| gimple *def_stmt = SSA_NAME_DEF_STMT (*tp); |
| if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt)) |
| && PURE_SLP_STMT (vinfo_for_stmt (def_stmt))) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: "); |
| dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0); |
| } |
| STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid; |
| } |
| } |
| |
| return NULL_TREE; |
| } |
| |
| static tree |
| vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled, |
| walk_stmt_info *) |
| { |
| stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi)); |
| /* If the stmt is in a SLP instance then this isn't a reason |
| to mark use definitions in other SLP instances as hybrid. */ |
| if (! STMT_SLP_TYPE (use_vinfo) |
| && (STMT_VINFO_RELEVANT (use_vinfo) |
| || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo))) |
| && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI |
| && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def)) |
| ; |
| else |
| *handled = true; |
| return NULL_TREE; |
| } |
| |
| /* Find stmts that must be both vectorized and SLPed. */ |
| |
| void |
| vect_detect_hybrid_slp (loop_vec_info loop_vinfo) |
| { |
| unsigned int i; |
| vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); |
| slp_instance instance; |
| |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ===" |
| "\n"); |
| |
| /* First walk all pattern stmt in the loop and mark defs of uses as |
| hybrid because immediate uses in them are not recorded. */ |
| for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i) |
| { |
| basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i]; |
| for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); |
| gsi_next (&gsi)) |
| { |
| gimple *stmt = gsi_stmt (gsi); |
| stmt_vec_info stmt_info = vinfo_for_stmt (stmt); |
| if (STMT_VINFO_IN_PATTERN_P (stmt_info)) |
| { |
| walk_stmt_info wi; |
| memset (&wi, 0, sizeof (wi)); |
| wi.info = LOOP_VINFO_LOOP (loop_vinfo); |
| gimple_stmt_iterator gsi2 |
| = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); |
| walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2, |
| vect_detect_hybrid_slp_1, &wi); |
| walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info), |
| vect_detect_hybrid_slp_2, |
| vect_detect_hybrid_slp_1, &wi); |
| } |
| } |
| } |
| |
| /* Then walk the SLP instance trees marking stmts with uses in |
| non-SLP stmts as hybrid, also propagating hybrid down the |
| SLP tree, collecting the above info on-the-fly. */ |
| FOR_EACH_VEC_ELT (slp_instances, i, instance) |
| { |
| for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i) |
| vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance), |
| i, pure_slp); |
| } |
| } |
| |
| |
| /* Initialize a bb_vec_info struct for the statements between |
| REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */ |
| |
| _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in, |
| gimple_stmt_iterator region_end_in) |
| : vec_info (vec_info::bb, init_cost (NULL)), |
| bb (gsi_bb (region_begin_in)), |
| region_begin (region_begin_in), |
| region_end (region_end_in) |
| { |
| gimple_stmt_iterator gsi; |
| |
| for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end); |
| gsi_next (&gsi)) |
| { |
| gimple *stmt = gsi_stmt (gsi); |
| gimple_set_uid (stmt, 0); |
| set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this)); |
| } |
| |
| bb->aux = this; |
| } |
| |
| |
| /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the |
| stmts in the basic block. */ |
| |
| _bb_vec_info::~_bb_vec_info () |
| { |
| for (gimple_stmt_iterator si = region_begin; |
| gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si)) |
| { |
| gimple *stmt = gsi_stmt (si); |
| stmt_vec_info stmt_info = vinfo_for_stmt (stmt); |
| |
| if (stmt_info) |
| /* Free stmt_vec_info. */ |
| free_stmt_vec_info (stmt); |
| |
| /* Reset region marker. */ |
| gimple_set_uid (stmt, -1); |
| } |
| |
| bb->aux = NULL; |
| } |
| |
| |
| /* Analyze statements contained in SLP tree NODE after recursively analyzing |
| the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE. |
| |
| Return true if the operations are supported. */ |
| |
| static bool |
| vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, |
| slp_instance node_instance) |
| { |
| bool dummy; |
| int i, j; |
| gimple *stmt; |
| slp_tree child; |
| |
| if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) |
| return true; |
| |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
| if (!vect_slp_analyze_node_operations (vinfo, child, node_instance)) |
| return false; |
| |
| stmt = SLP_TREE_SCALAR_STMTS (node)[0]; |
| stmt_vec_info stmt_info = vinfo_for_stmt (stmt); |
| gcc_assert (stmt_info); |
| gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect); |
| |
| /* For BB vectorization vector types are assigned here. |
| Memory accesses already got their vector type assigned |
| in vect_analyze_data_refs. */ |
| bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); |
| if (bb_vinfo |
| && ! STMT_VINFO_DATA_REF (stmt_info)) |
| { |
| gcc_assert (PURE_SLP_STMT (stmt_info)); |
| |
| tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "get vectype for scalar type: "); |
| dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type); |
| dump_printf (MSG_NOTE, "\n"); |
| } |
| |
| tree vectype = get_vectype_for_scalar_type (scalar_type); |
| if (!vectype) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "not SLPed: unsupported data-type "); |
| dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, |
| scalar_type); |
| dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); |
| } |
| return false; |
| } |
| |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_NOTE, vect_location, "vectype: "); |
| dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype); |
| dump_printf (MSG_NOTE, "\n"); |
| } |
| |
| gimple *sstmt; |
| FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt) |
| STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype; |
| } |
| |
| /* Calculate the number of vector statements to be created for the |
| scalar stmts in this node. For SLP reductions it is equal to the |
| number of vector statements in the children (which has already been |
| calculated by the recursive call). Otherwise it is the number of |
| scalar elements in one scalar iteration (GROUP_SIZE) multiplied by |
| VF divided by the number of elements in a vector. */ |
| if (GROUP_FIRST_ELEMENT (stmt_info) |
| && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) |
| SLP_TREE_NUMBER_OF_VEC_STMTS (node) |
| = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]); |
| else |
| { |
| poly_uint64 vf; |
| if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) |
| vf = loop_vinfo->vectorization_factor; |
| else |
| vf = 1; |
| unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance); |
| tree vectype = STMT_VINFO_VECTYPE (stmt_info); |
| SLP_TREE_NUMBER_OF_VEC_STMTS (node) |
| = vect_get_num_vectors (vf * group_size, vectype); |
| } |
| |
| /* Push SLP node def-type to stmt operands. */ |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) |
| if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) |
| STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])) |
| = SLP_TREE_DEF_TYPE (child); |
| bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance); |
| /* Restore def-types. */ |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) |
| if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) |
| STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])) |
| = vect_internal_def; |
| if (! res) |
| return false; |
| |
| return true; |
| } |
| |
| |
| /* Analyze statements in SLP instances of VINFO. Return true if the |
| operations are supported. */ |
| |
| bool |
| vect_slp_analyze_operations (vec_info *vinfo) |
| { |
| slp_instance instance; |
| int i; |
| |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "=== vect_slp_analyze_operations ===\n"); |
| |
| for (i = 0; vinfo->slp_instances.iterate (i, &instance); ) |
| { |
| if (!vect_slp_analyze_node_operations (vinfo, |
| SLP_INSTANCE_TREE (instance), |
| instance)) |
| { |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "removing SLP instance operations starting from: "); |
| dump_gimple_stmt (MSG_NOTE, TDF_SLIM, |
| SLP_TREE_SCALAR_STMTS |
| (SLP_INSTANCE_TREE (instance))[0], 0); |
| vect_free_slp_instance (instance); |
| vinfo->slp_instances.ordered_remove (i); |
| } |
| else |
| i++; |
| } |
| |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_NOTE, vect_location, |
| "=== vect_analyze_slp_cost ===\n"); |
| |
| /* Compute the costs of the SLP instances. */ |
| scalar_stmts_set_t *visited = new scalar_stmts_set_t (); |
| for (i = 0; vinfo->slp_instances.iterate (i, &instance); ++i) |
| vect_analyze_slp_cost (instance, vinfo->target_cost_data, visited); |
| delete visited; |
| |
| return !vinfo->slp_instances.is_empty (); |
| } |
| |
| |
| /* Compute the scalar cost of the SLP node NODE and its children |
| and return it. Do not account defs that are marked in LIFE and |
| update LIFE according to uses of NODE. */ |
| |
| static unsigned |
| vect_bb_slp_scalar_cost (basic_block bb, |
| slp_tree node, vec<bool, va_heap> *life) |
| { |
| unsigned scalar_cost = 0; |
| unsigned i; |
| gimple *stmt; |
| slp_tree child; |
| |
| FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) |
| { |
| unsigned stmt_cost; |
| ssa_op_iter op_iter; |
| def_operand_p def_p; |
| stmt_vec_info stmt_info; |
| |
| if ((*life)[i]) |
| continue; |
| |
| /* If there is a non-vectorized use of the defs then the scalar |
| stmt is kept live in which case we do not account it or any |
| required defs in the SLP children in the scalar cost. This |
| way we make the vectorization more costly when compared to |
| the scalar cost. */ |
| FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF) |
| { |
| imm_use_iterator use_iter; |
| gimple *use_stmt; |
| FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p)) |
| if (!is_gimple_debug (use_stmt) |
| && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo, |
| use_stmt) |
| || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt)))) |
| { |
| (*life)[i] = true; |
| BREAK_FROM_IMM_USE_STMT (use_iter); |
| } |
| } |
| if ((*life)[i]) |
| continue; |
| |
| /* Count scalar stmts only once. */ |
| if (gimple_visited_p (stmt)) |
| continue; |
| gimple_set_visited (stmt, true); |
| |
| stmt_info = vinfo_for_stmt (stmt); |
| if (STMT_VINFO_DATA_REF (stmt_info)) |
| { |
| if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))) |
| stmt_cost = vect_get_stmt_cost (scalar_load); |
| else |
| stmt_cost = vect_get_stmt_cost (scalar_store); |
| } |
| else |
| stmt_cost = vect_get_stmt_cost (scalar_stmt); |
| |
| scalar_cost += stmt_cost; |
| } |
| |
| auto_vec<bool, 20> subtree_life; |
| FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
| { |
| if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) |
| { |
| /* Do not directly pass LIFE to the recursive call, copy it to |
| confine changes in the callee to the current child/subtree. */ |
| subtree_life.safe_splice (*life); |
| scalar_cost += vect_bb_slp_scalar_cost (bb, child, &subtree_life); |
| subtree_life.truncate (0); |
| } |
| } |
| |
| return scalar_cost; |
| } |
| |
| /* Check if vectorization of the basic block is profitable. */ |
| |
| static bool |
| vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo) |
| { |
| vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo); |
| slp_instance instance; |
| int i; |
| unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0; |
| unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0; |
| |
| /* Calculate scalar cost. */ |
| FOR_EACH_VEC_ELT (slp_instances, i, instance) |
| { |
| auto_vec<bool, 20> life; |
| life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance)); |
| scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo), |
| SLP_INSTANCE_TREE (instance), |
| &life); |
| } |
| |
| /* Unset visited flag. */ |
| for (gimple_stmt_iterator gsi = bb_vinfo->region_begin; |
| gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi)) |
| gimple_set_visited (gsi_stmt (gsi), false); |
| |
| /* Complete the target-specific cost calculation. */ |
| finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost, |
| &vec_inside_cost, &vec_epilogue_cost); |
| |
| vec_outside_cost = vec_prologue_cost + vec_epilogue_cost; |
| |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n"); |
| dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n", |
| vec_inside_cost); |
| dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost); |
| dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost); |
| dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost); |
| } |
| |
| /* Vectorization is profitable if its cost is more than the cost of scalar |
| version. Note that we err on the vector side for equal cost because |
| the cost estimate is otherwise quite pessimistic (constant uses are |
| free on the scalar side but cost a load on the vector side for |
| example). */ |
| if (vec_outside_cost + vec_inside_cost > scalar_cost) |
| return false; |
| |
| return true; |
| } |
| |
| /* Check if the basic block can be vectorized. Returns a bb_vec_info |
| if so and sets fatal to true if failure is independent of |
| current_vector_size. */ |
| |
| static bb_vec_info |
| vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin, |
| gimple_stmt_iterator region_end, |
| vec<data_reference_p> datarefs, int n_stmts, |
| bool &fatal) |
| { |
| bb_vec_info bb_vinfo; |
| slp_instance instance; |
| int i; |
| poly_uint64 min_vf = 2; |
| |
| /* The first group of checks is independent of the vector size. */ |
| fatal = true; |
| |
| if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "not vectorized: too many instructions in " |
| "basic block.\n"); |
| free_data_refs (datarefs); |
| return NULL; |
| } |
| |
| bb_vinfo = new _bb_vec_info (region_begin, region_end); |
| if (!bb_vinfo) |
| return NULL; |
| |
| BB_VINFO_DATAREFS (bb_vinfo) = datarefs; |
| |
| /* Analyze the data references. */ |
| |
| if (!vect_analyze_data_refs (bb_vinfo, &min_vf)) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "not vectorized: unhandled data-ref in basic " |
| "block.\n"); |
| |
| delete bb_vinfo; |
| return NULL; |
| } |
| |
| if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "not vectorized: not enough data-refs in " |
| "basic block.\n"); |
| |
| delete bb_vinfo; |
| return NULL; |
| } |
| |
| if (!vect_analyze_data_ref_accesses (bb_vinfo)) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "not vectorized: unhandled data access in " |
| "basic block.\n"); |
| |
| delete bb_vinfo; |
| return NULL; |
| } |
| |
| /* If there are no grouped stores in the region there is no need |
| to continue with pattern recog as vect_analyze_slp will fail |
| anyway. */ |
| if (bb_vinfo->grouped_stores.is_empty ()) |
| { |
| if (dump_enabled_p ()) |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "not vectorized: no grouped stores in " |
| "basic block.\n"); |
| |
| delete bb_vinfo; |
| return NULL; |
| } |
| |
| /* While the rest of the analysis below depends on it in some way. */ |
| fatal = false; |
| |
| vect_pattern_recog (bb_vinfo); |
| |
| /* Check the SLP opportunities in the basic block, analyze and build SLP |
| trees. */ |
| if (!vect_analyze_slp (bb_vinfo, n_stmts)) |
| { |
| if (dump_enabled_p ()) |
| { |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "Failed to SLP the basic block.\n"); |
| dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
| "not vectorized: failed to find SLP opportunities " |
| "in basic block.\n"); |
| } |
| |
| delete bb_vinfo; |
| return NULL; |
| } |
| |
| vect_record_base_alignments (bb_vinfo); |
| |
| /* Analyze and verify the alignment of data references and the |
| dependence in the SLP instances. */ |
| for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); ) |
| { |
| if (! vect_slp_analyze_and_verify_instance_alignment (instance) |
| || ! vect_slp_analyze_instance_dependence (instance)) |
| { |
| dump_printf_loc (MSG_NOTE, vect_location, |
|