/* Analysis Utilities for Loop Vectorization. | |

Copyright (C) 2006-2021 Free Software Foundation, Inc. | |

Contributed by Dorit Nuzman <dorit@il.ibm.com> | |

This file is part of GCC. | |

GCC is free software; you can redistribute it and/or modify it under | |

the terms of the GNU General Public License as published by the Free | |

Software Foundation; either version 3, or (at your option) any later | |

version. | |

GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |

WARRANTY; without even the implied warranty of MERCHANTABILITY or | |

FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |

for more details. | |

You should have received a copy of the GNU General Public License | |

along with GCC; see the file COPYING3. If not see | |

<http://www.gnu.org/licenses/>. */ | |

#include "config.h" | |

#include "system.h" | |

#include "coretypes.h" | |

#include "backend.h" | |

#include "rtl.h" | |

#include "tree.h" | |

#include "gimple.h" | |

#include "ssa.h" | |

#include "expmed.h" | |

#include "optabs-tree.h" | |

#include "insn-config.h" | |

#include "recog.h" /* FIXME: for insn_data */ | |

#include "fold-const.h" | |

#include "stor-layout.h" | |

#include "tree-eh.h" | |

#include "gimplify.h" | |

#include "gimple-iterator.h" | |

#include "cfgloop.h" | |

#include "tree-vectorizer.h" | |

#include "dumpfile.h" | |

#include "builtins.h" | |

#include "internal-fn.h" | |

#include "case-cfn-macros.h" | |

#include "fold-const-call.h" | |

#include "attribs.h" | |

#include "cgraph.h" | |

#include "omp-simd-clone.h" | |

#include "predict.h" | |

#include "tree-vector-builder.h" | |

#include "vec-perm-indices.h" | |

#include "gimple-range.h" | |

/* Return true if we have a useful VR_RANGE range for VAR, storing it | |

in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */ | |

static bool | |

vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value) | |

{ | |

value_range vr; | |

get_range_query (cfun)->range_of_expr (vr, var); | |

if (vr.undefined_p ()) | |

vr.set_varying (TREE_TYPE (var)); | |

*min_value = wi::to_wide (vr.min ()); | |

*max_value = wi::to_wide (vr.max ()); | |

value_range_kind vr_type = vr.kind (); | |

wide_int nonzero = get_nonzero_bits (var); | |

signop sgn = TYPE_SIGN (TREE_TYPE (var)); | |

if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value, | |

nonzero, sgn) == VR_RANGE) | |

{ | |

if (dump_enabled_p ()) | |

{ | |

dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var); | |

dump_printf (MSG_NOTE, " has range ["); | |

dump_hex (MSG_NOTE, *min_value); | |

dump_printf (MSG_NOTE, ", "); | |

dump_hex (MSG_NOTE, *max_value); | |

dump_printf (MSG_NOTE, "]\n"); | |

} | |

return true; | |

} | |

else | |

{ | |

if (dump_enabled_p ()) | |

{ | |

dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var); | |

dump_printf (MSG_NOTE, " has no range info\n"); | |

} | |

return false; | |

} | |

} | |

/* Report that we've found an instance of pattern PATTERN in | |

statement STMT. */ | |

static void | |

vect_pattern_detected (const char *name, gimple *stmt) | |

{ | |

if (dump_enabled_p ()) | |

dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt); | |

} | |

/* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and | |

return the pattern statement's stmt_vec_info. Set its vector type to | |

VECTYPE if it doesn't have one already. */ | |

static stmt_vec_info | |

vect_init_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt, | |

stmt_vec_info orig_stmt_info, tree vectype) | |

{ | |

stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt); | |

if (pattern_stmt_info == NULL) | |

pattern_stmt_info = vinfo->add_stmt (pattern_stmt); | |

gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt)); | |

pattern_stmt_info->pattern_stmt_p = true; | |

STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info; | |

STMT_VINFO_DEF_TYPE (pattern_stmt_info) | |

= STMT_VINFO_DEF_TYPE (orig_stmt_info); | |

if (!STMT_VINFO_VECTYPE (pattern_stmt_info)) | |

{ | |

gcc_assert (VECTOR_BOOLEAN_TYPE_P (vectype) | |

== vect_use_mask_type_p (orig_stmt_info)); | |

STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype; | |

pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision; | |

} | |

return pattern_stmt_info; | |

} | |

/* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT. | |

Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't | |

have one already. */ | |

static void | |

vect_set_pattern_stmt (vec_info *vinfo, gimple *pattern_stmt, | |

stmt_vec_info orig_stmt_info, tree vectype) | |

{ | |

STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true; | |

STMT_VINFO_RELATED_STMT (orig_stmt_info) | |

= vect_init_pattern_stmt (vinfo, pattern_stmt, orig_stmt_info, vectype); | |

} | |

/* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE | |

is nonnull, record that NEW_STMT's vector type is VECTYPE, which might | |

be different from the vector type of the final pattern statement. | |

If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type | |

from which it was derived. */ | |

static inline void | |

append_pattern_def_seq (vec_info *vinfo, | |

stmt_vec_info stmt_info, gimple *new_stmt, | |

tree vectype = NULL_TREE, | |

tree scalar_type_for_mask = NULL_TREE) | |

{ | |

gcc_assert (!scalar_type_for_mask | |

== (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))); | |

if (vectype) | |

{ | |

stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt); | |

STMT_VINFO_VECTYPE (new_stmt_info) = vectype; | |

if (scalar_type_for_mask) | |

new_stmt_info->mask_precision | |

= GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask)); | |

} | |

gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info), | |

new_stmt); | |

} | |

/* The caller wants to perform new operations on vect_external variable | |

VAR, so that the result of the operations would also be vect_external. | |

Return the edge on which the operations can be performed, if one exists. | |

Return null if the operations should instead be treated as part of | |

the pattern that needs them. */ | |

static edge | |

vect_get_external_def_edge (vec_info *vinfo, tree var) | |

{ | |

edge e = NULL; | |

if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) | |

{ | |

e = loop_preheader_edge (loop_vinfo->loop); | |

if (!SSA_NAME_IS_DEFAULT_DEF (var)) | |

{ | |

basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var)); | |

if (bb == NULL | |

|| !dominated_by_p (CDI_DOMINATORS, e->dest, bb)) | |

e = NULL; | |

} | |

} | |

return e; | |

} | |

/* Return true if the target supports a vector version of CODE, | |

where CODE is known to map to a direct optab with the given SUBTYPE. | |

ITYPE specifies the type of (some of) the scalar inputs and OTYPE | |

specifies the type of the scalar result. | |

If CODE allows the inputs and outputs to have different type | |

(such as for WIDEN_SUM_EXPR), it is the input mode rather | |

than the output mode that determines the appropriate target pattern. | |

Operand 0 of the target pattern then specifies the mode that the output | |

must have. | |

When returning true, set *VECOTYPE_OUT to the vector version of OTYPE. | |

Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT | |

is nonnull. */ | |

static bool | |

vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code, | |

tree itype, tree *vecotype_out, | |

tree *vecitype_out = NULL, | |

enum optab_subtype subtype = optab_default) | |

{ | |

tree vecitype = get_vectype_for_scalar_type (vinfo, itype); | |

if (!vecitype) | |

return false; | |

tree vecotype = get_vectype_for_scalar_type (vinfo, otype); | |

if (!vecotype) | |

return false; | |

optab optab = optab_for_tree_code (code, vecitype, subtype); | |

if (!optab) | |

return false; | |

insn_code icode = optab_handler (optab, TYPE_MODE (vecitype)); | |

if (icode == CODE_FOR_nothing | |

|| insn_data[icode].operand[0].mode != TYPE_MODE (vecotype)) | |

return false; | |

*vecotype_out = vecotype; | |

if (vecitype_out) | |

*vecitype_out = vecitype; | |

return true; | |

} | |

/* Round bit precision PRECISION up to a full element. */ | |

static unsigned int | |

vect_element_precision (unsigned int precision) | |

{ | |

precision = 1 << ceil_log2 (precision); | |

return MAX (precision, BITS_PER_UNIT); | |

} | |

/* If OP is defined by a statement that's being considered for vectorization, | |

return information about that statement, otherwise return NULL. */ | |

static stmt_vec_info | |

vect_get_internal_def (vec_info *vinfo, tree op) | |

{ | |

stmt_vec_info def_stmt_info = vinfo->lookup_def (op); | |

if (def_stmt_info | |

&& STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def) | |

return def_stmt_info; | |

return NULL; | |

} | |

/* Check whether NAME, an ssa-name used in STMT_VINFO, | |

is a result of a type promotion, such that: | |

DEF_STMT: NAME = NOP (name0) | |

If CHECK_SIGN is TRUE, check that either both types are signed or both are | |

unsigned. */ | |

static bool | |

type_conversion_p (vec_info *vinfo, tree name, bool check_sign, | |

tree *orig_type, gimple **def_stmt, bool *promotion) | |

{ | |

tree type = TREE_TYPE (name); | |

tree oprnd0; | |

enum vect_def_type dt; | |

stmt_vec_info def_stmt_info; | |

if (!vect_is_simple_use (name, vinfo, &dt, &def_stmt_info, def_stmt)) | |

return false; | |

if (dt != vect_internal_def | |

&& dt != vect_external_def && dt != vect_constant_def) | |

return false; | |

if (!*def_stmt) | |

return false; | |

if (!is_gimple_assign (*def_stmt)) | |

return false; | |

if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt))) | |

return false; | |

oprnd0 = gimple_assign_rhs1 (*def_stmt); | |

*orig_type = TREE_TYPE (oprnd0); | |

if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type) | |

|| ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign)) | |

return false; | |

if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2)) | |

*promotion = true; | |

else | |

*promotion = false; | |

if (!vect_is_simple_use (oprnd0, vinfo, &dt)) | |

return false; | |

return true; | |

} | |

/* Holds information about an input operand after some sign changes | |

and type promotions have been peeled away. */ | |

class vect_unpromoted_value { | |

public: | |

vect_unpromoted_value (); | |

void set_op (tree, vect_def_type, stmt_vec_info = NULL); | |

/* The value obtained after peeling away zero or more casts. */ | |

tree op; | |

/* The type of OP. */ | |

tree type; | |

/* The definition type of OP. */ | |

vect_def_type dt; | |

/* If OP is the result of peeling at least one cast, and if the cast | |

of OP itself is a vectorizable statement, CASTER identifies that | |

statement, otherwise it is null. */ | |

stmt_vec_info caster; | |

}; | |

inline vect_unpromoted_value::vect_unpromoted_value () | |

: op (NULL_TREE), | |

type (NULL_TREE), | |

dt (vect_uninitialized_def), | |

caster (NULL) | |

{ | |

} | |

/* Set the operand to OP_IN, its definition type to DT_IN, and the | |

statement that casts it to CASTER_IN. */ | |

inline void | |

vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in, | |

stmt_vec_info caster_in) | |

{ | |

op = op_in; | |

type = TREE_TYPE (op); | |

dt = dt_in; | |

caster = caster_in; | |

} | |

/* If OP is a vectorizable SSA name, strip a sequence of integer conversions | |

to reach some vectorizable inner operand OP', continuing as long as it | |

is possible to convert OP' back to OP using a possible sign change | |

followed by a possible promotion P. Return this OP', or null if OP is | |

not a vectorizable SSA name. If there is a promotion P, describe its | |

input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P | |

is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved | |

have more than one user. | |

A successful return means that it is possible to go from OP' to OP | |

via UNPROM. The cast from OP' to UNPROM is at most a sign change, | |

whereas the cast from UNPROM to OP might be a promotion, a sign | |

change, or a nop. | |

E.g. say we have: | |

signed short *ptr = ...; | |

signed short C = *ptr; | |

unsigned short B = (unsigned short) C; // sign change | |

signed int A = (signed int) B; // unsigned promotion | |

...possible other uses of A... | |

unsigned int OP = (unsigned int) A; // sign change | |

In this case it's possible to go directly from C to OP using: | |

OP = (unsigned int) (unsigned short) C; | |

+------------+ +--------------+ | |

promotion sign change | |

so OP' would be C. The input to the promotion is B, so UNPROM | |

would describe B. */ | |

static tree | |

vect_look_through_possible_promotion (vec_info *vinfo, tree op, | |

vect_unpromoted_value *unprom, | |

bool *single_use_p = NULL) | |

{ | |

tree res = NULL_TREE; | |

tree op_type = TREE_TYPE (op); | |

unsigned int orig_precision = TYPE_PRECISION (op_type); | |

unsigned int min_precision = orig_precision; | |

stmt_vec_info caster = NULL; | |

while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type)) | |

{ | |

/* See whether OP is simple enough to vectorize. */ | |

stmt_vec_info def_stmt_info; | |

gimple *def_stmt; | |

vect_def_type dt; | |

if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt)) | |

break; | |

/* If OP is the input of a demotion, skip over it to see whether | |

OP is itself the result of a promotion. If so, the combined | |

effect of the promotion and the demotion might fit the required | |

pattern, otherwise neither operation fits. | |

This copes with cases such as the result of an arithmetic | |

operation being truncated before being stored, and where that | |

arithmetic operation has been recognized as an over-widened one. */ | |

if (TYPE_PRECISION (op_type) <= min_precision) | |

{ | |

/* Use OP as the UNPROM described above if we haven't yet | |

found a promotion, or if using the new input preserves the | |

sign of the previous promotion. */ | |

if (!res | |

|| TYPE_PRECISION (unprom->type) == orig_precision | |

|| TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type)) | |

{ | |

unprom->set_op (op, dt, caster); | |

min_precision = TYPE_PRECISION (op_type); | |

} | |

/* Stop if we've already seen a promotion and if this | |

conversion does more than change the sign. */ | |

else if (TYPE_PRECISION (op_type) | |

!= TYPE_PRECISION (unprom->type)) | |

break; | |

/* The sequence now extends to OP. */ | |

res = op; | |

} | |

/* See whether OP is defined by a cast. Record it as CASTER if | |

the cast is potentially vectorizable. */ | |

if (!def_stmt) | |

break; | |

caster = def_stmt_info; | |

/* Ignore pattern statements, since we don't link uses for them. */ | |

if (caster | |

&& single_use_p | |

&& !STMT_VINFO_RELATED_STMT (caster) | |

&& !has_single_use (res)) | |

*single_use_p = false; | |

gassign *assign = dyn_cast <gassign *> (def_stmt); | |

if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt))) | |

break; | |

/* Continue with the input to the cast. */ | |

op = gimple_assign_rhs1 (def_stmt); | |

op_type = TREE_TYPE (op); | |

} | |

return res; | |

} | |

/* OP is an integer operand to an operation that returns TYPE, and we | |

want to treat the operation as a widening one. So far we can treat | |

it as widening from *COMMON_TYPE. | |

Return true if OP is suitable for such a widening operation, | |

either widening from *COMMON_TYPE or from some supertype of it. | |

Update *COMMON_TYPE to the supertype in the latter case. | |

SHIFT_P is true if OP is a shift amount. */ | |

static bool | |

vect_joust_widened_integer (tree type, bool shift_p, tree op, | |

tree *common_type) | |

{ | |

/* Calculate the minimum precision required by OP, without changing | |

the sign of either operand. */ | |

unsigned int precision; | |

if (shift_p) | |

{ | |

if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2)) | |

return false; | |

precision = TREE_INT_CST_LOW (op); | |

} | |

else | |

{ | |

precision = wi::min_precision (wi::to_widest (op), | |

TYPE_SIGN (*common_type)); | |

if (precision * 2 > TYPE_PRECISION (type)) | |

return false; | |

} | |

/* If OP requires a wider type, switch to that type. The checks | |

above ensure that this is still narrower than the result. */ | |

precision = vect_element_precision (precision); | |

if (TYPE_PRECISION (*common_type) < precision) | |

*common_type = build_nonstandard_integer_type | |

(precision, TYPE_UNSIGNED (*common_type)); | |

return true; | |

} | |

/* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE | |

is narrower than type, storing the supertype in *COMMON_TYPE if so. */ | |

static bool | |

vect_joust_widened_type (tree type, tree new_type, tree *common_type) | |

{ | |

if (types_compatible_p (*common_type, new_type)) | |

return true; | |

/* See if *COMMON_TYPE can hold all values of NEW_TYPE. */ | |

if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type)) | |

&& (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type))) | |

return true; | |

/* See if NEW_TYPE can hold all values of *COMMON_TYPE. */ | |

if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type) | |

&& (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type))) | |

{ | |

*common_type = new_type; | |

return true; | |

} | |

/* We have mismatched signs, with the signed type being | |

no wider than the unsigned type. In this case we need | |

a wider signed type. */ | |

unsigned int precision = MAX (TYPE_PRECISION (*common_type), | |

TYPE_PRECISION (new_type)); | |

precision *= 2; | |

if (precision * 2 > TYPE_PRECISION (type)) | |

return false; | |

*common_type = build_nonstandard_integer_type (precision, false); | |

return true; | |

} | |

/* Check whether STMT_INFO can be viewed as a tree of integer operations | |

in which each node either performs CODE or WIDENED_CODE, and where | |

each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS | |

specifies the maximum number of leaf operands. SHIFT_P says whether | |

CODE and WIDENED_CODE are some sort of shift. | |

If STMT_INFO is such a tree, return the number of leaf operands | |

and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE | |

to a type that (a) is narrower than the result of STMT_INFO and | |

(b) can hold all leaf operand values. | |

If SUBTYPE then allow that the signs of the operands | |

may differ in signs but not in precision. SUBTYPE is updated to reflect | |

this. | |

Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE | |

exists. */ | |

static unsigned int | |

vect_widened_op_tree (vec_info *vinfo, stmt_vec_info stmt_info, tree_code code, | |

tree_code widened_code, bool shift_p, | |

unsigned int max_nops, | |

vect_unpromoted_value *unprom, tree *common_type, | |

enum optab_subtype *subtype = NULL) | |

{ | |

/* Check for an integer operation with the right code. */ | |

gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); | |

if (!assign) | |

return 0; | |

tree_code rhs_code = gimple_assign_rhs_code (assign); | |

if (rhs_code != code && rhs_code != widened_code) | |

return 0; | |

tree type = TREE_TYPE (gimple_assign_lhs (assign)); | |

if (!INTEGRAL_TYPE_P (type)) | |

return 0; | |

/* Assume that both operands will be leaf operands. */ | |

max_nops -= 2; | |

/* Check the operands. */ | |

unsigned int next_op = 0; | |

for (unsigned int i = 0; i < 2; ++i) | |

{ | |

vect_unpromoted_value *this_unprom = &unprom[next_op]; | |

unsigned int nops = 1; | |

tree op = gimple_op (assign, i + 1); | |

if (i == 1 && TREE_CODE (op) == INTEGER_CST) | |

{ | |

/* We already have a common type from earlier operands. | |

Update it to account for OP. */ | |

this_unprom->set_op (op, vect_constant_def); | |

if (!vect_joust_widened_integer (type, shift_p, op, common_type)) | |

return 0; | |

} | |

else | |

{ | |

/* Only allow shifts by constants. */ | |

if (shift_p && i == 1) | |

return 0; | |

if (!vect_look_through_possible_promotion (vinfo, op, this_unprom)) | |

return 0; | |

if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type)) | |

{ | |

/* The operand isn't widened. If STMT_INFO has the code | |

for an unwidened operation, recursively check whether | |

this operand is a node of the tree. */ | |

if (rhs_code != code | |

|| max_nops == 0 | |

|| this_unprom->dt != vect_internal_def) | |

return 0; | |

/* Give back the leaf slot allocated above now that we're | |

not treating this as a leaf operand. */ | |

max_nops += 1; | |

/* Recursively process the definition of the operand. */ | |

stmt_vec_info def_stmt_info | |

= vinfo->lookup_def (this_unprom->op); | |

nops = vect_widened_op_tree (vinfo, def_stmt_info, code, | |

widened_code, shift_p, max_nops, | |

this_unprom, common_type, | |

subtype); | |

if (nops == 0) | |

return 0; | |

max_nops -= nops; | |

} | |

else | |

{ | |

/* Make sure that the operand is narrower than the result. */ | |

if (TYPE_PRECISION (this_unprom->type) * 2 | |

> TYPE_PRECISION (type)) | |

return 0; | |

/* Update COMMON_TYPE for the new operand. */ | |

if (i == 0) | |

*common_type = this_unprom->type; | |

else if (!vect_joust_widened_type (type, this_unprom->type, | |

common_type)) | |

{ | |

if (subtype) | |

{ | |

/* See if we can sign extend the smaller type. */ | |

if (TYPE_PRECISION (this_unprom->type) | |

> TYPE_PRECISION (*common_type)) | |

*common_type = this_unprom->type; | |

*subtype = optab_vector_mixed_sign; | |

} | |

else | |

return 0; | |

} | |

} | |

} | |

next_op += nops; | |

} | |

return next_op; | |

} | |

/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT | |

is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */ | |

static tree | |

vect_recog_temp_ssa_var (tree type, gimple *stmt) | |

{ | |

return make_temp_ssa_name (type, stmt, "patt"); | |

} | |

/* STMT2_INFO describes a type conversion that could be split into STMT1 | |

followed by a version of STMT2_INFO that takes NEW_RHS as its first | |

input. Try to do this using pattern statements, returning true on | |

success. */ | |

static bool | |

vect_split_statement (vec_info *vinfo, stmt_vec_info stmt2_info, tree new_rhs, | |

gimple *stmt1, tree vectype) | |

{ | |

if (is_pattern_stmt_p (stmt2_info)) | |

{ | |

/* STMT2_INFO is part of a pattern. Get the statement to which | |

the pattern is attached. */ | |

stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info); | |

vect_init_pattern_stmt (vinfo, stmt1, orig_stmt2_info, vectype); | |

if (dump_enabled_p ()) | |

dump_printf_loc (MSG_NOTE, vect_location, | |

"Splitting pattern statement: %G", stmt2_info->stmt); | |

/* Since STMT2_INFO is a pattern statement, we can change it | |

in-situ without worrying about changing the code for the | |

containing block. */ | |

gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs); | |

if (dump_enabled_p ()) | |

{ | |

dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1); | |

dump_printf_loc (MSG_NOTE, vect_location, "and: %G", | |

stmt2_info->stmt); | |

} | |

gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info); | |

if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info) | |

/* STMT2_INFO is the actual pattern statement. Add STMT1 | |

to the end of the definition sequence. */ | |

gimple_seq_add_stmt_without_update (def_seq, stmt1); | |

else | |

{ | |

/* STMT2_INFO belongs to the definition sequence. Insert STMT1 | |

before it. */ | |

gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq); | |

gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT); | |

} | |

return true; | |

} | |

else | |

{ | |

/* STMT2_INFO doesn't yet have a pattern. Try to create a | |

two-statement pattern now. */ | |

gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info)); | |

tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt)); | |

tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type); | |

if (!lhs_vectype) | |

return false; | |

if (dump_enabled_p ()) | |

dump_printf_loc (MSG_NOTE, vect_location, | |

"Splitting statement: %G", stmt2_info->stmt); | |

/* Add STMT1 as a singleton pattern definition sequence. */ | |

gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info); | |

vect_init_pattern_stmt (vinfo, stmt1, stmt2_info, vectype); | |

gimple_seq_add_stmt_without_update (def_seq, stmt1); | |

/* Build the second of the two pattern statements. */ | |

tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL); | |

gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs); | |

vect_set_pattern_stmt (vinfo, new_stmt2, stmt2_info, lhs_vectype); | |

if (dump_enabled_p ()) | |

{ | |

dump_printf_loc (MSG_NOTE, vect_location, | |

"into pattern statements: %G", stmt1); | |

dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2); | |

} | |

return true; | |

} | |

} | |

/* Convert UNPROM to TYPE and return the result, adding new statements | |

to STMT_INFO's pattern definition statements if no better way is | |

available. VECTYPE is the vector form of TYPE. | |

If SUBTYPE then convert the type based on the subtype. */ | |

static tree | |

vect_convert_input (vec_info *vinfo, stmt_vec_info stmt_info, tree type, | |

vect_unpromoted_value *unprom, tree vectype, | |

enum optab_subtype subtype = optab_default) | |

{ | |

/* Update the type if the signs differ. */ | |

if (subtype == optab_vector_mixed_sign | |

&& TYPE_SIGN (type) != TYPE_SIGN (TREE_TYPE (unprom->op))) | |

type = build_nonstandard_integer_type (TYPE_PRECISION (type), | |

TYPE_SIGN (unprom->type)); | |

/* Check for a no-op conversion. */ | |

if (types_compatible_p (type, TREE_TYPE (unprom->op))) | |

return unprom->op; | |

/* Allow the caller to create constant vect_unpromoted_values. */ | |

if (TREE_CODE (unprom->op) == INTEGER_CST) | |

return wide_int_to_tree (type, wi::to_widest (unprom->op)); | |

tree input = unprom->op; | |

if (unprom->caster) | |

{ | |

tree lhs = gimple_get_lhs (unprom->caster->stmt); | |

tree lhs_type = TREE_TYPE (lhs); | |

/* If the result of the existing cast is the right width, use it | |

instead of the source of the cast. */ | |

if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type)) | |

input = lhs; | |

/* If the precision we want is between the source and result | |

precisions of the existing cast, try splitting the cast into | |

two and tapping into a mid-way point. */ | |

else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type) | |

&& TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type)) | |

{ | |

/* In order to preserve the semantics of the original cast, | |

give the mid-way point the same signedness as the input value. | |

It would be possible to use a signed type here instead if | |

TYPE is signed and UNPROM->TYPE is unsigned, but that would | |

make the sign of the midtype sensitive to the order in | |

which we process the statements, since the signedness of | |

TYPE is the signedness required by just one of possibly | |

many users. Also, unsigned promotions are usually as cheap | |

as or cheaper than signed ones, so it's better to keep an | |

unsigned promotion. */ | |

tree midtype = build_nonstandard_integer_type | |

(TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type)); | |

tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype); | |

if (vec_midtype) | |

{ | |

input = vect_recog_temp_ssa_var (midtype, NULL); | |

gassign *new_stmt = gimple_build_assign (input, NOP_EXPR, | |

unprom->op); | |

if (!vect_split_statement (vinfo, unprom->caster, input, new_stmt, | |

vec_midtype)) | |

append_pattern_def_seq (vinfo, stmt_info, | |

new_stmt, vec_midtype); | |

} | |

} | |

/* See if we can reuse an existing result. */ | |

if (types_compatible_p (type, TREE_TYPE (input))) | |

return input; | |

} | |

/* We need a new conversion statement. */ | |

tree new_op = vect_recog_temp_ssa_var (type, NULL); | |

gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input); | |

/* If OP is an external value, see if we can insert the new statement | |

on an incoming edge. */ | |

if (input == unprom->op && unprom->dt == vect_external_def) | |

if (edge e = vect_get_external_def_edge (vinfo, input)) | |

{ | |

basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt); | |

gcc_assert (!new_bb); | |

return new_op; | |

} | |

/* As a (common) last resort, add the statement to the pattern itself. */ | |

append_pattern_def_seq (vinfo, stmt_info, new_stmt, vectype); | |

return new_op; | |

} | |

/* Invoke vect_convert_input for N elements of UNPROM and store the | |

result in the corresponding elements of RESULT. | |

If SUBTYPE then convert the type based on the subtype. */ | |

static void | |

vect_convert_inputs (vec_info *vinfo, stmt_vec_info stmt_info, unsigned int n, | |

tree *result, tree type, vect_unpromoted_value *unprom, | |

tree vectype, enum optab_subtype subtype = optab_default) | |

{ | |

for (unsigned int i = 0; i < n; ++i) | |

{ | |

unsigned int j; | |

for (j = 0; j < i; ++j) | |

if (unprom[j].op == unprom[i].op) | |

break; | |

if (j < i) | |

result[i] = result[j]; | |

else | |

result[i] = vect_convert_input (vinfo, stmt_info, | |

type, &unprom[i], vectype, subtype); | |

} | |

} | |

/* The caller has created a (possibly empty) sequence of pattern definition | |

statements followed by a single statement PATTERN_STMT. Cast the result | |

of this final statement to TYPE. If a new statement is needed, add | |

PATTERN_STMT to the end of STMT_INFO's pattern definition statements | |

and return the new statement, otherwise return PATTERN_STMT as-is. | |

VECITYPE is the vector form of PATTERN_STMT's result type. */ | |

static gimple * | |

vect_convert_output (vec_info *vinfo, stmt_vec_info stmt_info, tree type, | |

gimple *pattern_stmt, tree vecitype) | |

{ | |

tree lhs = gimple_get_lhs (pattern_stmt); | |

if (!types_compatible_p (type, TREE_TYPE (lhs))) | |

{ | |

append_pattern_def_seq (vinfo, stmt_info, pattern_stmt, vecitype); | |

tree cast_var = vect_recog_temp_ssa_var (type, NULL); | |

pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs); | |

} | |

return pattern_stmt; | |

} | |

/* Return true if STMT_VINFO describes a reduction for which reassociation | |

is allowed. If STMT_INFO is part of a group, assume that it's part of | |

a reduction chain and optimistically assume that all statements | |

except the last allow reassociation. | |

Also require it to have code CODE and to be a reduction | |

in the outermost loop. When returning true, store the operands in | |

*OP0_OUT and *OP1_OUT. */ | |

static bool | |

vect_reassociating_reduction_p (vec_info *vinfo, | |

stmt_vec_info stmt_info, tree_code code, | |

tree *op0_out, tree *op1_out) | |

{ | |

loop_vec_info loop_info = dyn_cast <loop_vec_info> (vinfo); | |

if (!loop_info) | |

return false; | |

gassign *assign = dyn_cast <gassign *> (stmt_info->stmt); | |

if (!assign || gimple_assign_rhs_code (assign) != code) | |

return false; | |

/* We don't allow changing the order of the computation in the inner-loop | |

when doing outer-loop vectorization. */ | |

class loop *loop = LOOP_VINFO_LOOP (loop_info); | |

if (loop && nested_in_vect_loop_p (loop, stmt_info)) | |

return false; | |

if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def) | |

{ | |

if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)), | |

code)) | |

return false; | |

} | |

else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL) | |

return false; | |

*op0_out = gimple_assign_rhs1 (assign); | |

*op1_out = gimple_assign_rhs2 (assign); | |

if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0) | |

std::swap (*op0_out, *op1_out); | |

return true; | |

} | |

/* Function vect_recog_dot_prod_pattern | |

Try to find the following pattern: | |

type1a x_t | |

type1b y_t; | |

TYPE1 prod; | |

TYPE2 sum = init; | |

loop: | |

sum_0 = phi <init, sum_1> | |

S1 x_t = ... | |

S2 y_t = ... | |

S3 x_T = (TYPE1) x_t; | |

S4 y_T = (TYPE1) y_t; | |

S5 prod = x_T * y_T; | |

[S6 prod = (TYPE2) prod; #optional] | |

S7 sum_1 = prod + sum_0; | |

where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b', | |

the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of | |

'type1a' and 'type1b' can differ. | |

Input: | |

* STMT_VINFO: The stmt from which the pattern search begins. In the | |

example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7} | |

will be detected. | |

Output: | |

* TYPE_OUT: The type of the output of this pattern. | |

* Return value: A new stmt that will be used to replace the sequence of | |

stmts that constitute the pattern. In this case it will be: | |

WIDEN_DOT_PRODUCT <x_t, y_t, sum_0> | |

Note: The dot-prod idiom is a widening reduction pattern that is | |

vectorized without preserving all the intermediate results. It | |

produces only N/2 (widened) results (by summing up pairs of | |

intermediate results) rather than all N results. Therefore, we | |

cannot allow this pattern when we want to get all the results and in | |

the correct order (as is the case when this computation is in an | |

inner-loop nested in an outer-loop that us being vectorized). */ | |

static gimple * | |

vect_recog_dot_prod_pattern (vec_info *vinfo, | |

stmt_vec_info stmt_vinfo, tree *type_out) | |

{ | |

tree oprnd0, oprnd1; | |

gimple *last_stmt = stmt_vinfo->stmt; | |

tree type, half_type; | |

gimple *pattern_stmt; | |

tree var; | |

/* Look for the following pattern | |

DX = (TYPE1) X; | |

DY = (TYPE1) Y; | |

DPROD = DX * DY; | |

DDPROD = (TYPE2) DPROD; | |

sum_1 = DDPROD + sum_0; | |

In which | |

- DX is double the size of X | |

- DY is double the size of Y | |

- DX, DY, DPROD all have the same type but the sign | |

between X, Y and DPROD can differ. | |

- sum is the same size of DPROD or bigger | |

- sum has been recognized as a reduction variable. | |

This is equivalent to: | |

DPROD = X w* Y; #widen mult | |

sum_1 = DPROD w+ sum_0; #widen summation | |

or | |

DPROD = X w* Y; #widen mult | |

sum_1 = DPROD + sum_0; #summation | |

*/ | |

/* Starting from LAST_STMT, follow the defs of its uses in search | |

of the above pattern. */ | |

if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR, | |

&oprnd0, &oprnd1)) | |

return NULL; | |

type = TREE_TYPE (gimple_get_lhs (last_stmt)); | |

vect_unpromoted_value unprom_mult; | |

oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult); | |

/* So far so good. Since last_stmt was detected as a (summation) reduction, | |

we know that oprnd1 is the reduction variable (defined by a loop-header | |

phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. | |

Left to check that oprnd0 is defined by a (widen_)mult_expr */ | |

if (!oprnd0) | |

return NULL; | |

stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0); | |

if (!mult_vinfo) | |

return NULL; | |

/* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi | |

inside the loop (in case we are analyzing an outer-loop). */ | |

vect_unpromoted_value unprom0[2]; | |

enum optab_subtype subtype = optab_vector; | |

if (!vect_widened_op_tree (vinfo, mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR, | |

false, 2, unprom0, &half_type, &subtype)) | |

return NULL; | |

/* If there are two widening operations, make sure they agree on the sign | |

of the extension. The result of an optab_vector_mixed_sign operation | |

is signed; otherwise, the result has the same sign as the operands. */ | |

if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type) | |

&& (subtype == optab_vector_mixed_sign | |

? TYPE_UNSIGNED (unprom_mult.type) | |

: TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))) | |

return NULL; | |

vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt); | |

tree half_vectype; | |

if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type, | |

type_out, &half_vectype, subtype)) | |

return NULL; | |

/* Get the inputs in the appropriate types. */ | |

tree mult_oprnd[2]; | |

vect_convert_inputs (vinfo, stmt_vinfo, 2, mult_oprnd, half_type, | |

unprom0, half_vectype, subtype); | |

var = vect_recog_temp_ssa_var (type, NULL); | |

pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR, | |

mult_oprnd[0], mult_oprnd[1], oprnd1); | |

return pattern_stmt; | |

} | |

/* Function vect_recog_sad_pattern | |

Try to find the following Sum of Absolute Difference (SAD) pattern: | |

type x_t, y_t; | |

signed TYPE1 diff, abs_diff; | |

TYPE2 sum = init; | |

loop: | |

sum_0 = phi <init, sum_1> | |

S1 x_t = ... | |

S2 y_t = ... | |

S3 x_T = (TYPE1) x_t; | |

S4 y_T = (TYPE1) y_t; | |

S5 diff = x_T - y_T; | |

S6 abs_diff = ABS_EXPR <diff>; | |

[S7 abs_diff = (TYPE2) abs_diff; #optional] | |

S8 sum_1 = abs_diff + sum_0; | |

where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the | |

same size of 'TYPE1' or bigger. This is a special case of a reduction | |

computation. | |

Input: | |

* STMT_VINFO: The stmt from which the pattern search begins. In the | |

example, when this function is called with S8, the pattern | |

{S3,S4,S5,S6,S7,S8} will be detected. | |

Output: | |

* TYPE_OUT: The type of the output of this pattern. | |

* Return value: A new stmt that will be used to replace the sequence of | |

stmts that constitute the pattern. In this case it will be: | |

SAD_EXPR <x_t, y_t, sum_0> | |

*/ | |

static gimple * | |

vect_recog_sad_pattern (vec_info *vinfo, | |

stmt_vec_info stmt_vinfo, tree *type_out) | |

{ | |

gimple *last_stmt = stmt_vinfo->stmt; | |

tree half_type; | |

/* Look for the following pattern | |

DX = (TYPE1) X; | |

DY = (TYPE1) Y; | |

DDIFF = DX - DY; | |

DAD = ABS_EXPR <DDIFF>; | |

DDPROD = (TYPE2) DPROD; | |

sum_1 = DAD + sum_0; | |

In which | |

- DX is at least double the size of X | |

- DY is at least double the size of Y | |

- DX, DY, DDIFF, DAD all have the same type | |

- sum is the same size of DAD or bigger | |

- sum has been recognized as a reduction variable. | |

This is equivalent to: | |

DDIFF = X w- Y; #widen sub | |

DAD = ABS_EXPR <DDIFF>; | |

sum_1 = DAD w+ sum_0; #widen summation | |

or | |

DDIFF = X w- Y; #widen sub | |

DAD = ABS_EXPR <DDIFF>; | |

sum_1 = DAD + sum_0; #summation | |

*/ | |

/* Starting from LAST_STMT, follow the defs of its uses in search | |

of the above pattern. */ | |

tree plus_oprnd0, plus_oprnd1; | |

if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR, | |

&plus_oprnd0, &plus_oprnd1)) | |

return NULL; | |

tree sum_type = TREE_TYPE (gimple_get_lhs (last_stmt)); | |

/* Any non-truncating sequence of conversions is OK here, since | |

with a successful match, the result of the ABS(U) is known to fit | |

within the nonnegative range of the result type. (It cannot be the | |

negative of the minimum signed value due to the range of the widening | |

MINUS_EXPR.) */ | |

vect_unpromoted_value unprom_abs; | |

plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0, | |

&unprom_abs); | |

/* So far so good. Since last_stmt was detected as a (summation) reduction, | |

we know that plus_oprnd1 is the reduction variable (defined by a loop-header | |

phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body. | |

Then check that plus_oprnd0 is defined by an abs_expr. */ | |

if (!plus_oprnd0) | |

return NULL; | |

stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0); | |

if (!abs_stmt_vinfo) | |

return NULL; | |

/* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi | |

inside the loop (in case we are analyzing an outer-loop). */ | |

gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt); | |

if (!abs_stmt | |

|| (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR | |

&& gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR)) | |

return NULL; | |

tree abs_oprnd = gimple_assign_rhs1 (abs_stmt); | |

tree abs_type = TREE_TYPE (abs_oprnd); | |

if (TYPE_UNSIGNED (abs_type)) | |

return NULL; | |

/* Peel off conversions from the ABS input. This can involve sign | |

changes (e.g. from an unsigned subtraction to a signed ABS input) | |

or signed promotion, but it can't include unsigned promotion. | |

(Note that ABS of an unsigned promotion should have been folded | |

away before now anyway.) */ | |

vect_unpromoted_value unprom_diff; | |

abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd, | |

&unprom_diff); | |

if (!abs_oprnd) | |

return NULL; | |

if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type) | |

&& TYPE_UNSIGNED (unprom_diff.type)) | |

return NULL; | |

/* We then detect if the operand of abs_expr is defined by a minus_expr. */ | |

stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd); | |

if (!diff_stmt_vinfo) | |

return NULL; | |

/* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi | |

inside the loop (in case we are analyzing an outer-loop). */ | |

vect_unpromoted_value unprom[2]; | |

if (!vect_widened_op_tree (vinfo, diff_stmt_vinfo, MINUS_EXPR, WIDEN_MINUS_EXPR, | |

false, 2, unprom, &half_type)) | |

return NULL; | |

vect_pattern_detected ("vect_recog_sad_pattern", last_stmt); | |

tree half_vectype; | |

if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type, | |

type_out, &half_vectype)) | |

return NULL; | |

/* Get the inputs to the SAD_EXPR in the appropriate types. */ | |

tree sad_oprnd[2]; | |

vect_convert_inputs (vinfo, stmt_vinfo, 2, sad_oprnd, half_type, | |

unprom, half_vectype); | |

tree var = vect_recog_temp_ssa_var (sum_type, NULL); | |

gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0], | |

sad_oprnd[1], plus_oprnd1); | |

return pattern_stmt; | |

} | |

/* Recognize an operation that performs ORIG_CODE on widened inputs, | |

so that it can be treated as though it had the form: | |

A_TYPE a; | |

B_TYPE b; | |

HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op | |

HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op | |

| RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE | |

| RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE | |

| RES_TYPE res = a_extend ORIG_CODE b_extend; | |

Try to replace the pattern with: | |

A_TYPE a; | |

B_TYPE b; | |

HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op | |

HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op | |

| EXT_TYPE ext = a_cast WIDE_CODE b_cast; | |

| RES_TYPE res = (EXT_TYPE) ext; // possible no-op | |

where EXT_TYPE is wider than HALF_TYPE but has the same signedness. | |

SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the | |

name of the pattern being matched, for dump purposes. */ | |

static gimple * | |

vect_recog_widen_op_pattern (vec_info *vinfo, | |

stmt_vec_info last_stmt_info, tree *type_out, | |

tree_code orig_code, tree_code wide_code, | |

bool shift_p, const char *name) | |

{ | |

gimple *last_stmt = last_stmt_info->stmt; | |

vect_unpromoted_value unprom[2]; | |

tree half_type; | |

if (!vect_widened_op_tree (vinfo, last_stmt_info, orig_code, orig_code, | |

shift_p, 2, unprom, &half_type)) | |

return NULL; | |

/* Pattern detected. */ | |

vect_pattern_detected (name, last_stmt); | |

tree type = TREE_TYPE (gimple_get_lhs (last_stmt)); | |

tree itype = type; | |

if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2 | |

|| TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type)) | |

itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2, | |

TYPE_UNSIGNED (half_type)); | |

/* Check target support */ | |

tree vectype = get_vectype_for_scalar_type (vinfo, half_type); | |

tree vecitype = get_vectype_for_scalar_type (vinfo, itype); | |

tree ctype = itype; | |

tree vecctype = vecitype; | |

if (orig_code == MINUS_EXPR | |

&& TYPE_UNSIGNED (itype) | |

&& TYPE_PRECISION (type) > TYPE_PRECISION (itype)) | |

{ | |

/* Subtraction is special, even if half_type is unsigned and no matter | |

whether type is signed or unsigned, if type is wider than itype, | |

we need to sign-extend from the widening operation result to the | |

result type. | |

Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff, | |

itype unsigned short and type either int or unsigned int. | |

Widened (unsigned short) 0xfe - (unsigned short) 0xff is | |

(unsigned short) 0xffff, but for type int we want the result -1 | |

and for type unsigned int 0xffffffff rather than 0xffff. */ | |

ctype = build_nonstandard_integer_type (TYPE_PRECISION (itype), 0); | |

vecctype = get_vectype_for_scalar_type (vinfo, ctype); | |

} | |

enum tree_code dummy_code; | |

int dummy_int; | |

auto_vec<tree> dummy_vec; | |

if (!vectype | |

|| !vecitype | |

|| !vecctype | |

|| !supportable_widening_operation (vinfo, wide_code, last_stmt_info, | |

vecitype, vectype, | |

&dummy_code, &dummy_code, | |

&dummy_int, &dummy_vec)) | |

return NULL; | |

*type_out = get_vectype_for_scalar_type (vinfo, type); | |

if (!*type_out) | |

return NULL; | |

tree oprnd[2]; | |

vect_convert_inputs (vinfo, last_stmt_info, | |

2, oprnd, half_type, unprom, vectype); | |

tree var = vect_recog_temp_ssa_var (itype, NULL); | |

gimple *pattern_stmt = gimple_build_assign (var, wide_code, | |

oprnd[0], oprnd[1]); | |

if (vecctype != vecitype) | |

pattern_stmt = vect_convert_output (vinfo, last_stmt_info, ctype, | |

pattern_stmt, vecitype); | |

return vect_convert_output (vinfo, last_stmt_info, | |

type, pattern_stmt, vecctype); | |

} | |

/* Try to detect multiplication on widened inputs, converting MULT_EXPR | |

to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */ | |

static gimple * | |

vect_recog_widen_mult_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info, | |

tree *type_out) | |

{ | |

return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out, | |

MULT_EXPR, WIDEN_MULT_EXPR, false, | |

"vect_recog_widen_mult_pattern"); | |

} | |

/* Try to detect addition on widened inputs, converting PLUS_EXPR | |

to WIDEN_PLUS_EXPR. See vect_recog_widen_op_pattern for details. */ | |

static gimple * | |

vect_recog_widen_plus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info, | |

tree *type_out) | |

{ | |

return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out, | |

PLUS_EXPR, WIDEN_PLUS_EXPR, false, | |

"vect_recog_widen_plus_pattern"); | |

} | |

/* Try to detect subtraction on widened inputs, converting MINUS_EXPR | |

to WIDEN_MINUS_EXPR. See vect_recog_widen_op_pattern for details. */ | |

static gimple * | |

vect_recog_widen_minus_pattern (vec_info *vinfo, stmt_vec_info last_stmt_info, | |

tree *type_out) | |

{ | |

return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out, | |

MINUS_EXPR, WIDEN_MINUS_EXPR, false, | |

"vect_recog_widen_minus_pattern"); | |

} | |

/* Function vect_recog_popcount_pattern | |

Try to find the following pattern: | |

UTYPE1 A; | |

TYPE1 B; | |

UTYPE2 temp_in; | |

TYPE3 temp_out; | |

temp_in = (UTYPE2)A; | |

temp_out = __builtin_popcount{,l,ll} (temp_in); | |

B = (TYPE1) temp_out; | |

TYPE2 may or may not be equal to TYPE3. | |

i.e. TYPE2 is equal to TYPE3 for __builtin_popcount | |

i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll | |

Input: | |

* STMT_VINFO: The stmt from which the pattern search begins. | |

here it starts with B = (TYPE1) temp_out; | |

Output: | |

* TYPE_OUT: The vector type of the output of this pattern. | |

* Return value: A new stmt that will be used to replace the sequence of | |

stmts that constitute the pattern. In this case it will be: | |

B = .POPCOUNT (A); | |

*/ | |

static gimple * | |

vect_recog_popcount_pattern (vec_info *vinfo, | |

stmt_vec_info stmt_vinfo, tree *type_out) | |

{ | |

gassign *last_stmt = dyn_cast <gassign *> (stmt_vinfo->stmt); | |

gimple *popcount_stmt, *pattern_stmt; | |

tree rhs_oprnd, rhs_origin, lhs_oprnd, lhs_type, vec_type, new_var; | |

auto_vec<tree> vargs; | |

/* Find B = (TYPE1) temp_out. */ | |

if (!last_stmt) | |

return NULL; | |

tree_code code = gimple_assign_rhs_code (last_stmt); | |

if (!CONVERT_EXPR_CODE_P (code)) | |

return NULL; | |

lhs_oprnd = gimple_assign_lhs (last_stmt); | |

lhs_type = TREE_TYPE (lhs_oprnd); | |

if (!INTEGRAL_TYPE_P (lhs_type)) | |

return NULL; | |

rhs_oprnd = gimple_assign_rhs1 (last_stmt); | |

if (TREE_CODE (rhs_oprnd) != SSA_NAME | |

|| !has_single_use (rhs_oprnd)) | |

return NULL; | |

popcount_stmt = SSA_NAME_DEF_STMT (rhs_oprnd); | |

/* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */ | |

if (!is_gimple_call (popcount_stmt)) | |

return NULL; | |

switch (gimple_call_combined_fn (popcount_stmt)) | |

{ | |

CASE_CFN_POPCOUNT: | |

break; | |

default: | |

return NULL; | |

} | |

if (gimple_call_num_args (popcount_stmt) != 1) | |

return NULL; | |

rhs_oprnd = gimple_call_arg (popcount_stmt, 0); | |

vect_unpromoted_value unprom_diff; | |

rhs_origin = vect_look_through_possible_promotion (vinfo, rhs_oprnd, | |

&unprom_diff); | |

if (!rhs_origin) | |

return NULL; | |

/* Input and output of .POPCOUNT should be same-precision integer. | |

Also A should be unsigned or same precision as temp_in, | |

otherwise there would be sign_extend from A to temp_in. */ | |

if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (lhs_type) | |

|| (!TYPE_UNSIGNED (unprom_diff.type) | |

&& (TYPE_PRECISION (unprom_diff.type) | |

!= TYPE_PRECISION (TREE_TYPE (rhs_oprnd))))) | |

return NULL; | |

vargs.safe_push (unprom_diff.op); | |

vect_pattern_detected ("vec_regcog_popcount_pattern", popcount_stmt); | |

vec_type = get_vectype_for_scalar_type (vinfo, lhs_type); | |

/* Do it only if the backend has popcount<vector_mode>2 pattern. */ | |

if (!vec_type | |

|| !direct_internal_fn_supported_p (IFN_POPCOUNT, vec_type, | |

OPTIMIZE_FOR_SPEED)) | |

return NULL; | |

/* Create B = .POPCOUNT (A). */ | |

new_var = vect_recog_temp_ssa_var (lhs_type, NULL); | |

pattern_stmt = gimple_build_call_internal_vec (IFN_POPCOUNT, vargs); | |

gimple_call_set_lhs (pattern_stmt, new_var); | |

gimple_set_location (pattern_stmt, gimple_location (last_stmt)); | |

*type_out = vec_type; | |

if (dump_enabled_p ()) | |

dump_printf_loc (MSG_NOTE, vect_location, | |

"created pattern stmt: %G", pattern_stmt); | |

return pattern_stmt; | |

} | |

/* Function vect_recog_pow_pattern | |

Try to find the following pattern: | |

x = POW (y, N); | |

with POW being one of pow, powf, powi, powif and N being | |

either 2 or 0.5. | |

Input: | |

* STMT_VINFO: The stmt from which the pattern search begins. | |

Output: | |

* TYPE_OUT: The type of the output of this pattern. | |

* Return value: A new stmt that will be used to replace the sequence of | |

stmts that constitute the pattern. In this case it will be: | |

x = x * x | |

or | |

x = sqrt (x) | |

*/ | |

static gimple * | |

vect_recog_pow_pattern (vec_info *vinfo, | |

stmt_vec_info stmt_vinfo, tree *type_out) | |

{ | |

gimple *last_stmt = stmt_vinfo->stmt; | |

tree base, exp; | |

gimple *stmt; | |

tree var; | |

if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL) | |

return NULL; | |

switch (gimple_call_combined_fn (last_stmt)) | |

{ | |

CASE_CFN_POW: | |

CASE_CFN_POWI: | |

break; | |

default: | |

return NULL; | |

} | |

base = gimple_call_arg (last_stmt, 0); | |

exp = gimple_call_arg (last_stmt, 1); | |

if (TREE_CODE (exp) != REAL_CST | |

&& TREE_CODE (exp) != INTEGER_CST) | |

{ | |

if (flag_unsafe_math_optimizations | |

&& TREE_CODE (base) == REAL_CST | |

&& gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL)) | |

{ | |

combined_fn log_cfn; | |

built_in_function exp_bfn; | |

switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt))) | |

{ | |

case BUILT_IN_POW: | |

log_cfn = CFN_BUILT_IN_LOG; | |

exp_bfn = BUILT_IN_EXP; | |

break; | |

case BUILT_IN_POWF: | |

log_cfn = CFN_BUILT_IN_LOGF; | |

exp_bfn = BUILT_IN_EXPF; | |

break; | |

case BUILT_IN_POWL: | |

log_cfn = CFN_BUILT_IN_LOGL; | |

exp_bfn = BUILT_IN_EXPL; | |

break; | |

default: | |

return NULL; | |

} | |

tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base); | |

tree exp_decl = builtin_decl_implicit (exp_bfn); | |

/* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd | |

does that, but if C is a power of 2, we want to use | |

exp2 (log2 (C) * x) in the non-vectorized version, but for | |

vectorization we don't have vectorized exp2. */ | |

if (logc | |

&& TREE_CODE (logc) == REAL_CST | |

&& exp_decl | |

&& lookup_attribute ("omp declare simd", | |

DECL_ATTRIBUTES (exp_decl))) | |

{ | |

cgraph_node *node = cgraph_node::get_create (exp_decl); | |

if (node->simd_clones == NULL) | |

{ | |

if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL | |

|| node->definition) | |

return NULL; | |

expand_simd_clones (node); | |

if (node->simd_clones == NULL) | |

return NULL; | |

} | |

*type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base)); | |

if (!*type_out) | |

return NULL; | |

tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL); | |

gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc); | |

append_pattern_def_seq (vinfo, stmt_vinfo, g); | |

tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL); | |

g = gimple_build_call (exp_decl, 1, def); | |

gimple_call_set_lhs (g, res); | |

return g; | |

} | |

} | |

return NULL; | |

} | |

/* We now have a pow or powi builtin function call with a constant | |

exponent. */ | |

/* Catch squaring. */ | |

if ((tree_fits_shwi_p (exp) | |

&& tree_to_shwi (exp) == 2) | |

|| (TREE_CODE (exp) == REAL_CST | |

&& real_equal (&TREE_REAL_CST (exp), &dconst2))) | |

{ | |

if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR, | |

TREE_TYPE (base), type_out)) | |

return NULL; | |

var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL); | |

stmt = gimple_build_assign (var, MULT_EXPR, base, base); | |

return stmt; | |

} | |

/* Catch square root. */ | |

if (TREE_CODE (exp) == REAL_CST | |

&& real_equal (&TREE_REAL_CST (exp), &dconsthalf)) | |

{ | |

*type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base)); | |

if (*type_out | |

&& direct_internal_fn_supported_p (IFN_SQRT, *type_out, | |

OPTIMIZE_FOR_SPEED)) | |

{ | |

gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base); | |

var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt); | |

gimple_call_set_lhs (stmt, var); | |

gimple_call_set_nothrow (stmt, true); | |

return stmt; | |

} | |

} | |

return NULL; | |

} | |

/* Function vect_recog_widen_sum_pattern | |

Try to find the following pattern: | |

type x_t; | |

TYPE x_T, sum = init; | |

loop: | |

sum_0 = phi <init, sum_1> | |

S1 x_t = *p; | |

S2 x_T = (TYPE) x_t; | |

S3 sum_1 = x_T + sum_0; | |

where type 'TYPE' is at least double the size of type 'type', i.e - we're | |

summing elements of type 'type' into an accumulator of type 'TYPE'. This is | |

a special case of a reduction computation. | |

Input: | |

* STMT_VINFO: The stmt from which the pattern search begins. In the example, | |

when this function is called with S3, the pattern {S2,S3} will be detected. | |

Output: | |

* TYPE_OUT: The type of the output of this pattern. | |

* Return value: A new stmt that will be used to replace the sequence of | |

stmts that constitute the pattern. In this case it will be: | |

WIDEN_SUM <x_t, sum_0> | |

Note: The widening-sum idiom is a widening reduction pattern that is | |

vectorized without preserving all the intermediate results. It | |

produces only N/2 (widened) results (by summing up pairs of | |

intermediate results) rather than all N results. Therefore, we | |

cannot allow this pattern when we want to get all the results and in | |

the correct order (as is the case when this computation is in an | |

inner-loop nested in an outer-loop that us being vectorized). */ | |

static gimple * | |

vect_recog_widen_sum_pattern (vec_info *vinfo, | |

stmt_vec_info stmt_vinfo, tree *type_out) | |

{ | |

gimple *last_stmt = stmt_vinfo->stmt; | |

tree oprnd0, oprnd1; | |

tree type; | |

gimple *pattern_stmt; | |

tree var; | |

/* Look for the following pattern | |

DX = (TYPE) X; | |

sum_1 = DX + sum_0; | |

In which DX is at least double the size of X, and sum_1 has been | |

recognized as a reduction variable. | |

*/ | |

/* Starting from LAST_STMT, follow the defs of its uses in search | |

of the above pattern. */ | |

if (!vect_reassociating_reduction_p (vinfo, stmt_vinfo, PLUS_EXPR, | |

&oprnd0, &oprnd1)) | |

return NULL; | |

type = TREE_TYPE (gimple_get_lhs (last_stmt)); | |

/* So far so good. Since last_stmt was detected as a (summation) reduction, | |

we know that oprnd1 is the reduction variable (defined by a loop-header | |

phi), and oprnd0 is an ssa-name defined by a stmt in the loop body. | |

Left to check that oprnd0 is defined by a cast from type 'type' to type | |

'TYPE'. */ | |

vect_unpromoted_value unprom0; | |

if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0) | |

|| TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type)) | |

return NULL; | |

vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt); | |

if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR, | |

unprom0.type, type_out)) | |

return NULL; | |

var = vect_recog_temp_ssa_var (type, NULL); | |

pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1); | |

return pattern_stmt; | |

} | |

/* Recognize cases in which an operation is performed in one type WTYPE | |

but could be done more efficiently in a narrower type NTYPE. For example, | |

if we have: | |

ATYPE a; // narrower than NTYPE | |

BTYPE b; // narrower than NTYPE | |

WTYPE aw = (WTYPE) a; | |

WTYPE bw = (WTYPE) b; | |

WTYPE res = aw + bw; // only uses of aw and bw | |

then it would be more efficient to do: | |

NTYPE an = (NTYPE) a; | |

NTYPE bn = (NTYPE) b; | |

NTYPE resn = an + bn; | |

WTYPE res = (WTYPE) resn; | |

Other situations include things like: | |

ATYPE a; // NTYPE or narrower | |

WTYPE aw = (WTYPE) a; | |

WTYPE res = aw + b; | |

when only "(NTYPE) res" is significant. In that case it's more efficient | |

to truncate "b" and do the operation on NTYPE instead: | |

NTYPE an = (NTYPE) a; | |

NTYPE bn = (NTYPE) b; // truncation | |

NTYPE resn = an + bn; | |

WTYPE res = (WTYPE) resn; | |

All users of "res" should then use "resn" instead, making the final | |

statement dead (not marked as relevant). The final statement is still | |

needed to maintain the type correctness of the IR. | |

vect_determine_precisions has already determined the minimum | |

precison of the operation and the minimum precision required | |

by users of the result. */ | |

static gimple * | |

vect_recog_over_widening_pattern (vec_info *vinfo, | |

stmt_vec_info last_stmt_info, tree *type_out) | |

{ | |

gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt); | |

if (!last_stmt) | |

return NULL; | |

/* See whether we have found that this operation can be done on a | |

narrower type without changing its semantics. */ | |

unsigned int new_precision = last_stmt_info->operation_precision; | |

if (!new_precision) | |

return NULL; | |

tree lhs = gimple_assign_lhs (last_stmt); | |

tree type = TREE_TYPE (lhs); | |

tree_code code = gimple_assign_rhs_code (last_stmt); | |

/* Punt for reductions where we don't handle the type conversions. */ | |

if (STMT_VINFO_DEF_TYPE (last_stmt_info) == vect_reduction_def) | |

return NULL; | |

/* Keep the first operand of a COND_EXPR as-is: only the other two | |

operands are interesting. */ | |

unsigned int first_op = (code == COND_EXPR ? 2 : 1); | |

/* Check the operands. */ | |

unsigned int nops = gimple_num_ops (last_stmt) - first_op; | |

auto_vec <vect_unpromoted_value, 3> unprom (nops); | |

unprom.quick_grow (nops); | |

unsigned int min_precision = 0; | |

bool single_use_p = false; | |

for (unsigned int i = 0; i < nops; ++i) | |

{ | |

tree op = gimple_op (last_stmt, first_op + i); | |

if (TREE_CODE (op) == INTEGER_CST) | |

unprom[i].set_op (op, vect_constant_def); | |

else if (TREE_CODE (op) == SSA_NAME) | |

{ | |

bool op_single_use_p = true; | |

if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i], | |

&op_single_use_p)) | |

return NULL; | |

/* If: | |

(1) N bits of the result are needed; | |

(2) all inputs are widened from M<N bits; and | |

(3) one operand OP is a single-use SSA name | |

we can shift the M->N widening from OP to the output | |

without changing the number or type of extensions involved. | |

This then reduces the number of copies of STMT_INFO. | |

If instead of (3) more than one operand is a single-use SSA name, | |

shifting the extension to the output is even more of a win. | |

If instead: | |

(1) N bits of the result are needed; | |

(2) one operand OP2 is widened from M2<N bits; | |

(3) another operand OP1 is widened from M1<M2 bits; and | |

(4) both OP1 and OP2 are single-use | |

the choice is between: | |

(a) truncating OP2 to M1, doing the operation on M1, | |

and then widening the result to N | |

(b) widening OP1 to M2, doing the operation on M2, and then | |

widening the result to N | |

Both shift the M2->N widening of the inputs to the output. | |

(a) additionally shifts the M1->M2 widening to the output; | |

it requires fewer copies of STMT_INFO but requires an extra | |

M2->M1 truncation. | |

Which is better will depend on the complexity and cost of | |

STMT_INFO, which is hard to predict at this stage. However, | |

a clear tie-breaker in favor of (b) is the fact that the | |

truncation in (a) increases the length of the operation chain. | |

If instead of (4) only one of OP1 or OP2 is single-use, | |

(b) is still a win over doing the operation in N bits: | |

it still shifts the M2->N widening on the single-use operand | |

to the output and reduces the number of STMT_INFO copies. | |

If neither operand is single-use then operating on fewer than | |

N bits might lead to more extensions overall. Whether it does | |

or not depends on global information about the vectorization | |

region, and whether that's a good trade-off would again | |

depend on the complexity and cost of the statements involved, | |

as well as things like register pressure that are not normally | |

modelled at this stage. We therefore ignore these cases | |

and just optimize the clear single-use wins above. | |

Thus we take the maximum precision of the unpromoted operands | |

and record whether any operand is single-use. */ | |

if (unprom[i].dt == vect_internal_def) | |

{ | |

min_precision = MAX (min_precision, | |

TYPE_PRECISION (unprom[i].type)); | |

single_use_p |= op_single_use_p; | |

} | |

} | |

else | |

return NULL; | |

} | |

/* Although the operation could be done in operation_precision, we have | |

to balance that against introducing extra truncations or extensions. | |

Calculate the minimum precision that can be handled efficiently. | |

The loop above determined that the operation could be handled | |

efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an | |

extension from the inputs to the output without introducing more | |

instructions, and would reduce the number of instructions required | |

for STMT_INFO itself. | |

vect_determine_precisions has also determined that the result only | |

needs min_output_precision bits. Truncating by a factor of N times | |

requires a tree of N - 1 instructions, so if TYPE is N times wider | |

than min_output_precision, doing the operation in TYPE and truncating | |

the result requires N + (N - 1) = 2N - 1 instructions per output vector. | |

In contrast: | |

- truncating the input to a unary operation and doing the operation | |

in the new type requires at most N - 1 + 1 = N instructions per | |

output vector | |

- doing the same for a binary operation requires at most | |

(N - 1) * 2 + 1 = 2N - 1 instructions per output vector | |

Both unary and binary operations require fewer instructions than | |

this if the operands were extended from a suitable truncated form. | |

Thus there is usually nothing to lose by doing operations in | |

min_output_precision bits, but there can be something to gain. */ | |

if (!single_use_p) | |

min_precision = last_stmt_info->min_output_precision; | |

else | |

min_precision = MIN (min_precision, last_stmt_info->min_output_precision); | |

/* Apply the minimum efficient precision we just calculated. */ | |

if (new_precision < min_precision) | |

new_precision = min_precision; | |

new_precision = vect_element_precision (new_precision); | |

if (new_precision >= TYPE_PRECISION (type)) | |

return NULL; | |

vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt); | |

*type_out = get_vectype_for_scalar_type (vinfo, type); | |

if (!*type_out) | |

return NULL; | |

/* We've found a viable pattern. Get the new type of the operation. */ | |

bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED); | |

tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p); | |

/* If we're truncating an operation, we need to make sure that we | |

don't introduce new undefined overflow. The codes tested here are | |

a subset of those accepted by vect_truncatable_operation_p. */ | |

tree op_type = new_type; | |

if (TYPE_OVERFLOW_UNDEFINED (new_type) | |

&& (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR)) | |

op_type = build_nonstandard_integer_type (new_precision, true); | |

/* We specifically don't check here whether the target supports the | |

new operation, since it might be something that a later pattern | |

wants to rewrite anyway. If targets have a minimum element size | |

for some optabs, we should pattern-match smaller ops to larger ops | |

where beneficial. */ | |

tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type); | |

tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type); | |

if (!new_vectype || !op_vectype) | |

return NULL; | |

if (dump_enabled_p ()) | |

dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n", | |

type, new_type); | |

/* Calculate the rhs operands for an operation on OP_TYPE. */ | |

tree ops[3] = {}; | |

for (unsigned int i = 1; i < first_op; ++i) | |

ops[i - 1] = gimple_op (last_stmt, i); | |

vect_convert_inputs (vinfo, last_stmt_info, nops, &ops[first_op - 1], | |

op_type, &unprom[0], op_vectype); | |

/* Use the operation to produce a result of type OP_TYPE. */ | |

tree new_var = vect_recog_temp_ssa_var (op_type, NULL); | |

gimple *pattern_stmt = gimple_build_assign (new_var, code, | |

ops[0], ops[1], ops[2]); | |

gimple_set_location (pattern_stmt, gimple_location (last_stmt)); | |

if (dump_enabled_p ()) | |

dump_printf_loc (MSG_NOTE, vect_location, | |

"created pattern stmt: %G", pattern_stmt); | |

/* Convert back to the original signedness, if OP_TYPE is different | |

from NEW_TYPE. */ | |

if (op_type != new_type) | |

pattern_stmt = vect_convert_output (vinfo, last_stmt_info, new_type, | |

pattern_stmt, op_vectype); | |

/* Promote the result to the original type. */ | |

pattern_stmt = vect_convert_output (vinfo, last_stmt_info, type, | |

pattern_stmt, new_vectype); | |

return pattern_stmt; | |

} | |

/* Recognize the following patterns: | |

ATYPE a; // narrower than TYPE | |

BTYPE b; // narrower than TYPE | |

1) Multiply high with scaling | |

TYPE res = ((TYPE) a * (TYPE) b) >> c; | |

Here, c is bitsize (TYPE) / 2 - 1. | |

2) ... or also with rounding | |

TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1; | |

Here, d is bitsize (TYPE) / 2 - 2. | |

3) Normal multiply high | |

TYPE res = ((TYPE) a * (TYPE) b) >> e; | |

Here, e is bitsize (TYPE) / 2. | |

where only the bottom half of res is used. */ | |

static gimple * | |

vect_recog_mulhs_pattern (vec_info *vinfo, | |

stmt_vec_info last_stmt_info, tree *type_out) | |

{ | |

/* Check for a right shift. */ | |

gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt); | |

if (!last_stmt | |

|| gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR) | |

return NULL; | |

/* Check that the shift result is wider than the users of the | |

result need (i.e. that narrowing would be a natural choice). */ | |

tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt)); | |

unsigned int target_precision | |

= vect_element_precision (last_stmt_info->min_output_precision); | |

if (!INTEGRAL_TYPE_P (lhs_type) | |

|| target_precision >= TYPE_PRECISION (lhs_type)) | |

return NULL; | |

/* Look through any change in sign on the outer shift input. */ | |

vect_unpromoted_value unprom_rshift_input; | |

tree rshift_input = vect_look_through_possible_promotion | |

(vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input); | |

if (!rshift_input | |

|| TYPE_PRECISION (TREE_TYPE (rshift_input)) | |

!= TYPE_PRECISION (lhs_type)) | |

return NULL; | |

/* Get the definition of the shift input. */ | |

stmt_vec_info rshift_input_stmt_info | |

= vect_get_internal_def (vinfo, rshift_input); | |

if (!rshift_input_stmt_info) | |

return NULL; | |

gassign *rshift_input_stmt | |

= dyn_cast <gassign *> (rshift_input_stmt_info->stmt); | |

if (!rshift_input_stmt) | |

return NULL; | |

stmt_vec_info mulh_stmt_info; | |

tree scale_term; | |

bool rounding_p = false; | |

/* Check for the presence of the rounding term. */ | |

if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR) | |

{ | |

/* Check that the outer shift was by 1. */ | |

if (!integer_onep (gimple_assign_rhs2 (last_stmt))) | |

return NULL; | |

/* Check that the second operand of the PLUS_EXPR is 1. */ | |

if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt))) | |

return NULL; | |

/* Look through any change in sign on the addition input. */ | |

vect_unpromoted_value unprom_plus_input; | |

tree plus_input = vect_look_through_possible_promotion | |

(vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input); | |

if (!plus_input | |

|| TYPE_PRECISION (TREE_TYPE (plus_input)) | |

!= TYPE_PRECISION (TREE_TYPE (rshift_input))) | |

return NULL; | |

/* Get the definition of the multiply-high-scale part. */ | |

stmt_vec_info plus_input_stmt_info | |

= vect_get_internal_def (vinfo, plus_input); | |

if (!plus_input_stmt_info) | |

return NULL; | |

gassign *plus_input_stmt | |

= dyn_cast <gassign *> (plus_input_stmt_info->stmt); | |

if (!plus_input_stmt | |

|| gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR) | |

return NULL; | |

/* Look through any change in sign on the scaling input. */ | |

vect_unpromoted_value unprom_scale_input; | |

tree scale_input = vect_look_through_possible_promotion | |

(vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input); | |

if (!scale_input | |

|| TYPE_PRECISION (TREE_TYPE (scale_input)) | |

!= TYPE_PRECISION (TREE_TYPE (plus_input))) | |

return NULL; | |

/* Get the definition of the multiply-high part. */ | |

mulh_stmt_info = vect_get_internal_def (vinfo, scale_input); | |

if (!mulh_stmt_info) | |

return NULL; | |

/* Get the scaling term. */ | |

scale_term = gimple_assign_rhs2 (plus_input_stmt); | |

rounding_p = true; | |

} | |

else | |

{ | |

mulh_stmt_info = rshift_input_stmt_info; | |

scale_term = gimple_assign_rhs2 (last_stmt); | |

} | |

/* Check that the scaling factor is constant. */ | |

if (TREE_CODE (scale_term) != INTEGER_CST) | |

return NULL; | |

/* Check whether the scaling input term can be seen as two widened | |

inputs multiplied together. */ | |

vect_unpromoted_value unprom_mult[2]; | |

tree new_type; | |

unsigned int nops | |

= vect_widened_op_tree (vinfo, mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR, | |

false, 2, unprom_mult, &new_type); | |

if (nops != 2) | |

return NULL; | |

/* Adjust output precision. */ | |

if (TYPE_PRECISION (new_type) < target_precision) | |

new_type = build_nonstandard_integer_type | |

(target_precision, TYPE_UNSIGNED (new_type)); | |

unsigned mult_precision = TYPE_PRECISION (new_type); | |

internal_fn ifn; | |

/* Check that the scaling factor is expected. Instead of | |

target_precision, we should use the one that we actually | |

use for internal function. */ | |

if (rounding_p) | |

{ | |

/* Check pattern 2). */ | |

if (wi::to_widest (scale_term) + mult_precision + 2 | |

!= TYPE_PRECISION (lhs_type)) | |

return NULL; | |

ifn = IFN_MULHRS; | |

} | |

else | |

{ | |

/* Check for pattern 1). */ | |

if (wi::to_widest (scale_term) + mult_precision + 1 | |

== TYPE_PRECISION (lhs_type)) | |

ifn = IFN_MULHS; | |

/* Check for pattern 3). */ | |

else if (wi::to_widest (scale_term) + mult_precision | |

== TYPE_PRECISION (lhs_type)) | |

ifn = IFN_MULH; | |

else | |

return NULL; | |

} | |

vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt); | |

/* Check for target support. */ | |

tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type); | |

if (!new_vectype | |

|| !direct_internal_fn_supported_p | |

(ifn, new_vectype, OPTIMIZE_FOR_SPEED)) | |

return NULL; | |

/* The IR requires a valid vector type for the cast result, even though | |

it's likely to be discarded. */ | |

*type_out = get_vectype_for_scalar_type (vinfo, lhs_type); | |

if (!*type_out) | |

return NULL; | |

/* Generate the IFN_MULHRS call. */ | |

tree new_var = vect_recog_temp_ssa_var (new_type, NULL); | |

tree new_ops[2]; | |

vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type, | |

unprom_mult, new_vectype); | |

gcall *mulhrs_stmt | |

= gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]); | |

gimple_call_set_lhs (mulhrs_stmt, new_var); | |

gimple_set_location (mulhrs_stmt, gimple_location (last_stmt)); | |

if (dump_enabled_p ()) | |

dump_printf_loc (MSG_NOTE, vect_location, | |

"created pattern stmt: %G", mulhrs_stmt); | |

return vect_convert_output (vinfo, last_stmt_info, lhs_type, | |

mulhrs_stmt, new_vectype); | |

} | |

/* Recognize the patterns: | |

ATYPE a; // narrower than TYPE | |

BTYPE b; // narrower than TYPE | |

(1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1; | |

or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1; | |

where only the bottom half of avg is used. Try to transform them into: | |

(1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b); | |

or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b); | |

followed by: | |

TYPE avg = (TYPE) avg'; | |

where NTYPE is no wider than half of TYPE. Since only the bottom half | |

of avg is used, all or part of the cast of avg' should become redundant. | |

If there is no target support available, generate code to distribute rshift | |

over plus and add a carry. */ | |

static gimple * | |

vect_recog_average_pattern (vec_info *vinfo, | |

stmt_vec_info last_stmt_info, tree *type_out) | |

{ | |

/* Check for a shift right by one bit. */ | |

gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt); | |

if (!last_stmt | |

|| gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR | |

|| !integer_onep (gimple_assign_rhs2 (last_stmt))) | |

return NULL; | |

/* Check that the shift result is wider than the users of the | |

result need (i.e. that narrowing would be a natural choice). */ | |

tree lhs = gimple_assign_lhs (last_stmt); | |

tree type = TREE_TYPE (lhs); | |

unsigned int target_precision | |

= vect_element_precision (last_stmt_info->min_output_precision); | |

if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type)) | |

return NULL; | |

/* Look through any change in sign on the shift input. */ | |

tree rshift_rhs = gimple_assign_rhs1 (last_stmt); | |

vect_unpromoted_value unprom_plus; | |

rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs, | |

&unprom_plus); | |

if (!rshift_rhs | |

|| TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type)) | |

return NULL; | |

/* Get the definition of the shift input. */ | |

stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs); | |

if (!plus_stmt_info) | |

return NULL; | |

/* Check whether the shift input can be seen as a tree of additions on | |

2 or 3 widened inputs. | |

Note that the pattern should be a win even if the result of one or | |

more additions is reused elsewhere: if the pattern matches, we'd be | |

replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */ | |

internal_fn ifn = IFN_AVG_FLOOR; | |

vect_unpromoted_value unprom[3]; | |

tree new_type; | |

unsigned int nops = vect_widened_op_tree (vinfo, plus_stmt_info, PLUS_EXPR, | |

WIDEN_PLUS_EXPR, false, 3, | |

unprom, &new_type); | |

if (nops == 0) | |

return NULL; | |

if (nops == 3) | |

{ | |

/* Check that one operand is 1. */ | |

unsigned int i; | |

for (i = 0; i < 3; ++i) | |

if (integer_onep (unprom[i].op)) | |

break; | |

if (i == 3) | |

return NULL; | |

/* Throw away the 1 operand and keep the other two. */ | |

if (i < 2) | |

unprom[i] = unprom[2]; | |

ifn = IFN_AVG_CEIL; | |

} | |

vect_pattern_detected ("vect_recog_average_pattern", last_stmt); | |

/* We know that: | |

(a) the operation can be viewed as: | |

TYPE widened0 = (TYPE) UNPROM[0]; | |

TYPE widened1 = (TYPE) UNPROM[1]; | |

TYPE tmp1 = widened0 + widened1 {+ 1}; | |

TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO | |

(b) the first two statements are equivalent to: | |

TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0]; | |

TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1]; | |

(c) vect_recog_over_widening_pattern has already tried to narrow TYPE | |

where sensible; | |

(d) all the operations can be performed correctly at twice the width of | |

NEW_TYPE, due to the nature of the average operation; and | |

(e) users of the result of the right shift need only TARGET_PRECISION | |

bits, where TARGET_PRECISION is no more than half of TYPE's | |

precision. | |

Under these circumstances, the only situation in which NEW_TYPE | |

could be narrower than TARGET_PRECISION is if widened0, widened1 | |

and an addition result are all used more than once. Thus we can | |

treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION | |

as "free", whereas widening the result of the average instruction | |

from NEW_TYPE to TARGET_PRECISION would be a new operation. It's | |

therefore better not to go narrower than TARGET_PRECISION. */ | |

if (TYPE_PRECISION (new_type) < target_precision) | |

new_type = build_nonstandard_integer_type (target_precision, | |

TYPE_UNSIGNED (new_type)); | |

/* Check for target support. */ | |

tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type); | |

if (!new_vectype) | |

return NULL; | |

bool fallback_p = false; | |

if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED)) | |

; | |

else if (TYPE_UNSIGNED (new_type) | |

&& optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar) | |

&& optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default) | |

&& optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default) | |

&& optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default)) | |

fallback_p = true; | |

else | |

return NULL; | |

/* The IR requires a valid vector type for the cast result, even though | |

it's likely to be discarded. */ | |

*type_out = get_vectype_for_scalar_type (vinfo, type); | |

if (!*type_out) | |

return NULL; | |

tree new_var = vect_recog_temp_ssa_var (new_type, NULL); | |

tree new_ops[2]; | |

vect_convert_inputs (vinfo, last_stmt_info, 2, new_ops, new_type, | |

unprom, new_vectype); | |

if (fallback_p) | |

{ | |

/* As a fallback, generate code for following sequence: | |

shifted_op0 = new_ops[0] >> 1; | |

shifted_op1 = new_ops[1] >> 1; | |

sum_of_shifted = shifted_op0 + shifted_op1; | |

unmasked_carry = new_ops[0] and/or new_ops[1]; | |

carry = unmasked_carry & 1; | |

new_var = sum_of_shifted + carry; | |

*/ | |

tree one_cst = build_one_cst (new_type); | |

gassign *g; | |

tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL); | |

g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst); | |

append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype); | |

tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL); | |

g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst); | |

append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype); | |

tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL); | |

g = gimple_build_assign (sum_of_shifted, PLUS_EXPR, | |

shifted_op0, shifted_op1); | |

append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype); | |

tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL); | |

tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR; | |

g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]); | |

append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype); | |

tree carry = vect_recog_temp_ssa_var (new_type, NULL); | |

g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst); | |

append_pattern_def_seq (vinfo, last_stmt_info, g, new_vectype); | |

g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry); | |

return vect_convert_output (vinfo, last_stmt_info, type, g, new_vectype); | |

} | |

/* Generate the IFN_AVG* call. */ | |

gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0], | |

new_ops[1]); | |

gimple_call_set_lhs (average_stmt, new_var); | |

gimple_set_location (average_stmt, gimple_location (last_stmt)); | |

if (dump_enabled_p ()) | |

dump_printf_loc (MSG_NOTE, vect_location, | |

"created pattern stmt: %G", average_stmt); | |

return vect_convert_output (vinfo, last_stmt_info, | |

type, average_stmt, new_vectype); | |

} | |

/* Recognize cases in which the input to a cast is wider than its | |

output, and the input is fed by a widening operation. Fold this | |

by removing the unnecessary intermediate widening. E.g.: | |

unsigned char a; | |

unsigned int b = (unsigned int) a; | |

unsigned short c = (unsigned short) b; | |

--> | |

unsigned short c = (unsigned short) a; | |

Although this is rare in input IR, it is an expected side-effect | |

of the over-widening pattern above. | |

This is beneficial also for integer-to-float conversions, if the | |

widened integer has more bits than the float, and if the unwidened | |

input doesn't. */ | |

static gimple * | |

vect_recog_cast_forwprop_pattern (vec_info *vinfo, | |

stmt_vec_info last_stmt_info, tree *type_out) | |

{ | |

/* Check for a cast, including an integer-to-float conversion. */ | |

gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt); | |

if (!last_stmt) | |

return NULL; | |

tree_code code = gimple_assign_rhs_code (last_stmt); | |

if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR) | |

return NULL; | |

/* Make sure that the rhs is a scalar with a natural bitsize. */ | |

tree lhs = gimple_assign_lhs (last_stmt); | |

if (!lhs) | |

return NULL; | |

tree lhs_type = TREE_TYPE (lhs); | |

scalar_mode lhs_mode; | |

if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type) | |

|| !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode)) | |

return NULL; | |

/* Check for a narrowing operation (from a vector point of view). */ | |

tree rhs = gimple_assign_rhs1 (last_stmt); | |

tree rhs_type = TREE_TYPE (rhs); | |

if (!INTEGRAL_TYPE_P (rhs_type) | |

|| VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type) | |

|| TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode)) | |

return NULL; | |

/* Try to find an unpromoted input. */ | |

vect_unpromoted_value unprom; | |

if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom) | |

|| TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type)) | |

return NULL; | |

/* If the bits above RHS_TYPE matter, make sure that they're the | |

same when extending from UNPROM as they are when extending from RHS. */ | |

if (!INTEGRAL_TYPE_P (lhs_type) | |

&& TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type)) | |

return NULL; | |

/* We can get the same result by casting UNPROM directly, to avoid | |

the unnecessary widening and narrowing. */ | |

vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt); | |

*type_out = get_vectype_for_scalar_type (vinfo, lhs_type); | |

if (!*type_out) | |

return NULL; | |

tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL); | |

gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op); | |

gimple_set_location (pattern_stmt, gimple_location (last_stmt)); | |

return pattern_stmt; | |

} | |

/* Try to detect a shift left of a widened input, converting LSHIFT_EXPR | |

to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */ | |

static gimple * | |

vect_recog_widen_shift_pattern (vec_info *vinfo, | |

stmt_vec_info last_stmt_info, tree *type_out) | |

{ | |

return vect_recog_widen_op_pattern (vinfo, last_stmt_info, type_out, | |

LSHIFT_EXPR, WIDEN_LSHIFT_EXPR, true, | |

"vect_recog_widen_shift_pattern"); | |

} | |

/* Detect a rotate pattern wouldn't be otherwise vectorized: | |

type a_t, b_t, c_t; | |

S0 a_t = b_t r<< c_t; | |

Input/Output: | |

* STMT_VINFO: The stmt from which the pattern search begins, | |

i.e. the shift/rotate stmt. The original stmt (S0) is replaced | |

with a sequence: | |

S1 d_t = -c_t; | |

S2 e_t = d_t & (B - 1); | |

S3 f_t = b_t << c_t; | |

S4 g_t = b_t >> e_t; | |

S0 a_t = f_t | g_t; | |

where B is element bitsize of type. | |

Output: | |

* TYPE_OUT: The type of the output of this pattern. | |

* Return value: A new stmt that will be used to replace the rotate | |

S0 stmt. */ | |

static gimple * | |

vect_recog_rotate_pattern (vec_info *vinfo, | |

stmt_vec_info stmt_vinfo, tree *type_out) | |

{ | |

gimple *last_stmt = stmt_vinfo->stmt; | |

tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2; | |

gimple *pattern_stmt, *def_stmt; | |

enum tree_code rhs_code; | |

enum vect_def_type dt; | |

optab optab1, optab2; | |

edge ext_def = NULL; | |

bool bswap16_p = false; | |

if (is_gimple_assign (last_stmt)) | |

{ | |

rhs_code = gimple_assign_rhs_code (last_stmt); | |

switch (rhs_code) | |

{ | |

case LROTATE_EXPR: | |

case RROTATE_EXPR: | |

break; | |

default: | |

return NULL; | |

} | |

lhs = gimple_assign_lhs (last_stmt); | |

oprnd0 = gimple_assign_rhs1 (last_stmt); | |

type = TREE_TYPE (oprnd0); | |

oprnd1 = gimple_assign_rhs2 (last_stmt); | |

} | |

else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16)) | |

{ | |

/* __builtin_bswap16 (x) is another form of x r>> 8. | |

The vectorizer has bswap support, but only if the argument isn't | |

promoted. */ | |

lhs = gimple_call_lhs (last_stmt); | |

oprnd0 = gimple_call_arg (last_stmt, 0); | |

type = TREE_TYPE (oprnd0); | |

if (!lhs | |

|| TYPE_PRECISION (TREE_TYPE (lhs)) != 16 | |

|| TYPE_PRECISION (type) <= 16 | |

|| TREE_CODE (oprnd0) != SSA_NAME | |

|| BITS_PER_UNIT != 8 | |

|| !TYPE_UNSIGNED (TREE_TYPE (lhs))) | |

return NULL; | |

stmt_vec_info def_stmt_info; | |

if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt)) | |

return NULL; | |

if (dt != vect_internal_def) | |

return NULL; | |

if (gimple_assign_cast_p (def_stmt)) | |

{ | |

def = gimple_assign_rhs1 (def_stmt); | |

if (INTEGRAL_TYPE_P (TREE_TYPE (def)) | |

&& TYPE_PRECISION (TREE_TYPE (def)) == 16) | |

oprnd0 = def; | |

} | |

type = TREE_TYPE (lhs); | |

vectype = get_vectype_for_scalar_type (vinfo, type); | |

if (vectype == NULL_TREE) | |

return NULL; | |

if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype)) | |

{ | |

/* The encoding uses one stepped pattern for each byte in the | |

16-bit word. */ | |

vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3); | |

for (unsigned i = 0; i < 3; ++i) | |

for (unsigned j = 0; j < 2; ++j) | |

elts.quick_push ((i + 1) * 2 - j - 1); | |

vec_perm_indices indices (elts, 1, | |

TYPE_VECTOR_SUBPARTS (char_vectype)); | |

if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices)) | |

{ | |

/* vectorizable_bswap can handle the __builtin_bswap16 if we | |

undo the argument promotion. */ | |

if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0))) | |

{ | |

def = vect_recog_temp_ssa_var (type, NULL); | |

def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0); | |

append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); | |

oprnd0 = def; | |

} | |

/* Pattern detected. */ | |

vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt); | |

*type_out = vectype; | |

/* Pattern supported. Create a stmt to be used to replace the | |

pattern, with the unpromoted argument. */ | |

var = vect_recog_temp_ssa_var (type, NULL); | |

pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt), | |

1, oprnd0); | |

gimple_call_set_lhs (pattern_stmt, var); | |

gimple_call_set_fntype (as_a <gcall *> (pattern_stmt), | |

gimple_call_fntype (last_stmt)); | |

return pattern_stmt; | |

} | |

} | |

oprnd1 = build_int_cst (integer_type_node, 8); | |

rhs_code = LROTATE_EXPR; | |

bswap16_p = true; | |

} | |

else | |

return NULL; | |

if (TREE_CODE (oprnd0) != SSA_NAME | |

|| TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type) | |

|| !INTEGRAL_TYPE_P (type) | |

|| !TYPE_UNSIGNED (type)) | |

return NULL; | |

stmt_vec_info def_stmt_info; | |

if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt)) | |

return NULL; | |

if (dt != vect_internal_def | |

&& dt != vect_constant_def | |

&& dt != vect_external_def) | |

return NULL; | |

vectype = get_vectype_for_scalar_type (vinfo, type); | |

if (vectype == NULL_TREE) | |

return NULL; | |

/* If vector/vector or vector/scalar rotate is supported by the target, | |

don't do anything here. */ | |

optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector); | |

if (optab1 | |

&& optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing) | |

{ | |

use_rotate: | |

if (bswap16_p) | |

{ | |

if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0))) | |

{ | |

def = vect_recog_temp_ssa_var (type, NULL); | |

def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0); | |

append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); | |

oprnd0 = def; | |

} | |

/* Pattern detected. */ | |

vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt); | |

*type_out = vectype; | |

/* Pattern supported. Create a stmt to be used to replace the | |

pattern. */ | |

var = vect_recog_temp_ssa_var (type, NULL); | |

pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0, | |

oprnd1); | |

return pattern_stmt; | |

} | |

return NULL; | |

} | |

if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def) | |

{ | |

optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar); | |

if (optab2 | |

&& optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing) | |

goto use_rotate; | |

} | |

/* If vector/vector or vector/scalar shifts aren't supported by the target, | |

don't do anything here either. */ | |

optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector); | |

optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector); | |

if (!optab1 | |

|| optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing | |

|| !optab2 | |

|| optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing) | |

{ | |

if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def) | |

return NULL; | |

optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar); | |

optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar); | |

if (!optab1 | |

|| optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing | |

|| !optab2 | |

|| optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing) | |

return NULL; | |

} | |

*type_out = vectype; | |

if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0))) | |

{ | |

def = vect_recog_temp_ssa_var (type, NULL); | |

def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0); | |

append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); | |

oprnd0 = def; | |

} | |

if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME) | |

ext_def = vect_get_external_def_edge (vinfo, oprnd1); | |

def = NULL_TREE; | |

scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type); | |

if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode) | |

def = oprnd1; | |

else if (def_stmt && gimple_assign_cast_p (def_stmt)) | |

{ | |

tree rhs1 = gimple_assign_rhs1 (def_stmt); | |

if (TYPE_MODE (TREE_TYPE (rhs1)) == mode | |

&& TYPE_PRECISION (TREE_TYPE (rhs1)) | |

== TYPE_PRECISION (type)) | |

def = rhs1; | |

} | |

if (def == NULL_TREE) | |

{ | |

def = vect_recog_temp_ssa_var (type, NULL); | |

def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1); | |

append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); | |

} | |

stype = TREE_TYPE (def); | |

if (TREE_CODE (def) == INTEGER_CST) | |

{ | |

if (!tree_fits_uhwi_p (def) | |

|| tree_to_uhwi (def) >= GET_MODE_PRECISION (mode) | |

|| integer_zerop (def)) | |

return NULL; | |

def2 = build_int_cst (stype, | |

GET_MODE_PRECISION (mode) - tree_to_uhwi (def)); | |

} | |

else | |

{ | |

tree vecstype = get_vectype_for_scalar_type (vinfo, stype); | |

if (vecstype == NULL_TREE) | |

return NULL; | |

def2 = vect_recog_temp_ssa_var (stype, NULL); | |

def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def); | |

if (ext_def) | |

{ | |

basic_block new_bb | |

= gsi_insert_on_edge_immediate (ext_def, def_stmt); | |

gcc_assert (!new_bb); | |

} | |

else | |

append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype); | |

def2 = vect_recog_temp_ssa_var (stype, NULL); | |

tree mask = build_int_cst (stype, GET_MODE_PRECISION (mode) - 1); | |

def_stmt = gimple_build_assign (def2, BIT_AND_EXPR, | |

gimple_assign_lhs (def_stmt), mask); | |

if (ext_def) | |

{ | |

basic_block new_bb | |

= gsi_insert_on_edge_immediate (ext_def, def_stmt); | |

gcc_assert (!new_bb); | |

} | |

else | |

append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype); | |

} | |

var1 = vect_recog_temp_ssa_var (type, NULL); | |

def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR | |

? LSHIFT_EXPR : RSHIFT_EXPR, | |

oprnd0, def); | |

append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); | |

var2 = vect_recog_temp_ssa_var (type, NULL); | |

def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR | |

? RSHIFT_EXPR : LSHIFT_EXPR, | |

oprnd0, def2); | |

append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); | |

/* Pattern detected. */ | |

vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt); | |

/* Pattern supported. Create a stmt to be used to replace the pattern. */ | |

var = vect_recog_temp_ssa_var (type, NULL); | |

pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2); | |

return pattern_stmt; | |

} | |

/* Detect a vector by vector shift pattern that wouldn't be otherwise | |

vectorized: | |

type a_t; | |

TYPE b_T, res_T; | |

S1 a_t = ; | |

S2 b_T = ; | |

S3 res_T = b_T op a_t; | |

where type 'TYPE' is a type with different size than 'type', | |

and op is <<, >> or rotate. | |

Also detect cases: | |

type a_t; | |

TYPE b_T, c_T, res_T; | |

S0 c_T = ; | |

S1 a_t = (type) c_T; | |

S2 b_T = ; | |

S3 res_T = b_T op a_t; | |

Input/Output: | |

* STMT_VINFO: The stmt from which the pattern search begins, | |

i.e. the shift/rotate stmt. The original stmt (S3) is replaced | |

with a shift/rotate which has same type on both operands, in the | |

second case just b_T op c_T, in the first case with added cast | |

from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ. | |

Output: | |

* TYPE_OUT: The type of the output of this pattern. | |

* Return value: A new stmt that will be used to replace the shift/rotate | |

S3 stmt. */ | |

static gimple * | |

vect_recog_vector_vector_shift_pattern (vec_info *vinfo, | |

stmt_vec_info stmt_vinfo, | |

tree *type_out) | |

{ | |

gimple *last_stmt = stmt_vinfo->stmt; | |

tree oprnd0, oprnd1, lhs, var; | |

gimple *pattern_stmt; | |

enum tree_code rhs_code; | |

if (!is_gimple_assign (last_stmt)) | |

return NULL; | |

rhs_code = gimple_assign_rhs_code (last_stmt); | |

switch (rhs_code) | |

{ | |

case LSHIFT_EXPR: | |

case RSHIFT_EXPR: | |

case LROTATE_EXPR: | |

case RROTATE_EXPR: | |

break; | |

default: | |

return NULL; | |

} | |

lhs = gimple_assign_lhs (last_stmt); | |

oprnd0 = gimple_assign_rhs1 (last_stmt); | |

oprnd1 = gimple_assign_rhs2 (last_stmt); | |

if (TREE_CODE (oprnd0) != SSA_NAME | |

|| TREE_CODE (oprnd1) != SSA_NAME | |

|| TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1)) | |

|| !type_has_mode_precision_p (TREE_TYPE (oprnd1)) | |

|| TYPE_PRECISION (TREE_TYPE (lhs)) | |

!= TYPE_PRECISION (TREE_TYPE (oprnd0))) | |

return NULL; | |

stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1); | |

if (!def_vinfo) | |

return NULL; | |

*type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0)); | |

if (*type_out == NULL_TREE) | |

return NULL; | |

tree def = NULL_TREE; | |

gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt); | |

if (def_stmt && gimple_assign_cast_p (def_stmt)) | |

{ | |

tree rhs1 = gimple_assign_rhs1 (def_stmt); | |

if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0)) | |

&& TYPE_PRECISION (TREE_TYPE (rhs1)) | |

== TYPE_PRECISION (TREE_TYPE (oprnd0))) | |

{ | |

if (TYPE_PRECISION (TREE_TYPE (oprnd1)) | |

>= TYPE_PRECISION (TREE_TYPE (rhs1))) | |

def = rhs1; | |

else | |

{ | |

tree mask | |

= build_low_bits_mask (TREE_TYPE (rhs1), | |

TYPE_PRECISION (TREE_TYPE (oprnd1))); | |

def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL); | |

def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask); | |

tree vecstype = get_vectype_for_scalar_type (vinfo, | |

TREE_TYPE (rhs1)); | |

append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt, vecstype); | |

} | |

} | |

} | |

if (def == NULL_TREE) | |

{ | |

def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL); | |

def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1); | |

append_pattern_def_seq (vinfo, stmt_vinfo, def_stmt); | |

} | |

/* Pattern detected. */ | |

vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt); | |

/* Pattern supported. Create a stmt to be used to replace the pattern. */ | |

var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL); | |

pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def); | |

return pattern_stmt; | |

} | |

/* Return true iff the target has a vector optab implementing the operation | |

CODE on type VECTYPE. */ | |

static bool | |

target_has_vecop_for_code (tree_code code, tree vectype) | |

{ | |

optab voptab = optab_for_tree_code (code, vectype, optab_vector); | |

return voptab | |

&& optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing; | |

} | |

/* Verify that the target has optabs of VECTYPE to perform all the steps | |

needed by the multiplication-by-immediate synthesis algorithm described by | |

ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is | |

present. Return true iff the target supports all the steps. */ | |

static bool | |

target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var, | |

tree vectype, bool synth_shift_p) | |

{ | |

if (alg->op[0] != alg_zero && alg->op[0] != alg_m) | |

return false; | |

bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype); | |

bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype); | |

if (var == negate_variant | |

&& !target_has_vecop_for_code (NEGATE_EXPR, vectype)) | |

return false; | |

/* If we must synthesize shifts with additions make sure that vector | |

addition is available. */ | |

if ((var == add_variant || synth_shift_p) && !supports_vplus) | |

return false; | |

for (int i = 1; i < alg->ops; i++) | |

{ | |

switch (alg->op[i]) | |

{ | |

case alg_shift: | |

break;< |