blob: b2f2f6417b39f1158260c02ff0ad01c550b66331 [file] [log] [blame]
/* Subroutines used for code generation on IBM S/390 and zSeries
Copyright (C) 1999-2021 Free Software Foundation, Inc.
Contributed by Hartmut Penner (hpenner@de.ibm.com) and
Ulrich Weigand (uweigand@de.ibm.com) and
Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#define IN_TARGET_CODE 1
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "target.h"
#include "target-globals.h"
#include "rtl.h"
#include "tree.h"
#include "gimple.h"
#include "cfghooks.h"
#include "cfgloop.h"
#include "df.h"
#include "memmodel.h"
#include "tm_p.h"
#include "stringpool.h"
#include "attribs.h"
#include "expmed.h"
#include "optabs.h"
#include "regs.h"
#include "emit-rtl.h"
#include "recog.h"
#include "cgraph.h"
#include "diagnostic-core.h"
#include "diagnostic.h"
#include "alias.h"
#include "fold-const.h"
#include "print-tree.h"
#include "stor-layout.h"
#include "varasm.h"
#include "calls.h"
#include "conditions.h"
#include "output.h"
#include "insn-attr.h"
#include "flags.h"
#include "except.h"
#include "dojump.h"
#include "explow.h"
#include "stmt.h"
#include "expr.h"
#include "reload.h"
#include "cfgrtl.h"
#include "cfganal.h"
#include "lcm.h"
#include "cfgbuild.h"
#include "cfgcleanup.h"
#include "debug.h"
#include "langhooks.h"
#include "internal-fn.h"
#include "gimple-fold.h"
#include "tree-eh.h"
#include "gimplify.h"
#include "opts.h"
#include "tree-pass.h"
#include "context.h"
#include "builtins.h"
#include "rtl-iter.h"
#include "intl.h"
#include "tm-constrs.h"
#include "tree-vrp.h"
#include "symbol-summary.h"
#include "ipa-prop.h"
#include "ipa-fnsummary.h"
#include "sched-int.h"
/* This file should be included last. */
#include "target-def.h"
static bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
/* Remember the last target of s390_set_current_function. */
static GTY(()) tree s390_previous_fndecl;
/* Define the specific costs for a given cpu. */
struct processor_costs
{
/* multiplication */
const int m; /* cost of an M instruction. */
const int mghi; /* cost of an MGHI instruction. */
const int mh; /* cost of an MH instruction. */
const int mhi; /* cost of an MHI instruction. */
const int ml; /* cost of an ML instruction. */
const int mr; /* cost of an MR instruction. */
const int ms; /* cost of an MS instruction. */
const int msg; /* cost of an MSG instruction. */
const int msgf; /* cost of an MSGF instruction. */
const int msgfr; /* cost of an MSGFR instruction. */
const int msgr; /* cost of an MSGR instruction. */
const int msr; /* cost of an MSR instruction. */
const int mult_df; /* cost of multiplication in DFmode. */
const int mxbr;
/* square root */
const int sqxbr; /* cost of square root in TFmode. */
const int sqdbr; /* cost of square root in DFmode. */
const int sqebr; /* cost of square root in SFmode. */
/* multiply and add */
const int madbr; /* cost of multiply and add in DFmode. */
const int maebr; /* cost of multiply and add in SFmode. */
/* division */
const int dxbr;
const int ddbr;
const int debr;
const int dlgr;
const int dlr;
const int dr;
const int dsgfr;
const int dsgr;
};
#define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
static const
struct processor_costs z900_cost =
{
COSTS_N_INSNS (5), /* M */
COSTS_N_INSNS (10), /* MGHI */
COSTS_N_INSNS (5), /* MH */
COSTS_N_INSNS (4), /* MHI */
COSTS_N_INSNS (5), /* ML */
COSTS_N_INSNS (5), /* MR */
COSTS_N_INSNS (4), /* MS */
COSTS_N_INSNS (15), /* MSG */
COSTS_N_INSNS (7), /* MSGF */
COSTS_N_INSNS (7), /* MSGFR */
COSTS_N_INSNS (10), /* MSGR */
COSTS_N_INSNS (4), /* MSR */
COSTS_N_INSNS (7), /* multiplication in DFmode */
COSTS_N_INSNS (13), /* MXBR */
COSTS_N_INSNS (136), /* SQXBR */
COSTS_N_INSNS (44), /* SQDBR */
COSTS_N_INSNS (35), /* SQEBR */
COSTS_N_INSNS (18), /* MADBR */
COSTS_N_INSNS (13), /* MAEBR */
COSTS_N_INSNS (134), /* DXBR */
COSTS_N_INSNS (30), /* DDBR */
COSTS_N_INSNS (27), /* DEBR */
COSTS_N_INSNS (220), /* DLGR */
COSTS_N_INSNS (34), /* DLR */
COSTS_N_INSNS (34), /* DR */
COSTS_N_INSNS (32), /* DSGFR */
COSTS_N_INSNS (32), /* DSGR */
};
static const
struct processor_costs z990_cost =
{
COSTS_N_INSNS (4), /* M */
COSTS_N_INSNS (2), /* MGHI */
COSTS_N_INSNS (2), /* MH */
COSTS_N_INSNS (2), /* MHI */
COSTS_N_INSNS (4), /* ML */
COSTS_N_INSNS (4), /* MR */
COSTS_N_INSNS (5), /* MS */
COSTS_N_INSNS (6), /* MSG */
COSTS_N_INSNS (4), /* MSGF */
COSTS_N_INSNS (4), /* MSGFR */
COSTS_N_INSNS (4), /* MSGR */
COSTS_N_INSNS (4), /* MSR */
COSTS_N_INSNS (1), /* multiplication in DFmode */
COSTS_N_INSNS (28), /* MXBR */
COSTS_N_INSNS (130), /* SQXBR */
COSTS_N_INSNS (66), /* SQDBR */
COSTS_N_INSNS (38), /* SQEBR */
COSTS_N_INSNS (1), /* MADBR */
COSTS_N_INSNS (1), /* MAEBR */
COSTS_N_INSNS (60), /* DXBR */
COSTS_N_INSNS (40), /* DDBR */
COSTS_N_INSNS (26), /* DEBR */
COSTS_N_INSNS (176), /* DLGR */
COSTS_N_INSNS (31), /* DLR */
COSTS_N_INSNS (31), /* DR */
COSTS_N_INSNS (31), /* DSGFR */
COSTS_N_INSNS (31), /* DSGR */
};
static const
struct processor_costs z9_109_cost =
{
COSTS_N_INSNS (4), /* M */
COSTS_N_INSNS (2), /* MGHI */
COSTS_N_INSNS (2), /* MH */
COSTS_N_INSNS (2), /* MHI */
COSTS_N_INSNS (4), /* ML */
COSTS_N_INSNS (4), /* MR */
COSTS_N_INSNS (5), /* MS */
COSTS_N_INSNS (6), /* MSG */
COSTS_N_INSNS (4), /* MSGF */
COSTS_N_INSNS (4), /* MSGFR */
COSTS_N_INSNS (4), /* MSGR */
COSTS_N_INSNS (4), /* MSR */
COSTS_N_INSNS (1), /* multiplication in DFmode */
COSTS_N_INSNS (28), /* MXBR */
COSTS_N_INSNS (130), /* SQXBR */
COSTS_N_INSNS (66), /* SQDBR */
COSTS_N_INSNS (38), /* SQEBR */
COSTS_N_INSNS (1), /* MADBR */
COSTS_N_INSNS (1), /* MAEBR */
COSTS_N_INSNS (60), /* DXBR */
COSTS_N_INSNS (40), /* DDBR */
COSTS_N_INSNS (26), /* DEBR */
COSTS_N_INSNS (30), /* DLGR */
COSTS_N_INSNS (23), /* DLR */
COSTS_N_INSNS (23), /* DR */
COSTS_N_INSNS (24), /* DSGFR */
COSTS_N_INSNS (24), /* DSGR */
};
static const
struct processor_costs z10_cost =
{
COSTS_N_INSNS (10), /* M */
COSTS_N_INSNS (10), /* MGHI */
COSTS_N_INSNS (10), /* MH */
COSTS_N_INSNS (10), /* MHI */
COSTS_N_INSNS (10), /* ML */
COSTS_N_INSNS (10), /* MR */
COSTS_N_INSNS (10), /* MS */
COSTS_N_INSNS (10), /* MSG */
COSTS_N_INSNS (10), /* MSGF */
COSTS_N_INSNS (10), /* MSGFR */
COSTS_N_INSNS (10), /* MSGR */
COSTS_N_INSNS (10), /* MSR */
COSTS_N_INSNS (1) , /* multiplication in DFmode */
COSTS_N_INSNS (50), /* MXBR */
COSTS_N_INSNS (120), /* SQXBR */
COSTS_N_INSNS (52), /* SQDBR */
COSTS_N_INSNS (38), /* SQEBR */
COSTS_N_INSNS (1), /* MADBR */
COSTS_N_INSNS (1), /* MAEBR */
COSTS_N_INSNS (111), /* DXBR */
COSTS_N_INSNS (39), /* DDBR */
COSTS_N_INSNS (32), /* DEBR */
COSTS_N_INSNS (160), /* DLGR */
COSTS_N_INSNS (71), /* DLR */
COSTS_N_INSNS (71), /* DR */
COSTS_N_INSNS (71), /* DSGFR */
COSTS_N_INSNS (71), /* DSGR */
};
static const
struct processor_costs z196_cost =
{
COSTS_N_INSNS (7), /* M */
COSTS_N_INSNS (5), /* MGHI */
COSTS_N_INSNS (5), /* MH */
COSTS_N_INSNS (5), /* MHI */
COSTS_N_INSNS (7), /* ML */
COSTS_N_INSNS (7), /* MR */
COSTS_N_INSNS (6), /* MS */
COSTS_N_INSNS (8), /* MSG */
COSTS_N_INSNS (6), /* MSGF */
COSTS_N_INSNS (6), /* MSGFR */
COSTS_N_INSNS (8), /* MSGR */
COSTS_N_INSNS (6), /* MSR */
COSTS_N_INSNS (1) , /* multiplication in DFmode */
COSTS_N_INSNS (40), /* MXBR B+40 */
COSTS_N_INSNS (100), /* SQXBR B+100 */
COSTS_N_INSNS (42), /* SQDBR B+42 */
COSTS_N_INSNS (28), /* SQEBR B+28 */
COSTS_N_INSNS (1), /* MADBR B */
COSTS_N_INSNS (1), /* MAEBR B */
COSTS_N_INSNS (101), /* DXBR B+101 */
COSTS_N_INSNS (29), /* DDBR */
COSTS_N_INSNS (22), /* DEBR */
COSTS_N_INSNS (160), /* DLGR cracked */
COSTS_N_INSNS (160), /* DLR cracked */
COSTS_N_INSNS (160), /* DR expanded */
COSTS_N_INSNS (160), /* DSGFR cracked */
COSTS_N_INSNS (160), /* DSGR cracked */
};
static const
struct processor_costs zEC12_cost =
{
COSTS_N_INSNS (7), /* M */
COSTS_N_INSNS (5), /* MGHI */
COSTS_N_INSNS (5), /* MH */
COSTS_N_INSNS (5), /* MHI */
COSTS_N_INSNS (7), /* ML */
COSTS_N_INSNS (7), /* MR */
COSTS_N_INSNS (6), /* MS */
COSTS_N_INSNS (8), /* MSG */
COSTS_N_INSNS (6), /* MSGF */
COSTS_N_INSNS (6), /* MSGFR */
COSTS_N_INSNS (8), /* MSGR */
COSTS_N_INSNS (6), /* MSR */
COSTS_N_INSNS (1) , /* multiplication in DFmode */
COSTS_N_INSNS (40), /* MXBR B+40 */
COSTS_N_INSNS (100), /* SQXBR B+100 */
COSTS_N_INSNS (42), /* SQDBR B+42 */
COSTS_N_INSNS (28), /* SQEBR B+28 */
COSTS_N_INSNS (1), /* MADBR B */
COSTS_N_INSNS (1), /* MAEBR B */
COSTS_N_INSNS (131), /* DXBR B+131 */
COSTS_N_INSNS (29), /* DDBR */
COSTS_N_INSNS (22), /* DEBR */
COSTS_N_INSNS (160), /* DLGR cracked */
COSTS_N_INSNS (160), /* DLR cracked */
COSTS_N_INSNS (160), /* DR expanded */
COSTS_N_INSNS (160), /* DSGFR cracked */
COSTS_N_INSNS (160), /* DSGR cracked */
};
const struct s390_processor processor_table[] =
{
{ "z900", "z900", PROCESSOR_2064_Z900, &z900_cost, 5 },
{ "z990", "z990", PROCESSOR_2084_Z990, &z990_cost, 6 },
{ "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost, 7 },
{ "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost, 7 },
{ "z10", "z10", PROCESSOR_2097_Z10, &z10_cost, 8 },
{ "z196", "z196", PROCESSOR_2817_Z196, &z196_cost, 9 },
{ "zEC12", "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost, 10 },
{ "z13", "z13", PROCESSOR_2964_Z13, &zEC12_cost, 11 },
{ "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost, 12 },
{ "z15", "arch13", PROCESSOR_8561_Z15, &zEC12_cost, 13 },
{ "arch14", "arch14", PROCESSOR_ARCH14, &zEC12_cost, 14 },
{ "native", "", PROCESSOR_NATIVE, NULL, 0 }
};
extern int reload_completed;
/* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
static rtx_insn *last_scheduled_insn;
#define NUM_SIDES 2
#define MAX_SCHED_UNITS 4
static int last_scheduled_unit_distance[MAX_SCHED_UNITS][NUM_SIDES];
/* Estimate of number of cycles a long-running insn occupies an
execution unit. */
static int fxd_longrunning[NUM_SIDES];
static int fpd_longrunning[NUM_SIDES];
/* The maximum score added for an instruction whose unit hasn't been
in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
give instruction mix scheduling more priority over instruction
grouping. */
#define MAX_SCHED_MIX_SCORE 2
/* The maximum distance up to which individual scores will be
calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
Increase this with the OOO windows size of the machine. */
#define MAX_SCHED_MIX_DISTANCE 70
/* Structure used to hold the components of a S/390 memory
address. A legitimate address on S/390 is of the general
form
base + index + displacement
where any of the components is optional.
base and index are registers of the class ADDR_REGS,
displacement is an unsigned 12-bit immediate constant. */
/* The max number of insns of backend generated memset/memcpy/memcmp
loops. This value is used in the unroll adjust hook to detect such
loops. Current max is 9 coming from the memcmp loop. */
#define BLOCK_MEM_OPS_LOOP_INSNS 9
struct s390_address
{
rtx base;
rtx indx;
rtx disp;
bool pointer;
bool literal_pool;
};
/* Few accessor macros for struct cfun->machine->s390_frame_layout. */
#define cfun_frame_layout (cfun->machine->frame_layout)
#define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
#define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
? cfun_frame_layout.fpr_bitmap & 0x0f \
: cfun_frame_layout.fpr_bitmap & 0x03))
#define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
#define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
(1 << (REGNO - FPR0_REGNUM)))
#define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
(1 << (REGNO - FPR0_REGNUM))))
#define cfun_gpr_save_slot(REGNO) \
cfun->machine->frame_layout.gpr_save_slots[REGNO]
/* Number of GPRs and FPRs used for argument passing. */
#define GP_ARG_NUM_REG 5
#define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
#define VEC_ARG_NUM_REG 8
/* A couple of shortcuts. */
#define CONST_OK_FOR_J(x) \
CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
#define CONST_OK_FOR_K(x) \
CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
#define CONST_OK_FOR_Os(x) \
CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
#define CONST_OK_FOR_Op(x) \
CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
#define CONST_OK_FOR_On(x) \
CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
#define REGNO_PAIR_OK(REGNO, MODE) \
(s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
/* That's the read ahead of the dynamic branch prediction unit in
bytes on a z10 (or higher) CPU. */
#define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
/* Masks per jump target register indicating which thunk need to be
generated. */
static GTY(()) int indirect_branch_prez10thunk_mask = 0;
static GTY(()) int indirect_branch_z10thunk_mask = 0;
#define INDIRECT_BRANCH_NUM_OPTIONS 4
enum s390_indirect_branch_option
{
s390_opt_indirect_branch_jump = 0,
s390_opt_indirect_branch_call,
s390_opt_function_return_reg,
s390_opt_function_return_mem
};
static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 };
const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \
{ "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] = \
{ ".s390_indirect_jump", ".s390_indirect_call",
".s390_return_reg", ".s390_return_mem" };
bool
s390_return_addr_from_memory ()
{
return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
}
/* Return nonzero if it's OK to use fused multiply-add for MODE. */
bool
s390_fma_allowed_p (machine_mode mode)
{
if (TARGET_VXE && mode == TFmode)
return flag_vx_long_double_fma;
return true;
}
/* Indicate which ABI has been used for passing vector args.
0 - no vector type arguments have been passed where the ABI is relevant
1 - the old ABI has been used
2 - a vector type argument has been passed either in a vector register
or on the stack by value */
static int s390_vector_abi = 0;
/* Set the vector ABI marker if TYPE is subject to the vector ABI
switch. The vector ABI affects only vector data types. There are
two aspects of the vector ABI relevant here:
1. vectors >= 16 bytes have an alignment of 8 bytes with the new
ABI and natural alignment with the old.
2. vector <= 16 bytes are passed in VRs or by value on the stack
with the new ABI but by reference on the stack with the old.
If ARG_P is true TYPE is used for a function argument or return
value. The ABI marker then is set for all vector data types. If
ARG_P is false only type 1 vectors are being checked. */
static void
s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
{
static hash_set<const_tree> visited_types_hash;
if (s390_vector_abi)
return;
if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
return;
if (visited_types_hash.contains (type))
return;
visited_types_hash.add (type);
if (VECTOR_TYPE_P (type))
{
int type_size = int_size_in_bytes (type);
/* Outside arguments only the alignment is changing and this
only happens for vector types >= 16 bytes. */
if (!arg_p && type_size < 16)
return;
/* In arguments vector types > 16 are passed as before (GCC
never enforced the bigger alignment for arguments which was
required by the old vector ABI). However, it might still be
ABI relevant due to the changed alignment if it is a struct
member. */
if (arg_p && type_size > 16 && !in_struct_p)
return;
s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
}
else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
{
/* ARRAY_TYPE: Since with neither of the ABIs we have more than
natural alignment there will never be ABI dependent padding
in an array type. That's why we do not set in_struct_p to
true here. */
s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
}
else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
{
tree arg_chain;
/* Check the return type. */
s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
for (arg_chain = TYPE_ARG_TYPES (type);
arg_chain;
arg_chain = TREE_CHAIN (arg_chain))
s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
}
else if (RECORD_OR_UNION_TYPE_P (type))
{
tree field;
for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
{
if (TREE_CODE (field) != FIELD_DECL)
continue;
s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
}
}
}
/* System z builtins. */
#include "s390-builtins.h"
const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
{
#undef B_DEF
#undef OB_DEF
#undef OB_DEF_VAR
#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
#define OB_DEF(...)
#define OB_DEF_VAR(...)
#include "s390-builtins.def"
0
};
const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
{
#undef B_DEF
#undef OB_DEF
#undef OB_DEF_VAR
#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
#define OB_DEF(...)
#define OB_DEF_VAR(...)
#include "s390-builtins.def"
0
};
const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
{
#undef B_DEF
#undef OB_DEF
#undef OB_DEF_VAR
#define B_DEF(...)
#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
#define OB_DEF_VAR(...)
#include "s390-builtins.def"
0
};
const unsigned int
bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
{
#undef B_DEF
#undef OB_DEF
#undef OB_DEF_VAR
#define B_DEF(...)
#define OB_DEF(...)
#define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
#include "s390-builtins.def"
0
};
const unsigned int
opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
{
#undef B_DEF
#undef OB_DEF
#undef OB_DEF_VAR
#define B_DEF(...)
#define OB_DEF(...)
#define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
#include "s390-builtins.def"
0
};
tree s390_builtin_types[BT_MAX];
tree s390_builtin_fn_types[BT_FN_MAX];
tree s390_builtin_decls[S390_BUILTIN_MAX +
S390_OVERLOADED_BUILTIN_MAX +
S390_OVERLOADED_BUILTIN_VAR_MAX];
static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
#undef B_DEF
#undef OB_DEF
#undef OB_DEF_VAR
#define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
#define OB_DEF(...)
#define OB_DEF_VAR(...)
#include "s390-builtins.def"
CODE_FOR_nothing
};
static void
s390_init_builtins (void)
{
/* These definitions are being used in s390-builtins.def. */
tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
NULL, NULL);
tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
tree c_uint64_type_node;
/* The uint64_type_node from tree.c is not compatible to the C99
uint64_t data type. What we want is c_uint64_type_node from
c-common.c. But since backend code is not supposed to interface
with the frontend we recreate it here. */
if (TARGET_64BIT)
c_uint64_type_node = long_unsigned_type_node;
else
c_uint64_type_node = long_long_unsigned_type_node;
#undef DEF_TYPE
#define DEF_TYPE(INDEX, NODE, CONST_P) \
if (s390_builtin_types[INDEX] == NULL) \
s390_builtin_types[INDEX] = (!CONST_P) ? \
(NODE) : build_type_variant ((NODE), 1, 0);
#undef DEF_POINTER_TYPE
#define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
if (s390_builtin_types[INDEX] == NULL) \
s390_builtin_types[INDEX] = \
build_pointer_type (s390_builtin_types[INDEX_BASE]);
#undef DEF_DISTINCT_TYPE
#define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
if (s390_builtin_types[INDEX] == NULL) \
s390_builtin_types[INDEX] = \
build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
#undef DEF_VECTOR_TYPE
#define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
if (s390_builtin_types[INDEX] == NULL) \
s390_builtin_types[INDEX] = \
build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
#undef DEF_OPAQUE_VECTOR_TYPE
#define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
if (s390_builtin_types[INDEX] == NULL) \
s390_builtin_types[INDEX] = \
build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
#undef DEF_FN_TYPE
#define DEF_FN_TYPE(INDEX, args...) \
if (s390_builtin_fn_types[INDEX] == NULL) \
s390_builtin_fn_types[INDEX] = \
build_function_type_list (args, NULL_TREE);
#undef DEF_OV_TYPE
#define DEF_OV_TYPE(...)
#include "s390-builtin-types.def"
#undef B_DEF
#define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
s390_builtin_decls[S390_BUILTIN_##NAME] = \
add_builtin_function ("__builtin_" #NAME, \
s390_builtin_fn_types[FNTYPE], \
S390_BUILTIN_##NAME, \
BUILT_IN_MD, \
NULL, \
ATTRS);
#undef OB_DEF
#define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
== NULL) \
s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
add_builtin_function ("__builtin_" #NAME, \
s390_builtin_fn_types[FNTYPE], \
S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
BUILT_IN_MD, \
NULL, \
0);
#undef OB_DEF_VAR
#define OB_DEF_VAR(...)
#include "s390-builtins.def"
}
/* Return true if ARG is appropriate as argument number ARGNUM of
builtin DECL. The operand flags from s390-builtins.def have to
passed as OP_FLAGS. */
bool
s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
{
if (O_UIMM_P (op_flags))
{
unsigned HOST_WIDE_INT bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32, 4 };
unsigned HOST_WIDE_INT bitmasks[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 12 };
unsigned HOST_WIDE_INT bitwidth = bitwidths[op_flags - O_U1];
unsigned HOST_WIDE_INT bitmask = bitmasks[op_flags - O_U1];
gcc_assert(ARRAY_SIZE(bitwidths) == (O_M12 - O_U1 + 1));
gcc_assert(ARRAY_SIZE(bitmasks) == (O_M12 - O_U1 + 1));
if (!tree_fits_uhwi_p (arg)
|| tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1
|| (bitmask && tree_to_uhwi (arg) & ~bitmask))
{
if (bitmask)
{
gcc_assert (bitmask < 16);
char values[120] = "";
for (unsigned HOST_WIDE_INT i = 0; i <= bitmask; i++)
{
char buf[5];
if (i & ~bitmask)
continue;
int ret = snprintf (buf, 5, HOST_WIDE_INT_PRINT_UNSIGNED, i & bitmask);
gcc_assert (ret < 5);
strcat (values, buf);
if (i < bitmask)
strcat (values, ", ");
}
error ("constant argument %d for builtin %qF is invalid (%s)",
argnum, decl, values);
}
else
error ("constant argument %d for builtin %qF is out of range (0..%wu)",
argnum, decl, (HOST_WIDE_INT_1U << bitwidth) - 1);
return false;
}
}
if (O_SIMM_P (op_flags))
{
int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
int bitwidth = bitwidths[op_flags - O_S2];
if (!tree_fits_shwi_p (arg)
|| tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
|| tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
{
error ("constant argument %d for builtin %qF is out of range "
"(%wd..%wd)", argnum, decl,
-(HOST_WIDE_INT_1 << (bitwidth - 1)),
(HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
return false;
}
}
return true;
}
/* Expand an expression EXP that calls a built-in function,
with result going to TARGET if that's convenient
(and in mode MODE if that's convenient).
SUBTARGET may be used as the target for computing one of EXP's operands.
IGNORE is nonzero if the value is to be ignored. */
static rtx
s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
machine_mode mode ATTRIBUTE_UNUSED,
int ignore ATTRIBUTE_UNUSED)
{
#define MAX_ARGS 6
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
enum insn_code icode;
rtx op[MAX_ARGS], pat;
int arity;
bool nonvoid;
tree arg;
call_expr_arg_iterator iter;
unsigned int all_op_flags = opflags_for_builtin (fcode);
machine_mode last_vec_mode = VOIDmode;
if (TARGET_DEBUG_ARG)
{
fprintf (stderr,
"s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
(int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
bflags_for_builtin (fcode));
}
if (S390_USE_TARGET_ATTRIBUTE)
{
unsigned int bflags;
bflags = bflags_for_builtin (fcode);
if ((bflags & B_HTM) && !TARGET_HTM)
{
error ("builtin %qF is not supported without %<-mhtm%> "
"(default with %<-march=zEC12%> and higher).", fndecl);
return const0_rtx;
}
if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
{
error ("builtin %qF requires %<-mvx%> "
"(default with %<-march=z13%> and higher).", fndecl);
return const0_rtx;
}
if ((bflags & B_VXE) && !TARGET_VXE)
{
error ("Builtin %qF requires z14 or higher.", fndecl);
return const0_rtx;
}
if ((bflags & B_VXE2) && !TARGET_VXE2)
{
error ("Builtin %qF requires z15 or higher.", fndecl);
return const0_rtx;
}
if ((bflags & B_NNPA) && !TARGET_NNPA)
{
error ("Builtin %qF requires arch14 or higher.", fndecl);
return const0_rtx;
}
}
if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
&& fcode < S390_ALL_BUILTIN_MAX)
{
gcc_unreachable ();
}
else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
{
icode = code_for_builtin[fcode];
/* Set a flag in the machine specific cfun part in order to support
saving/restoring of FPRs. */
if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
cfun->machine->tbegin_p = true;
}
else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
{
error ("unresolved overloaded builtin");
return const0_rtx;
}
else
internal_error ("bad builtin fcode");
if (icode == 0)
internal_error ("bad builtin icode");
nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
if (nonvoid)
{
machine_mode tmode = insn_data[icode].operand[0].mode;
if (!target
|| GET_MODE (target) != tmode
|| !(*insn_data[icode].operand[0].predicate) (target, tmode))
target = gen_reg_rtx (tmode);
/* There are builtins (e.g. vec_promote) with no vector
arguments but an element selector. So we have to also look
at the vector return type when emitting the modulo
operation. */
if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
last_vec_mode = insn_data[icode].operand[0].mode;
}
arity = 0;
FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
{
rtx tmp_rtx;
const struct insn_operand_data *insn_op;
unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
all_op_flags = all_op_flags >> O_SHIFT;
if (arg == error_mark_node)
return NULL_RTX;
if (arity >= MAX_ARGS)
return NULL_RTX;
if (O_IMM_P (op_flags)
&& TREE_CODE (arg) != INTEGER_CST)
{
error ("constant value required for builtin %qF argument %d",
fndecl, arity + 1);
return const0_rtx;
}
if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
return const0_rtx;
insn_op = &insn_data[icode].operand[arity + nonvoid];
op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
/* expand_expr truncates constants to the target mode only if it
is "convenient". However, our checks below rely on this
being done. */
if (CONST_INT_P (op[arity])
&& SCALAR_INT_MODE_P (insn_op->mode)
&& GET_MODE (op[arity]) != insn_op->mode)
op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
insn_op->mode));
/* Wrap the expanded RTX for pointer types into a MEM expr with
the proper mode. This allows us to use e.g. (match_operand
"memory_operand"..) in the insn patterns instead of (mem
(match_operand "address_operand)). This is helpful for
patterns not just accepting MEMs. */
if (POINTER_TYPE_P (TREE_TYPE (arg))
&& insn_op->predicate != address_operand)
op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
/* Expand the module operation required on element selectors. */
if (op_flags == O_ELEM)
{
gcc_assert (last_vec_mode != VOIDmode);
op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
op[arity],
GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
NULL_RTX, 1, OPTAB_DIRECT);
}
/* Record the vector mode used for an element selector. This assumes:
1. There is no builtin with two different vector modes and an element selector
2. The element selector comes after the vector type it is referring to.
This currently the true for all the builtins but FIXME we
should better check for that. */
if (VECTOR_MODE_P (insn_op->mode))
last_vec_mode = insn_op->mode;
if (insn_op->predicate (op[arity], insn_op->mode))
{
arity++;
continue;
}
/* A memory operand is rejected by the memory_operand predicate.
Try making the address legal by copying it into a register. */
if (MEM_P (op[arity])
&& insn_op->predicate == memory_operand
&& (GET_MODE (XEXP (op[arity], 0)) == Pmode
|| GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
{
op[arity] = replace_equiv_address (op[arity],
copy_to_mode_reg (Pmode,
XEXP (op[arity], 0)));
}
/* Some of the builtins require different modes/types than the
pattern in order to implement a specific API. Instead of
adding many expanders which do the mode change we do it here.
E.g. s390_vec_add_u128 required to have vector unsigned char
arguments is mapped to addti3. */
else if (insn_op->mode != VOIDmode
&& GET_MODE (op[arity]) != VOIDmode
&& GET_MODE (op[arity]) != insn_op->mode
&& ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
GET_MODE (op[arity]), 0))
!= NULL_RTX))
{
op[arity] = tmp_rtx;
}
/* The predicate rejects the operand although the mode is fine.
Copy the operand to register. */
if (!insn_op->predicate (op[arity], insn_op->mode)
&& (GET_MODE (op[arity]) == insn_op->mode
|| GET_MODE (op[arity]) == VOIDmode
|| (insn_op->predicate == address_operand
&& GET_MODE (op[arity]) == Pmode)))
{
/* An address_operand usually has VOIDmode in the expander
so we cannot use this. */
machine_mode target_mode =
(insn_op->predicate == address_operand
? (machine_mode) Pmode : insn_op->mode);
op[arity] = copy_to_mode_reg (target_mode, op[arity]);
}
if (!insn_op->predicate (op[arity], insn_op->mode))
{
error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
return const0_rtx;
}
arity++;
}
switch (arity)
{
case 0:
pat = GEN_FCN (icode) (target);
break;
case 1:
if (nonvoid)
pat = GEN_FCN (icode) (target, op[0]);
else
pat = GEN_FCN (icode) (op[0]);
break;
case 2:
if (nonvoid)
pat = GEN_FCN (icode) (target, op[0], op[1]);
else
pat = GEN_FCN (icode) (op[0], op[1]);
break;
case 3:
if (nonvoid)
pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
else
pat = GEN_FCN (icode) (op[0], op[1], op[2]);
break;
case 4:
if (nonvoid)
pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
else
pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
break;
case 5:
if (nonvoid)
pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
else
pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
break;
case 6:
if (nonvoid)
pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
else
pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
break;
default:
gcc_unreachable ();
}
if (!pat)
return NULL_RTX;
emit_insn (pat);
if (nonvoid)
return target;
else
return const0_rtx;
}
static const int s390_hotpatch_hw_max = 1000000;
static int s390_hotpatch_hw_before_label = 0;
static int s390_hotpatch_hw_after_label = 0;
/* Check whether the hotpatch attribute is applied to a function and, if it has
an argument, the argument is valid. */
static tree
s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
{
tree expr;
tree expr2;
int err;
if (TREE_CODE (*node) != FUNCTION_DECL)
{
warning (OPT_Wattributes, "%qE attribute only applies to functions",
name);
*no_add_attrs = true;
}
if (args != NULL && TREE_CHAIN (args) != NULL)
{
expr = TREE_VALUE (args);
expr2 = TREE_VALUE (TREE_CHAIN (args));
}
if (args == NULL || TREE_CHAIN (args) == NULL)
err = 1;
else if (TREE_CODE (expr) != INTEGER_CST
|| !INTEGRAL_TYPE_P (TREE_TYPE (expr))
|| wi::gtu_p (wi::to_wide (expr), s390_hotpatch_hw_max))
err = 1;
else if (TREE_CODE (expr2) != INTEGER_CST
|| !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
|| wi::gtu_p (wi::to_wide (expr2), s390_hotpatch_hw_max))
err = 1;
else
err = 0;
if (err)
{
error ("requested %qE attribute is not a comma separated pair of"
" non-negative integer constants or too large (max. %d)", name,
s390_hotpatch_hw_max);
*no_add_attrs = true;
}
return NULL_TREE;
}
/* Expand the s390_vector_bool type attribute. */
static tree
s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
tree args ATTRIBUTE_UNUSED,
int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
{
tree type = *node, result = NULL_TREE;
machine_mode mode;
while (POINTER_TYPE_P (type)
|| TREE_CODE (type) == FUNCTION_TYPE
|| TREE_CODE (type) == METHOD_TYPE
|| TREE_CODE (type) == ARRAY_TYPE)
type = TREE_TYPE (type);
mode = TYPE_MODE (type);
switch (mode)
{
case E_DImode: case E_V2DImode:
result = s390_builtin_types[BT_BV2DI];
break;
case E_SImode: case E_V4SImode:
result = s390_builtin_types[BT_BV4SI];
break;
case E_HImode: case E_V8HImode:
result = s390_builtin_types[BT_BV8HI];
break;
case E_QImode: case E_V16QImode:
result = s390_builtin_types[BT_BV16QI];
break;
default:
break;
}
*no_add_attrs = true; /* No need to hang on to the attribute. */
if (result)
*node = lang_hooks.types.reconstruct_complex_type (*node, result);
return NULL_TREE;
}
/* Check syntax of function decl attributes having a string type value. */
static tree
s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
tree args ATTRIBUTE_UNUSED,
int flags ATTRIBUTE_UNUSED,
bool *no_add_attrs)
{
tree cst;
if (TREE_CODE (*node) != FUNCTION_DECL)
{
warning (OPT_Wattributes, "%qE attribute only applies to functions",
name);
*no_add_attrs = true;
}
cst = TREE_VALUE (args);
if (TREE_CODE (cst) != STRING_CST)
{
warning (OPT_Wattributes,
"%qE attribute requires a string constant argument",
name);
*no_add_attrs = true;
}
if (is_attribute_p ("indirect_branch", name)
|| is_attribute_p ("indirect_branch_call", name)
|| is_attribute_p ("function_return", name)
|| is_attribute_p ("function_return_reg", name)
|| is_attribute_p ("function_return_mem", name))
{
if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
&& strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
&& strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
{
warning (OPT_Wattributes,
"argument to %qE attribute is not "
"(keep|thunk|thunk-extern)", name);
*no_add_attrs = true;
}
}
if (is_attribute_p ("indirect_branch_jump", name)
&& strcmp (TREE_STRING_POINTER (cst), "keep") != 0
&& strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
&& strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
&& strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
{
warning (OPT_Wattributes,
"argument to %qE attribute is not "
"(keep|thunk|thunk-inline|thunk-extern)", name);
*no_add_attrs = true;
}
return NULL_TREE;
}
static const struct attribute_spec s390_attribute_table[] = {
{ "hotpatch", 2, 2, true, false, false, false,
s390_handle_hotpatch_attribute, NULL },
{ "s390_vector_bool", 0, 0, false, true, false, true,
s390_handle_vectorbool_attribute, NULL },
{ "indirect_branch", 1, 1, true, false, false, false,
s390_handle_string_attribute, NULL },
{ "indirect_branch_jump", 1, 1, true, false, false, false,
s390_handle_string_attribute, NULL },
{ "indirect_branch_call", 1, 1, true, false, false, false,
s390_handle_string_attribute, NULL },
{ "function_return", 1, 1, true, false, false, false,
s390_handle_string_attribute, NULL },
{ "function_return_reg", 1, 1, true, false, false, false,
s390_handle_string_attribute, NULL },
{ "function_return_mem", 1, 1, true, false, false, false,
s390_handle_string_attribute, NULL },
/* End element. */
{ NULL, 0, 0, false, false, false, false, NULL, NULL }
};
/* Return the alignment for LABEL. We default to the -falign-labels
value except for the literal pool base label. */
int
s390_label_align (rtx_insn *label)
{
rtx_insn *prev_insn = prev_active_insn (label);
rtx set, src;
if (prev_insn == NULL_RTX)
goto old;
set = single_set (prev_insn);
if (set == NULL_RTX)
goto old;
src = SET_SRC (set);
/* Don't align literal pool base labels. */
if (GET_CODE (src) == UNSPEC
&& XINT (src, 1) == UNSPEC_MAIN_BASE)
return 0;
old:
return align_labels.levels[0].log;
}
static GTY(()) rtx got_symbol;
/* Return the GOT table symbol. The symbol will be created when the
function is invoked for the first time. */
static rtx
s390_got_symbol (void)
{
if (!got_symbol)
{
got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
}
return got_symbol;
}
static scalar_int_mode
s390_libgcc_cmp_return_mode (void)
{
return TARGET_64BIT ? DImode : SImode;
}
static scalar_int_mode
s390_libgcc_shift_count_mode (void)
{
return TARGET_64BIT ? DImode : SImode;
}
static scalar_int_mode
s390_unwind_word_mode (void)
{
return TARGET_64BIT ? DImode : SImode;
}
/* Return true if the back end supports mode MODE. */
static bool
s390_scalar_mode_supported_p (scalar_mode mode)
{
/* In contrast to the default implementation reject TImode constants on 31bit
TARGET_ZARCH for ABI compliance. */
if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
return false;
if (DECIMAL_FLOAT_MODE_P (mode))
return default_decimal_float_supported_p ();
return default_scalar_mode_supported_p (mode);
}
/* Return true if the back end supports vector mode MODE. */
static bool
s390_vector_mode_supported_p (machine_mode mode)
{
machine_mode inner;
if (!VECTOR_MODE_P (mode)
|| !TARGET_VX
|| GET_MODE_SIZE (mode) > 16)
return false;
inner = GET_MODE_INNER (mode);
switch (inner)
{
case E_QImode:
case E_HImode:
case E_SImode:
case E_DImode:
case E_TImode:
case E_SFmode:
case E_DFmode:
case E_TFmode:
return true;
default:
return false;
}
}
/* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
void
s390_set_has_landing_pad_p (bool value)
{
cfun->machine->has_landing_pad_p = value;
}
/* If two condition code modes are compatible, return a condition code
mode which is compatible with both. Otherwise, return
VOIDmode. */
static machine_mode
s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
{
if (m1 == m2)
return m1;
switch (m1)
{
case E_CCZmode:
if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
|| m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
return m2;
return VOIDmode;
case E_CCSmode:
case E_CCUmode:
case E_CCTmode:
case E_CCSRmode:
case E_CCURmode:
case E_CCZ1mode:
if (m2 == CCZmode)
return m1;
return VOIDmode;
default:
return VOIDmode;
}
return VOIDmode;
}
/* Return true if SET either doesn't set the CC register, or else
the source and destination have matching CC modes and that
CC mode is at least as constrained as REQ_MODE. */
static bool
s390_match_ccmode_set (rtx set, machine_mode req_mode)
{
machine_mode set_mode;
gcc_assert (GET_CODE (set) == SET);
/* These modes are supposed to be used only in CC consumer
patterns. */
gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
&& req_mode != CCVFALLmode && req_mode != CCVFANYmode);
if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
return 1;
set_mode = GET_MODE (SET_DEST (set));
switch (set_mode)
{
case E_CCZ1mode:
case E_CCSmode:
case E_CCSRmode:
case E_CCSFPSmode:
case E_CCUmode:
case E_CCURmode:
case E_CCOmode:
case E_CCLmode:
case E_CCL1mode:
case E_CCL2mode:
case E_CCL3mode:
case E_CCT1mode:
case E_CCT2mode:
case E_CCT3mode:
case E_CCVEQmode:
case E_CCVIHmode:
case E_CCVIHUmode:
case E_CCVFHmode:
case E_CCVFHEmode:
if (req_mode != set_mode)
return 0;
break;
case E_CCZmode:
if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
&& req_mode != CCSRmode && req_mode != CCURmode
&& req_mode != CCZ1mode)
return 0;
break;
case E_CCAPmode:
case E_CCANmode:
if (req_mode != CCAmode)
return 0;
break;
default:
gcc_unreachable ();
}
return (GET_MODE (SET_SRC (set)) == set_mode);
}
/* Return true if every SET in INSN that sets the CC register
has source and destination with matching CC modes and that
CC mode is at least as constrained as REQ_MODE.
If REQ_MODE is VOIDmode, always return false. */
bool
s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
{
int i;
/* s390_tm_ccmode returns VOIDmode to indicate failure. */
if (req_mode == VOIDmode)
return false;
if (GET_CODE (PATTERN (insn)) == SET)
return s390_match_ccmode_set (PATTERN (insn), req_mode);
if (GET_CODE (PATTERN (insn)) == PARALLEL)
for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
{
rtx set = XVECEXP (PATTERN (insn), 0, i);
if (GET_CODE (set) == SET)
if (!s390_match_ccmode_set (set, req_mode))
return false;
}
return true;
}
/* If a test-under-mask instruction can be used to implement
(compare (and ... OP1) OP2), return the CC mode required
to do that. Otherwise, return VOIDmode.
MIXED is true if the instruction can distinguish between
CC1 and CC2 for mixed selected bits (TMxx), it is false
if the instruction cannot (TM). */
machine_mode
s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
{
int bit0, bit1;
/* ??? Fixme: should work on CONST_WIDE_INT as well. */
if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
return VOIDmode;
/* Selected bits all zero: CC0.
e.g.: int a; if ((a & (16 + 128)) == 0) */
if (INTVAL (op2) == 0)
return CCTmode;
/* Selected bits all one: CC3.
e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
if (INTVAL (op2) == INTVAL (op1))
return CCT3mode;
/* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
int a;
if ((a & (16 + 128)) == 16) -> CCT1
if ((a & (16 + 128)) == 128) -> CCT2 */
if (mixed)
{
bit1 = exact_log2 (INTVAL (op2));
bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
if (bit0 != -1 && bit1 != -1)
return bit0 > bit1 ? CCT1mode : CCT2mode;
}
return VOIDmode;
}
/* Given a comparison code OP (EQ, NE, etc.) and the operands
OP0 and OP1 of a COMPARE, return the mode to be used for the
comparison. */
machine_mode
s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
{
switch (code)
{
case EQ:
case NE:
if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
&& GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
return CCAPmode;
if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
&& CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
return CCAPmode;
if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
|| GET_CODE (op1) == NEG)
&& GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
return CCLmode;
if (GET_CODE (op0) == AND)
{
/* Check whether we can potentially do it via TM. */
machine_mode ccmode;
ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
if (ccmode != VOIDmode)
{
/* Relax CCTmode to CCZmode to allow fall-back to AND
if that turns out to be beneficial. */
return ccmode == CCTmode ? CCZmode : ccmode;
}
}
if (register_operand (op0, HImode)
&& GET_CODE (op1) == CONST_INT
&& (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
return CCT3mode;
if (register_operand (op0, QImode)
&& GET_CODE (op1) == CONST_INT
&& (INTVAL (op1) == -1 || INTVAL (op1) == 255))
return CCT3mode;
return CCZmode;
case LE:
case LT:
case GE:
case GT:
/* The only overflow condition of NEG and ABS happens when
-INT_MAX is used as parameter, which stays negative. So
we have an overflow from a positive value to a negative.
Using CCAP mode the resulting cc can be used for comparisons. */
if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
&& GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
return CCAPmode;
/* If constants are involved in an add instruction it is possible to use
the resulting cc for comparisons with zero. Knowing the sign of the
constant the overflow behavior gets predictable. e.g.:
int a, b; if ((b = a + c) > 0)
with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
&& (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
|| (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
/* Avoid INT32_MIN on 32 bit. */
&& (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
{
if (INTVAL (XEXP((op0), 1)) < 0)
return CCANmode;
else
return CCAPmode;
}
/* Fall through. */
case LTGT:
if (HONOR_NANS (op0) || HONOR_NANS (op1))
return CCSFPSmode;
/* Fall through. */
case UNORDERED:
case ORDERED:
case UNEQ:
case UNLE:
case UNLT:
case UNGE:
case UNGT:
if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
&& GET_CODE (op1) != CONST_INT)
return CCSRmode;
return CCSmode;
case LTU:
case GEU:
if (GET_CODE (op0) == PLUS
&& GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
return CCL1mode;
if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
&& GET_CODE (op1) != CONST_INT)
return CCURmode;
return CCUmode;
case LEU:
case GTU:
if (GET_CODE (op0) == MINUS
&& GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
return CCL2mode;
if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
&& GET_CODE (op1) != CONST_INT)
return CCURmode;
return CCUmode;
default:
gcc_unreachable ();
}
}
/* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
that we can implement more efficiently. */
static void
s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
bool op0_preserve_value)
{
if (op0_preserve_value)
return;
/* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
if ((*code == EQ || *code == NE)
&& *op1 == const0_rtx
&& GET_CODE (*op0) == ZERO_EXTRACT
&& GET_CODE (XEXP (*op0, 1)) == CONST_INT
&& GET_CODE (XEXP (*op0, 2)) == CONST_INT
&& SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
{
rtx inner = XEXP (*op0, 0);
HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
if (len > 0 && len < modesize
&& pos >= 0 && pos + len <= modesize
&& modesize <= HOST_BITS_PER_WIDE_INT)
{
unsigned HOST_WIDE_INT block;
block = (HOST_WIDE_INT_1U << len) - 1;
block <<= modesize - pos - len;
*op0 = gen_rtx_AND (GET_MODE (inner), inner,
gen_int_mode (block, GET_MODE (inner)));
}
}
/* Narrow AND of memory against immediate to enable TM. */
if ((*code == EQ || *code == NE)
&& *op1 == const0_rtx
&& GET_CODE (*op0) == AND
&& GET_CODE (XEXP (*op0, 1)) == CONST_INT
&& SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
{
rtx inner = XEXP (*op0, 0);
rtx mask = XEXP (*op0, 1);
/* Ignore paradoxical SUBREGs if all extra bits are masked out. */
if (GET_CODE (inner) == SUBREG
&& SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
&& (GET_MODE_SIZE (GET_MODE (inner))
>= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
&& ((INTVAL (mask)
& GET_MODE_MASK (GET_MODE (inner))
& ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
== 0))
inner = SUBREG_REG (inner);
/* Do not change volatile MEMs. */
if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
{
int part = s390_single_part (XEXP (*op0, 1),
GET_MODE (inner), QImode, 0);
if (part >= 0)
{
mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
inner = adjust_address_nv (inner, QImode, part);
*op0 = gen_rtx_AND (QImode, inner, mask);
}
}
}
/* Narrow comparisons against 0xffff to HImode if possible. */
if ((*code == EQ || *code == NE)
&& GET_CODE (*op1) == CONST_INT
&& INTVAL (*op1) == 0xffff
&& SCALAR_INT_MODE_P (GET_MODE (*op0))
&& (nonzero_bits (*op0, GET_MODE (*op0))
& ~HOST_WIDE_INT_UC (0xffff)) == 0)
{
*op0 = gen_lowpart (HImode, *op0);
*op1 = constm1_rtx;
}
/* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
if (GET_CODE (*op0) == UNSPEC
&& XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
&& XVECLEN (*op0, 0) == 1
&& GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
&& GET_CODE (XVECEXP (*op0, 0, 0)) == REG
&& REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
&& *op1 == const0_rtx)
{
enum rtx_code new_code = UNKNOWN;
switch (*code)
{
case EQ: new_code = EQ; break;
case NE: new_code = NE; break;
case LT: new_code = GTU; break;
case GT: new_code = LTU; break;
case LE: new_code = GEU; break;
case GE: new_code = LEU; break;
default: break;
}
if (new_code != UNKNOWN)
{
*op0 = XVECEXP (*op0, 0, 0);
*code = new_code;
}
}
/* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
if (GET_CODE (*op0) == UNSPEC
&& XINT (*op0, 1) == UNSPEC_CC_TO_INT
&& XVECLEN (*op0, 0) == 1
&& GET_CODE (XVECEXP (*op0, 0, 0)) == REG
&& REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
&& CONST_INT_P (*op1))
{
enum rtx_code new_code = UNKNOWN;
switch (GET_MODE (XVECEXP (*op0, 0, 0)))
{
case E_CCZmode:
case E_CCRAWmode:
switch (*code)
{
case EQ: new_code = EQ; break;
case NE: new_code = NE; break;
default: break;
}
break;
default: break;
}
if (new_code != UNKNOWN)
{
/* For CCRAWmode put the required cc mask into the second
operand. */
if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
&& INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
*op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
*op0 = XVECEXP (*op0, 0, 0);
*code = new_code;
}
}
/* Simplify cascaded EQ, NE with const0_rtx. */
if ((*code == NE || *code == EQ)
&& (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
&& GET_MODE (*op0) == SImode
&& GET_MODE (XEXP (*op0, 0)) == CCZ1mode
&& REG_P (XEXP (*op0, 0))
&& XEXP (*op0, 1) == const0_rtx
&& *op1 == const0_rtx)
{
if ((*code == EQ && GET_CODE (*op0) == NE)
|| (*code == NE && GET_CODE (*op0) == EQ))
*code = EQ;
else
*code = NE;
*op0 = XEXP (*op0, 0);
}
/* Prefer register over memory as first operand. */
if (MEM_P (*op0) && REG_P (*op1))
{
rtx tem = *op0; *op0 = *op1; *op1 = tem;
*code = (int)swap_condition ((enum rtx_code)*code);
}
/* A comparison result is compared against zero. Replace it with
the (perhaps inverted) original comparison.
This probably should be done by simplify_relational_operation. */
if ((*code == EQ || *code == NE)
&& *op1 == const0_rtx
&& COMPARISON_P (*op0)
&& CC_REG_P (XEXP (*op0, 0)))
{
enum rtx_code new_code;
if (*code == EQ)
new_code = reversed_comparison_code_parts (GET_CODE (*op0),
XEXP (*op0, 0),
XEXP (*op0, 1), NULL);
else
new_code = GET_CODE (*op0);
if (new_code != UNKNOWN)
{
*code = new_code;
*op1 = XEXP (*op0, 1);
*op0 = XEXP (*op0, 0);
}
}
/* ~a==b -> ~(a^b)==0 ~a!=b -> ~(a^b)!=0 */
if (TARGET_Z15
&& (*code == EQ || *code == NE)
&& (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
&& GET_CODE (*op0) == NOT)
{
machine_mode mode = GET_MODE (*op0);
*op0 = gen_rtx_XOR (mode, XEXP (*op0, 0), *op1);
*op0 = gen_rtx_NOT (mode, *op0);
*op1 = const0_rtx;
}
/* a&b == -1 -> ~a|~b == 0 a|b == -1 -> ~a&~b == 0 */
if (TARGET_Z15
&& (*code == EQ || *code == NE)
&& (GET_CODE (*op0) == AND || GET_CODE (*op0) == IOR)
&& (GET_MODE (*op0) == DImode || GET_MODE (*op0) == SImode)
&& CONST_INT_P (*op1)
&& *op1 == constm1_rtx)
{
machine_mode mode = GET_MODE (*op0);
rtx op00 = gen_rtx_NOT (mode, XEXP (*op0, 0));
rtx op01 = gen_rtx_NOT (mode, XEXP (*op0, 1));
if (GET_CODE (*op0) == AND)
*op0 = gen_rtx_IOR (mode, op00, op01);
else
*op0 = gen_rtx_AND (mode, op00, op01);
*op1 = const0_rtx;
}
}
/* Emit a compare instruction suitable to implement the comparison
OP0 CODE OP1. Return the correct condition RTL to be placed in
the IF_THEN_ELSE of the conditional branch testing the result. */
rtx
s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
{
machine_mode mode = s390_select_ccmode (code, op0, op1);
rtx cc;
/* Force OP1 into register in order to satisfy VXE TFmode patterns. */
if (TARGET_VXE && GET_MODE (op1) == TFmode)
op1 = force_reg (TFmode, op1);
if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
{
/* Do not output a redundant compare instruction if a
compare_and_swap pattern already computed the result and the
machine modes are compatible. */
gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
== GET_MODE (op0));
cc = op0;
}
else
{
cc = gen_rtx_REG (mode, CC_REGNUM);
emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
}
return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
}
/* If MEM is not a legitimate compare-and-swap memory operand, return a new
MEM, whose address is a pseudo containing the original MEM's address. */
static rtx
s390_legitimize_cs_operand (rtx mem)
{
rtx tmp;
if (!contains_symbol_ref_p (mem))
return mem;
tmp = gen_reg_rtx (Pmode);
emit_move_insn (tmp, copy_rtx (XEXP (mem, 0)));
return change_address (mem, VOIDmode, tmp);
}
/* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
matches CMP.
Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
conditional branch testing the result. */
static rtx
s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
rtx cmp, rtx new_rtx, machine_mode ccmode)
{
rtx cc;
mem = s390_legitimize_cs_operand (mem);
cc = gen_rtx_REG (ccmode, CC_REGNUM);
switch (GET_MODE (mem))
{
case E_SImode:
emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
new_rtx, cc));
break;
case E_DImode:
emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
new_rtx, cc));
break;
case E_TImode:
emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
new_rtx, cc));
break;
case E_QImode:
case E_HImode:
default:
gcc_unreachable ();
}
return s390_emit_compare (code, cc, const0_rtx);
}
/* Emit a jump instruction to TARGET and return it. If COND is
NULL_RTX, emit an unconditional jump, else a conditional jump under
condition COND. */
rtx_insn *
s390_emit_jump (rtx target, rtx cond)
{
rtx insn;
target = gen_rtx_LABEL_REF (VOIDmode, target);
if (cond)
target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
insn = gen_rtx_SET (pc_rtx, target);
return emit_jump_insn (insn);
}
/* Return branch condition mask to implement a branch
specified by CODE. Return -1 for invalid comparisons. */
int
s390_branch_condition_mask (rtx code)
{
const int CC0 = 1 << 3;
const int CC1 = 1 << 2;
const int CC2 = 1 << 1;
const int CC3 = 1 << 0;
gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
gcc_assert (XEXP (code, 1) == const0_rtx
|| (GET_MODE (XEXP (code, 0)) == CCRAWmode
&& CONST_INT_P (XEXP (code, 1))));
switch (GET_MODE (XEXP (code, 0)))
{
case E_CCZmode:
case E_CCZ1mode:
switch (GET_CODE (code))
{
case EQ: return CC0;
case NE: return CC1 | CC2 | CC3;
default: return -1;
}
break;
case E_CCT1mode:
switch (GET_CODE (code))
{
case EQ: return CC1;
case NE: return CC0 | CC2 | CC3;
default: return -1;
}
break;
case E_CCT2mode:
switch (GET_CODE (code))
{
case EQ: return CC2;
case NE: return CC0 | CC1 | CC3;
default: return -1;
}
break;
case E_CCT3mode:
switch (GET_CODE (code))
{
case EQ: return CC3;
case NE: return CC0 | CC1 | CC2;
default: return -1;
}
break;
case E_CCLmode:
switch (GET_CODE (code))
{
case EQ: return CC0 | CC2;
case NE: return CC1 | CC3;
default: return -1;
}
break;
case E_CCL1mode:
switch (GET_CODE (code))
{
case LTU: return CC2 | CC3; /* carry */
case GEU: return CC0 | CC1; /* no carry */
default: return -1;
}
break;
case E_CCL2mode:
switch (GET_CODE (code))
{
case GTU: return CC0 | CC1; /* borrow */
case LEU: return CC2 | CC3; /* no borrow */
default: return -1;
}
break;
case E_CCL3mode:
switch (GET_CODE (code))
{
case EQ: return CC0 | CC2;
case NE: return CC1 | CC3;
case LTU: return CC1;
case GTU: return CC3;
case LEU: return CC1 | CC2;
case GEU: return CC2 | CC3;
default: return -1;
}
case E_CCUmode:
switch (GET_CODE (code))
{
case EQ: return CC0;
case NE: return CC1 | CC2 | CC3;
case LTU: return CC1;
case GTU: return CC2;
case LEU: return CC0 | CC1;
case GEU: return CC0 | CC2;
default: return -1;
}
break;
case E_CCURmode:
switch (GET_CODE (code))
{
case EQ: return CC0;
case NE: return CC2 | CC1 | CC3;
case LTU: return CC2;
case GTU: return CC1;
case LEU: return CC0 | CC2;
case GEU: return CC0 | CC1;
default: return -1;
}
break;
case E_CCAPmode:
switch (GET_CODE (code))
{
case EQ: return CC0;
case NE: return CC1 | CC2 | CC3;
case LT: return CC1 | CC3;
case GT: return CC2;
case LE: return CC0 | CC1 | CC3;
case GE: return CC0 | CC2;
default: return -1;
}
break;
case E_CCANmode:
switch (GET_CODE (code))
{
case EQ: return CC0;
case NE: return CC1 | CC2 | CC3;
case LT: return CC1;
case GT: return CC2 | CC3;
case LE: return CC0 | CC1;
case GE: return CC0 | CC2 | CC3;
default: return -1;
}
break;
case E_CCOmode:
switch (GET_CODE (code))
{
case EQ: return CC0 | CC1 | CC2;
case NE: return CC3;
default: return -1;
}
break;
case E_CCSmode:
case E_CCSFPSmode:
switch (GET_CODE (code))
{
case EQ: return CC0;
case NE: return CC1 | CC2 | CC3;
case LT: return CC1;
case GT: return CC2;
case LE: return CC0 | CC1;
case GE: return CC0 | CC2;
case UNORDERED: return CC3;
case ORDERED: return CC0 | CC1 | CC2;
case UNEQ: return CC0 | CC3;
case UNLT: return CC1 | CC3;
case UNGT: return CC2 | CC3;
case UNLE: return CC0 | CC1 | CC3;
case UNGE: return CC0 | CC2 | CC3;
case LTGT: return CC1 | CC2;
default: return -1;
}
break;
case E_CCSRmode:
switch (GET_CODE (code))
{
case EQ: return CC0;
case NE: return CC2 | CC1 | CC3;
case LT: return CC2;
case GT: return CC1;
case LE: return CC0 | CC2;
case GE: return CC0 | CC1;
case UNORDERED: return CC3;
case ORDERED: return CC0 | CC2 | CC1;
case UNEQ: return CC0 | CC3;
case UNLT: return CC2 | CC3;
case UNGT: return CC1 | CC3;
case UNLE: return CC0 | CC2 | CC3;
case UNGE: return CC0 | CC1 | CC3;
case LTGT: return CC2 | CC1;
default: return -1;
}
break;
/* Vector comparison modes. */
/* CC2 will never be set. It however is part of the negated
masks. */
case E_CCVIALLmode:
switch (GET_CODE (code))
{
case EQ:
case GTU:
case GT:
case GE: return CC0;
/* The inverted modes are in fact *any* modes. */
case NE:
case LEU:
case LE:
case LT: return CC3 | CC1 | CC2;
default: return -1;
}
case E_CCVIANYmode:
switch (GET_CODE (code))
{
case EQ:
case GTU:
case GT:
case GE: return CC0 | CC1;
/* The inverted modes are in fact *all* modes. */
case NE:
case LEU:
case LE:
case LT: return CC3 | CC2;
default: return -1;
}
case E_CCVFALLmode:
switch (GET_CODE (code))
{
case EQ:
case GT:
case GE: return CC0;
/* The inverted modes are in fact *any* modes. */
case NE:
case UNLE:
case UNLT: return CC3 | CC1 | CC2;
default: return -1;
}
case E_CCVFANYmode:
switch (GET_CODE (code))
{
case EQ:
case GT:
case GE: return CC0 | CC1;
/* The inverted modes are in fact *all* modes. */
case NE:
case UNLE:
case UNLT: return CC3 | CC2;
default: return -1;
}
case E_CCRAWmode:
switch (GET_CODE (code))
{
case EQ:
return INTVAL (XEXP (code, 1));
case NE:
return (INTVAL (XEXP (code, 1))) ^ 0xf;
default:
gcc_unreachable ();
}
default:
return -1;
}
}
/* Return branch condition mask to implement a compare and branch
specified by CODE. Return -1 for invalid comparisons. */
int
s390_compare_and_branch_condition_mask (rtx code)
{
const int CC0 = 1 << 3;
const int CC1 = 1 << 2;
const int CC2 = 1 << 1;
switch (GET_CODE (code))
{
case EQ:
return CC0;
case NE:
return CC1 | CC2;
case LT:
case LTU:
return CC1;
case GT:
case GTU:
return CC2;
case LE:
case LEU:
return CC0 | CC1;
case GE:
case GEU:
return CC0 | CC2;
default:
gcc_unreachable ();
}
return -1;
}
/* If INV is false, return assembler mnemonic string to implement
a branch specified by CODE. If INV is true, return mnemonic
for the corresponding inverted branch. */
static const char *
s390_branch_condition_mnemonic (rtx code, int inv)
{
int mask;
static const char *const mnemonic[16] =
{
NULL, "o", "h", "nle",
"l", "nhe", "lh", "ne",
"e", "nlh", "he", "nl",
"le", "nh", "no", NULL
};
if (GET_CODE (XEXP (code, 0)) == REG
&& REGNO (XEXP (code, 0)) == CC_REGNUM
&& (XEXP (code, 1) == const0_rtx
|| (GET_MODE (XEXP (code, 0)) == CCRAWmode
&& CONST_INT_P (XEXP (code, 1)))))
mask = s390_branch_condition_mask (code);
else
mask = s390_compare_and_branch_condition_mask (code);
gcc_assert (mask >= 0);
if (inv)
mask ^= 15;
gcc_assert (mask >= 1 && mask <= 14);
return mnemonic[mask];
}
/* Return the part of op which has a value different from def.
The size of the part is determined by mode.
Use this function only if you already know that op really
contains such a part. */
unsigned HOST_WIDE_INT
s390_extract_part (rtx op, machine_mode mode, int def)
{
unsigned HOST_WIDE_INT value = 0;
int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
int part_bits = GET_MODE_BITSIZE (mode);
unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
int i;
for (i = 0; i < max_parts; i++)
{
if (i == 0)
value = UINTVAL (op);
else
value >>= part_bits;
if ((value & part_mask) != (def & part_mask))
return value & part_mask;
}
gcc_unreachable ();
}
/* If OP is an integer constant of mode MODE with exactly one
part of mode PART_MODE unequal to DEF, return the number of that
part. Otherwise, return -1. */
int
s390_single_part (rtx op,
machine_mode mode,
machine_mode part_mode,
int def)
{
unsigned HOST_WIDE_INT value = 0;
int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
unsigned HOST_WIDE_INT part_mask
= (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
int i, part = -1;
if (GET_CODE (op) != CONST_INT)
return -1;
for (i = 0; i < n_parts; i++)
{
if (i == 0)
value = UINTVAL (op);
else
value >>= GET_MODE_BITSIZE (part_mode);
if ((value & part_mask) != (def & part_mask))
{
if (part != -1)
return -1;
else
part = i;
}
}
return part == -1 ? -1 : n_parts - 1 - part;
}
/* Return true if IN contains a contiguous bitfield in the lower SIZE
bits and no other bits are set in (the lower SIZE bits of) IN.
PSTART and PEND can be used to obtain the start and end
position (inclusive) of the bitfield relative to 64
bits. *PSTART / *PEND gives the position of the first/last bit
of the bitfield counting from the highest order bit starting
with zero. */
bool
s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
int *pstart, int *pend)
{
int start;
int end = -1;
int lowbit = HOST_BITS_PER_WIDE_INT - 1;
int highbit = HOST_BITS_PER_WIDE_INT - size;
unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
gcc_assert (!!pstart == !!pend);
for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
if (end == -1)
{
/* Look for the rightmost bit of a contiguous range of ones. */
if (bitmask & in)
/* Found it. */
end = start;
}
else
{
/* Look for the firt zero bit after the range of ones. */
if (! (bitmask & in))
/* Found it. */
break;
}
/* We're one past the last one-bit. */
start++;
if (end == -1)
/* No one bits found. */
return false;
if (start > highbit)
{
unsigned HOST_WIDE_INT mask;
/* Calculate a mask for all bits beyond the contiguous bits. */
mask = ((~HOST_WIDE_INT_0U >> highbit)
& (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
if (mask & in)
/* There are more bits set beyond the first range of one bits. */
return false;
}
if (pstart)
{
*pstart = start;
*pend = end;
}
return true;
}
/* Same as s390_contiguous_bitmask_nowrap_p but also returns true
if ~IN contains a contiguous bitfield. In that case, *END is <
*START.
If WRAP_P is true, a bitmask that wraps around is also tested.
When a wraparoud occurs *START is greater than *END (in
non-null pointers), and the uppermost (64 - SIZE) bits are thus
part of the range. If WRAP_P is false, no wraparound is
tested. */
bool
s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
int size, int *start, int *end)
{
int bs = HOST_BITS_PER_WIDE_INT;
bool b;
gcc_assert (!!start == !!end);
if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
/* This cannot be expressed as a contiguous bitmask. Exit early because
the second call of s390_contiguous_bitmask_nowrap_p would accept this as
a valid bitmask. */
return false;
b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
if (b)
return true;
if (! wrap_p)
return false;
b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
if (b && start)
{
int s = *start;
int e = *end;
gcc_assert (s >= 1);
*start = ((e + 1) & (bs - 1));
*end = ((s - 1 + bs) & (bs - 1));
}
return b;
}
/* Return true if OP contains the same contiguous bitfield in *all*
its elements. START and END can be used to obtain the start and
end position of the bitfield.
START/STOP give the position of the first/last bit of the bitfield
counting from the lowest order bit starting with zero. In order to
use these values for S/390 instructions this has to be converted to
"bits big endian" style. */
bool
s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
{
unsigned HOST_WIDE_INT mask;
int size;
rtx elt;
bool b;
/* Handle floats by bitcasting them to ints. */
op = gen_lowpart (related_int_vector_mode (GET_MODE (op)).require (), op);
gcc_assert (!!start == !!end);
if (!const_vec_duplicate_p (op, &elt)
|| !CONST_INT_P (elt))
return false;
size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
/* We cannot deal with V1TI/V1TF. This would require a vgmq. */
if (size > 64)
return false;
mask = UINTVAL (elt);
b = s390_contiguous_bitmask_p (mask, true, size, start, end);
if (b)
{
if (start)
{
*start -= (HOST_BITS_PER_WIDE_INT - size);
*end -= (HOST_BITS_PER_WIDE_INT - size);
}
return true;
}
else
return false;
}
/* Return true if C consists only of byte chunks being either 0 or
0xff. If MASK is !=NULL a byte mask is generated which is
appropriate for the vector generate byte mask instruction. */
bool
s390_bytemask_vector_p (rtx op, unsigned *mask)
{
int i;
unsigned tmp_mask = 0;
int nunit, unit_size;
if (!VECTOR_MODE_P (GET_MODE (op))
|| GET_CODE (op) != CONST_VECTOR
|| !CONST_INT_P (XVECEXP (op, 0, 0)))
return false;
nunit = GET_MODE_NUNITS (GET_MODE (op));
unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
for (i = 0; i < nunit; i++)
{
unsigned HOST_WIDE_INT c;
int j;
if (!CONST_INT_P (XVECEXP (op, 0, i)))
return false;
c = UINTVAL (XVECEXP (op, 0, i));
for (j = 0; j < unit_size; j++)
{
if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
return false;
tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
c = c >> BITS_PER_UNIT;
}
}
if (mask != NULL)
*mask = tmp_mask;
return true;
}
/* Check whether a rotate of ROTL followed by an AND of CONTIG is
equivalent to a shift followed by the AND. In particular, CONTIG
should not overlap the (rotated) bit 0/bit 63 gap. Negative values
for ROTL indicate a rotate to the right. */
bool
s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
{
int start, end;
bool ok;
ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
gcc_assert (ok);
if (rotl >= 0)
return (64 - end >= rotl);
else
{
/* Translate "- rotate right" in BITSIZE mode to "rotate left" in
DIMode. */
rotl = -rotl + (64 - bitsize);
return (start >= rotl);
}
}
/* Check whether we can (and want to) split a double-word
move in mode MODE from SRC to DST into two single-word
moves, moving the subword FIRST_SUBWORD first. */
bool
s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
{
/* Floating point and vector registers cannot be split. */
if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
return false;
/* Non-offsettable memory references cannot be split. */
if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
|| (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
return false;
/* Moving the first subword must not clobber a register
needed to move the second subword. */
if (register_operand (dst, mode))
{
rtx subreg = operand_subword (dst, first_subword, 0, mode);
if (reg_overlap_mentioned_p (subreg, src))
return false;
}
return true;
}
/* Return true if it can be proven that [MEM1, MEM1 + SIZE]
and [MEM2, MEM2 + SIZE] do overlap and false
otherwise. */
bool
s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
{
rtx addr1, addr2, addr_delta;
HOST_WIDE_INT delta;
if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
return true;
if (size == 0)
return false;
addr1 = XEXP (mem1, 0);
addr2 = XEXP (mem2, 0);
addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
/* This overlapping check is used by peepholes merging memory block operations.
Overlapping operations would otherwise be recognized by the S/390 hardware
and would fall back to a slower implementation. Allowing overlapping
operations would lead to slow code but not to wrong code. Therefore we are
somewhat optimistic if we cannot prove that the memory blocks are
overlapping.
That's why we return false here although this may accept operations on
overlapping memory areas. */
if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
return false;
delta = INTVAL (addr_delta);
if (delta == 0
|| (delta > 0 && delta < size)
|| (delta < 0 && -delta < size))
return true;
return false;
}
/* Check whether the address of memory reference MEM2 equals exactly
the address of memory reference MEM1 plus DELTA. Return true if
we can prove this to be the case, false otherwise. */
bool
s390_offset_p (rtx mem1, rtx mem2, rtx delta)
{
rtx addr1, addr2, addr_delta;
if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
return false;
addr1 = XEXP (mem1, 0);
addr2 = XEXP (mem2, 0);
addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
if (!addr_delta || !rtx_equal_p (addr_delta, delta))
return false;
return true;
}
/* Expand logical operator CODE in mode MODE with operands OPERANDS. */
void
s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
rtx *operands)
{
machine_mode wmode = mode;
rtx dst = operands[0<