blob: f2525d51a5d3c547b525244c4624045f22738c5e [file] [log] [blame]
/* Subroutines used for code generation on the Tilera TILEPro.
Copyright (C) 2011-2021 Free Software Foundation, Inc.
Contributed by Walter Lee (walt@tilera.com)
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published
by the Free Software Foundation; either version 3, or (at your
option) any later version.
GCC is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#define IN_TARGET_CODE 1
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "target.h"
#include "rtl.h"
#include "tree.h"
#include "gimple.h"
#include "df.h"
#include "memmodel.h"
#include "tm_p.h"
#include "stringpool.h"
#include "attribs.h"
#include "expmed.h"
#include "optabs.h"
#include "regs.h"
#include "emit-rtl.h"
#include "recog.h"
#include "diagnostic.h"
#include "output.h"
#include "insn-attr.h"
#include "alias.h"
#include "explow.h"
#include "calls.h"
#include "varasm.h"
#include "expr.h"
#include "langhooks.h"
#include "cfgrtl.h"
#include "tm-constrs.h"
#include "dwarf2.h"
#include "fold-const.h"
#include "stor-layout.h"
#include "gimplify.h"
#include "tilepro-builtins.h"
#include "tilepro-multiply.h"
#include "builtins.h"
/* This file should be included last. */
#include "target-def.h"
/* SYMBOL_REF for GOT */
static GTY(()) rtx g_got_symbol = NULL;
/* Report whether we're printing out the first address fragment of a
POST_INC or POST_DEC memory reference, from TARGET_PRINT_OPERAND to
TARGET_PRINT_OPERAND_ADDRESS. */
static bool output_memory_autoinc_first;
/* Option handling */
/* Implement TARGET_OPTION_OVERRIDE. */
static void
tilepro_option_override (void)
{
/* When modulo scheduling is enabled, we still rely on regular
scheduler for bundling. */
if (flag_modulo_sched)
flag_resched_modulo_sched = 1;
}
/* Implement TARGET_SCALAR_MODE_SUPPORTED_P. */
static bool
tilepro_scalar_mode_supported_p (scalar_mode mode)
{
switch (mode)
{
case E_QImode:
case E_HImode:
case E_SImode:
case E_DImode:
return true;
case E_SFmode:
case E_DFmode:
return true;
default:
return false;
}
}
/* Implement TARGET_VECTOR_MODE_SUPPORTED_P. */
static bool
tile_vector_mode_supported_p (machine_mode mode)
{
return mode == V4QImode || mode == V2HImode;
}
/* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
static bool
tilepro_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED,
rtx x ATTRIBUTE_UNUSED)
{
return true;
}
/* Implement TARGET_FUNCTION_OK_FOR_SIBCALL. */
static bool
tilepro_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
{
return decl != NULL;
}
/* Implement TARGET_PASS_BY_REFERENCE. Variable sized types are
passed by reference. */
static bool
tilepro_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
{
return (arg.type
&& TYPE_SIZE (arg.type)
&& TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST);
}
/* Implement TARGET_RETURN_IN_MEMORY. */
static bool
tilepro_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
{
return !IN_RANGE (int_size_in_bytes (type),
0, TILEPRO_NUM_RETURN_REGS * UNITS_PER_WORD);
}
/* Implement TARGET_FUNCTION_ARG_BOUNDARY. */
static unsigned int
tilepro_function_arg_boundary (machine_mode mode, const_tree type)
{
unsigned int alignment;
alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode);
if (alignment < PARM_BOUNDARY)
alignment = PARM_BOUNDARY;
if (alignment > STACK_BOUNDARY)
alignment = STACK_BOUNDARY;
return alignment;
}
/* Implement TARGET_FUNCTION_ARG. */
static rtx
tilepro_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
{
CUMULATIVE_ARGS cum = *get_cumulative_args (cum_v);
int byte_size = arg.promoted_size_in_bytes ();
bool doubleword_aligned_p;
if (cum >= TILEPRO_NUM_ARG_REGS)
return NULL_RTX;
/* See whether the argument has doubleword alignment. */
doubleword_aligned_p =
tilepro_function_arg_boundary (arg.mode, arg.type) > BITS_PER_WORD;
if (doubleword_aligned_p)
cum += cum & 1;
/* The ABI does not allow parameters to be passed partially in reg
and partially in stack. */
if ((cum + (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
> TILEPRO_NUM_ARG_REGS)
return NULL_RTX;
return gen_rtx_REG (arg.mode, cum);
}
/* Implement TARGET_FUNCTION_ARG_ADVANCE. */
static void
tilepro_function_arg_advance (cumulative_args_t cum_v,
const function_arg_info &arg)
{
CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
int byte_size = arg.promoted_size_in_bytes ();
int word_size = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
bool doubleword_aligned_p;
/* See whether the argument has doubleword alignment. */
doubleword_aligned_p =
tilepro_function_arg_boundary (arg.mode, arg.type) > BITS_PER_WORD;
if (doubleword_aligned_p)
*cum += *cum & 1;
/* If the current argument does not fit in the pretend_args space,
skip over it. */
if (*cum < TILEPRO_NUM_ARG_REGS
&& *cum + word_size > TILEPRO_NUM_ARG_REGS)
*cum = TILEPRO_NUM_ARG_REGS;
*cum += word_size;
}
/* Implement TARGET_FUNCTION_VALUE. */
static rtx
tilepro_function_value (const_tree valtype, const_tree fn_decl_or_type,
bool outgoing ATTRIBUTE_UNUSED)
{
machine_mode mode;
int unsigned_p;
mode = TYPE_MODE (valtype);
unsigned_p = TYPE_UNSIGNED (valtype);
mode = promote_function_mode (valtype, mode, &unsigned_p,
fn_decl_or_type, 1);
return gen_rtx_REG (mode, 0);
}
/* Implement TARGET_LIBCALL_VALUE. */
static rtx
tilepro_libcall_value (machine_mode mode,
const_rtx fun ATTRIBUTE_UNUSED)
{
return gen_rtx_REG (mode, 0);
}
/* Implement FUNCTION_VALUE_REGNO_P. */
static bool
tilepro_function_value_regno_p (const unsigned int regno)
{
return regno < TILEPRO_NUM_RETURN_REGS;
}
/* Implement TARGET_BUILD_BUILTIN_VA_LIST. */
static tree
tilepro_build_builtin_va_list (void)
{
tree f_args, f_skip, record, type_decl;
bool owp;
record = lang_hooks.types.make_type (RECORD_TYPE);
type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
get_identifier ("__va_list_tag"), record);
f_args = build_decl (BUILTINS_LOCATION, FIELD_DECL,
get_identifier ("__args"), ptr_type_node);
f_skip = build_decl (BUILTINS_LOCATION, FIELD_DECL,
get_identifier ("__skip"), ptr_type_node);
DECL_FIELD_CONTEXT (f_args) = record;
DECL_FIELD_CONTEXT (f_skip) = record;
TREE_CHAIN (record) = type_decl;
TYPE_NAME (record) = type_decl;
TYPE_FIELDS (record) = f_args;
TREE_CHAIN (f_args) = f_skip;
/* We know this is being padded and we want it too. It is an
internal type so hide the warnings from the user. */
owp = warn_padded;
warn_padded = false;
layout_type (record);
warn_padded = owp;
/* The correct type is an array type of one element. */
return record;
}
/* Implement TARGET_EXPAND_BUILTIN_VA_START. */
static void
tilepro_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
{
tree f_args, f_skip;
tree args, skip, t;
f_args = TYPE_FIELDS (TREE_TYPE (valist));
f_skip = TREE_CHAIN (f_args);
args =
build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
skip =
build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
/* Find the __args area. */
t = make_tree (TREE_TYPE (args), virtual_incoming_args_rtx);
t = fold_build_pointer_plus_hwi (t,
UNITS_PER_WORD *
(crtl->args.info - TILEPRO_NUM_ARG_REGS));
if (crtl->args.pretend_args_size > 0)
t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
/* Find the __skip area. */
t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
}
/* Implement TARGET_SETUP_INCOMING_VARARGS. */
static void
tilepro_setup_incoming_varargs (cumulative_args_t cum,
const function_arg_info &arg,
int *pretend_args, int no_rtl)
{
CUMULATIVE_ARGS local_cum = *get_cumulative_args (cum);
int first_reg;
/* The caller has advanced CUM up to, but not beyond, the last named
argument. Advance a local copy of CUM past the last "real" named
argument, to find out how many registers are left over. */
targetm.calls.function_arg_advance (pack_cumulative_args (&local_cum), arg);
first_reg = local_cum;
if (local_cum < TILEPRO_NUM_ARG_REGS)
{
*pretend_args = UNITS_PER_WORD * (TILEPRO_NUM_ARG_REGS - first_reg);
if (!no_rtl)
{
alias_set_type set = get_varargs_alias_set ();
rtx tmp =
gen_rtx_MEM (BLKmode, plus_constant (Pmode, \
virtual_incoming_args_rtx,
-STACK_POINTER_OFFSET -
UNITS_PER_WORD *
(TILEPRO_NUM_ARG_REGS -
first_reg)));
MEM_NOTRAP_P (tmp) = 1;
set_mem_alias_set (tmp, set);
move_block_from_reg (first_reg, tmp,
TILEPRO_NUM_ARG_REGS - first_reg);
}
}
else
*pretend_args = 0;
}
/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. Gimplify va_arg by updating
the va_list structure VALIST as required to retrieve an argument of
type TYPE, and returning that argument.
ret = va_arg(VALIST, TYPE);
generates code equivalent to:
paddedsize = (sizeof(TYPE) + 3) & -4;
if ((VALIST.__args + paddedsize > VALIST.__skip)
& (VALIST.__args <= VALIST.__skip))
addr = VALIST.__skip + STACK_POINTER_OFFSET;
else
addr = VALIST.__args;
VALIST.__args = addr + paddedsize;
ret = *(TYPE *)addr; */
static tree
tilepro_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
gimple_seq * post_p ATTRIBUTE_UNUSED)
{
tree f_args, f_skip;
tree args, skip;
HOST_WIDE_INT size, rsize;
tree addr, tmp;
bool pass_by_reference_p;
f_args = TYPE_FIELDS (va_list_type_node);
f_skip = TREE_CHAIN (f_args);
args =
build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
skip =
build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
addr = create_tmp_var (ptr_type_node, "va_arg");
/* if an object is dynamically sized, a pointer to it is passed
instead of the object itself. */
pass_by_reference_p = pass_va_arg_by_reference (type);
if (pass_by_reference_p)
type = build_pointer_type (type);
size = int_size_in_bytes (type);
rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
/* If the alignment of the type is greater than the default for a
parameter, align to STACK_BOUNDARY. */
if (TYPE_ALIGN (type) > PARM_BOUNDARY)
{
/* Assert the only case we generate code for: when
stack boundary = 2 * parm boundary. */
gcc_assert (STACK_BOUNDARY == PARM_BOUNDARY * 2);
tmp = build2 (BIT_AND_EXPR, sizetype,
fold_convert (sizetype, unshare_expr (args)),
size_int (PARM_BOUNDARY / 8));
tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node,
unshare_expr (args), tmp);
gimplify_assign (unshare_expr (args), tmp, pre_p);
}
/* Build conditional expression to calculate addr. The expression
will be gimplified later. */
tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
unshare_expr (skip)));
tmp = build3 (COND_EXPR, ptr_type_node, tmp,
build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
size_int (STACK_POINTER_OFFSET)),
unshare_expr (args));
gimplify_assign (addr, tmp, pre_p);
/* Update VALIST.__args. */
tmp = fold_build_pointer_plus_hwi (addr, rsize);
gimplify_assign (unshare_expr (args), tmp, pre_p);
addr = fold_convert (build_pointer_type (type), addr);
if (pass_by_reference_p)
addr = build_va_arg_indirect_ref (addr);
return build_va_arg_indirect_ref (addr);
}
/* Implement TARGET_RTX_COSTS. */
static bool
tilepro_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno,
int *total, bool speed)
{
int code = GET_CODE (x);
switch (code)
{
case CONST_INT:
/* If this is an 8-bit constant, return zero since it can be
used nearly anywhere with no cost. If it is a valid operand
for an ADD or AND, likewise return 0 if we know it will be
used in that context. Otherwise, return 2 since it might be
used there later. All other constants take at least two
insns. */
if (satisfies_constraint_I (x))
{
*total = 0;
return true;
}
else if (outer_code == PLUS && add_operand (x, VOIDmode))
{
/* Slightly penalize large constants even though we can add
them in one instruction, because it forces the use of
2-wide bundling mode. */
*total = 1;
return true;
}
else if (move_operand (x, SImode))
{
/* We can materialize in one move. */
*total = COSTS_N_INSNS (1);
return true;
}
else
{
/* We can materialize in two moves. */
*total = COSTS_N_INSNS (2);
return true;
}
return false;
case CONST:
case LABEL_REF:
case SYMBOL_REF:
*total = COSTS_N_INSNS (2);
return true;
case CONST_DOUBLE:
*total = COSTS_N_INSNS (4);
return true;
case HIGH:
*total = 0;
return true;
case MEM:
/* If outer-code was a sign or zero extension, a cost of
COSTS_N_INSNS (1) was already added in, so account for
that. */
if (outer_code == ZERO_EXTEND || outer_code == SIGN_EXTEND)
*total = COSTS_N_INSNS (1);
else
*total = COSTS_N_INSNS (2);
return true;
case PLUS:
/* Convey that s[123]a are efficient. */
if (GET_CODE (XEXP (x, 0)) == MULT
&& cint_248_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
{
*total = (rtx_cost (XEXP (XEXP (x, 0), 0), mode,
(enum rtx_code) outer_code, opno, speed)
+ rtx_cost (XEXP (x, 1), mode,
(enum rtx_code) outer_code, opno, speed)
+ COSTS_N_INSNS (1));
return true;
}
return false;
case MULT:
*total = COSTS_N_INSNS (2);
return false;
case SIGN_EXTEND:
case ZERO_EXTEND:
if (outer_code == MULT)
*total = 0;
else
*total = COSTS_N_INSNS (1);
return false;
case DIV:
case UDIV:
case MOD:
case UMOD:
/* These are handled by software and are very expensive. */
*total = COSTS_N_INSNS (100);
return false;
case UNSPEC:
case UNSPEC_VOLATILE:
{
int num = XINT (x, 1);
if (num <= TILEPRO_LAST_LATENCY_1_INSN)
*total = COSTS_N_INSNS (1);
else if (num <= TILEPRO_LAST_LATENCY_2_INSN)
*total = COSTS_N_INSNS (2);
else if (num > TILEPRO_LAST_LATENCY_INSN)
{
if (outer_code == PLUS)
*total = 0;
else
*total = COSTS_N_INSNS (1);
}
else
{
switch (num)
{
case UNSPEC_BLOCKAGE:
case UNSPEC_NETWORK_BARRIER:
*total = 0;
break;
case UNSPEC_LNK_AND_LABEL:
case UNSPEC_MF:
case UNSPEC_NETWORK_RECEIVE:
case UNSPEC_NETWORK_SEND:
case UNSPEC_TLS_GD_ADD:
*total = COSTS_N_INSNS (1);
break;
case UNSPEC_TLS_IE_LOAD:
*total = COSTS_N_INSNS (2);
break;
case UNSPEC_SP_SET:
*total = COSTS_N_INSNS (3);
break;
case UNSPEC_SP_TEST:
*total = COSTS_N_INSNS (4);
break;
case UNSPEC_LATENCY_L2:
*total = COSTS_N_INSNS (8);
break;
case UNSPEC_TLS_GD_CALL:
*total = COSTS_N_INSNS (30);
break;
case UNSPEC_LATENCY_MISS:
*total = COSTS_N_INSNS (80);
break;
default:
*total = COSTS_N_INSNS (1);
}
}
return true;
}
default:
return false;
}
}
/* Returns an SImode integer rtx with value VAL. */
static rtx
gen_int_si (HOST_WIDE_INT val)
{
return gen_int_mode (val, SImode);
}
/* Create a temporary variable to hold a partial result, to enable
CSE. */
static rtx
create_temp_reg_if_possible (machine_mode mode, rtx default_reg)
{
return can_create_pseudo_p ()? gen_reg_rtx (mode) : default_reg;
}
/* Functions to save and restore machine-specific function data. */
static struct machine_function *
tilepro_init_machine_status (void)
{
return ggc_cleared_alloc<machine_function> ();
}
/* Do anything needed before RTL is emitted for each function. */
void
tilepro_init_expanders (void)
{
/* Arrange to initialize and mark the machine per-function
status. */
init_machine_status = tilepro_init_machine_status;
if (cfun && cfun->machine && flag_pic)
{
static int label_num = 0;
char text_label_name[32];
struct machine_function *machine = cfun->machine;
ASM_GENERATE_INTERNAL_LABEL (text_label_name, "L_PICLNK", label_num++);
machine->text_label_symbol =
gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (text_label_name));
machine->text_label_rtx =
gen_rtx_REG (Pmode, TILEPRO_PIC_TEXT_LABEL_REGNUM);
machine->got_rtx = gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM);
machine->calls_tls_get_addr = false;
}
}
/* Return true if X contains a thread-local symbol. */
static bool
tilepro_tls_referenced_p (rtx x)
{
if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
x = XEXP (XEXP (x, 0), 0);
if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
return true;
/* That's all we handle in tilepro_legitimize_tls_address for
now. */
return false;
}
/* Return true if X requires a scratch register. It is given that
flag_pic is on and that X satisfies CONSTANT_P. */
static int
tilepro_pic_address_needs_scratch (rtx x)
{
if (GET_CODE (x) == CONST
&& GET_CODE (XEXP (x, 0)) == PLUS
&& (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
|| GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF)
&& CONST_INT_P (XEXP (XEXP (x, 0), 1)))
return true;
return false;
}
/* Implement TARGET_LEGITIMATE_CONSTANT_P. This is all constants for
which we are willing to load the value into a register via a move
pattern. TLS cannot be treated as a constant because it can
include a function call. */
static bool
tilepro_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
{
switch (GET_CODE (x))
{
case CONST:
case SYMBOL_REF:
return !tilepro_tls_referenced_p (x);
default:
return true;
}
}
/* Return true if the constant value X is a legitimate general operand
when generating PIC code. It is given that flag_pic is on and that
X satisfies CONSTANT_P. */
bool
tilepro_legitimate_pic_operand_p (rtx x)
{
if (tilepro_pic_address_needs_scratch (x))
return false;
if (tilepro_tls_referenced_p (x))
return false;
return true;
}
/* Return true if the rtx X can be used as an address operand. */
static bool
tilepro_legitimate_address_p (machine_mode ARG_UNUSED (mode), rtx x,
bool strict)
{
if (GET_CODE (x) == SUBREG)
x = SUBREG_REG (x);
switch (GET_CODE (x))
{
case POST_INC:
case POST_DEC:
if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
return false;
x = XEXP (x, 0);
break;
case POST_MODIFY:
if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
return false;
if (GET_CODE (XEXP (x, 1)) != PLUS)
return false;
if (!rtx_equal_p (XEXP (x, 0), XEXP (XEXP (x, 1), 0)))
return false;
if (!satisfies_constraint_I (XEXP (XEXP (x, 1), 1)))
return false;
x = XEXP (x, 0);
break;
case REG:
break;
default:
return false;
}
/* Check if x is a valid reg. */
if (!REG_P (x))
return false;
if (strict)
return REGNO_OK_FOR_BASE_P (REGNO (x));
else
return true;
}
/* Return the rtx containing SYMBOL_REF to the text label. */
static rtx
tilepro_text_label_symbol (void)
{
return cfun->machine->text_label_symbol;
}
/* Return the register storing the value of the text label. */
static rtx
tilepro_text_label_rtx (void)
{
return cfun->machine->text_label_rtx;
}
/* Return the register storing the value of the global offset
table. */
static rtx
tilepro_got_rtx (void)
{
return cfun->machine->got_rtx;
}
/* Return the SYMBOL_REF for _GLOBAL_OFFSET_TABLE_. */
static rtx
tilepro_got_symbol (void)
{
if (g_got_symbol == NULL)
g_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
return g_got_symbol;
}
/* Return a reference to the got to be used by tls references. */
static rtx
tilepro_tls_got (void)
{
rtx temp;
if (flag_pic)
{
crtl->uses_pic_offset_table = 1;
return tilepro_got_rtx ();
}
temp = gen_reg_rtx (Pmode);
emit_move_insn (temp, tilepro_got_symbol ());
return temp;
}
/* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
this (thread-local) address. */
static rtx
tilepro_legitimize_tls_address (rtx addr)
{
rtx ret;
gcc_assert (can_create_pseudo_p ());
if (GET_CODE (addr) == SYMBOL_REF)
switch (SYMBOL_REF_TLS_MODEL (addr))
{
case TLS_MODEL_GLOBAL_DYNAMIC:
case TLS_MODEL_LOCAL_DYNAMIC:
{
rtx r0, temp1, temp2, temp3, got;
rtx_insn *last;
ret = gen_reg_rtx (Pmode);
r0 = gen_rtx_REG (Pmode, 0);
temp1 = gen_reg_rtx (Pmode);
temp2 = gen_reg_rtx (Pmode);
temp3 = gen_reg_rtx (Pmode);
got = tilepro_tls_got ();
emit_insn (gen_tls_gd_addhi (temp1, got, addr));
emit_insn (gen_tls_gd_addlo (temp2, temp1, addr));
emit_move_insn (r0, temp2);
emit_insn (gen_tls_gd_call (addr));
emit_move_insn (temp3, r0);
last = emit_insn (gen_tls_gd_add (ret, temp3, addr));
set_unique_reg_note (last, REG_EQUAL, copy_rtx (addr));
break;
}
case TLS_MODEL_INITIAL_EXEC:
{
rtx temp1, temp2, temp3, got;
rtx_insn *last;
ret = gen_reg_rtx (Pmode);
temp1 = gen_reg_rtx (Pmode);
temp2 = gen_reg_rtx (Pmode);
temp3 = gen_reg_rtx (Pmode);
got = tilepro_tls_got ();
emit_insn (gen_tls_ie_addhi (temp1, got, addr));
emit_insn (gen_tls_ie_addlo (temp2, temp1, addr));
emit_insn (gen_tls_ie_load (temp3, temp2, addr));
last =
emit_move_insn(ret,
gen_rtx_PLUS (Pmode,
gen_rtx_REG (Pmode,
THREAD_POINTER_REGNUM),
temp3));
set_unique_reg_note (last, REG_EQUAL, copy_rtx (addr));
break;
}
case TLS_MODEL_LOCAL_EXEC:
{
rtx temp1;
rtx_insn *last;
ret = gen_reg_rtx (Pmode);
temp1 = gen_reg_rtx (Pmode);
emit_insn (gen_tls_le_addhi (temp1,
gen_rtx_REG (Pmode,
THREAD_POINTER_REGNUM),
addr));
last = emit_insn (gen_tls_le_addlo (ret, temp1, addr));
set_unique_reg_note (last, REG_EQUAL, copy_rtx (addr));
break;
}
default:
gcc_unreachable ();
}
else if (GET_CODE (addr) == CONST)
{
rtx base, offset;
gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
base = tilepro_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
offset = XEXP (XEXP (addr, 0), 1);
base = force_operand (base, NULL_RTX);
ret = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, offset));
}
else
gcc_unreachable ();
return ret;
}
/* Legitimize PIC addresses. If the address is already
position-independent, we return ORIG. Newly generated
position-independent addresses go into a reg. This is REG if
nonzero, otherwise we allocate register(s) as necessary. */
static rtx
tilepro_legitimize_pic_address (rtx orig,
machine_mode mode ATTRIBUTE_UNUSED,
rtx reg)
{
if (GET_CODE (orig) == SYMBOL_REF)
{
rtx address, pic_ref;
if (reg == 0)
{
gcc_assert (can_create_pseudo_p ());
reg = gen_reg_rtx (Pmode);
}
if (SYMBOL_REF_LOCAL_P (orig))
{
/* If not during reload, allocate another temp reg here for
loading in the address, so that these instructions can be
optimized properly. */
rtx temp_reg = create_temp_reg_if_possible (Pmode, reg);
rtx text_label_symbol = tilepro_text_label_symbol ();
rtx text_label_rtx = tilepro_text_label_rtx ();
emit_insn (gen_addli_pcrel (temp_reg, text_label_rtx, orig,
text_label_symbol));
emit_insn (gen_auli_pcrel (temp_reg, temp_reg, orig,
text_label_symbol));
/* Note: this is conservative. We use the text_label but we
don't use the pic_offset_table. However, in some cases
we may need the pic_offset_table (see
tilepro_fixup_pcrel_references). */
crtl->uses_pic_offset_table = 1;
address = temp_reg;
emit_move_insn (reg, address);
return reg;
}
else
{
/* If not during reload, allocate another temp reg here for
loading in the address, so that these instructions can be
optimized properly. */
rtx temp_reg = create_temp_reg_if_possible (Pmode, reg);
gcc_assert (flag_pic);
if (flag_pic == 1)
{
emit_insn (gen_add_got16 (temp_reg,
tilepro_got_rtx (), orig));
}
else
{
rtx temp_reg2 = create_temp_reg_if_possible (Pmode, reg);
emit_insn (gen_addhi_got32 (temp_reg2,
tilepro_got_rtx (), orig));
emit_insn (gen_addlo_got32 (temp_reg, temp_reg2, orig));
}
address = temp_reg;
pic_ref = gen_const_mem (Pmode, address);
crtl->uses_pic_offset_table = 1;
emit_move_insn (reg, pic_ref);
/* The following put a REG_EQUAL note on this insn, so that
it can be optimized by loop. But it causes the label to
be optimized away. */
/* set_unique_reg_note (insn, REG_EQUAL, orig); */
return reg;
}
}
else if (GET_CODE (orig) == CONST)
{
rtx base, offset;
if (GET_CODE (XEXP (orig, 0)) == PLUS
&& XEXP (XEXP (orig, 0), 0) == tilepro_got_rtx ())
return orig;
if (reg == 0)
{
gcc_assert (can_create_pseudo_p ());
reg = gen_reg_rtx (Pmode);
}
gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
base = tilepro_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode,
reg);
offset =
tilepro_legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
base == reg ? 0 : reg);
if (CONST_INT_P (offset))
{
if (can_create_pseudo_p ())
offset = force_reg (Pmode, offset);
else
/* If we reach here, then something is seriously
wrong. */
gcc_unreachable ();
}
if (can_create_pseudo_p ())
return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, offset));
else
gcc_unreachable ();
}
else if (GET_CODE (orig) == LABEL_REF)
{
rtx address, temp_reg;
rtx text_label_symbol;
rtx text_label_rtx;
if (reg == 0)
{
gcc_assert (can_create_pseudo_p ());
reg = gen_reg_rtx (Pmode);
}
/* If not during reload, allocate another temp reg here for
loading in the address, so that these instructions can be
optimized properly. */
temp_reg = create_temp_reg_if_possible (Pmode, reg);
text_label_symbol = tilepro_text_label_symbol ();
text_label_rtx = tilepro_text_label_rtx ();
emit_insn (gen_addli_pcrel (temp_reg, text_label_rtx, orig,
text_label_symbol));
emit_insn (gen_auli_pcrel (temp_reg, temp_reg, orig,
text_label_symbol));
/* Note: this is conservative. We use the text_label but we
don't use the pic_offset_table. */
crtl->uses_pic_offset_table = 1;
address = temp_reg;
emit_move_insn (reg, address);
return reg;
}
return orig;
}
/* Implement TARGET_LEGITIMIZE_ADDRESS. */
static rtx
tilepro_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
machine_mode mode)
{
if (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
&& symbolic_operand (x, Pmode) && tilepro_tls_referenced_p (x))
{
return tilepro_legitimize_tls_address (x);
}
else if (flag_pic)
{
return tilepro_legitimize_pic_address (x, mode, 0);
}
else
return x;
}
/* Implement TARGET_DELEGITIMIZE_ADDRESS. */
static rtx
tilepro_delegitimize_address (rtx x)
{
x = delegitimize_mem_from_attrs (x);
if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
{
switch (XINT (XEXP (x, 0), 1))
{
case UNSPEC_PCREL_SYM:
case UNSPEC_GOT16_SYM:
case UNSPEC_GOT32_SYM:
case UNSPEC_TLS_GD:
case UNSPEC_TLS_IE:
x = XVECEXP (XEXP (x, 0), 0, 0);
break;
}
}
return x;
}
/* Emit code to load the PIC register. */
static void
load_pic_register (bool delay_pic_helper ATTRIBUTE_UNUSED)
{
int orig_flag_pic = flag_pic;
rtx got_symbol = tilepro_got_symbol ();
rtx text_label_symbol = tilepro_text_label_symbol ();
rtx text_label_rtx = tilepro_text_label_rtx ();
flag_pic = 0;
emit_insn (gen_insn_lnk_and_label (text_label_rtx, text_label_symbol));
emit_insn (gen_addli_pcrel (tilepro_got_rtx (),
text_label_rtx, got_symbol, text_label_symbol));
emit_insn (gen_auli_pcrel (tilepro_got_rtx (),
tilepro_got_rtx (),
got_symbol, text_label_symbol));
flag_pic = orig_flag_pic;
/* Need to emit this whether or not we obey regdecls, since
setjmp/longjmp can cause life info to screw up. ??? In the case
where we don't obey regdecls, this is not sufficient since we may
not fall out the bottom. */
emit_use (tilepro_got_rtx ());
}
/* Return the simd variant of the constant NUM of mode MODE, by
replicating it to fill an interger of mode SImode. NUM is first
truncated to fit in MODE. */
rtx
tilepro_simd_int (rtx num, machine_mode mode)
{
HOST_WIDE_INT n = 0;
gcc_assert (CONST_INT_P (num));
n = INTVAL (num);
switch (mode)
{
case E_QImode:
n = 0x01010101 * (n & 0x000000FF);
break;
case E_HImode:
n = 0x00010001 * (n & 0x0000FFFF);
break;
case E_SImode:
break;
case E_DImode:
break;
default:
gcc_unreachable ();
}
return gen_int_si (n);
}
/* Split one or more DImode RTL references into pairs of SImode
references. The RTL can be REG, offsettable MEM, integer constant,
or CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL
to split and "num" is its length. lo_half and hi_half are output
arrays that parallel "operands". */
void
split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
{
while (num--)
{
rtx op = operands[num];
/* simplify_subreg refuse to split volatile memory addresses,
but we still have to handle it. */
if (MEM_P (op))
{
lo_half[num] = adjust_address (op, SImode, 0);
hi_half[num] = adjust_address (op, SImode, 4);
}
else
{
lo_half[num] = simplify_gen_subreg (SImode, op,
GET_MODE (op) == VOIDmode
? DImode : GET_MODE (op), 0);
hi_half[num] = simplify_gen_subreg (SImode, op,
GET_MODE (op) == VOIDmode
? DImode : GET_MODE (op), 4);
}
}
}
/* Returns true iff val can be moved into a register in one
instruction. And if it can, it emits the code to move the
constant.
If three_wide_only is true, this insists on an instruction that
works in a bundle containing three instructions. */
static bool
expand_set_cint32_one_inst (rtx dest_reg,
HOST_WIDE_INT val, bool three_wide_only)
{
val = trunc_int_for_mode (val, SImode);
if (val == trunc_int_for_mode (val, QImode))
{
/* Success! */
emit_move_insn (dest_reg, GEN_INT (val));
return true;
}
else if (!three_wide_only)
{
rtx imm_op = GEN_INT (val);
if (satisfies_constraint_J (imm_op)
|| satisfies_constraint_K (imm_op)
|| satisfies_constraint_N (imm_op)
|| satisfies_constraint_P (imm_op))
{
emit_move_insn (dest_reg, imm_op);
return true;
}
}
return false;
}
/* Implement SImode rotatert. */
static HOST_WIDE_INT
rotate_right (HOST_WIDE_INT n, int count)
{
unsigned HOST_WIDE_INT x = n & 0xFFFFFFFF;
if (count == 0)
return x;
return ((x >> count) | (x << (32 - count))) & 0xFFFFFFFF;
}
/* Return true iff n contains exactly one contiguous sequence of 1
bits, possibly wrapping around from high bits to low bits. */
bool
tilepro_bitfield_operand_p (HOST_WIDE_INT n, int *first_bit, int *last_bit)
{
int i;
if (n == 0)
return false;
for (i = 0; i < 32; i++)
{
unsigned HOST_WIDE_INT x = rotate_right (n, i);
if (!(x & 1))
continue;
/* See if x is a power of two minus one, i.e. only consecutive 1
bits starting from bit 0. */
if ((x & (x + 1)) == 0)
{
if (first_bit != NULL)
*first_bit = i;
if (last_bit != NULL)
*last_bit = (i + exact_log2 (x ^ (x >> 1))) & 31;
return true;
}
}
return false;
}
/* Create code to move the CONST_INT value in src_val to dest_reg. */
static void
expand_set_cint32 (rtx dest_reg, rtx src_val)
{
HOST_WIDE_INT val;
int leading_zeroes, trailing_zeroes;
int lower, upper;
int three_wide_only;
rtx temp;
gcc_assert (CONST_INT_P (src_val));
val = trunc_int_for_mode (INTVAL (src_val), SImode);
/* See if we can generate the constant in one instruction. */
if (expand_set_cint32_one_inst (dest_reg, val, false))
return;
/* Create a temporary variable to hold a partial result, to enable
CSE. */
temp = create_temp_reg_if_possible (SImode, dest_reg);
leading_zeroes = 31 - floor_log2 (val & 0xFFFFFFFF);
trailing_zeroes = exact_log2 (val & -val);
lower = trunc_int_for_mode (val, HImode);
upper = trunc_int_for_mode ((val - lower) >> 16, HImode);
/* First try all three-wide instructions that generate a constant
(i.e. movei) followed by various shifts and rotates. If none of
those work, try various two-wide ways of generating a constant
followed by various shifts and rotates. */
for (three_wide_only = 1; three_wide_only >= 0; three_wide_only--)
{
int count;
if (expand_set_cint32_one_inst (temp, val >> trailing_zeroes,
three_wide_only))
{
/* 0xFFFFA500 becomes:
movei temp, 0xFFFFFFA5
shli dest, temp, 8 */
emit_move_insn (dest_reg,
gen_rtx_ASHIFT (SImode, temp,
GEN_INT (trailing_zeroes)));
return;
}
if (expand_set_cint32_one_inst (temp, val << leading_zeroes,
three_wide_only))
{
/* 0x7FFFFFFF becomes:
movei temp, -2
shri dest, temp, 1 */
emit_move_insn (dest_reg,
gen_rtx_LSHIFTRT (SImode, temp,
GEN_INT (leading_zeroes)));
return;
}
/* Try rotating a one-instruction immediate, since rotate is
3-wide. */
for (count = 1; count < 32; count++)
{
HOST_WIDE_INT r = rotate_right (val, count);
if (expand_set_cint32_one_inst (temp, r, three_wide_only))
{
/* 0xFFA5FFFF becomes:
movei temp, 0xFFFFFFA5
rli dest, temp, 16 */
emit_move_insn (dest_reg,
gen_rtx_ROTATE (SImode, temp, GEN_INT (count)));
return;
}
}
if (lower == trunc_int_for_mode (lower, QImode))
{
/* We failed to use two 3-wide instructions, but the low 16
bits are a small number so just use a 2-wide + 3-wide
auli + addi pair rather than anything more exotic.
0x12340056 becomes:
auli temp, zero, 0x1234
addi dest, temp, 0x56 */
break;
}
}
/* Fallback case: use a auli + addli/addi pair. */
emit_move_insn (temp, GEN_INT (upper << 16));
emit_move_insn (dest_reg, (gen_rtx_PLUS (SImode, temp, GEN_INT (lower))));
}
/* Load OP1, a 32-bit constant, into OP0, a register. We know it
can't be done in one insn when we get here, the move expander
guarantees this. */
void
tilepro_expand_set_const32 (rtx op0, rtx op1)
{
machine_mode mode = GET_MODE (op0);
rtx temp;
if (CONST_INT_P (op1))
{
/* TODO: I don't know if we want to split large constants now,
or wait until later (with a define_split).
Does splitting early help CSE? Does it harm other
optimizations that might fold loads? */
expand_set_cint32 (op0, op1);
}
else
{
temp = create_temp_reg_if_possible (mode, op0);
/* A symbol, emit in the traditional way. */
emit_move_insn (temp, gen_rtx_HIGH (mode, op1));
emit_move_insn (op0, gen_rtx_LO_SUM (mode, temp, op1));
}
}
/* Expand a move instruction. Return true if all work is done. */
bool
tilepro_expand_mov (machine_mode mode, rtx *operands)
{
/* Handle sets of MEM first. */
if (MEM_P (operands[0]))
{
if (can_create_pseudo_p ())
operands[0] = validize_mem (operands[0]);
if (reg_or_0_operand (operands[1], mode))
return false;
if (!reload_in_progress)
operands[1] = force_reg (mode, operands[1]);
}
/* Fixup TLS cases. */
if (CONSTANT_P (operands[1]) && tilepro_tls_referenced_p (operands[1]))
{
operands[1] = tilepro_legitimize_tls_address (operands[1]);
return false;
}
/* Fixup PIC cases. */
if (flag_pic && CONSTANT_P (operands[1]))
{
if (tilepro_pic_address_needs_scratch (operands[1]))
operands[1] = tilepro_legitimize_pic_address (operands[1], mode, 0);
if (symbolic_operand (operands[1], mode))
{
operands[1] = tilepro_legitimize_pic_address (operands[1],
mode,
(reload_in_progress ?
operands[0] :
NULL_RTX));
return false;
}
}
/* Fixup for UNSPEC addresses. */
if (flag_pic
&& GET_CODE (operands[1]) == HIGH
&& GET_CODE (XEXP (operands[1], 0)) == CONST
&& GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == UNSPEC)
{
rtx unspec = XEXP (XEXP (operands[1], 0), 0);
int unspec_num = XINT (unspec, 1);
if (unspec_num == UNSPEC_PCREL_SYM)
{
emit_insn (gen_auli_pcrel (operands[0], const0_rtx,
XVECEXP (unspec, 0, 0),
XVECEXP (unspec, 0, 1)));
return true;
}
else if (flag_pic == 2 && unspec_num == UNSPEC_GOT32_SYM)
{
emit_insn (gen_addhi_got32 (operands[0], const0_rtx,
XVECEXP (unspec, 0, 0)));
return true;
}
else if (HAVE_AS_TLS && unspec_num == UNSPEC_TLS_GD)
{
emit_insn (gen_tls_gd_addhi (operands[0], const0_rtx,
XVECEXP (unspec, 0, 0)));
return true;
}
else if (HAVE_AS_TLS && unspec_num == UNSPEC_TLS_IE)
{
emit_insn (gen_tls_ie_addhi (operands[0], const0_rtx,
XVECEXP (unspec, 0, 0)));
return true;
}
else if (HAVE_AS_TLS && unspec_num == UNSPEC_TLS_LE)
{
emit_insn (gen_tls_le_addhi (operands[0], const0_rtx,
XVECEXP (unspec, 0, 0)));
return true;
}
}
/* Accept non-constants and valid constants unmodified. */
if (!CONSTANT_P (operands[1])
|| GET_CODE (operands[1]) == HIGH || move_operand (operands[1], mode))
return false;
/* Split large integers. */
if (GET_MODE_SIZE (mode) <= 4)
{
tilepro_expand_set_const32 (operands[0], operands[1]);
return true;
}
return false;
}
/* Expand the "insv" pattern. */
void
tilepro_expand_insv (rtx operands[4])
{
rtx first_rtx = operands[2];
HOST_WIDE_INT first = INTVAL (first_rtx);
HOST_WIDE_INT width = INTVAL (operands[1]);
rtx v = operands[3];
/* Shift the inserted bits into position. */
if (first != 0)
{
if (CONST_INT_P (v))
{
/* Shift the constant into mm position. */
v = gen_int_si (INTVAL (v) << first);
}
else
{
/* Shift over the value to be inserted. */
rtx tmp = gen_reg_rtx (SImode);
emit_insn (gen_ashlsi3 (tmp, v, first_rtx));
v = tmp;
}
}
/* Insert the shifted bits using an 'mm' insn. */
emit_insn (gen_insn_mm (operands[0], v, operands[0], first_rtx,
GEN_INT (first + width - 1)));
}
/* Expand unaligned loads. */
void
tilepro_expand_unaligned_load (rtx dest_reg, rtx mem, HOST_WIDE_INT bitsize,
HOST_WIDE_INT bit_offset, bool sign)
{
machine_mode mode;
rtx addr_lo, addr_hi;
rtx mem_lo, mem_hi, hi;
rtx mema, wide_result;
int last_byte_offset;
HOST_WIDE_INT byte_offset = bit_offset / BITS_PER_UNIT;
mode = GET_MODE (dest_reg);
hi = gen_reg_rtx (mode);
if (bitsize == 2 * BITS_PER_UNIT && (bit_offset % BITS_PER_UNIT) == 0)
{
rtx lo;
/* When just loading a two byte value, we can load the two bytes
individually and combine them efficiently. */
mem_lo = adjust_address (mem, QImode, byte_offset);
mem_hi = adjust_address (mem, QImode, byte_offset + 1);
lo = gen_reg_rtx (mode);
emit_insn (gen_zero_extendqisi2 (lo, mem_lo));
if (sign)
{
rtx tmp = gen_reg_rtx (mode);
/* Do a signed load of the second byte then shift and OR it
in. */
emit_insn (gen_extendqisi2 (gen_lowpart (SImode, hi), mem_hi));
emit_insn (gen_ashlsi3 (gen_lowpart (SImode, tmp),
gen_lowpart (SImode, hi), GEN_INT (8)));
emit_insn (gen_iorsi3 (gen_lowpart (SImode, dest_reg),
gen_lowpart (SImode, lo),
gen_lowpart (SImode, tmp)));
}
else
{
/* Do two unsigned loads and use intlb to interleave
them. */
emit_insn (gen_zero_extendqisi2 (gen_lowpart (SImode, hi), mem_hi));
emit_insn (gen_insn_intlb (gen_lowpart (SImode, dest_reg),
gen_lowpart (SImode, hi),
gen_lowpart (SImode, lo)));
}
return;
}
mema = XEXP (mem, 0);
/* AND addresses cannot be in any alias set, since they may
implicitly alias surrounding code. Ideally we'd have some alias
set that covered all types except those with alignment 8 or
higher. */
addr_lo = force_reg (Pmode, plus_constant (Pmode, mema, byte_offset));
mem_lo = change_address (mem, mode,
gen_rtx_AND (Pmode, addr_lo, GEN_INT (-4)));
set_mem_alias_set (mem_lo, 0);
/* Load the high word at an address that will not fault if the low
address is aligned and at the very end of a page. */
last_byte_offset = (bit_offset + bitsize - 1) / BITS_PER_UNIT;
addr_hi = force_reg (Pmode, plus_constant (Pmode, mema, last_byte_offset));
mem_hi = change_address (mem, mode,
gen_rtx_AND (Pmode, addr_hi, GEN_INT (-4)));
set_mem_alias_set (mem_hi, 0);
if (bitsize == 32)
{
addr_lo = make_safe_from (addr_lo, dest_reg);
wide_result = dest_reg;
}
else
{
wide_result = gen_reg_rtx (mode);
}
/* Load hi first in case dest_reg is used in mema. */
emit_move_insn (hi, mem_hi);
emit_move_insn (wide_result, mem_lo);
emit_insn (gen_insn_dword_align (gen_lowpart (SImode, wide_result),
gen_lowpart (SImode, wide_result),
gen_lowpart (SImode, hi), addr_lo));
if (bitsize != 32)
{
rtx extracted =
extract_bit_field (gen_lowpart (SImode, wide_result),
bitsize, bit_offset % BITS_PER_UNIT,
!sign, gen_lowpart (SImode, dest_reg),
SImode, SImode, false, NULL);
if (extracted != dest_reg)
emit_move_insn (dest_reg, gen_lowpart (SImode, extracted));
}
}
/* Expand unaligned stores. */
static void
tilepro_expand_unaligned_store (rtx mem, rtx src, HOST_WIDE_INT bitsize,
HOST_WIDE_INT bit_offset)
{
HOST_WIDE_INT byte_offset = bit_offset / BITS_PER_UNIT;
HOST_WIDE_INT bytesize = bitsize / BITS_PER_UNIT;
HOST_WIDE_INT shift_amt;
HOST_WIDE_INT i;
rtx mem_addr;
rtx store_val;
for (i = 0, shift_amt = 0; i < bytesize; i++, shift_amt += BITS_PER_UNIT)
{
mem_addr = adjust_address (mem, QImode, byte_offset + i);
if (shift_amt)
{
store_val = expand_simple_binop (SImode, LSHIFTRT,
gen_lowpart (SImode, src),
GEN_INT (shift_amt), NULL, 1,
OPTAB_LIB_WIDEN);
store_val = gen_lowpart (QImode, store_val);
}
else
{
store_val = gen_lowpart (QImode, src);
}
emit_move_insn (mem_addr, store_val);
}
}
/* Implement the movmisalign patterns. One of the operands is a
memory that is not naturally aligned. Emit instructions to load
it. */
void
tilepro_expand_movmisalign (machine_mode mode, rtx *operands)
{
if (MEM_P (operands[1]))
{
rtx tmp;
if (register_operand (operands[0], mode))
tmp = operands[0];
else
tmp = gen_reg_rtx (mode);
tilepro_expand_unaligned_load (tmp, operands[1],
GET_MODE_BITSIZE (mode), 0, true);
if (tmp != operands[0])
emit_move_insn (operands[0], tmp);
}
else if (MEM_P (operands[0]))
{
if (!reg_or_0_operand (operands[1], mode))
operands[1] = force_reg (mode, operands[1]);
tilepro_expand_unaligned_store (operands[0], operands[1],
GET_MODE_BITSIZE (mode), 0);
}
else
gcc_unreachable ();
}
/* Implement the addsi3 pattern. */
bool
tilepro_expand_addsi (rtx op0, rtx op1, rtx op2)
{
rtx temp;
HOST_WIDE_INT n;
HOST_WIDE_INT high;
/* Skip anything that only takes one instruction. */
if (add_operand (op2, SImode))
return false;
/* We can only optimize ints here (it should be impossible to get
here with any other type, but it is harmless to check. */
if (!CONST_INT_P (op2))
return false;
temp = create_temp_reg_if_possible (SImode, op0);
n = INTVAL (op2);
high = (n + (n & 0x8000)) & ~0xffff;
emit_move_insn (temp, gen_rtx_PLUS (SImode, op1, gen_int_si (high)));
emit_move_insn (op0, gen_rtx_PLUS (SImode, temp, gen_int_si (n - high)));
return true;
}
/* Implement the allocate_stack pattern (alloca). */
void
tilepro_allocate_stack (rtx op0, rtx op1)
{
/* Technically the correct way to initialize chain_loc is with
* gen_frame_mem() instead of gen_rtx_MEM(), but gen_frame_mem()
* sets the alias_set to that of a frame reference. Some of our
* tests rely on some unsafe assumption about when the chaining
* update is done, we need to be conservative about reordering the
* chaining instructions.
*/
rtx fp_addr = gen_reg_rtx (Pmode);
rtx fp_value = gen_reg_rtx (Pmode);
rtx fp_loc;
emit_move_insn (fp_addr, gen_rtx_PLUS (Pmode, stack_pointer_rtx,
GEN_INT (UNITS_PER_WORD)));
fp_loc = gen_frame_mem (Pmode, fp_addr);
emit_move_insn (fp_value, fp_loc);
op1 = force_reg (Pmode, op1);
emit_move_insn (stack_pointer_rtx,
gen_rtx_MINUS (Pmode, stack_pointer_rtx, op1));
emit_move_insn (fp_addr, gen_rtx_PLUS (Pmode, stack_pointer_rtx,
GEN_INT (UNITS_PER_WORD)));
fp_loc = gen_frame_mem (Pmode, fp_addr);
emit_move_insn (fp_loc, fp_value);
emit_move_insn (op0, virtual_stack_dynamic_rtx);
}
/* Multiplies */
/* Returns the insn_code in ENTRY. */
static enum insn_code
tilepro_multiply_get_opcode (const struct tilepro_multiply_insn_seq_entry
*entry)
{
return tilepro_multiply_insn_seq_decode_opcode[entry->compressed_opcode];
}
/* Returns the length of the 'op' array. */
static int
tilepro_multiply_get_num_ops (const struct tilepro_multiply_insn_seq *seq)
{
/* The array either uses all of its allocated slots or is terminated
by a bogus opcode. Either way, the array size is the index of the
last valid opcode plus one. */
int i;
for (i = tilepro_multiply_insn_seq_MAX_OPERATIONS - 1; i >= 0; i--)
if (tilepro_multiply_get_opcode (&seq->op[i]) != CODE_FOR_nothing)
return i + 1;
/* An empty array is not allowed. */
gcc_unreachable ();
}
/* We precompute a number of expression trees for multiplying by
constants. This generates code for such an expression tree by
walking through the nodes in the tree (which are conveniently
pre-linearized) and emitting an instruction for each one. */
static void
tilepro_expand_constant_multiply_given_sequence (rtx result, rtx src,
const struct
tilepro_multiply_insn_seq
*seq)
{
int i;
int num_ops;
/* Keep track of the subexpressions computed so far, so later
instructions can refer to them. We seed the array with zero and
the value being multiplied. */
int num_subexprs = 2;
rtx subexprs[tilepro_multiply_insn_seq_MAX_OPERATIONS + 2];
subexprs[0] = const0_rtx;
subexprs[1] = src;
/* Determine how many instructions we are going to generate. */
num_ops = tilepro_multiply_get_num_ops (seq);
gcc_assert (num_ops > 0
&& num_ops <= tilepro_multiply_insn_seq_MAX_OPERATIONS);
for (i = 0; i < num_ops; i++)
{
const struct tilepro_multiply_insn_seq_entry *entry = &seq->op[i];
/* Figure out where to store the output of this instruction. */
const bool is_last_op = (i + 1 == num_ops);
rtx out = is_last_op ? result : gen_reg_rtx (SImode);
enum insn_code opcode = tilepro_multiply_get_opcode (entry);
if (opcode == CODE_FOR_ashlsi3)
{
/* Handle shift by immediate. This is a special case because
the meaning of the second operand is a constant shift
count rather than an operand index. */
/* Make sure the shift count is in range. Zero should not
happen. */
const int shift_count = entry->rhs;
gcc_assert (shift_count > 0 && shift_count < 32);
/* Emit the actual instruction. */
emit_insn (GEN_FCN (opcode)
(out, subexprs[entry->lhs],
gen_rtx_CONST_INT (SImode, shift_count)));
}
else
{
/* Handle a normal two-operand instruction, such as add or
s1a. */
/* Make sure we are referring to a previously computed
subexpression. */
gcc_assert (entry->rhs < num_subexprs);
/* Emit the actual instruction. */
emit_insn (GEN_FCN (opcode)
(out, subexprs[entry->lhs], subexprs[entry->rhs]));
}
/* Record this subexpression for use by later expressions. */
subexprs[num_subexprs++] = out;
}
}
/* bsearch helper function. */
static int
tilepro_compare_multipliers (const void *key, const void *t)
{
return *(const int *) key -
((const struct tilepro_multiply_insn_seq *) t)->multiplier;
}
/* Returns the tilepro_multiply_insn_seq for multiplier, or NULL if
none exists. */
static const struct tilepro_multiply_insn_seq *
tilepro_find_multiply_insn_seq_for_constant (int multiplier)
{
return ((const struct tilepro_multiply_insn_seq *)
bsearch (&multiplier, tilepro_multiply_insn_seq_table,
tilepro_multiply_insn_seq_table_size,
sizeof tilepro_multiply_insn_seq_table[0],
tilepro_compare_multipliers));
}
/* Try to a expand constant multiply in SImode by looking it up in a
precompiled table. OP0 is the result operand, OP1 is the source
operand, and MULTIPLIER is the value of the constant. Return true
if it succeeds. */
static bool
tilepro_expand_const_mulsi (rtx op0, rtx op1, int multiplier)
{
/* See if we have precomputed an efficient way to multiply by this
constant. */
const struct tilepro_multiply_insn_seq *seq =
tilepro_find_multiply_insn_seq_for_constant (multiplier);
if (seq != NULL)
{
tilepro_expand_constant_multiply_given_sequence (op0, op1, seq);
return true;
}
else
return false;
}
/* Expand the mulsi pattern. */
bool
tilepro_expand_mulsi (rtx op0, rtx op1, rtx op2)
{
if (CONST_INT_P (op2))
{
HOST_WIDE_INT n = trunc_int_for_mode (INTVAL (op2), SImode);
return tilepro_expand_const_mulsi (op0, op1, n);
}
return false;
}
/* Expand a high multiply pattern in SImode. RESULT, OP1, OP2 are the
operands, and SIGN is true if it's a signed multiply, and false if
it's an unsigned multiply. */
static void
tilepro_expand_high_multiply (rtx result, rtx op1, rtx op2, bool sign)
{
rtx tmp0 = gen_reg_rtx (SImode);
rtx tmp1 = gen_reg_rtx (SImode);
rtx tmp2 = gen_reg_rtx (SImode);
rtx tmp3 = gen_reg_rtx (SImode);
rtx tmp4 = gen_reg_rtx (SImode);
rtx tmp5 = gen_reg_rtx (SImode);
rtx tmp6 = gen_reg_rtx (SImode);
rtx tmp7 = gen_reg_rtx (SImode);
rtx tmp8 = gen_reg_rtx (SImode);
rtx tmp9 = gen_reg_rtx (SImode);
rtx tmp10 = gen_reg_rtx (SImode);
rtx tmp11 = gen_reg_rtx (SImode);
rtx tmp12 = gen_reg_rtx (SImode);
rtx tmp13 = gen_reg_rtx (SImode);
rtx result_lo = gen_reg_rtx (SImode);
if (sign)
{
emit_insn (gen_insn_mulhl_su (tmp0, op1, op2));
emit_insn (gen_insn_mulhl_su (tmp1, op2, op1));
emit_insn (gen_insn_mulll_uu (tmp2, op1, op2));
emit_insn (gen_insn_mulhh_ss (tmp3, op1, op2));
}
else
{
emit_insn (gen_insn_mulhl_uu (tmp0, op1, op2));
emit_insn (gen_insn_mulhl_uu (tmp1, op2, op1));
emit_insn (gen_insn_mulll_uu (tmp2, op1, op2));
emit_insn (gen_insn_mulhh_uu (tmp3, op1, op2));
}
emit_move_insn (tmp4, (gen_rtx_ASHIFT (SImode, tmp0, GEN_INT (16))));
emit_move_insn (tmp5, (gen_rtx_ASHIFT (SImode, tmp1, GEN_INT (16))));
emit_move_insn (tmp6, (gen_rtx_PLUS (SImode, tmp4, tmp5)));
emit_move_insn (result_lo, (gen_rtx_PLUS (SImode, tmp2, tmp6)));
emit_move_insn (tmp7, gen_rtx_LTU (SImode, tmp6, tmp4));
emit_move_insn (tmp8, gen_rtx_LTU (SImode, result_lo, tmp2));
if (sign)
{
emit_move_insn (tmp9, (gen_rtx_ASHIFTRT (SImode, tmp0, GEN_INT (16))));
emit_move_insn (tmp10, (gen_rtx_ASHIFTRT (SImode, tmp1, GEN_INT (16))));
}
else
{
emit_move_insn (tmp9, (gen_rtx_LSHIFTRT (SImode, tmp0, GEN_INT (16))));
emit_move_insn (tmp10, (gen_rtx_LSHIFTRT (SImode, tmp1, GEN_INT (16))));
}
emit_move_insn (tmp11, (gen_rtx_PLUS (SImode, tmp3, tmp7)));
emit_move_insn (tmp12, (gen_rtx_PLUS (SImode, tmp8, tmp9)));
emit_move_insn (tmp13, (gen_rtx_PLUS (SImode, tmp11, tmp12)));
emit_move_insn (result, (gen_rtx_PLUS (SImode, tmp13, tmp10)));
}
/* Implement smulsi3_highpart. */
void
tilepro_expand_smulsi3_highpart (rtx op0, rtx op1, rtx op2)
{
tilepro_expand_high_multiply (op0, op1, op2, true);
}
/* Implement umulsi3_highpart. */
void
tilepro_expand_umulsi3_highpart (rtx op0, rtx op1, rtx op2)
{
tilepro_expand_high_multiply (op0, op1, op2, false);
}
/* Compare and branches */
/* Helper function to handle DImode for tilepro_emit_setcc_internal. */
static bool
tilepro_emit_setcc_internal_di (rtx res, enum rtx_code code, rtx op0, rtx op1)
{
rtx operands[2], lo_half[2], hi_half[2];
rtx tmp, tmp0, tmp1, tmp2;
bool swap = false;
/* Reduce the number of cases we need to handle by reversing the
operands. */
switch (code)
{
case EQ:
case NE:
case LE:
case LT:
case LEU:
case LTU:
/* We handle these compares directly. */
break;
case GE:
case GT:
case GEU:
case GTU:
/* Reverse the operands. */
swap = true;
break;
default:
/* We should not have called this with any other code. */
gcc_unreachable ();
}
if (swap)
{
code = swap_condition (code);
tmp = op0, op0 = op1, op1 = tmp;
}
operands[0] = op0;
operands[1] = op1;
split_di (operands, 2, lo_half, hi_half);
if (!reg_or_0_operand (lo_half[0], SImode))
lo_half[0] = force_reg (SImode, lo_half[0]);
if (!reg_or_0_operand (hi_half[0], SImode))
hi_half[0] = force_reg (SImode, hi_half[0]);
if (!CONST_INT_P (lo_half[1]) && !register_operand (lo_half[1], SImode))
lo_half[1] = force_reg (SImode, lo_half[1]);
if (!CONST_INT_P (hi_half[1]) && !register_operand (hi_half[1], SImode))
hi_half[1] = force_reg (SImode, hi_half[1]);
tmp0 = gen_reg_rtx (SImode);
tmp1 = gen_reg_rtx (SImode);
tmp2 = gen_reg_rtx (SImode);
switch (code)
{
case EQ:
emit_insn (gen_insn_seq (tmp0, lo_half[0], lo_half[1]));
emit_insn (gen_insn_seq (tmp1, hi_half[0], hi_half[1]));
emit_insn (gen_andsi3 (res, tmp0, tmp1));
return true;
case NE:
emit_insn (gen_insn_sne (tmp0, lo_half[0], lo_half[1]));
emit_insn (gen_insn_sne (tmp1, hi_half[0], hi_half[1]));
emit_insn (gen_iorsi3 (res, tmp0, tmp1));
return true;
case LE:
emit_insn (gen_insn_slte (tmp0, hi_half[0], hi_half[1]));
emit_insn (gen_insn_seq (tmp1, hi_half[0], hi_half[1]));
emit_insn (gen_insn_slte_u (tmp2, lo_half[0], lo_half[1]));
emit_insn (gen_insn_mvnz (res, tmp0, tmp1, tmp2));
return true;
case LT:
if (operands[1] == const0_rtx)
{
emit_insn (gen_lshrsi3 (res, hi_half[0], GEN_INT (31)));
return true;
}
else
{
emit_insn (gen_insn_slt (tmp0, hi_half[0], hi_half[1]));
emit_insn (gen_insn_seq (tmp1, hi_half[0], hi_half[1]));
emit_insn (gen_insn_slt_u (tmp2, lo_half[0], lo_half[1]));
emit_insn (gen_insn_mvnz (res, tmp0, tmp1, tmp2));
}
return true;
case LEU:
emit_insn (gen_insn_slte_u (tmp0, hi_half[0], hi_half[1]));
emit_insn (gen_insn_seq (tmp1, hi_half[0], hi_half[1]));
emit_insn (gen_insn_slte_u (tmp2, lo_half[0], lo_half[1]));
emit_insn (gen_insn_mvnz (res, tmp0, tmp1, tmp2));
return true;
case LTU:
emit_insn (gen_insn_slt_u (tmp0, hi_half[0], hi_half[1]));
emit_insn (gen_insn_seq (tmp1, hi_half[0], hi_half[1]));
emit_insn (gen_insn_slt_u (tmp2, lo_half[0], lo_half[1]));
emit_insn (gen_insn_mvnz (res, tmp0, tmp1, tmp2));
return true;
default:
gcc_unreachable ();
}
return false;
}
/* Certain simplifications can be done to make invalid setcc
operations valid. Return the final comparison, or NULL if we can't
work. */
static bool
tilepro_emit_setcc_internal (rtx res, enum rtx_code code, rtx op0, rtx op1,
machine_mode cmp_mode)
{
rtx tmp;
bool swap = false;
if (cmp_mode == DImode)
{
return tilepro_emit_setcc_internal_di (res, code, op0, op1);
}
/* The general case: fold the comparison code to the types of
compares that we have, choosing the branch as necessary. */
switch (code)
{
case EQ:
case NE:
case LE:
case LT:
case LEU:
case LTU:
/* We have these compares. */
break;
case GE:
case GT:
case GEU:
case GTU:
/* We do not have these compares, so we reverse the
operands. */
swap = true;
break;
default:
/* We should not have called this with any other code. */
gcc_unreachable ();
}
if (swap)
{
code = swap_condition (code);
tmp = op0, op0 = op1, op1 = tmp;
}
if (!reg_or_0_operand (op0, SImode))
op0 = force_reg (SImode, op0);
if (!CONST_INT_P (op1) && !register_operand (op1, SImode))
op1 = force_reg (SImode, op1);
/* Return the setcc comparison. */
emit_insn (gen_rtx_SET (res, gen_rtx_fmt_ee (code, SImode, op0, op1)));
return true;
}
/* Implement cstore patterns. */
bool
tilepro_emit_setcc (rtx operands[], machine_mode cmp_mode)
{
return
tilepro_emit_setcc_internal (operands[0], GET_CODE (operands[1]),
operands[2], operands[3], cmp_mode);
}
/* Return whether CODE is a signed comparison. */
static bool
signed_compare_p (enum rtx_code code)
{
return (code == EQ || code == NE || code == LT || code == LE
|| code == GT || code == GE);
}
/* Generate the comparison for an SImode conditional branch. */
static rtx
tilepro_emit_cc_test (enum rtx_code code, rtx op0, rtx op1,
machine_mode cmp_mode, bool eq_ne_only)
{
enum rtx_code branch_code;
rtx temp;
/* Check for a compare against zero using a comparison we can do
directly. */
if (cmp_mode != DImode
&& op1 == const0_rtx
&& (code == EQ || code == NE
|| (!eq_ne_only && signed_compare_p (code))))
{
op0 = force_reg (SImode, op0);
return gen_rtx_fmt_ee (code, VOIDmode, op0, const0_rtx);
}
/* The general case: fold the comparison code to the types of
compares that we have, choosing the branch as necessary. */
switch (code)
{
case EQ:
case LE:
case LT:
case LEU:
case LTU:
/* We have these compares. */
branch_code = NE;
break;
case NE:
case GE:
case GT:
case GEU:
case GTU:
/* These must be reversed (except NE, but let's
canonicalize). */
code = reverse_condition (code);
branch_code = EQ;
break;
default:
gcc_unreachable ();
}
if (cmp_mode != DImode
&& CONST_INT_P (op1) && (!satisfies_constraint_I (op1) || code == LEU))
{
HOST_WIDE_INT n = trunc_int_for_mode (INTVAL (op1), SImode);
switch (code)
{
case EQ:
/* Subtract off the value we want to compare against and see
if we get zero. This is cheaper than creating a constant
in a register. Except that subtracting -128 is more
expensive than seqi to -128, so we leave that alone. */
/* ??? Don't do this when comparing against symbols,
otherwise we'll reduce (&x == 0x1234) to (&x-0x1234 ==
0), which will be declared false out of hand (at least
for non-weak). */
if (!(symbolic_operand (op0, VOIDmode)
|| (REG_P (op0) && REG_POINTER (op0))))
{
/* To compare against MIN_INT, we add MIN_INT and check
for 0. */
HOST_WIDE_INT add;
if (n != -2147483647 - 1)
add = -n;
else
add = n;
op0 = force_reg (SImode, op0);
temp = gen_reg_rtx (SImode);
emit_insn (gen_addsi3 (temp, op0, gen_int_si (add)));
return gen_rtx_fmt_ee (reverse_condition (branch_code),
VOIDmode, temp, const0_rtx);
}
break;
case LEU:
if (n == -1)
break;
/* FALLTHRU */
case LTU:
/* Change ((unsigned)x < 0x1000) into !((unsigned)x >> 12),
etc. */
{
int first = exact_log2 (code == LTU ? n : n + 1);
if (first != -1)
{
op0 = force_reg (SImode, op0);
temp = gen_reg_rtx (SImode);
emit_move_insn (temp,
gen_rtx_LSHIFTRT (SImode, op0,
gen_int_si (first)));
return gen_rtx_fmt_ee (reverse_condition (branch_code),
VOIDmode, temp, const0_rtx);
}
}
break;
default:
break;
}
}
/* Compute a flag saying whether we should branch. */
temp = gen_reg_rtx (SImode);
tilepro_emit_setcc_internal (temp, code, op0, op1, cmp_mode);
/* Return the branch comparison. */
return gen_rtx_fmt_ee (branch_code, VOIDmode, temp, const0_rtx);
}
/* Generate the comparison for a conditional branch. */
void
tilepro_emit_conditional_branch (rtx operands[], machine_mode cmp_mode)
{
rtx cmp_rtx =
tilepro_emit_cc_test (GET_CODE (operands[0]), operands[1], operands[2],
cmp_mode, false);
rtx branch_rtx = gen_rtx_SET (pc_rtx,
gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
gen_rtx_LABEL_REF
(VOIDmode,
operands[3]),
pc_rtx));
emit_jump_insn (branch_rtx);
}
/* Implement the movsicc pattern. */
rtx
tilepro_emit_conditional_move (rtx cmp)
{
return
tilepro_emit_cc_test (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1),
GET_MODE (XEXP (cmp, 0)), true);
}
/* Return true if INSN is annotated with a REG_BR_PROB note that
indicates it's a branch that's predicted taken. */
static bool
cbranch_predicted_p (rtx_insn *insn)
{
rtx x = find_reg_note (insn, REG_BR_PROB, 0);
if (x)
{
return profile_probability::from_reg_br_prob_note (XINT (x, 0))
>= profile_probability::even ();
}
return false;
}
/* Output assembly code for a specific branch instruction, appending
the branch prediction flag to the opcode if appropriate. */
static const char *
tilepro_output_simple_cbranch_with_opcode (rtx_insn *insn, const char *opcode,
int regop, bool netreg_p,
bool reverse_predicted)
{
static char buf[64];
sprintf (buf, "%s%s\t%%%c%d, %%l0", opcode,
(cbranch_predicted_p (insn) ^ reverse_predicted) ? "t" : "",
netreg_p ? 'N' : 'r', regop);
return buf;
}
/* Output assembly code for a specific branch instruction, appending
the branch prediction flag to the opcode if appropriate. */
const char *
tilepro_output_cbranch_with_opcode (rtx_insn *insn, rtx *operands,
const char *opcode,
const char *rev_opcode,
int regop, bool netreg_p)
{
const char *branch_if_false;
rtx taken, not_taken;
bool is_simple_branch;
gcc_assert (LABEL_P (operands[0]));
is_simple_branch = true;
if (INSN_ADDRESSES_SET_P ())
{
int from_addr = INSN_ADDRESSES (INSN_UID (insn));
int to_addr = INSN_ADDRESSES (INSN_UID (operands[0]));
int delta = to_addr - from_addr;
is_simple_branch = IN_RANGE (delta, -524288, 524280);
}
if (is_simple_branch)
{
/* Just a simple conditional branch. */
return
tilepro_output_simple_cbranch_with_opcode (insn, opcode, regop,
netreg_p, false);
}
/* Generate a reversed branch around a direct jump. This fallback
does not use branch-likely instructions. */
not_taken = gen_label_rtx ();
taken = operands[0];
/* Generate the reversed branch to NOT_TAKEN. */
operands[0] = not_taken;
branch_if_false =
tilepro_output_simple_cbranch_with_opcode (insn, rev_opcode, regop,
netreg_p, true);
output_asm_insn (branch_if_false, operands);
output_asm_insn ("j\t%l0", &taken);
/* Output NOT_TAKEN. */
targetm.asm_out.internal_label (asm_out_file, "L",
CODE_LABEL_NUMBER (not_taken));
return "";
}
/* Output assembly code for a conditional branch instruction. */
const char *
tilepro_output_cbranch (rtx_insn *insn, rtx *operands, bool reversed)
{
enum rtx_code code = GET_CODE (operands[1]);
const char *opcode;
const char *rev_opcode;
if (reversed)
code = reverse_condition (code);
switch (code)
{
case NE:
opcode = "bnz";
rev_opcode = "bz";
break;
case EQ:
opcode = "bz";
rev_opcode = "bnz";
break;
case GE:
opcode = "bgez";
rev_opcode = "blz";
break;
case GT:
opcode = "bgz";
rev_opcode = "blez";
break;
case LE:
opcode = "blez";
rev_opcode = "bgz";
break;
case LT:
opcode = "blz";
rev_opcode = "bgez";
break;
default:
gcc_unreachable ();
}
return
tilepro_output_cbranch_with_opcode (insn, operands, opcode, rev_opcode,
2, false);
}
/* Implement the tablejump pattern. */
void
tilepro_expand_tablejump (rtx op0, rtx op1)
{
if (flag_pic)
{
rtx table = gen_rtx_LABEL_REF (Pmode, op1);
rtx temp = gen_reg_rtx (Pmode);
rtx text_label_symbol = tilepro_text_label_symbol ();
rtx text_label_rtx = tilepro_text_label_rtx ();
emit_insn (gen_addli_pcrel (temp, text_label_rtx,
table, text_label_symbol));
emit_insn (gen_auli_pcrel (temp, temp, table, text_label_symbol));
emit_move_insn (temp,
gen_rtx_PLUS (Pmode,
convert_to_mode (Pmode, op0, false),
temp));
op0 = temp;
}
emit_jump_insn (gen_tablejump_aux (op0, op1));
}
/* Expand a builtin vector binary op, by calling gen function GEN with
operands in the proper modes. DEST is converted to DEST_MODE, and
src0 and src1 (if DO_SRC1 is true) is converted to SRC_MODE. */
void
tilepro_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
machine_mode dest_mode,
rtx dest,
machine_mode src_mode,
rtx src0, rtx src1, bool do_src1)
{
dest = gen_lowpart (dest_mode, dest);
if (src0 == const0_rtx)
src0 = CONST0_RTX (src_mode);
else
src0 = gen_lowpart (src_mode, src0);
if (do_src1)
{
if (src1 == const0_rtx)
src1 = CONST0_RTX (src_mode);
else
src1 = gen_lowpart (src_mode, src1);
}
emit_insn ((*gen) (dest, src0, src1));
}
/* Intrinsics */
struct tile_builtin_info
{
enum insn_code icode;
tree fndecl;
};
static struct tile_builtin_info tilepro_builtin_info[TILEPRO_BUILTIN_max] = {
{ CODE_FOR_addsi3, NULL }, /* add */
{ CODE_FOR_insn_addb, NULL }, /* addb */
{ CODE_FOR_insn_addbs_u, NULL }, /* addbs_u */
{ CODE_FOR_insn_addh, NULL }, /* addh */
{ CODE_FOR_insn_addhs, NULL }, /* addhs */
{ CODE_FOR_insn_addib, NULL }, /* addib */
{ CODE_FOR_insn_addih, NULL }, /* addih */
{ CODE_FOR_insn_addlis, NULL }, /* addlis */
{ CODE_FOR_ssaddsi3, NULL }, /* adds */
{ CODE_FOR_insn_adiffb_u, NULL }, /* adiffb_u */
{ CODE_FOR_insn_adiffh, NULL }, /* adiffh */
{ CODE_FOR_andsi3, NULL }, /* and */
{ CODE_FOR_insn_auli, NULL }, /* auli */
{ CODE_FOR_insn_avgb_u, NULL }, /* avgb_u */
{ CODE_FOR_insn_avgh, NULL }, /* avgh */
{ CODE_FOR_insn_bitx, NULL }, /* bitx */
{ CODE_FOR_bswapsi2, NULL }, /* bytex */
{ CODE_FOR_clzsi2, NULL }, /* clz */
{ CODE_FOR_insn_crc32_32, NULL }, /* crc32_32 */
{ CODE_FOR_insn_crc32_8, NULL }, /* crc32_8 */
{ CODE_FOR_ctzsi2, NULL }, /* ctz */
{ CODE_FOR_insn_drain, NULL }, /* drain */
{ CODE_FOR_insn_dtlbpr, NULL }, /* dtlbpr */
{ CODE_FOR_insn_dword_align, NULL }, /* dword_align */
{ CODE_FOR_insn_finv, NULL }, /* finv */
{ CODE_FOR_insn_flush, NULL }, /* flush */
{ CODE_FOR_insn_fnop, NULL }, /* fnop */
{ CODE_FOR_insn_icoh, NULL }, /* icoh */
{ CODE_FOR_insn_ill, NULL }, /* ill */
{ CODE_FOR_insn_info, NULL }, /* info */
{ CODE_FOR_insn_infol, NULL }, /* infol */
{ CODE_FOR_insn_inthb, NULL }, /* inthb */
{ CODE_FOR_insn_inthh, NULL }, /* inthh */
{ CODE_FOR_insn_intlb, NULL }, /* intlb */
{ CODE_FOR_insn_intlh, NULL }, /* intlh */
{ CODE_FOR_insn_inv, NULL }, /* inv */
{ CODE_FOR_insn_lb, NULL }, /* lb */
{ CODE_FOR_insn_lb_u, NULL }, /* lb_u */
{ CODE_FOR_insn_lh, NULL }, /* lh */
{ CODE_FOR_insn_lh_u, NULL }, /* lh_u */
{ CODE_FOR_insn_lnk, NULL }, /* lnk */
{ CODE_FOR_insn_lw, NULL }, /* lw */
{ CODE_FOR_insn_lw_na, NULL }, /* lw_na */
{ CODE_FOR_insn_lb_L2, NULL }, /* lb_L2 */
{ CODE_FOR_insn_lb_u_L2, NULL }, /* lb_u_L2 */
{ CODE_FOR_insn_lh_L2, NULL }, /* lh_L2 */
{ CODE_FOR_insn_lh_u_L2, NULL }, /* lh_u_L2 */
{ CODE_FOR_insn_lw_L2, NULL }, /* lw_L2 */
{ CODE_FOR_insn_lw_na_L2, NULL }, /* lw_na_L2 */
{ CODE_FOR_insn_lb_miss, NULL }, /* lb_miss */
{ CODE_FOR_insn_lb_u_miss, NULL }, /* lb_u_miss */
{ CODE_FOR_insn_lh_miss, NULL }, /* lh_miss */
{ CODE_FOR_insn_lh_u_miss, NULL }, /* lh_u_miss */
{ CODE_FOR_insn_lw_miss, NULL }, /* lw_miss */
{ CODE_FOR_insn_lw_na_miss, NULL }, /* lw_na_miss */
{ CODE_FOR_insn_maxb_u, NULL }, /* maxb_u */
{ CODE_FOR_insn_maxh, NULL }, /* maxh */
{ CODE_FOR_insn_maxib_u, NULL }, /* maxib_u */
{ CODE_FOR_insn_maxih, NULL }, /* maxih */
{ CODE_FOR_memory_barrier, NULL }, /* mf */
{ CODE_FOR_insn_mfspr, NULL }, /* mfspr */
{ CODE_FOR_insn_minb_u, NULL }, /* minb_u */
{ CODE_FOR_insn_minh, NULL }, /* minh */
{ CODE_FOR_insn_minib_u, NULL }, /* minib_u */
{ CODE_FOR_insn_minih, NULL }, /* minih */
{ CODE_FOR_insn_mm, NULL }, /* mm */
{ CODE_FOR_insn_mnz, NULL }, /* mnz */
{ CODE_FOR_insn_mnzb, NULL }, /* mnzb */
{ CODE_FOR_insn_mnzh, NULL }, /* mnzh */
{ CODE_FOR_movsi, NULL }, /* move */
{ CODE_FOR_insn_movelis, NULL }, /* movelis */
{ CODE_FOR_insn_mtspr, NULL }, /* mtspr */
{ CODE_FOR_insn_mulhh_ss, NULL }, /* mulhh_ss */
{ CODE_FOR_insn_mulhh_su, NULL }, /* mulhh_su */
{ CODE_FOR_insn_mulhh_uu, NULL }, /* mulhh_uu */
{ CODE_FOR_insn_mulhha_ss, NULL }, /* mulhha_ss */
{ CODE_FOR_insn_mulhha_su, NULL }, /* mulhha_su */
{ CODE_FOR_insn_mulhha_uu, NULL }, /* mulhha_uu */
{ CODE_FOR_insn_mulhhsa_uu, NULL }, /* mulhhsa_uu */
{ CODE_FOR_insn_mulhl_ss, NULL }, /* mulhl_ss */
{ CODE_FOR_insn_mulhl_su, NULL }, /* mulhl_su */
{ CODE_FOR_insn_mulhl_us, NULL }, /* mulhl_us */
{ CODE_FOR_insn_mulhl_uu, NULL }, /* mulhl_uu */
{ CODE_FOR_insn_mulhla_ss, NULL }, /* mulhla_ss */
{ CODE_FOR_insn_mulhla_su, NULL }, /* mulhla_su */
{ CODE_FOR_insn_mulhla_us, NULL }, /* mulhla_us */
{ CODE_FOR_insn_mulhla_uu, NULL }, /* mulhla_uu */
{ CODE_FOR_insn_mulhlsa_uu, NULL }, /* mulhlsa_uu */
{ CODE_FOR_insn_mulll_ss, NULL }, /* mulll_ss */
{ CODE_FOR_insn_mulll_su, NULL }, /* mulll_su */
{ CODE_FOR_insn_mulll_uu, NULL }, /* mulll_uu */
{ CODE_FOR_insn_mullla_ss, NULL }, /* mullla_ss */
{ CODE_FOR_insn_mullla_su, NULL }, /* mullla_su */
{ CODE_FOR_insn_mullla_uu, NULL }, /* mullla_uu */
{ CODE_FOR_insn_mulllsa_uu, NULL }, /* mulllsa_uu */
{ CODE_FOR_insn_mvnz, NULL }, /* mvnz */
{ CODE_FOR_insn_mvz, NULL }, /* mvz */
{ CODE_FOR_insn_mz, NULL }, /* mz */
{ CODE_FOR_insn_mzb, NULL }, /* mzb */
{ CODE_FOR_insn_mzh, NULL }, /* mzh */
{ CODE_FOR_insn_nap, NULL }, /* nap */
{ CODE_FOR_nop, NULL }, /* nop */
{ CODE_FOR_insn_nor, NULL }, /* nor */
{ CODE_FOR_iorsi3, NULL }, /* or */
{ CODE_FOR_insn_packbs_u, NULL }, /* packbs_u */
{ CODE_FOR_insn_packhb, NULL }, /* packhb */
{ CODE_FOR_insn_packhs, NULL }, /* packhs */
{ CODE_FOR_insn_packlb, NULL }, /* packlb */
{ CODE_FOR_popcountsi2, NULL }, /* pcnt */
{ CODE_FOR_insn_prefetch, NULL }, /* prefetch */
{ CODE_FOR_insn_prefetch_L1, NULL }, /* prefetch_L1 */
{ CODE_FOR_rotlsi3, NULL }, /* rl */
{ CODE_FOR_insn_s1a, NULL }, /* s1a */
{ CODE_FOR_insn_s2a, NULL }, /* s2a */
{ CODE_FOR_insn_s3a, NULL }, /* s3a */
{ CODE_FOR_insn_sadab_u, NULL }, /* sadab_u */
{ CODE_FOR_insn_sadah, NULL }, /* sadah */
{ CODE_FOR_insn_sadah_u, NULL }, /* sadah_u */
{ CODE_FOR_insn_sadb_u, NULL }, /* sadb_u */
{ CODE_FOR_insn_sadh, NULL }, /* sadh */
{ CODE_FOR_insn_sadh_u, NULL }, /* sadh_u */
{ CODE_FOR_insn_sb, NULL }, /* sb */
{ CODE_FOR_insn_seq, NULL }, /* seq */
{ CODE_FOR_insn_seqb, NULL }, /* seqb */
{ CODE_FOR_insn_seqh, NULL }, /* seqh */
{ CODE_FOR_insn_seqib, NULL }, /* seqib */
{ CODE_FOR_insn_seqih, NULL }, /* seqih */
{ CODE_FOR_insn_sh, NULL }, /* sh */
{ CODE_FOR_ashlsi3, NULL }, /* shl */
{ CODE_FOR_insn_shlb, NULL }, /* shlb */
{ CODE_FOR_insn_shlh, NULL }, /* shlh */
{ CODE_FOR_insn_shlb, NULL }, /* shlib */
{ CODE_FOR_insn_shlh, NULL }, /* shlih */
{ CODE_FOR_lshrsi3, NULL }, /* shr */
{ CODE_FOR_insn_shrb, NULL }, /* shrb */
{ CODE_FOR_insn_shrh, NULL }, /* shrh */
{ CODE_FOR_insn_shrb, NULL }, /* shrib */
{ CODE_FOR_insn_shrh, NULL }, /* shrih */
{ CODE_FOR_insn_slt, NULL }, /* slt */
{ CODE_FOR_insn_slt_u, NULL }, /* slt_u */
{ CODE_FOR_insn_sltb, NULL }, /* sltb */
{ CODE_FOR_insn_sltb_u, NULL }, /* sltb_u */
{ CODE_FOR_insn_slte, NULL }, /* slte */
{ CODE_FOR_insn_slte_u, NULL }, /* slte_u */
{ CODE_FOR_insn_slteb, NULL }, /* slteb */
{ CODE_FOR_insn_slteb_u, NULL }, /* slteb_u */
{ CODE_FOR_insn_slteh, NULL }, /* slteh */
{ CODE_FOR_insn_slteh_u, NULL }, /* slteh_u */
{ CODE_FOR_insn_slth, NULL }, /* slth */
{ CODE_FOR_insn_slth_u, NULL }, /* slth_u */
{ CODE_FOR_insn_sltib, NULL }, /* sltib */
{ CODE_FOR_insn_sltib_u, NULL }, /* sltib_u */
{ CODE_FOR_insn_sltih, NULL }, /* sltih */
{ CODE_FOR_insn_sltih_u, NULL }, /* sltih_u */
{ CODE_FOR_insn_sne, NULL }, /* sne */
{ CODE_FOR_insn_sneb, NULL }, /* sneb */
{ CODE_FOR_insn_sneh, NULL }, /* sneh */
{ CODE_FOR_ashrsi3, NULL }, /* sra */
{ CODE_FOR_insn_srab, NULL }, /* srab */
{ CODE_FOR_insn_srah, NULL }, /* srah */
{ CODE_FOR_insn_srab, NULL }, /* sraib */
{ CODE_FOR_insn_srah, NULL }, /* sraih */
{ CODE_FOR_subsi3, NULL }, /* sub */
{ CODE_FOR_insn_subb, NULL }, /* subb */
{ CODE_FOR_insn_subbs_u, NULL }, /* subbs_u */
{ CODE_FOR_insn_subh, NULL }, /* subh */
{ CODE_FOR_insn_subhs, NULL }, /* subhs */
{ CODE_FOR_sssubsi3, NULL }, /* subs */
{ CODE_FOR_insn_sw, NULL }, /* sw */
{ CODE_FOR_insn_tblidxb0, NULL }, /* tblidxb0 */
{ CODE_FOR_insn_tblidxb1, NULL }, /* tblidxb1 */
{ CODE_FOR_insn_tblidxb2, NULL }, /* tblidxb2 */
{ CODE_FOR_insn_tblidxb3, NULL }, /* tblidxb3 */
{ CODE_FOR_insn_tns, NULL }, /* tns */
{ CODE_FOR_insn_wh64, NULL }, /* wh64 */
{ CODE_FOR_xorsi3, NULL }, /* xor */
{ CODE_FOR_tilepro_network_barrier, NULL }, /* network_barrier */
{ CODE_FOR_tilepro_idn0_receive, NULL }, /* idn0_receive */
{ CODE_FOR_tilepro_idn1_receive, NULL }, /* idn1_receive */
{ CODE_FOR_tilepro_idn_send, NULL }, /* idn_send */
{ CODE_FOR_tilepro_sn_receive, NULL }, /* sn_receive */
{ CODE_FOR_tilepro_sn_send, NULL }, /* sn_send */
{ CODE_FOR_tilepro_udn0_receive, NULL }, /* udn0_receive */
{ CODE_FOR_tilepro_udn1_receive, NULL }, /* udn1_receive */
{ CODE_FOR_tilepro_udn2_receive, NULL }, /* udn2_receive */
{ CODE_FOR_tilepro_udn3_receive, NULL }, /* udn3_receive */
{ CODE_FOR_tilepro_udn_send, NULL }, /* udn_send */
};
struct tilepro_builtin_def
{
const char *name;
enum tilepro_builtin code;
bool is_const;
/* The first character is the return type. Subsequent characters
are the argument types. See char_to_type. */
const char *type;
};
static const struct tilepro_builtin_def tilepro_builtins[] = {
{ "__insn_add", TILEPRO_INSN_ADD, true, "lll" },
{ "__insn_addb", TILEPRO_INSN_ADDB, true, "lll" },
{ "__insn_addbs_u", TILEPRO_INSN_ADDBS_U, false, "lll" },
{ "__insn_addh", TILEPRO_INSN_ADDH, true, "lll" },
{ "__insn_addhs", TILEPRO_INSN_ADDHS, false, "lll" },
{ "__insn_addi", TILEPRO_INSN_ADD, true, "lll" },
{ "__insn_addib", TILEPRO_INSN_ADDIB, true, "lll" },
{ "__insn_addih", TILEPRO_INSN_ADDIH, true, "lll" },
{ "__insn_addli", TILEPRO_INSN_ADD, true, "lll" },
{ "__insn_addlis", TILEPRO_INSN_ADDLIS, false, "lll" },
{ "__insn_adds", TILEPRO_INSN_ADDS, false, "lll" },
{ "__insn_adiffb_u", TILEPRO_INSN_ADIFFB_U, true, "lll" },
{ "__insn_adiffh", TILEPRO_INSN_ADIFFH, true, "lll" },
{ "__insn_and", TILEPRO_INSN_AND, true, "lll" },
{ "__insn_andi", TILEPRO_INSN_AND, true, "lll" },
{ "__insn_auli", TILEPRO_INSN_AULI, true, "lll" },
{ "__insn_avgb_u", TILEPRO_INSN_AVGB_U, true, "lll" },
{ "__insn_avgh", TILEPRO_INSN_AVGH, true, "lll" },
{ "__insn_bitx", TILEPRO_INSN_BITX, true, "ll" },
{ "__insn_bytex", TILEPRO_INSN_BYTEX, true, "ll" },
{ "__insn_clz", TILEPRO_INSN_CLZ, true, "ll" },
{ "__insn_crc32_32", TILEPRO_INSN_CRC32_32, true, "lll" },
{ "__insn_crc32_8", TILEPRO_INSN_CRC32_8, true, "lll" },
{ "__insn_ctz", TILEPRO_INSN_CTZ, true, "ll" },
{ "__insn_drain", TILEPRO_INSN_DRAIN, false, "v" },
{ "__insn_dtlbpr", TILEPRO_INSN_DTLBPR, false, "vl" },
{ "__insn_dword_align", TILEPRO_INSN_DWORD_ALIGN, true, "lllk" },
{ "__insn_finv", TILEPRO_INSN_FINV, false, "vk" },
{ "__insn_flush", TILEPRO_INSN_FLUSH, false, "vk" },
{ "__insn_fnop", TILEPRO_INSN_FNOP, false, "v" },
{ "__insn_icoh", TILEPRO_INSN_ICOH, false, "vk" },
{ "__insn_ill", TILEPRO_INSN_ILL, false, "v" },
{ "__insn_info", TILEPRO_INSN_INFO, false, "vl" },
{ "__insn_infol", TILEPRO_INSN_INFOL, false, "vl" },
{ "__insn_inthb", TILEPRO_INSN_INTHB, true, "lll" },
{ "__insn_inthh", TILEPRO_INSN_INTHH, true, "lll" },
{