blob: 403c274a27340e303a2b4c54085cd9baa29eacfe [file] [log] [blame]
/* Output routines for GCC for Renesas / SuperH SH.
Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
2003, 2004 Free Software Foundation, Inc.
Contributed by Steve Chamberlain (sac@cygnus.com).
Improved by Jim Wilson (wilson@cygnus.com).
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING. If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "insn-config.h"
#include "rtl.h"
#include "tree.h"
#include "flags.h"
#include "expr.h"
#include "optabs.h"
#include "function.h"
#include "regs.h"
#include "hard-reg-set.h"
#include "output.h"
#include "insn-attr.h"
#include "toplev.h"
#include "recog.h"
#include "c-pragma.h"
#include "integrate.h"
#include "tm_p.h"
#include "target.h"
#include "target-def.h"
#include "real.h"
#include "langhooks.h"
#include "basic-block.h"
#include "ra.h"
#include "cfglayout.h"
#include "intl.h"
#include "ggc.h"
int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
#define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
#define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
/* These are some macros to abstract register modes. */
#define CONST_OK_FOR_ADD(size) \
(TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
#define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
#define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
#define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
/* Set to 1 by expand_prologue() when the function is an interrupt handler. */
int current_function_interrupt;
/* ??? The pragma interrupt support will not work for SH3. */
/* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
output code for the next function appropriate for an interrupt handler. */
int pragma_interrupt;
/* This is set by the trap_exit attribute for functions. It specifies
a trap number to be used in a trapa instruction at function exit
(instead of an rte instruction). */
int trap_exit;
/* This is used by the sp_switch attribute for functions. It specifies
a variable holding the address of the stack the interrupt function
should switch to/from at entry/exit. */
rtx sp_switch;
/* This is set by #pragma trapa, and is similar to the above, except that
the compiler doesn't emit code to preserve all registers. */
static int pragma_trapa;
/* This is set by #pragma nosave_low_regs. This is useful on the SH3,
which has a separate set of low regs for User and Supervisor modes.
This should only be used for the lowest level of interrupts. Higher levels
of interrupts must save the registers in case they themselves are
interrupted. */
int pragma_nosave_low_regs;
/* This is used for communication between SETUP_INCOMING_VARARGS and
sh_expand_prologue. */
int current_function_anonymous_args;
/* Global variables for machine-dependent things. */
/* Which cpu are we scheduling for. */
enum processor_type sh_cpu;
/* Saved operands from the last compare to use when we generate an scc
or bcc insn. */
rtx sh_compare_op0;
rtx sh_compare_op1;
/* Provides the class number of the smallest class containing
reg number. */
enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
{
R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
DF_REGS, DF_REGS, DF_REGS, DF_REGS,
DF_REGS, DF_REGS, DF_REGS, DF_REGS,
NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
GENERAL_REGS,
};
char sh_register_names[FIRST_PSEUDO_REGISTER] \
[MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
char sh_additional_register_names[ADDREGNAMES_SIZE] \
[MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
= SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
/* Provide reg_class from a letter such as appears in the machine
description. *: target independently reserved letter.
reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
enum reg_class reg_class_from_letter[] =
{
/* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
/* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
/* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
/* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
/* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
/* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
/* y */ FPUL_REGS, /* z */ R0_REGS
};
int assembler_dialect;
static bool shmedia_space_reserved_for_target_registers;
static void split_branches (rtx);
static int branch_dest (rtx);
static void force_into (rtx, rtx);
static void print_slot (rtx);
static rtx add_constant (rtx, enum machine_mode, rtx);
static void dump_table (rtx, rtx);
static int hi_const (rtx);
static int broken_move (rtx);
static int mova_p (rtx);
static rtx find_barrier (int, rtx, rtx);
static int noncall_uses_reg (rtx, rtx, rtx *);
static rtx gen_block_redirect (rtx, int, int);
static void sh_reorg (void);
static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
static rtx frame_insn (rtx);
static rtx push (int);
static void pop (int);
static void push_regs (HARD_REG_SET *, int);
static int calc_live_regs (HARD_REG_SET *);
static void mark_use (rtx, rtx *);
static HOST_WIDE_INT rounded_frame_size (int);
static rtx mark_constant_pool_use (rtx);
const struct attribute_spec sh_attribute_table[];
static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
static void sh_insert_attributes (tree, tree *);
static int sh_adjust_cost (rtx, rtx, rtx, int);
static int sh_use_dfa_interface (void);
static int sh_issue_rate (void);
static bool sh_function_ok_for_sibcall (tree, tree);
static bool sh_cannot_modify_jumps_p (void);
static int sh_target_reg_class (void);
static bool sh_optimize_target_register_callee_saved (bool);
static bool sh_ms_bitfield_layout_p (tree);
static void sh_init_builtins (void);
static void sh_media_init_builtins (void);
static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
static void sh_file_start (void);
static int flow_dependent_p (rtx, rtx);
static void flow_dependent_p_1 (rtx, rtx, void *);
static int shiftcosts (rtx);
static int andcosts (rtx);
static int addsubcosts (rtx);
static int multcosts (rtx);
static bool unspec_caller_rtx_p (rtx);
static bool sh_cannot_copy_insn_p (rtx);
static bool sh_rtx_costs (rtx, int, int, int *);
static int sh_address_cost (rtx);
static int shmedia_target_regs_stack_space (HARD_REG_SET *);
static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
static int scavenge_reg (HARD_REG_SET *s);
struct save_schedule_s;
static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
struct save_schedule_s *, int);
static rtx sh_struct_value_rtx (tree, int);
static bool sh_return_in_memory (tree, tree);
static rtx sh_builtin_saveregs (void);
static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
static tree sh_build_builtin_va_list (void);
/* Initialize the GCC target structure. */
#undef TARGET_ATTRIBUTE_TABLE
#define TARGET_ATTRIBUTE_TABLE sh_attribute_table
/* The next two are used for debug info when compiling with -gdwarf. */
#undef TARGET_ASM_UNALIGNED_HI_OP
#define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
#undef TARGET_ASM_UNALIGNED_SI_OP
#define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
/* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
#undef TARGET_ASM_UNALIGNED_DI_OP
#define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
#undef TARGET_ASM_ALIGNED_DI_OP
#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
#undef TARGET_ASM_FUNCTION_EPILOGUE
#define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
#undef TARGET_ASM_OUTPUT_MI_THUNK
#define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
#undef TARGET_ASM_FILE_START
#define TARGET_ASM_FILE_START sh_file_start
#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
#undef TARGET_INSERT_ATTRIBUTES
#define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
#undef TARGET_SCHED_ADJUST_COST
#define TARGET_SCHED_ADJUST_COST sh_adjust_cost
#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
sh_use_dfa_interface
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE sh_issue_rate
#undef TARGET_CANNOT_MODIFY_JUMPS_P
#define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
#undef TARGET_BRANCH_TARGET_REGISTER_CLASS
#define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
#undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
#define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
sh_optimize_target_register_callee_saved
#undef TARGET_MS_BITFIELD_LAYOUT_P
#define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS sh_init_builtins
#undef TARGET_EXPAND_BUILTIN
#define TARGET_EXPAND_BUILTIN sh_expand_builtin
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
#undef TARGET_CANNOT_COPY_INSN_P
#define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS sh_rtx_costs
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST sh_address_cost
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
#ifdef HAVE_AS_TLS
#undef TARGET_HAVE_TLS
#define TARGET_HAVE_TLS true
#endif
#undef TARGET_PROMOTE_PROTOTYPES
#define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
#undef TARGET_PROMOTE_FUNCTION_ARGS
#define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
#undef TARGET_PROMOTE_FUNCTION_RETURN
#define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
#undef TARGET_STRUCT_VALUE_RTX
#define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY sh_return_in_memory
#undef TARGET_EXPAND_BUILTIN_SAVEREGS
#define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
#undef TARGET_SETUP_INCOMING_VARARGS
#define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
#undef TARGET_STRICT_ARGUMENT_NAMING
#define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
#undef TARGET_BUILD_BUILTIN_VA_LIST
#define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
#undef TARGET_PCH_VALID_P
#define TARGET_PCH_VALID_P sh_pch_valid_p
struct gcc_target targetm = TARGET_INITIALIZER;
/* Print the operand address in x to the stream. */
void
print_operand_address (FILE *stream, rtx x)
{
switch (GET_CODE (x))
{
case REG:
case SUBREG:
fprintf (stream, "@%s", reg_names[true_regnum (x)]);
break;
case PLUS:
{
rtx base = XEXP (x, 0);
rtx index = XEXP (x, 1);
switch (GET_CODE (index))
{
case CONST_INT:
fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
reg_names[true_regnum (base)]);
break;
case REG:
case SUBREG:
{
int base_num = true_regnum (base);
int index_num = true_regnum (index);
fprintf (stream, "@(r0,%s)",
reg_names[MAX (base_num, index_num)]);
break;
}
default:
debug_rtx (x);
abort ();
}
}
break;
case PRE_DEC:
fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
break;
case POST_INC:
fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
break;
default:
x = mark_constant_pool_use (x);
output_addr_const (stream, x);
break;
}
}
/* Print operand x (an rtx) in assembler syntax to file stream
according to modifier code.
'.' print a .s if insn needs delay slot
',' print LOCAL_LABEL_PREFIX
'@' print trap, rte or rts depending upon pragma interruptness
'#' output a nop if there is nothing to put in the delay slot
''' print likelihood suffix (/u for unlikely).
'O' print a constant without the #
'R' print the LSW of a dp value - changes if in little endian
'S' print the MSW of a dp value - changes if in little endian
'T' print the next word of a dp value - same as 'R' in big endian mode.
'M' print an `x' if `m' will print `base,index'.
'N' print 'r63' if the operand is (const_int 0).
'm' print a pair `base,offset' or `base,index', for LD and ST.
'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
'o' output an operator. */
void
print_operand (FILE *stream, rtx x, int code)
{
switch (code)
{
case '.':
if (final_sequence
&& ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
&& get_attr_length (XVECEXP (final_sequence, 0, 1)))
fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
break;
case ',':
fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
break;
case '@':
if (trap_exit)
fprintf (stream, "trapa #%d", trap_exit);
else if (sh_cfun_interrupt_handler_p ())
fprintf (stream, "rte");
else
fprintf (stream, "rts");
break;
case '#':
/* Output a nop if there's nothing in the delay slot. */
if (dbr_sequence_length () == 0)
fprintf (stream, "\n\tnop");
break;
case '\'':
{
rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
fputs ("/u", stream);
break;
}
case 'O':
x = mark_constant_pool_use (x);
output_addr_const (stream, x);
break;
case 'R':
fputs (reg_names[REGNO (x) + LSW], (stream));
break;
case 'S':
fputs (reg_names[REGNO (x) + MSW], (stream));
break;
case 'T':
/* Next word of a double. */
switch (GET_CODE (x))
{
case REG:
fputs (reg_names[REGNO (x) + 1], (stream));
break;
case MEM:
if (GET_CODE (XEXP (x, 0)) != PRE_DEC
&& GET_CODE (XEXP (x, 0)) != POST_INC)
x = adjust_address (x, SImode, 4);
print_operand_address (stream, XEXP (x, 0));
break;
default:
break;
}
break;
case 'o':
switch (GET_CODE (x))
{
case PLUS: fputs ("add", stream); break;
case MINUS: fputs ("sub", stream); break;
case MULT: fputs ("mul", stream); break;
case DIV: fputs ("div", stream); break;
case EQ: fputs ("eq", stream); break;
case NE: fputs ("ne", stream); break;
case GT: case LT: fputs ("gt", stream); break;
case GE: case LE: fputs ("ge", stream); break;
case GTU: case LTU: fputs ("gtu", stream); break;
case GEU: case LEU: fputs ("geu", stream); break;
default:
break;
}
break;
case 'M':
if (GET_CODE (x) == MEM
&& GET_CODE (XEXP (x, 0)) == PLUS
&& (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
|| GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
fputc ('x', stream);
break;
case 'm':
if (GET_CODE (x) != MEM)
abort ();
x = XEXP (x, 0);
switch (GET_CODE (x))
{
case REG:
case SUBREG:
print_operand (stream, x, 0);
fputs (", 0", stream);
break;
case PLUS:
print_operand (stream, XEXP (x, 0), 0);
fputs (", ", stream);
print_operand (stream, XEXP (x, 1), 0);
break;
default:
abort ();
}
break;
case 'N':
if (x == CONST0_RTX (GET_MODE (x)))
{
fprintf ((stream), "r63");
break;
}
goto default_output;
case 'u':
if (GET_CODE (x) == CONST_INT)
{
fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
break;
}
/* Fall through. */
default_output:
default:
switch (GET_CODE (x))
{
/* FIXME: We need this on SHmedia32 because reload generates
some sign-extended HI or QI loads into DImode registers
but, because Pmode is SImode, the address ends up with a
subreg:SI of the DImode register. Maybe reload should be
fixed so as to apply alter_subreg to such loads? */
case SUBREG:
if (SUBREG_BYTE (x) != 0
|| GET_CODE (SUBREG_REG (x)) != REG)
abort ();
x = SUBREG_REG (x);
/* Fall through. */
case REG:
if (FP_REGISTER_P (REGNO (x))
&& GET_MODE (x) == V16SFmode)
fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
else if (FP_REGISTER_P (REGNO (x))
&& GET_MODE (x) == V4SFmode)
fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
else if (GET_CODE (x) == REG
&& GET_MODE (x) == V2SFmode)
fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
else if (FP_REGISTER_P (REGNO (x))
&& GET_MODE_SIZE (GET_MODE (x)) > 4)
fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
else
fputs (reg_names[REGNO (x)], (stream));
break;
case MEM:
output_address (XEXP (x, 0));
break;
case CONST:
if (TARGET_SHMEDIA
&& GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
&& GET_MODE (XEXP (x, 0)) == DImode
&& GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
&& GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
{
rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
fputc ('(', stream);
if (GET_CODE (val) == ASHIFTRT)
{
fputc ('(', stream);
if (GET_CODE (XEXP (val, 0)) == CONST)
fputc ('(', stream);
output_addr_const (stream, XEXP (val, 0));
if (GET_CODE (XEXP (val, 0)) == CONST)
fputc (')', stream);
fputs (" >> ", stream);
output_addr_const (stream, XEXP (val, 1));
fputc (')', stream);
}
else
{
if (GET_CODE (val) == CONST)
fputc ('(', stream);
output_addr_const (stream, val);
if (GET_CODE (val) == CONST)
fputc (')', stream);
}
fputs (" & 65535)", stream);
break;
}
/* Fall through. */
default:
if (TARGET_SH1)
fputc ('#', stream);
output_addr_const (stream, x);
break;
}
break;
}
}
/* Like force_operand, but guarantees that VALUE ends up in TARGET. */
static void
force_into (rtx value, rtx target)
{
value = force_operand (value, target);
if (! rtx_equal_p (value, target))
emit_insn (gen_move_insn (target, value));
}
/* Emit code to perform a block move. Choose the best method.
OPERANDS[0] is the destination.
OPERANDS[1] is the source.
OPERANDS[2] is the size.
OPERANDS[3] is the alignment safe to use. */
int
expand_block_move (rtx *operands)
{
int align = INTVAL (operands[3]);
int constp = (GET_CODE (operands[2]) == CONST_INT);
int bytes = (constp ? INTVAL (operands[2]) : 0);
/* If it isn't a constant number of bytes, or if it doesn't have 4 byte
alignment, or if it isn't a multiple of 4 bytes, then fail. */
if (! constp || align < 4 || (bytes % 4 != 0))
return 0;
if (TARGET_HARD_SH4)
{
if (bytes < 12)
return 0;
else if (bytes == 12)
{
tree entry_name;
rtx sym;
rtx func_addr_rtx;
rtx r4 = gen_rtx (REG, SImode, 4);
rtx r5 = gen_rtx (REG, SImode, 5);
entry_name = get_identifier ("__movstrSI12_i4");
sym = function_symbol (IDENTIFIER_POINTER (entry_name));
func_addr_rtx = copy_to_mode_reg (Pmode, sym);
force_into (XEXP (operands[0], 0), r4);
force_into (XEXP (operands[1], 0), r5);
emit_insn (gen_block_move_real_i4 (func_addr_rtx));
return 1;
}
else if (! TARGET_SMALLCODE)
{
tree entry_name;
rtx sym;
rtx func_addr_rtx;
int dwords;
rtx r4 = gen_rtx (REG, SImode, 4);
rtx r5 = gen_rtx (REG, SImode, 5);
rtx r6 = gen_rtx (REG, SImode, 6);
entry_name = get_identifier (bytes & 4
? "__movstr_i4_odd"
: "__movstr_i4_even");
sym = function_symbol (IDENTIFIER_POINTER (entry_name));
func_addr_rtx = copy_to_mode_reg (Pmode, sym);
force_into (XEXP (operands[0], 0), r4);
force_into (XEXP (operands[1], 0), r5);
dwords = bytes >> 3;
emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
return 1;
}
else
return 0;
}
if (bytes < 64)
{
char entry[30];
tree entry_name;
rtx sym;
rtx func_addr_rtx;
rtx r4 = gen_rtx_REG (SImode, 4);
rtx r5 = gen_rtx_REG (SImode, 5);
sprintf (entry, "__movstrSI%d", bytes);
entry_name = get_identifier (entry);
sym = function_symbol (IDENTIFIER_POINTER (entry_name));
func_addr_rtx = copy_to_mode_reg (Pmode, sym);
force_into (XEXP (operands[0], 0), r4);
force_into (XEXP (operands[1], 0), r5);
emit_insn (gen_block_move_real (func_addr_rtx));
return 1;
}
/* This is the same number of bytes as a memcpy call, but to a different
less common function name, so this will occasionally use more space. */
if (! TARGET_SMALLCODE)
{
tree entry_name;
rtx sym;
rtx func_addr_rtx;
int final_switch, while_loop;
rtx r4 = gen_rtx_REG (SImode, 4);
rtx r5 = gen_rtx_REG (SImode, 5);
rtx r6 = gen_rtx_REG (SImode, 6);
entry_name = get_identifier ("__movstr");
sym = function_symbol (IDENTIFIER_POINTER (entry_name));
func_addr_rtx = copy_to_mode_reg (Pmode, sym);
force_into (XEXP (operands[0], 0), r4);
force_into (XEXP (operands[1], 0), r5);
/* r6 controls the size of the move. 16 is decremented from it
for each 64 bytes moved. Then the negative bit left over is used
as an index into a list of move instructions. e.g., a 72 byte move
would be set up with size(r6) = 14, for one iteration through the
big while loop, and a switch of -2 for the last part. */
final_switch = 16 - ((bytes / 4) % 16);
while_loop = ((bytes / 4) / 16 - 1) * 16;
emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
emit_insn (gen_block_lump_real (func_addr_rtx));
return 1;
}
return 0;
}
/* Prepare operands for a move define_expand; specifically, one of the
operands must be in a register. */
int
prepare_move_operands (rtx operands[], enum machine_mode mode)
{
if ((mode == SImode || mode == DImode)
&& flag_pic
&& ! ((mode == Pmode || mode == ptr_mode)
&& tls_symbolic_operand (operands[1], Pmode) != 0))
{
rtx temp;
if (SYMBOLIC_CONST_P (operands[1]))
{
if (GET_CODE (operands[0]) == MEM)
operands[1] = force_reg (Pmode, operands[1]);
else if (TARGET_SHMEDIA
&& GET_CODE (operands[1]) == LABEL_REF
&& target_reg_operand (operands[0], mode))
/* It's ok. */;
else
{
temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
operands[1] = legitimize_pic_address (operands[1], mode, temp);
}
}
else if (GET_CODE (operands[1]) == CONST
&& GET_CODE (XEXP (operands[1], 0)) == PLUS
&& SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
{
temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
mode, temp);
operands[1] = expand_binop (mode, add_optab, temp,
XEXP (XEXP (operands[1], 0), 1),
no_new_pseudos ? temp
: gen_reg_rtx (Pmode),
0, OPTAB_LIB_WIDEN);
}
}
if (! reload_in_progress && ! reload_completed)
{
/* Copy the source to a register if both operands aren't registers. */
if (! register_operand (operands[0], mode)
&& ! sh_register_operand (operands[1], mode))
operands[1] = copy_to_mode_reg (mode, operands[1]);
if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
{
/* This is like change_address_1 (operands[0], mode, 0, 1) ,
except that we can't use that function because it is static. */
rtx new = change_address (operands[0], mode, 0);
MEM_COPY_ATTRIBUTES (new, operands[0]);
operands[0] = new;
}
/* This case can happen while generating code to move the result
of a library call to the target. Reject `st r0,@(rX,rY)' because
reload will fail to find a spill register for rX, since r0 is already
being used for the source. */
else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
&& GET_CODE (operands[0]) == MEM
&& GET_CODE (XEXP (operands[0], 0)) == PLUS
&& GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
operands[1] = copy_to_mode_reg (mode, operands[1]);
}
if (mode == Pmode || mode == ptr_mode)
{
rtx op0, op1;
enum tls_model tls_kind;
op0 = operands[0];
op1 = operands[1];
if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
{
rtx tga_op1, tga_ret, tmp, tmp2;
switch (tls_kind)
{
case TLS_MODEL_GLOBAL_DYNAMIC:
tga_ret = gen_rtx_REG (Pmode, R0_REG);
emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
op1 = tga_ret;
break;
case TLS_MODEL_LOCAL_DYNAMIC:
tga_ret = gen_rtx_REG (Pmode, R0_REG);
emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
tmp = gen_reg_rtx (Pmode);
emit_move_insn (tmp, tga_ret);
if (register_operand (op0, Pmode))
tmp2 = op0;
else
tmp2 = gen_reg_rtx (Pmode);
emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
op1 = tmp2;
break;
case TLS_MODEL_INITIAL_EXEC:
if (! flag_pic)
emit_insn (gen_GOTaddr2picreg ());
tga_op1 = gen_reg_rtx (Pmode);
tmp = gen_sym2GOTTPOFF (op1);
emit_insn (gen_tls_initial_exec (tga_op1, tmp));
op1 = tga_op1;
break;
case TLS_MODEL_LOCAL_EXEC:
tmp2 = gen_reg_rtx (Pmode);
emit_insn (gen_load_gbr (tmp2));
tmp = gen_reg_rtx (Pmode);
emit_insn (gen_symTPOFF2reg (tmp, op1));
RTX_UNCHANGING_P (tmp) = 1;
if (register_operand (op0, Pmode))
op1 = op0;
else
op1 = gen_reg_rtx (Pmode);
emit_insn (gen_addsi3 (op1, tmp, tmp2));
break;
default:
abort ();
}
operands[1] = op1;
}
}
return 0;
}
/* Prepare the operands for an scc instruction; make sure that the
compare has been done. */
rtx
prepare_scc_operands (enum rtx_code code)
{
rtx t_reg = gen_rtx_REG (SImode, T_REG);
enum rtx_code oldcode = code;
enum machine_mode mode;
/* First need a compare insn. */
switch (code)
{
case NE:
/* It isn't possible to handle this case. */
abort ();
case LT:
code = GT;
break;
case LE:
code = GE;
break;
case LTU:
code = GTU;
break;
case LEU:
code = GEU;
break;
default:
break;
}
if (code != oldcode)
{
rtx tmp = sh_compare_op0;
sh_compare_op0 = sh_compare_op1;
sh_compare_op1 = tmp;
}
mode = GET_MODE (sh_compare_op0);
if (mode == VOIDmode)
mode = GET_MODE (sh_compare_op1);
sh_compare_op0 = force_reg (mode, sh_compare_op0);
if ((code != EQ && code != NE
&& (sh_compare_op1 != const0_rtx
|| code == GTU || code == GEU || code == LTU || code == LEU))
|| (mode == DImode && sh_compare_op1 != const0_rtx)
|| (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
sh_compare_op1 = force_reg (mode, sh_compare_op1);
if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
(mode == SFmode ? emit_sf_insn : emit_df_insn)
(gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
gen_rtx (SET, VOIDmode, t_reg,
gen_rtx (code, SImode,
sh_compare_op0, sh_compare_op1)),
gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
else
emit_insn (gen_rtx (SET, VOIDmode, t_reg,
gen_rtx (code, SImode, sh_compare_op0,
sh_compare_op1)));
return t_reg;
}
/* Called from the md file, set up the operands of a compare instruction. */
void
from_compare (rtx *operands, int code)
{
enum machine_mode mode = GET_MODE (sh_compare_op0);
rtx insn;
if (mode == VOIDmode)
mode = GET_MODE (sh_compare_op1);
if (code != EQ
|| mode == DImode
|| (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
{
/* Force args into regs, since we can't use constants here. */
sh_compare_op0 = force_reg (mode, sh_compare_op0);
if (sh_compare_op1 != const0_rtx
|| code == GTU || code == GEU
|| (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
sh_compare_op1 = force_reg (mode, sh_compare_op1);
}
if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
{
from_compare (operands, GT);
insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
}
else
insn = gen_rtx_SET (VOIDmode,
gen_rtx_REG (SImode, T_REG),
gen_rtx (code, SImode, sh_compare_op0,
sh_compare_op1));
if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
{
insn = gen_rtx (PARALLEL, VOIDmode,
gen_rtvec (2, insn,
gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
(mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
}
else
emit_insn (insn);
}
/* Functions to output assembly code. */
/* Return a sequence of instructions to perform DI or DF move.
Since the SH cannot move a DI or DF in one instruction, we have
to take care when we see overlapping source and dest registers. */
const char *
output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
enum machine_mode mode)
{
rtx dst = operands[0];
rtx src = operands[1];
if (GET_CODE (dst) == MEM
&& GET_CODE (XEXP (dst, 0)) == PRE_DEC)
return "mov.l %T1,%0\n\tmov.l %1,%0";
if (register_operand (dst, mode)
&& register_operand (src, mode))
{
if (REGNO (src) == MACH_REG)
return "sts mach,%S0\n\tsts macl,%R0";
/* When mov.d r1,r2 do r2->r3 then r1->r2;
when mov.d r1,r0 do r1->r0 then r2->r1. */
if (REGNO (src) + 1 == REGNO (dst))
return "mov %T1,%T0\n\tmov %1,%0";
else
return "mov %1,%0\n\tmov %T1,%T0";
}
else if (GET_CODE (src) == CONST_INT)
{
if (INTVAL (src) < 0)
output_asm_insn ("mov #-1,%S0", operands);
else
output_asm_insn ("mov #0,%S0", operands);
return "mov %1,%R0";
}
else if (GET_CODE (src) == MEM)
{
int ptrreg = -1;
int dreg = REGNO (dst);
rtx inside = XEXP (src, 0);
if (GET_CODE (inside) == REG)
ptrreg = REGNO (inside);
else if (GET_CODE (inside) == SUBREG)
ptrreg = subreg_regno (inside);
else if (GET_CODE (inside) == PLUS)
{
ptrreg = REGNO (XEXP (inside, 0));
/* ??? A r0+REG address shouldn't be possible here, because it isn't
an offsettable address. Unfortunately, offsettable addresses use
QImode to check the offset, and a QImode offsettable address
requires r0 for the other operand, which is not currently
supported, so we can't use the 'o' constraint.
Thus we must check for and handle r0+REG addresses here.
We punt for now, since this is likely very rare. */
if (GET_CODE (XEXP (inside, 1)) == REG)
abort ();
}
else if (GET_CODE (inside) == LABEL_REF)
return "mov.l %1,%0\n\tmov.l %1+4,%T0";
else if (GET_CODE (inside) == POST_INC)
return "mov.l %1,%0\n\tmov.l %1,%T0";
else
abort ();
/* Work out the safe way to copy. Copy into the second half first. */
if (dreg == ptrreg)
return "mov.l %T1,%T0\n\tmov.l %1,%0";
}
return "mov.l %1,%0\n\tmov.l %T1,%T0";
}
/* Print an instruction which would have gone into a delay slot after
another instruction, but couldn't because the other instruction expanded
into a sequence where putting the slot insn at the end wouldn't work. */
static void
print_slot (rtx insn)
{
final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1, NULL);
INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
}
const char *
output_far_jump (rtx insn, rtx op)
{
struct { rtx lab, reg, op; } this;
rtx braf_base_lab = NULL_RTX;
const char *jump;
int far;
int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
rtx prev;
this.lab = gen_label_rtx ();
if (TARGET_SH2
&& offset >= -32764
&& offset - get_attr_length (insn) <= 32766)
{
far = 0;
jump = "mov.w %O0,%1; braf %1";
}
else
{
far = 1;
if (flag_pic)
{
if (TARGET_SH2)
jump = "mov.l %O0,%1; braf %1";
else
jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
}
else
jump = "mov.l %O0,%1; jmp @%1";
}
/* If we have a scratch register available, use it. */
if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
&& INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
{
this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
output_asm_insn (jump, &this.lab);
if (dbr_sequence_length ())
print_slot (final_sequence);
else
output_asm_insn ("nop", 0);
}
else
{
/* Output the delay slot insn first if any. */
if (dbr_sequence_length ())
print_slot (final_sequence);
this.reg = gen_rtx_REG (SImode, 13);
/* We must keep the stack aligned to 8-byte boundaries on SH5.
Fortunately, MACL is fixed and call-clobbered, and we never
need its value across jumps, so save r13 in it instead of in
the stack. */
if (TARGET_SH5)
output_asm_insn ("lds r13, macl", 0);
else
output_asm_insn ("mov.l r13,@-r15", 0);
output_asm_insn (jump, &this.lab);
if (TARGET_SH5)
output_asm_insn ("sts macl, r13", 0);
else
output_asm_insn ("mov.l @r15+,r13", 0);
}
if (far && flag_pic && TARGET_SH2)
{
braf_base_lab = gen_label_rtx ();
(*targetm.asm_out.internal_label) (asm_out_file, "L",
CODE_LABEL_NUMBER (braf_base_lab));
}
if (far)
output_asm_insn (".align 2", 0);
(*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
this.op = op;
if (far && flag_pic)
{
if (TARGET_SH2)
this.lab = braf_base_lab;
output_asm_insn (".long %O2-%O0", &this.lab);
}
else
output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
return "";
}
/* Local label counter, used for constants in the pool and inside
pattern branches. */
static int lf = 100;
/* Output code for ordinary branches. */
const char *
output_branch (int logic, rtx insn, rtx *operands)
{
switch (get_attr_length (insn))
{
case 6:
/* This can happen if filling the delay slot has caused a forward
branch to exceed its range (we could reverse it, but only
when we know we won't overextend other branches; this should
best be handled by relaxation).
It can also happen when other condbranches hoist delay slot insn
from their destination, thus leading to code size increase.
But the branch will still be in the range -4092..+4098 bytes. */
if (! TARGET_RELAX)
{
int label = lf++;
/* The call to print_slot will clobber the operands. */
rtx op0 = operands[0];
/* If the instruction in the delay slot is annulled (true), then
there is no delay slot where we can put it now. The only safe
place for it is after the label. final will do that by default. */
if (final_sequence
&& ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
&& get_attr_length (XVECEXP (final_sequence, 0, 1)))
{
asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
ASSEMBLER_DIALECT ? "/" : ".", label);
print_slot (final_sequence);
}
else
asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
output_asm_insn ("bra\t%l0", &op0);
fprintf (asm_out_file, "\tnop\n");
(*targetm.asm_out.internal_label)(asm_out_file, "LF", label);
return "";
}
/* When relaxing, handle this like a short branch. The linker
will fix it up if it still doesn't fit after relaxation. */
case 2:
return logic ? "bt%.\t%l0" : "bf%.\t%l0";
/* These are for SH2e, in which we have to account for the
extra nop because of the hardware bug in annulled branches. */
case 8:
if (! TARGET_RELAX)
{
int label = lf++;
if (final_sequence
&& INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
abort ();
asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
logic ? "f" : "t",
ASSEMBLER_DIALECT ? "/" : ".", label);
fprintf (asm_out_file, "\tnop\n");
output_asm_insn ("bra\t%l0", operands);
fprintf (asm_out_file, "\tnop\n");
(*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
return "";
}
/* When relaxing, fall through. */
case 4:
{
char buffer[10];
sprintf (buffer, "b%s%ss\t%%l0",
logic ? "t" : "f",
ASSEMBLER_DIALECT ? "/" : ".");
output_asm_insn (buffer, &operands[0]);
return "nop";
}
default:
/* There should be no longer branches now - that would
indicate that something has destroyed the branches set
up in machine_dependent_reorg. */
abort ();
}
}
const char *
output_branchy_insn (enum rtx_code code, const char *template,
rtx insn, rtx *operands)
{
rtx next_insn = NEXT_INSN (insn);
if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
{
rtx src = SET_SRC (PATTERN (next_insn));
if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
{
/* Following branch not taken */
operands[9] = gen_label_rtx ();
emit_label_after (operands[9], next_insn);
INSN_ADDRESSES_NEW (operands[9],
INSN_ADDRESSES (INSN_UID (next_insn))
+ get_attr_length (next_insn));
return template;
}
else
{
int offset = (branch_dest (next_insn)
- INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
if (offset >= -252 && offset <= 258)
{
if (GET_CODE (src) == IF_THEN_ELSE)
/* branch_true */
src = XEXP (src, 1);
operands[9] = src;
return template;
}
}
}
operands[9] = gen_label_rtx ();
emit_label_after (operands[9], insn);
INSN_ADDRESSES_NEW (operands[9],
INSN_ADDRESSES (INSN_UID (insn))
+ get_attr_length (insn));
return template;
}
const char *
output_ieee_ccmpeq (rtx insn, rtx *operands)
{
return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
}
/* Output the start of the assembler file. */
static void
sh_file_start (void)
{
default_file_start ();
if (TARGET_ELF)
/* We need to show the text section with the proper
attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
will complain. We can teach GAS specifically about the
default attributes for our choice of text section, but
then we would have to change GAS again if/when we change
the text section name. */
fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
else
/* Switch to the data section so that the coffsem symbol
isn't in the text section. */
data_section ();
if (TARGET_LITTLE_ENDIAN)
fputs ("\t.little\n", asm_out_file);
if (!TARGET_ELF)
{
if (TARGET_SHCOMPACT)
fputs ("\t.mode\tSHcompact\n", asm_out_file);
else if (TARGET_SHMEDIA)
fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
TARGET_SHMEDIA64 ? 64 : 32);
}
}
/* Check if PAT includes UNSPEC_CALLER unspec pattern. */
static bool
unspec_caller_rtx_p (rtx pat)
{
switch (GET_CODE (pat))
{
case CONST:
return unspec_caller_rtx_p (XEXP (pat, 0));
case PLUS:
case MINUS:
if (unspec_caller_rtx_p (XEXP (pat, 0)))
return true;
return unspec_caller_rtx_p (XEXP (pat, 1));
case UNSPEC:
if (XINT (pat, 1) == UNSPEC_CALLER)
return true;
default:
break;
}
return false;
}
/* Indicate that INSN cannot be duplicated. This is true for insn
that generates an unique label. */
static bool
sh_cannot_copy_insn_p (rtx insn)
{
rtx pat;
if (!reload_completed || !flag_pic)
return false;
if (GET_CODE (insn) != INSN)
return false;
if (asm_noperands (insn) >= 0)
return false;
pat = PATTERN (insn);
if (GET_CODE (pat) != SET)
return false;
pat = SET_SRC (pat);
if (unspec_caller_rtx_p (pat))
return true;
return false;
}
/* Actual number of instructions used to make a shift by N. */
static const char ashiftrt_insns[] =
{ 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
/* Left shift and logical right shift are the same. */
static const char shift_insns[] =
{ 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
/* Individual shift amounts needed to get the above length sequences.
One bit right shifts clobber the T bit, so when possible, put one bit
shifts in the middle of the sequence, so the ends are eligible for
branch delay slots. */
static const short shift_amounts[32][5] = {
{0}, {1}, {2}, {2, 1},
{2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
{8}, {8, 1}, {8, 2}, {8, 1, 2},
{8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
{16}, {16, 1}, {16, 2}, {16, 1, 2},
{16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
{16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
{16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
/* Likewise, but for shift amounts < 16, up to three highmost bits
might be clobbered. This is typically used when combined with some
kind of sign or zero extension. */
static const char ext_shift_insns[] =
{ 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
static const short ext_shift_amounts[32][4] = {
{0}, {1}, {2}, {2, 1},
{2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
{8}, {8, 1}, {8, 2}, {8, 1, 2},
{8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
{16}, {16, 1}, {16, 2}, {16, 1, 2},
{16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
{16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
{16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
/* Assuming we have a value that has been sign-extended by at least one bit,
can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
to shift it by N without data loss, and quicker than by other means? */
#define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
/* This is used in length attributes in sh.md to help compute the length
of arbitrary constant shift instructions. */
int
shift_insns_rtx (rtx insn)
{
rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
int shift_count = INTVAL (XEXP (set_src, 1));
enum rtx_code shift_code = GET_CODE (set_src);
switch (shift_code)
{
case ASHIFTRT:
return ashiftrt_insns[shift_count];
case LSHIFTRT:
case ASHIFT:
return shift_insns[shift_count];
default:
abort();
}
}
/* Return the cost of a shift. */
static inline int
shiftcosts (rtx x)
{
int value;
if (TARGET_SHMEDIA)
return 1;
if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
{
if (GET_MODE (x) == DImode
&& GET_CODE (XEXP (x, 1)) == CONST_INT
&& INTVAL (XEXP (x, 1)) == 1)
return 2;
/* Everything else is invalid, because there is no pattern for it. */
return 10000;
}
/* If shift by a non constant, then this will be expensive. */
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
return SH_DYNAMIC_SHIFT_COST;
value = INTVAL (XEXP (x, 1));
/* Otherwise, return the true cost in instructions. */
if (GET_CODE (x) == ASHIFTRT)
{
int cost = ashiftrt_insns[value];
/* If SH3, then we put the constant in a reg and use shad. */
if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
cost = 1 + SH_DYNAMIC_SHIFT_COST;
return cost;
}
else
return shift_insns[value];
}
/* Return the cost of an AND operation. */
static inline int
andcosts (rtx x)
{
int i;
/* Anding with a register is a single cycle and instruction. */
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
return 1;
i = INTVAL (XEXP (x, 1));
if (TARGET_SHMEDIA)
{
if ((GET_CODE (XEXP (x, 1)) == CONST_INT
&& CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
|| EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
return 1;
else
return 2;
}
/* These constants are single cycle extu.[bw] instructions. */
if (i == 0xff || i == 0xffff)
return 1;
/* Constants that can be used in an and immediate instruction in a single
cycle, but this requires r0, so make it a little more expensive. */
if (CONST_OK_FOR_K08 (i))
return 2;
/* Constants that can be loaded with a mov immediate and an and.
This case is probably unnecessary. */
if (CONST_OK_FOR_I08 (i))
return 2;
/* Any other constants requires a 2 cycle pc-relative load plus an and.
This case is probably unnecessary. */
return 3;
}
/* Return the cost of an addition or a subtraction. */
static inline int
addsubcosts (rtx x)
{
/* Adding a register is a single cycle insn. */
if (GET_CODE (XEXP (x, 1)) == REG
|| GET_CODE (XEXP (x, 1)) == SUBREG)
return 1;
/* Likewise for small constants. */
if (GET_CODE (XEXP (x, 1)) == CONST_INT
&& CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
return 1;
if (TARGET_SHMEDIA)
switch (GET_CODE (XEXP (x, 1)))
{
case CONST:
case LABEL_REF:
case SYMBOL_REF:
return TARGET_SHMEDIA64 ? 5 : 3;
case CONST_INT:
if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
return 2;
else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
return 3;
else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
return 4;
/* Fall through. */
default:
return 5;
}
/* Any other constant requires a 2 cycle pc-relative load plus an
addition. */
return 3;
}
/* Return the cost of a multiply. */
static inline int
multcosts (rtx x ATTRIBUTE_UNUSED)
{
if (TARGET_SHMEDIA)
return 3;
if (TARGET_SH2)
{
/* We have a mul insn, so we can never take more than the mul and the
read of the mac reg, but count more because of the latency and extra
reg usage. */
if (TARGET_SMALLCODE)
return 2;
return 3;
}
/* If we're aiming at small code, then just count the number of
insns in a multiply call sequence. */
if (TARGET_SMALLCODE)
return 5;
/* Otherwise count all the insns in the routine we'd be calling too. */
return 20;
}
/* Compute a (partial) cost for rtx X. Return true if the complete
cost has been computed, and false if subexpressions should be
scanned. In either case, *TOTAL contains the cost result. */
static bool
sh_rtx_costs (rtx x, int code, int outer_code, int *total)
{
switch (code)
{
case CONST_INT:
if (TARGET_SHMEDIA)
{
if (INTVAL (x) == 0)
*total = 0;
else if (outer_code == AND && and_operand ((x), DImode))
*total = 0;
else if ((outer_code == IOR || outer_code == XOR
|| outer_code == PLUS)
&& CONST_OK_FOR_I10 (INTVAL (x)))
*total = 0;
else if (CONST_OK_FOR_I16 (INTVAL (x)))
*total = COSTS_N_INSNS (outer_code != SET);
else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
*total = COSTS_N_INSNS (2);
else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
*total = COSTS_N_INSNS (3);
else
*total = COSTS_N_INSNS (4);
return true;
}
if (CONST_OK_FOR_I08 (INTVAL (x)))
*total = 0;
else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
&& CONST_OK_FOR_K08 (INTVAL (x)))
*total = 1;
else
*total = 8;
return true;
case CONST:
case LABEL_REF:
case SYMBOL_REF:
if (TARGET_SHMEDIA64)
*total = COSTS_N_INSNS (4);
else if (TARGET_SHMEDIA32)
*total = COSTS_N_INSNS (2);
else
*total = 5;
return true;
case CONST_DOUBLE:
if (TARGET_SHMEDIA)
*total = COSTS_N_INSNS (4);
else
*total = 10;
return true;
case PLUS:
*total = COSTS_N_INSNS (addsubcosts (x));
return true;
case AND:
*total = COSTS_N_INSNS (andcosts (x));
return true;
case MULT:
*total = COSTS_N_INSNS (multcosts (x));
return true;
case ASHIFT:
case ASHIFTRT:
case LSHIFTRT:
*total = COSTS_N_INSNS (shiftcosts (x));
return true;
case DIV:
case UDIV:
case MOD:
case UMOD:
*total = COSTS_N_INSNS (20);
return true;
case FLOAT:
case FIX:
*total = 100;
return true;
default:
return false;
}
}
/* Compute the cost of an address. For the SH, all valid addresses are
the same cost. Use a slightly higher cost for reg + reg addressing,
since it increases pressure on r0. */
static int
sh_address_cost (rtx X)
{
return (GET_CODE (X) == PLUS
&& ! CONSTANT_P (XEXP (X, 1))
&& ! TARGET_SHMEDIA ? 1 : 0);
}
/* Code to expand a shift. */
void
gen_ashift (int type, int n, rtx reg)
{
/* Negative values here come from the shift_amounts array. */
if (n < 0)
{
if (type == ASHIFT)
type = LSHIFTRT;
else
type = ASHIFT;
n = -n;
}
switch (type)
{
case ASHIFTRT:
emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
break;
case LSHIFTRT:
if (n == 1)
emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
else
emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
break;
case ASHIFT:
emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
break;
}
}
/* Same for HImode */
void
gen_ashift_hi (int type, int n, rtx reg)
{
/* Negative values here come from the shift_amounts array. */
if (n < 0)
{
if (type == ASHIFT)
type = LSHIFTRT;
else
type = ASHIFT;
n = -n;
}
switch (type)
{
case ASHIFTRT:
case LSHIFTRT:
/* We don't have HImode right shift operations because using the
ordinary 32 bit shift instructions for that doesn't generate proper
zero/sign extension.
gen_ashift_hi is only called in contexts where we know that the
sign extension works out correctly. */
{
int offset = 0;
if (GET_CODE (reg) == SUBREG)
{
offset = SUBREG_BYTE (reg);
reg = SUBREG_REG (reg);
}
gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
break;
}
case ASHIFT:
emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
break;
}
}
/* Output RTL to split a constant shift into its component SH constant
shift instructions. */
void
gen_shifty_op (int code, rtx *operands)
{
int value = INTVAL (operands[2]);
int max, i;
/* Truncate the shift count in case it is out of bounds. */
value = value & 0x1f;
if (value == 31)
{
if (code == LSHIFTRT)
{
emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
emit_insn (gen_movt (operands[0]));
return;
}
else if (code == ASHIFT)
{
/* There is a two instruction sequence for 31 bit left shifts,
but it requires r0. */
if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
{
emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
return;
}
}
}
else if (value == 0)
{
/* This can happen when not optimizing. We must output something here
to prevent the compiler from aborting in final.c after the try_split
call. */
emit_insn (gen_nop ());
return;
}
max = shift_insns[value];
for (i = 0; i < max; i++)
gen_ashift (code, shift_amounts[value][i], operands[0]);
}
/* Same as above, but optimized for values where the topmost bits don't
matter. */
void
gen_shifty_hi_op (int code, rtx *operands)
{
int value = INTVAL (operands[2]);
int max, i;
void (*gen_fun) (int, int, rtx);
/* This operation is used by and_shl for SImode values with a few
high bits known to be cleared. */
value &= 31;
if (value == 0)
{
emit_insn (gen_nop ());
return;
}
gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
if (code == ASHIFT)
{
max = ext_shift_insns[value];
for (i = 0; i < max; i++)
gen_fun (code, ext_shift_amounts[value][i], operands[0]);
}
else
/* When shifting right, emit the shifts in reverse order, so that
solitary negative values come first. */
for (i = ext_shift_insns[value] - 1; i >= 0; i--)
gen_fun (code, ext_shift_amounts[value][i], operands[0]);
}
/* Output RTL for an arithmetic right shift. */
/* ??? Rewrite to use super-optimizer sequences. */
int
expand_ashiftrt (rtx *operands)
{
rtx sym;
rtx wrk;
char func[18];
tree func_name;
int value;
if (TARGET_SH3)
{
if (GET_CODE (operands[2]) != CONST_INT)
{
rtx count = copy_to_mode_reg (SImode, operands[2]);
emit_insn (gen_negsi2 (count, count));
emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
return 1;
}
else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
> 1 + SH_DYNAMIC_SHIFT_COST)
{
rtx count
= force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
return 1;
}
}
if (GET_CODE (operands[2]) != CONST_INT)
return 0;
value = INTVAL (operands[2]) & 31;
if (value == 31)
{
emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
return 1;
}
else if (value >= 16 && value <= 19)
{
wrk = gen_reg_rtx (SImode);
emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
value -= 16;
while (value--)
gen_ashift (ASHIFTRT, 1, wrk);
emit_move_insn (operands[0], wrk);
return 1;
}
/* Expand a short sequence inline, longer call a magic routine. */
else if (value <= 5)
{
wrk = gen_reg_rtx (SImode);
emit_move_insn (wrk, operands[1]);
while (value--)
gen_ashift (ASHIFTRT, 1, wrk);
emit_move_insn (operands[0], wrk);
return 1;
}
wrk = gen_reg_rtx (Pmode);
/* Load the value into an arg reg and call a helper. */
emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
sprintf (func, "__ashiftrt_r4_%d", value);
func_name = get_identifier (func);
sym = function_symbol (IDENTIFIER_POINTER (func_name));
emit_move_insn (wrk, sym);
emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
return 1;
}
int
sh_dynamicalize_shift_p (rtx count)
{
return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
}
/* Try to find a good way to implement the combiner pattern
[(set (match_operand:SI 0 "register_operand" "r")
(and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
(match_operand:SI 2 "const_int_operand" "n"))
(match_operand:SI 3 "const_int_operand" "n"))) .
LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
return 0 for simple right / left or left/right shift combination.
return 1 for a combination of shifts with zero_extend.
return 2 for a combination of shifts with an AND that needs r0.
return 3 for a combination of shifts with an AND that needs an extra
scratch register, when the three highmost bits of the AND mask are clear.
return 4 for a combination of shifts with an AND that needs an extra
scratch register, when any of the three highmost bits of the AND mask
is set.
If ATTRP is set, store an initial right shift width in ATTRP[0],
and the instruction length in ATTRP[1] . These values are not valid
when returning 0.
When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
shift_amounts for the last shift value that is to be used before the
sign extend. */
int
shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
{
unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
int left = INTVAL (left_rtx), right;
int best = 0;
int cost, best_cost = 10000;
int best_right = 0, best_len = 0;
int i;
int can_ext;
if (left < 0 || left > 31)
return 0;
if (GET_CODE (mask_rtx) == CONST_INT)
mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
else
mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
/* Can this be expressed as a right shift / left shift pair ? */
lsb = ((mask ^ (mask - 1)) >> 1) + 1;
right = exact_log2 (lsb);
mask2 = ~(mask + lsb - 1);
lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
/* mask has no zeroes but trailing zeroes <==> ! mask2 */
if (! mask2)
best_cost = shift_insns[right] + shift_insns[right + left];
/* mask has no trailing zeroes <==> ! right */
else if (! right && mask2 == ~(lsb2 - 1))
{
int late_right = exact_log2 (lsb2);
best_cost = shift_insns[left + late_right] + shift_insns[late_right];
}
/* Try to use zero extend */
if (mask2 == ~(lsb2 - 1))
{
int width, first;
for (width = 8; width <= 16; width += 8)
{
/* Can we zero-extend right away? */
if (lsb2 == (unsigned HOST_WIDE_INT)1 << width)
{
cost
= 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
if (cost < best_cost)
{
best = 1;
best_cost = cost;
best_right = right;
best_len = cost;
if (attrp)
attrp[2] = -1;
}
continue;
}
/* ??? Could try to put zero extend into initial right shift,
or even shift a bit left before the right shift. */
/* Determine value of first part of left shift, to get to the
zero extend cut-off point. */
first = width - exact_log2 (lsb2) + right;
if (first >= 0 && right + left - first >= 0)
{
cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
+ ext_shift_insns[right + left - first];
if (cost < best_cost)
{
best = 1;
best_cost = cost;
best_right = right;
best_len = cost;
if (attrp)
attrp[2] = first;
}
}
}
}
/* Try to use r0 AND pattern */
for (i = 0; i <= 2; i++)
{
if (i > right)
break;
if (! CONST_OK_FOR_K08 (mask >> i))
continue;
cost = (i != 0) + 2 + ext_shift_insns[left + i];
if (cost < best_cost)
{
best = 2;
best_cost = cost;
best_right = i;
best_len = cost - 1;
}
}
/* Try to use a scratch register to hold the AND operand. */
can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT)3 << 30)) == 0;
for (i = 0; i <= 2; i++)
{
if (i > right)
break;
cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
+ (can_ext ? ext_shift_insns : shift_insns)[left + i];
if (cost < best_cost)
{
best = 4 - can_ext;
best_cost = cost;
best_right = i;
best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
}
}
if (attrp)
{
attrp[0] = best_right;
attrp[1] = best_len;
}
return best;
}
/* This is used in length attributes of the unnamed instructions
corresponding to shl_and_kind return values of 1 and 2. */
int
shl_and_length (rtx insn)
{
rtx set_src, left_rtx, mask_rtx;
int attributes[3];
set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
left_rtx = XEXP (XEXP (set_src, 0), 1);
mask_rtx = XEXP (set_src, 1);
shl_and_kind (left_rtx, mask_rtx, attributes);
return attributes[1];
}
/* This is used in length attribute of the and_shl_scratch instruction. */
int
shl_and_scr_length (rtx insn)
{
rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
int len = shift_insns[INTVAL (XEXP (set_src, 1))];
rtx op = XEXP (set_src, 0);
len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
op = XEXP (XEXP (op, 0), 0);
return len + shift_insns[INTVAL (XEXP (op, 1))];
}
/* Generating rtl? */
extern int rtx_equal_function_value_matters;
/* Generate rtl for instructions for which shl_and_kind advised a particular
method of generating them, i.e. returned zero. */
int
gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
{
int attributes[3];
unsigned HOST_WIDE_INT mask;
int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
int right, total_shift;
void (*shift_gen_fun) (int, rtx*) = gen_shifty_hi_op;
right = attributes[0];
total_shift = INTVAL (left_rtx) + right;
mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
switch (kind)
{
default:
return -1;
case 1:
{
int first = attributes[2];
rtx operands[3];
if (first < 0)
{
emit_insn ((mask << right) <= 0xff
? gen_zero_extendqisi2(dest,
gen_lowpart (QImode, source))
: gen_zero_extendhisi2(dest,
gen_lowpart (HImode, source)));
source = dest;
}
if (source != dest)
emit_insn (gen_movsi (dest, source));
operands[0] = dest;
if (right)
{
operands[2] = GEN_INT (right);
gen_shifty_hi_op (LSHIFTRT, operands);
}
if (first > 0)
{
operands[2] = GEN_INT (first);
gen_shifty_hi_op (ASHIFT, operands);
total_shift -= first;
mask <<= first;
}
if (first >= 0)
emit_insn (mask <= 0xff
? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
: gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
if (total_shift > 0)
{
operands[2] = GEN_INT (total_shift);
gen_shifty_hi_op (ASHIFT, operands);
}
break;
}
case 4:
shift_gen_fun = gen_shifty_op;
case 3:
/* If the topmost bit that matters is set, set the topmost bits
that don't matter. This way, we might be able to get a shorter
signed constant. */
if (mask & ((HOST_WIDE_INT)1 << (31 - total_shift)))
mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
case 2:
/* Don't expand fine-grained when combining, because that will
make the pattern fail. */
if (rtx_equal_function_value_matters
|| reload_in_progress || reload_completed)
{
rtx operands[3];
/* Cases 3 and 4 should be handled by this split
only while combining */
if (kind > 2)
abort ();
if (right)
{
emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
source = dest;
}
emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
if (total_shift)
{
operands[0] = dest;
operands[1] = dest;
operands[2] = GEN_INT (total_shift);
shift_gen_fun (ASHIFT, operands);
}
break;
}
else
{
int neg = 0;
if (kind != 4 && total_shift < 16)
{
neg = -ext_shift_amounts[total_shift][1];
if (neg > 0)
neg -= ext_shift_amounts[total_shift][2];
else
neg = 0;
}
emit_insn (gen_and_shl_scratch (dest, source,
GEN_INT (right),
GEN_INT (mask),
GEN_INT (total_shift + neg),
GEN_INT (neg)));
emit_insn (gen_movsi (dest, dest));
break;
}
}
return 0;
}
/* Try to find a good way to implement the combiner pattern
[(set (match_operand:SI 0 "register_operand" "=r")
(sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
(match_operand:SI 2 "const_int_operand" "n")
(match_operand:SI 3 "const_int_operand" "n")
(const_int 0)))
(clobber (reg:SI T_REG))]
LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
return 0 for simple left / right shift combination.
return 1 for left shift / 8 bit sign extend / left shift.
return 2 for left shift / 16 bit sign extend / left shift.
return 3 for left shift / 8 bit sign extend / shift / sign extend.
return 4 for left shift / 16 bit sign extend / shift / sign extend.
return 5 for left shift / 16 bit sign extend / right shift
return 6 for < 8 bit sign extend / left shift.
return 7 for < 8 bit sign extend / left shift / single right shift.
If COSTP is nonzero, assign the calculated cost to *COSTP. */
int
shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
{
int left, size, insize, ext;
int cost = 0, best_cost;
int kind;
left = INTVAL (left_rtx);
size = INTVAL (size_rtx);
insize = size - left;
if (insize <= 0)
abort ();
/* Default to left / right shift. */
kind = 0;
best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
if (size <= 16)
{
/* 16 bit shift / sign extend / 16 bit shift */
cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
/* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
below, by alternative 3 or something even better. */
if (cost < best_cost)
{
kind = 5;
best_cost = cost;
}
}
/* Try a plain sign extend between two shifts. */
for (ext = 16; ext >= insize; ext -= 8)
{
if (ext <= size)
{
cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
if (cost < best_cost)
{
kind = ext / (unsigned) 8;
best_cost = cost;
}
}
/* Check if we can do a sloppy shift with a final signed shift
restoring the sign. */
if (EXT_SHIFT_SIGNED (size - ext))
cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
/* If not, maybe it's still cheaper to do the second shift sloppy,
and do a final sign extend? */
else if (size <= 16)
cost = ext_shift_insns[ext - insize] + 1
+ ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
else
continue;
if (cost < best_cost)
{
kind = ext / (unsigned) 8 + 2;
best_cost = cost;
}
}
/* Check if we can sign extend in r0 */
if (insize < 8)
{
cost = 3 + shift_insns[left];
if (cost < best_cost)
{
kind = 6;
best_cost = cost;
}
/* Try the same with a final signed shift. */
if (left < 31)
{
cost = 3 + ext_shift_insns[left + 1] + 1;
if (cost < best_cost)
{
kind = 7;
best_cost = cost;
}
}
}
if (TARGET_SH3)
{
/* Try to use a dynamic shift. */
cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
if (cost < best_cost)
{
kind = 0;
best_cost = cost;
}
}
if (costp)
*costp = cost;
return kind;
}
/* Function to be used in the length attribute of the instructions
implementing this pattern. */
int
shl_sext_length (rtx insn)
{
rtx set_src, left_rtx, size_rtx;
int cost;
set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
left_rtx = XEXP (XEXP (set_src, 0), 1);
size_rtx = XEXP (set_src, 1);
shl_sext_kind (left_rtx, size_rtx, &cost);
return cost;
}
/* Generate rtl for this pattern */
int
gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
{
int kind;
int left, size, insize, cost;
rtx operands[3];
kind = shl_sext_kind (left_rtx, size_rtx, &cost);
left = INTVAL (left_rtx);
size = INTVAL (size_rtx);
insize = size - left;
switch (kind)
{
case 1:
case 2:
case 3:
case 4:
{
int ext = kind & 1 ? 8 : 16;
int shift2 = size - ext;
/* Don't expand fine-grained when combining, because that will
make the pattern fail. */
if (! rtx_equal_function_value_matters
&& ! reload_in_progress && ! reload_completed)
{
emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
emit_insn (gen_movsi (dest, source));
break;
}
if (dest != source)
emit_insn (gen_movsi (dest, source));
operands[0] = dest;
if (ext - insize)
{
operands[2] = GEN_INT (ext - insize);
gen_shifty_hi_op (ASHIFT, operands);
}
emit_insn (kind & 1
? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
: gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
if (kind <= 2)
{
if (shift2)
{
operands[2] = GEN_INT (shift2);
gen_shifty_op (ASHIFT, operands);
}
}
else
{
if (shift2 > 0)
{
if (EXT_SHIFT_SIGNED (shift2))
{
operands[2] = GEN_INT (shift2 + 1);
gen_shifty_op (ASHIFT, operands);
operands[2] = GEN_INT (1);
gen_shifty_op (ASHIFTRT, operands);
break;
}
operands[2] = GEN_INT (shift2);
gen_shifty_hi_op (ASHIFT, operands);
}
else if (shift2)
{
operands[2] = GEN_INT (-shift2);
gen_shifty_hi_op (LSHIFTRT, operands);
}
emit_insn (size <= 8
? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
: gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
}
break;
}
case 5:
{
int i = 16 - size;
if (! rtx_equal_function_value_matters
&& ! reload_in_progress && ! reload_completed)
emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
else
{
operands[0] = dest;
operands[2] = GEN_INT (16 - insize);
gen_shifty_hi_op (ASHIFT, operands);
emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
}
/* Don't use gen_ashrsi3 because it generates new pseudos. */
while (--i >= 0)
gen_ashift (ASHIFTRT, 1, dest);
break;
}
case 6:
case 7:
/* Don't expand fine-grained when combining, because that will
make the pattern fail. */
if (! rtx_equal_function_value_matters
&& ! reload_in_progress && ! reload_completed)
{
emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
emit_insn (gen_movsi (dest, source));
break;
}
emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
operands[0] = dest;
operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
gen_shifty_op (ASHIFT, operands);
if (kind == 7)
emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
break;
default:
return -1;
}
return 0;
}
/* Prefix a symbol_ref name with "datalabel". */
rtx
gen_datalabel_ref (rtx sym)
{
if (GET_CODE (sym) == LABEL_REF)
return gen_rtx_CONST (GET_MODE (sym),
gen_rtx_UNSPEC (GET_MODE (sym),
gen_rtvec (1, sym),
UNSPEC_DATALABEL));
if (GET_CODE (sym) != SYMBOL_REF)
abort ();
return sym;
}
/* The SH cannot load a large constant into a register, constants have to
come from a pc relative load. The reference of a pc relative load
instruction must be less than 1k infront of the instruction. This
means that we often have to dump a constant inside a function, and
generate code to branch around it.
It is important to minimize this, since the branches will slow things
down and make things bigger.
Worst case code looks like:
mov.l L1,rn
bra L2
nop
align
L1: .long value
L2:
..
mov.l L3,rn
bra L4
nop
align
L3: .long value
L4:
..
We fix this by performing a scan before scheduling, which notices which
instructions need to have their operands fetched from the constant table
and builds the table.
The algorithm is:
scan, find an instruction which needs a pcrel move. Look forward, find the
last barrier which is within MAX_COUNT bytes of the requirement.
If there isn't one, make one. Process all the instructions between
the find and the barrier.
In the above example, we can tell that L3 is within 1k of L1, so
the first move can be shrunk from the 3 insn+constant sequence into
just 1 insn, and the constant moved to L3 to make:
mov.l L1,rn
..
mov.l L3,rn
bra L4
nop
align
L3:.long value
L4:.long value
Then the second move becomes the target for the shortening process. */
typedef struct
{
rtx value; /* Value in table. */
rtx label; /* Label of value. */
rtx wend; /* End of window. */
enum machine_mode mode; /* Mode of value. */
/* True if this constant is accessed as part of a post-increment
sequence. Note that HImode constants are never accessed in this way. */
bool part_of_sequence_p;
} pool_node;
/* The maximum number of constants that can fit into one pool, since
the pc relative range is 0...1020 bytes and constants are at least 4
bytes long. */
#define MAX_POOL_SIZE (1020/4)
static pool_node pool_vector[MAX_POOL_SIZE];
static int pool_size;
static rtx pool_window_label;
static int pool_window_last;
/* ??? If we need a constant in HImode which is the truncated value of a
constant we need in SImode, we could combine the two entries thus saving
two bytes. Is this common enough to be worth the effort of implementing
it? */
/* ??? This stuff should be done at the same time that we shorten branches.
As it is now, we must assume that all branches are the maximum size, and
this causes us to almost always output constant pools sooner than
necessary. */
/* Add a constant to the pool and return its label. */
static rtx
add_constant (rtx x, enum machine_mode mode, rtx last_value)
{
int i;
rtx lab, new, ref, newref;
/* First see if we've already got it. */
for (i = 0; i < pool_size; i++)
{
if (x->code == pool_vector[i].value->code
&& mode == pool_vector[i].mode)
{
if (x->code == CODE_LABEL)
{
if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
continue;
}
if (rtx_equal_p (x, pool_vector[i].value))
{
lab = new = 0;
if (! last_value
|| ! i
|| ! rtx_equal_p (last_value, pool_vector[i-1].value))
{
new = gen_label_rtx ();
LABEL_REFS (new) = pool_vector[i].label;
pool_vector[i].label = lab = new;
}
if (lab && pool_window_label)
{
newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
ref = pool_vector[pool_window_last].wend;
LABEL_NEXTREF (newref) = ref;
pool_vector[pool_window_last].wend = newref;
}
if (new)
pool_window_label = new;
pool_window_last = i;
return lab;
}
}
}
/* Need a new one. */
pool_vector[pool_size].value = x;
if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
{
lab = 0;
pool_vector[pool_size - 1].part_of_sequence_p = true;
}
else
lab = gen_label_rtx ();
pool_vector[pool_size].mode = mode;
pool_vector[pool_size].label = lab;
pool_vector[pool_size].wend = NULL_RTX;
pool_vector[pool_size].part_of_sequence_p = (lab == 0);
if (lab && pool_window_label)
{
newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
ref = pool_vector[pool_window_last].wend;
LABEL_NEXTREF (newref) = ref;
pool_vector[pool_window_last].wend = newref;
}
if (lab)
pool_window_label = lab;
pool_window_last = pool_size;
pool_size++;
return lab;
}
/* Output the literal table. START, if nonzero, is the first instruction
this table is needed for, and also indicates that there is at least one
casesi_worker_2 instruction; We have to emit the operand3 labels from
these insns at a 4-byte aligned position. BARRIER is the barrier
after which we are to place the table. */
static void
dump_table (rtx start, rtx barrier)
{
rtx scan = barrier;
int i;
int need_align = 1;
rtx lab, ref;
int have_df = 0;
/* Do two passes, first time dump out the HI sized constants. */
for (i = 0; i < pool_size; i++)
{
pool_node *p = &pool_vector[i];
if (p->mode == HImode)
{
if (need_align)
{
scan = emit_insn_after (gen_align_2 (), scan);
need_align = 0;
}
for (lab = p->label; lab; lab = LABEL_REFS (lab))
scan = emit_label_after (lab, scan);
scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
scan);
for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
{
lab = XEXP (ref, 0);
scan = emit_insn_after (gen_consttable_window_end (lab), scan);
}
}
else if (p->mode == DFmode)
have_df = 1;
}
need_align = 1;
if (start)
{
scan = emit_insn_after (gen_align_4 (), scan);
need_align = 0;
for (; start != barrier; start = NEXT_INSN (start))
if (GET_CODE (start) == INSN
&& recog_memoized (start) == CODE_FOR_casesi_worker_2)
{
rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
scan = emit_label_after (lab, scan);
}
}
if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
{
rtx align_insn = NULL_RTX;
scan = emit_label_after (gen_label_rtx (), scan);
scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
need_align = 0;
for (i = 0; i < pool_size; i++)
{
pool_node *p = &pool_vector[i];
switch (p->mode)
{
case HImode:
break;
case SImode:
case SFmode:
if (align_insn && !p->part_of_sequence_p)
{
for (lab = p->label; lab; lab = LABEL_REFS (lab))
emit_label_before (lab, align_insn);
emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
align_insn);
for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
{
lab = XEXP (ref, 0);
emit_insn_before (gen_consttable_window_end (lab),
align_insn);
}
delete_insn (align_insn);
align_insn = NULL_RTX;
continue;
}
else
{
for (lab = p->label; lab; lab = LABEL_REFS (lab))
scan = emit_label_after (lab, scan);
scan = emit_insn_after (gen_consttable_4 (p->value,
const0_rtx), scan);
need_align = ! need_align;
}
break;
case DFmode:
if (need_align)
{
scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
align_insn = scan;
need_align = 0;
}
case DImode:
for (lab = p->label; lab; lab = LABEL_REFS (lab))
scan = emit_label_after (lab, scan);
scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
scan);
break;
default:
abort ();
break;
}
if (p->mode != HImode)
{
for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
{
lab = XEXP (ref, 0);
scan = emit_insn_after (gen_consttable_window_end (lab),
scan);
}
}
}
pool_size = 0;
}
for (i = 0; i < pool_size; i++)
{
pool_node *p = &pool_vector[i];
switch (p->mode)
{
case HImode:
break;
case SImode:
case SFmode:
if (need_align)
{
need_align = 0;
scan = emit_label_after (gen_label_rtx (), scan);
scan = emit_insn_after (gen_align_4 (), scan);
}
for (lab = p->label; lab; lab = LABEL_REFS (lab))
scan = emit_label_after (lab, scan);
scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
scan);
break;
case DFmode:
case DImode:
if (need_align)
{
need_align = 0;
scan = emit_label_after (gen_label_rtx (), scan);
scan = emit_insn_after (gen_align_4 (), scan);
}
for (lab = p->label; lab; lab = LABEL_REFS (lab))
scan = emit_label_after (lab, scan);
scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
scan);
break;
default:
abort ();
break;
}
if (p->mode != HImode)
{
for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
{
lab = XEXP (ref, 0);
scan = emit_insn_after (gen_consttable_window_end (lab), scan);
}
}
}
scan = emit_insn_after (gen_consttable_end (), scan);
scan = emit_barrier_after (scan);
pool_size = 0;
pool_window_label = NULL_RTX;
pool_window_last = 0;
}
/* Return nonzero if constant would be an ok source for a
mov.w instead of a mov.l. */
static int
hi_const (rtx src)
{
return (GET_CODE (src) == CONST_INT
&& INTVAL (src) >= -32768
&& INTVAL (src) <= 32767);
}
/* Nonzero if the insn is a move instruction which needs to be fixed. */
/* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
need to fix it if the input value is CONST_OK_FOR_I08. */
static int
broken_move (rtx insn)
{
if (GET_CODE (insn) == INSN)
{
rtx pat = PATTERN (insn);
if (GET_CODE (pat) == PARALLEL)
pat = XVECEXP (pat, 0, 0);
if (GET_CODE (pat) == SET
/* We can load any 8 bit value if we don't care what the high
order bits end up as. */
&& GET_MODE (SET_DEST (pat)) != QImode
&& (CONSTANT_P (SET_SRC (pat))
/* Match mova_const. */
|| (GET_CODE (SET_SRC (pat)) == UNSPEC
&& XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
&& GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
&& ! (TARGET_SH2E
&& GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
&& (fp_zero_operand (SET_SRC (pat))
|| fp_one_operand (SET_SRC (pat)))
/* ??? If this is a -m4 or -m4-single compilation, in general
we don't know the current setting of fpscr, so disable fldi.
There is an exception if this was a register-register move
before reload - and hence it was ascertained that we have
single precision setting - and in a post-reload optimization
we changed this to do a constant load. In that case
we don't have an r0 clobber, hence we must use fldi. */
&& (! TARGET_SH4 || TARGET_FMOVD
|| (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
== SCRATCH))
&& GET_CODE (SET_DEST (pat)) == REG
&& FP_REGISTER_P (REGNO (SET_DEST (pat))))
&& (GET_CODE (SET_SRC (pat)) != CONST_INT
|| ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
return 1;
}
return 0;
}
static int
mova_p (rtx insn)
{
return (GET_CODE (insn) == INSN
&& GET_CODE (PATTERN (insn)) == SET
&& GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
&& XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
/* Don't match mova_const. */
&& GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
}
/* Fix up a mova from a switch that went out of range. */
static void
fixup_mova (rtx mova)
{
if (! flag_pic)
{
SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
INSN_CODE (mova) = -1;
}
else
{
rtx worker = mova;
rtx lab = gen_label_rtx ();
rtx wpat, wpat0, wpat1, wsrc, diff;
do
{
worker = NEXT_INSN (worker);
if (! worker
|| GET_CODE (worker) == CODE_LABEL
|| GET_CODE (worker) == JUMP_INSN)
abort ();
} while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
wpat = PATTERN (worker);
wpat0 = XVECEXP (wpat, 0, 0);
wpat1 = XVECEXP (wpat, 0, 1);
wsrc = SET_SRC (wpat0);
PATTERN (worker) = (gen_casesi_worker_2
(SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
XEXP (wpat1, 0)));
INSN_CODE (worker) = -1;
diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
gen_rtx_LABEL_REF (Pmode, lab));
diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
INSN_CODE (mova) = -1;
}
}
/* Find the last barrier from insn FROM which is close enough to hold the
constant pool. If we can't find one, then create one near the end of
the range. */
static rtx
find_barrier (int num_mova, rtx mova, rtx from)
{
int count_si = 0;
int count_hi = 0;
int found_hi = 0;
int found_si = 0;
int found_di = 0;
int hi_align = 2;
int si_align = 2;
int leading_mova = num_mova;
rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
int si_limit;
int hi_limit;
/* For HImode: range is 510, add 4 because pc counts from address of
second instruction after this one, subtract 2 for the jump instruction
that we may need to emit before the table, subtract 2 for the instruction
that fills the jump delay slot (in very rare cases, reorg will take an
instruction from after the constant pool or will leave the delay slot
empty). This gives 510.