| /* Output routines for GCC for Renesas / SuperH SH. |
| Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002, |
| 2003, 2004 Free Software Foundation, Inc. |
| Contributed by Steve Chamberlain (sac@cygnus.com). |
| Improved by Jim Wilson (wilson@cygnus.com). |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 2, or (at your option) |
| any later version. |
| |
| GCC is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING. If not, write to |
| the Free Software Foundation, 59 Temple Place - Suite 330, |
| Boston, MA 02111-1307, USA. */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "tm.h" |
| #include "insn-config.h" |
| #include "rtl.h" |
| #include "tree.h" |
| #include "flags.h" |
| #include "expr.h" |
| #include "optabs.h" |
| #include "function.h" |
| #include "regs.h" |
| #include "hard-reg-set.h" |
| #include "output.h" |
| #include "insn-attr.h" |
| #include "toplev.h" |
| #include "recog.h" |
| #include "c-pragma.h" |
| #include "integrate.h" |
| #include "tm_p.h" |
| #include "target.h" |
| #include "target-def.h" |
| #include "real.h" |
| #include "langhooks.h" |
| #include "basic-block.h" |
| #include "ra.h" |
| #include "cfglayout.h" |
| #include "intl.h" |
| #include "ggc.h" |
| |
| int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch; |
| |
| #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0) |
| #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1) |
| |
| /* These are some macros to abstract register modes. */ |
| #define CONST_OK_FOR_ADD(size) \ |
| (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size)) |
| #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi)) |
| #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3)) |
| #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3)) |
| |
| /* Set to 1 by expand_prologue() when the function is an interrupt handler. */ |
| int current_function_interrupt; |
| |
| /* ??? The pragma interrupt support will not work for SH3. */ |
| /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to |
| output code for the next function appropriate for an interrupt handler. */ |
| int pragma_interrupt; |
| |
| /* This is set by the trap_exit attribute for functions. It specifies |
| a trap number to be used in a trapa instruction at function exit |
| (instead of an rte instruction). */ |
| int trap_exit; |
| |
| /* This is used by the sp_switch attribute for functions. It specifies |
| a variable holding the address of the stack the interrupt function |
| should switch to/from at entry/exit. */ |
| rtx sp_switch; |
| |
| /* This is set by #pragma trapa, and is similar to the above, except that |
| the compiler doesn't emit code to preserve all registers. */ |
| static int pragma_trapa; |
| |
| /* This is set by #pragma nosave_low_regs. This is useful on the SH3, |
| which has a separate set of low regs for User and Supervisor modes. |
| This should only be used for the lowest level of interrupts. Higher levels |
| of interrupts must save the registers in case they themselves are |
| interrupted. */ |
| int pragma_nosave_low_regs; |
| |
| /* This is used for communication between SETUP_INCOMING_VARARGS and |
| sh_expand_prologue. */ |
| int current_function_anonymous_args; |
| |
| /* Global variables for machine-dependent things. */ |
| |
| /* Which cpu are we scheduling for. */ |
| enum processor_type sh_cpu; |
| |
| /* Saved operands from the last compare to use when we generate an scc |
| or bcc insn. */ |
| |
| rtx sh_compare_op0; |
| rtx sh_compare_op1; |
| |
| /* Provides the class number of the smallest class containing |
| reg number. */ |
| |
| enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] = |
| { |
| R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| FP0_REGS,FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS, |
| TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS, |
| DF_REGS, DF_REGS, DF_REGS, DF_REGS, |
| DF_REGS, DF_REGS, DF_REGS, DF_REGS, |
| NO_REGS, GENERAL_REGS, PR_REGS, T_REGS, |
| MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS, |
| GENERAL_REGS, |
| }; |
| |
| char sh_register_names[FIRST_PSEUDO_REGISTER] \ |
| [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER; |
| |
| char sh_additional_register_names[ADDREGNAMES_SIZE] \ |
| [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1] |
| = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER; |
| |
| /* Provide reg_class from a letter such as appears in the machine |
| description. *: target independently reserved letter. |
| reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */ |
| |
| enum reg_class reg_class_from_letter[] = |
| { |
| /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS, |
| /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS, |
| /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS, |
| /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS, |
| /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS, |
| /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS, |
| /* y */ FPUL_REGS, /* z */ R0_REGS |
| }; |
| |
| int assembler_dialect; |
| |
| static bool shmedia_space_reserved_for_target_registers; |
| |
| static void split_branches (rtx); |
| static int branch_dest (rtx); |
| static void force_into (rtx, rtx); |
| static void print_slot (rtx); |
| static rtx add_constant (rtx, enum machine_mode, rtx); |
| static void dump_table (rtx, rtx); |
| static int hi_const (rtx); |
| static int broken_move (rtx); |
| static int mova_p (rtx); |
| static rtx find_barrier (int, rtx, rtx); |
| static int noncall_uses_reg (rtx, rtx, rtx *); |
| static rtx gen_block_redirect (rtx, int, int); |
| static void sh_reorg (void); |
| static void output_stack_adjust (int, rtx, int, HARD_REG_SET *); |
| static rtx frame_insn (rtx); |
| static rtx push (int); |
| static void pop (int); |
| static void push_regs (HARD_REG_SET *, int); |
| static int calc_live_regs (HARD_REG_SET *); |
| static void mark_use (rtx, rtx *); |
| static HOST_WIDE_INT rounded_frame_size (int); |
| static rtx mark_constant_pool_use (rtx); |
| const struct attribute_spec sh_attribute_table[]; |
| static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *); |
| static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *); |
| static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *); |
| static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *); |
| static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT); |
| static void sh_insert_attributes (tree, tree *); |
| static int sh_adjust_cost (rtx, rtx, rtx, int); |
| static int sh_use_dfa_interface (void); |
| static int sh_issue_rate (void); |
| static bool sh_function_ok_for_sibcall (tree, tree); |
| |
| static bool sh_cannot_modify_jumps_p (void); |
| static int sh_target_reg_class (void); |
| static bool sh_optimize_target_register_callee_saved (bool); |
| static bool sh_ms_bitfield_layout_p (tree); |
| |
| static void sh_init_builtins (void); |
| static void sh_media_init_builtins (void); |
| static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int); |
| static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); |
| static void sh_file_start (void); |
| static int flow_dependent_p (rtx, rtx); |
| static void flow_dependent_p_1 (rtx, rtx, void *); |
| static int shiftcosts (rtx); |
| static int andcosts (rtx); |
| static int addsubcosts (rtx); |
| static int multcosts (rtx); |
| static bool unspec_caller_rtx_p (rtx); |
| static bool sh_cannot_copy_insn_p (rtx); |
| static bool sh_rtx_costs (rtx, int, int, int *); |
| static int sh_address_cost (rtx); |
| static int shmedia_target_regs_stack_space (HARD_REG_SET *); |
| static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *); |
| static int shmedia_target_regs_stack_adjust (HARD_REG_SET *); |
| static int scavenge_reg (HARD_REG_SET *s); |
| struct save_schedule_s; |
| static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *, |
| struct save_schedule_s *, int); |
| |
| static rtx sh_struct_value_rtx (tree, int); |
| static bool sh_return_in_memory (tree, tree); |
| static rtx sh_builtin_saveregs (void); |
| static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int); |
| static bool sh_strict_argument_naming (CUMULATIVE_ARGS *); |
| static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *); |
| static tree sh_build_builtin_va_list (void); |
| |
| |
| /* Initialize the GCC target structure. */ |
| #undef TARGET_ATTRIBUTE_TABLE |
| #define TARGET_ATTRIBUTE_TABLE sh_attribute_table |
| |
| /* The next two are used for debug info when compiling with -gdwarf. */ |
| #undef TARGET_ASM_UNALIGNED_HI_OP |
| #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t" |
| #undef TARGET_ASM_UNALIGNED_SI_OP |
| #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t" |
| |
| /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */ |
| #undef TARGET_ASM_UNALIGNED_DI_OP |
| #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t" |
| #undef TARGET_ASM_ALIGNED_DI_OP |
| #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t" |
| |
| #undef TARGET_ASM_FUNCTION_EPILOGUE |
| #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue |
| |
| #undef TARGET_ASM_OUTPUT_MI_THUNK |
| #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk |
| |
| #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK |
| #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true |
| |
| #undef TARGET_ASM_FILE_START |
| #define TARGET_ASM_FILE_START sh_file_start |
| #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE |
| #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true |
| |
| #undef TARGET_INSERT_ATTRIBUTES |
| #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes |
| |
| #undef TARGET_SCHED_ADJUST_COST |
| #define TARGET_SCHED_ADJUST_COST sh_adjust_cost |
| |
| #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE |
| #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \ |
| sh_use_dfa_interface |
| #undef TARGET_SCHED_ISSUE_RATE |
| #define TARGET_SCHED_ISSUE_RATE sh_issue_rate |
| |
| #undef TARGET_CANNOT_MODIFY_JUMPS_P |
| #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p |
| #undef TARGET_BRANCH_TARGET_REGISTER_CLASS |
| #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class |
| #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED |
| #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \ |
| sh_optimize_target_register_callee_saved |
| |
| #undef TARGET_MS_BITFIELD_LAYOUT_P |
| #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p |
| |
| #undef TARGET_INIT_BUILTINS |
| #define TARGET_INIT_BUILTINS sh_init_builtins |
| #undef TARGET_EXPAND_BUILTIN |
| #define TARGET_EXPAND_BUILTIN sh_expand_builtin |
| |
| #undef TARGET_FUNCTION_OK_FOR_SIBCALL |
| #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall |
| |
| #undef TARGET_CANNOT_COPY_INSN_P |
| #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p |
| #undef TARGET_RTX_COSTS |
| #define TARGET_RTX_COSTS sh_rtx_costs |
| #undef TARGET_ADDRESS_COST |
| #define TARGET_ADDRESS_COST sh_address_cost |
| |
| #undef TARGET_MACHINE_DEPENDENT_REORG |
| #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg |
| |
| #ifdef HAVE_AS_TLS |
| #undef TARGET_HAVE_TLS |
| #define TARGET_HAVE_TLS true |
| #endif |
| |
| #undef TARGET_PROMOTE_PROTOTYPES |
| #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes |
| #undef TARGET_PROMOTE_FUNCTION_ARGS |
| #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes |
| #undef TARGET_PROMOTE_FUNCTION_RETURN |
| #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes |
| |
| #undef TARGET_STRUCT_VALUE_RTX |
| #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx |
| #undef TARGET_RETURN_IN_MEMORY |
| #define TARGET_RETURN_IN_MEMORY sh_return_in_memory |
| |
| #undef TARGET_EXPAND_BUILTIN_SAVEREGS |
| #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs |
| #undef TARGET_SETUP_INCOMING_VARARGS |
| #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs |
| #undef TARGET_STRICT_ARGUMENT_NAMING |
| #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming |
| #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED |
| #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named |
| |
| #undef TARGET_BUILD_BUILTIN_VA_LIST |
| #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list |
| |
| #undef TARGET_PCH_VALID_P |
| #define TARGET_PCH_VALID_P sh_pch_valid_p |
| |
| struct gcc_target targetm = TARGET_INITIALIZER; |
| |
| /* Print the operand address in x to the stream. */ |
| |
| void |
| print_operand_address (FILE *stream, rtx x) |
| { |
| switch (GET_CODE (x)) |
| { |
| case REG: |
| case SUBREG: |
| fprintf (stream, "@%s", reg_names[true_regnum (x)]); |
| break; |
| |
| case PLUS: |
| { |
| rtx base = XEXP (x, 0); |
| rtx index = XEXP (x, 1); |
| |
| switch (GET_CODE (index)) |
| { |
| case CONST_INT: |
| fprintf (stream, "@(%d,%s)", (int) INTVAL (index), |
| reg_names[true_regnum (base)]); |
| break; |
| |
| case REG: |
| case SUBREG: |
| { |
| int base_num = true_regnum (base); |
| int index_num = true_regnum (index); |
| |
| fprintf (stream, "@(r0,%s)", |
| reg_names[MAX (base_num, index_num)]); |
| break; |
| } |
| |
| default: |
| debug_rtx (x); |
| abort (); |
| } |
| } |
| break; |
| |
| case PRE_DEC: |
| fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]); |
| break; |
| |
| case POST_INC: |
| fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]); |
| break; |
| |
| default: |
| x = mark_constant_pool_use (x); |
| output_addr_const (stream, x); |
| break; |
| } |
| } |
| |
| /* Print operand x (an rtx) in assembler syntax to file stream |
| according to modifier code. |
| |
| '.' print a .s if insn needs delay slot |
| ',' print LOCAL_LABEL_PREFIX |
| '@' print trap, rte or rts depending upon pragma interruptness |
| '#' output a nop if there is nothing to put in the delay slot |
| ''' print likelihood suffix (/u for unlikely). |
| 'O' print a constant without the # |
| 'R' print the LSW of a dp value - changes if in little endian |
| 'S' print the MSW of a dp value - changes if in little endian |
| 'T' print the next word of a dp value - same as 'R' in big endian mode. |
| 'M' print an `x' if `m' will print `base,index'. |
| 'N' print 'r63' if the operand is (const_int 0). |
| 'm' print a pair `base,offset' or `base,index', for LD and ST. |
| 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value. |
| 'o' output an operator. */ |
| |
| void |
| print_operand (FILE *stream, rtx x, int code) |
| { |
| switch (code) |
| { |
| case '.': |
| if (final_sequence |
| && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)) |
| && get_attr_length (XVECEXP (final_sequence, 0, 1))) |
| fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s"); |
| break; |
| case ',': |
| fprintf (stream, "%s", LOCAL_LABEL_PREFIX); |
| break; |
| case '@': |
| if (trap_exit) |
| fprintf (stream, "trapa #%d", trap_exit); |
| else if (sh_cfun_interrupt_handler_p ()) |
| fprintf (stream, "rte"); |
| else |
| fprintf (stream, "rts"); |
| break; |
| case '#': |
| /* Output a nop if there's nothing in the delay slot. */ |
| if (dbr_sequence_length () == 0) |
| fprintf (stream, "\n\tnop"); |
| break; |
| case '\'': |
| { |
| rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0); |
| |
| if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE) |
| fputs ("/u", stream); |
| break; |
| } |
| case 'O': |
| x = mark_constant_pool_use (x); |
| output_addr_const (stream, x); |
| break; |
| case 'R': |
| fputs (reg_names[REGNO (x) + LSW], (stream)); |
| break; |
| case 'S': |
| fputs (reg_names[REGNO (x) + MSW], (stream)); |
| break; |
| case 'T': |
| /* Next word of a double. */ |
| switch (GET_CODE (x)) |
| { |
| case REG: |
| fputs (reg_names[REGNO (x) + 1], (stream)); |
| break; |
| case MEM: |
| if (GET_CODE (XEXP (x, 0)) != PRE_DEC |
| && GET_CODE (XEXP (x, 0)) != POST_INC) |
| x = adjust_address (x, SImode, 4); |
| print_operand_address (stream, XEXP (x, 0)); |
| break; |
| default: |
| break; |
| } |
| break; |
| case 'o': |
| switch (GET_CODE (x)) |
| { |
| case PLUS: fputs ("add", stream); break; |
| case MINUS: fputs ("sub", stream); break; |
| case MULT: fputs ("mul", stream); break; |
| case DIV: fputs ("div", stream); break; |
| case EQ: fputs ("eq", stream); break; |
| case NE: fputs ("ne", stream); break; |
| case GT: case LT: fputs ("gt", stream); break; |
| case GE: case LE: fputs ("ge", stream); break; |
| case GTU: case LTU: fputs ("gtu", stream); break; |
| case GEU: case LEU: fputs ("geu", stream); break; |
| default: |
| break; |
| } |
| break; |
| case 'M': |
| if (GET_CODE (x) == MEM |
| && GET_CODE (XEXP (x, 0)) == PLUS |
| && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG |
| || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG)) |
| fputc ('x', stream); |
| break; |
| |
| case 'm': |
| if (GET_CODE (x) != MEM) |
| abort (); |
| x = XEXP (x, 0); |
| switch (GET_CODE (x)) |
| { |
| case REG: |
| case SUBREG: |
| print_operand (stream, x, 0); |
| fputs (", 0", stream); |
| break; |
| |
| case PLUS: |
| print_operand (stream, XEXP (x, 0), 0); |
| fputs (", ", stream); |
| print_operand (stream, XEXP (x, 1), 0); |
| break; |
| |
| default: |
| abort (); |
| } |
| break; |
| |
| case 'N': |
| if (x == CONST0_RTX (GET_MODE (x))) |
| { |
| fprintf ((stream), "r63"); |
| break; |
| } |
| goto default_output; |
| case 'u': |
| if (GET_CODE (x) == CONST_INT) |
| { |
| fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1)); |
| break; |
| } |
| /* Fall through. */ |
| |
| default_output: |
| default: |
| switch (GET_CODE (x)) |
| { |
| /* FIXME: We need this on SHmedia32 because reload generates |
| some sign-extended HI or QI loads into DImode registers |
| but, because Pmode is SImode, the address ends up with a |
| subreg:SI of the DImode register. Maybe reload should be |
| fixed so as to apply alter_subreg to such loads? */ |
| case SUBREG: |
| if (SUBREG_BYTE (x) != 0 |
| || GET_CODE (SUBREG_REG (x)) != REG) |
| abort (); |
| |
| x = SUBREG_REG (x); |
| /* Fall through. */ |
| |
| case REG: |
| if (FP_REGISTER_P (REGNO (x)) |
| && GET_MODE (x) == V16SFmode) |
| fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2); |
| else if (FP_REGISTER_P (REGNO (x)) |
| && GET_MODE (x) == V4SFmode) |
| fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2); |
| else if (GET_CODE (x) == REG |
| && GET_MODE (x) == V2SFmode) |
| fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2); |
| else if (FP_REGISTER_P (REGNO (x)) |
| && GET_MODE_SIZE (GET_MODE (x)) > 4) |
| fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1); |
| else |
| fputs (reg_names[REGNO (x)], (stream)); |
| break; |
| |
| case MEM: |
| output_address (XEXP (x, 0)); |
| break; |
| |
| case CONST: |
| if (TARGET_SHMEDIA |
| && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND |
| && GET_MODE (XEXP (x, 0)) == DImode |
| && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE |
| && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode) |
| { |
| rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0); |
| |
| fputc ('(', stream); |
| if (GET_CODE (val) == ASHIFTRT) |
| { |
| fputc ('(', stream); |
| if (GET_CODE (XEXP (val, 0)) == CONST) |
| fputc ('(', stream); |
| output_addr_const (stream, XEXP (val, 0)); |
| if (GET_CODE (XEXP (val, 0)) == CONST) |
| fputc (')', stream); |
| fputs (" >> ", stream); |
| output_addr_const (stream, XEXP (val, 1)); |
| fputc (')', stream); |
| } |
| else |
| { |
| if (GET_CODE (val) == CONST) |
| fputc ('(', stream); |
| output_addr_const (stream, val); |
| if (GET_CODE (val) == CONST) |
| fputc (')', stream); |
| } |
| fputs (" & 65535)", stream); |
| break; |
| } |
| |
| /* Fall through. */ |
| default: |
| if (TARGET_SH1) |
| fputc ('#', stream); |
| output_addr_const (stream, x); |
| break; |
| } |
| break; |
| } |
| } |
| |
| /* Like force_operand, but guarantees that VALUE ends up in TARGET. */ |
| static void |
| force_into (rtx value, rtx target) |
| { |
| value = force_operand (value, target); |
| if (! rtx_equal_p (value, target)) |
| emit_insn (gen_move_insn (target, value)); |
| } |
| |
| /* Emit code to perform a block move. Choose the best method. |
| |
| OPERANDS[0] is the destination. |
| OPERANDS[1] is the source. |
| OPERANDS[2] is the size. |
| OPERANDS[3] is the alignment safe to use. */ |
| |
| int |
| expand_block_move (rtx *operands) |
| { |
| int align = INTVAL (operands[3]); |
| int constp = (GET_CODE (operands[2]) == CONST_INT); |
| int bytes = (constp ? INTVAL (operands[2]) : 0); |
| |
| /* If it isn't a constant number of bytes, or if it doesn't have 4 byte |
| alignment, or if it isn't a multiple of 4 bytes, then fail. */ |
| if (! constp || align < 4 || (bytes % 4 != 0)) |
| return 0; |
| |
| if (TARGET_HARD_SH4) |
| { |
| if (bytes < 12) |
| return 0; |
| else if (bytes == 12) |
| { |
| tree entry_name; |
| rtx sym; |
| rtx func_addr_rtx; |
| rtx r4 = gen_rtx (REG, SImode, 4); |
| rtx r5 = gen_rtx (REG, SImode, 5); |
| |
| entry_name = get_identifier ("__movstrSI12_i4"); |
| |
| sym = function_symbol (IDENTIFIER_POINTER (entry_name)); |
| func_addr_rtx = copy_to_mode_reg (Pmode, sym); |
| force_into (XEXP (operands[0], 0), r4); |
| force_into (XEXP (operands[1], 0), r5); |
| emit_insn (gen_block_move_real_i4 (func_addr_rtx)); |
| return 1; |
| } |
| else if (! TARGET_SMALLCODE) |
| { |
| tree entry_name; |
| rtx sym; |
| rtx func_addr_rtx; |
| int dwords; |
| rtx r4 = gen_rtx (REG, SImode, 4); |
| rtx r5 = gen_rtx (REG, SImode, 5); |
| rtx r6 = gen_rtx (REG, SImode, 6); |
| |
| entry_name = get_identifier (bytes & 4 |
| ? "__movstr_i4_odd" |
| : "__movstr_i4_even"); |
| sym = function_symbol (IDENTIFIER_POINTER (entry_name)); |
| func_addr_rtx = copy_to_mode_reg (Pmode, sym); |
| force_into (XEXP (operands[0], 0), r4); |
| force_into (XEXP (operands[1], 0), r5); |
| |
| dwords = bytes >> 3; |
| emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1))); |
| emit_insn (gen_block_lump_real_i4 (func_addr_rtx)); |
| return 1; |
| } |
| else |
| return 0; |
| } |
| if (bytes < 64) |
| { |
| char entry[30]; |
| tree entry_name; |
| rtx sym; |
| rtx func_addr_rtx; |
| rtx r4 = gen_rtx_REG (SImode, 4); |
| rtx r5 = gen_rtx_REG (SImode, 5); |
| |
| sprintf (entry, "__movstrSI%d", bytes); |
| entry_name = get_identifier (entry); |
| sym = function_symbol (IDENTIFIER_POINTER (entry_name)); |
| func_addr_rtx = copy_to_mode_reg (Pmode, sym); |
| force_into (XEXP (operands[0], 0), r4); |
| force_into (XEXP (operands[1], 0), r5); |
| emit_insn (gen_block_move_real (func_addr_rtx)); |
| return 1; |
| } |
| |
| /* This is the same number of bytes as a memcpy call, but to a different |
| less common function name, so this will occasionally use more space. */ |
| if (! TARGET_SMALLCODE) |
| { |
| tree entry_name; |
| rtx sym; |
| rtx func_addr_rtx; |
| int final_switch, while_loop; |
| rtx r4 = gen_rtx_REG (SImode, 4); |
| rtx r5 = gen_rtx_REG (SImode, 5); |
| rtx r6 = gen_rtx_REG (SImode, 6); |
| |
| entry_name = get_identifier ("__movstr"); |
| sym = function_symbol (IDENTIFIER_POINTER (entry_name)); |
| func_addr_rtx = copy_to_mode_reg (Pmode, sym); |
| force_into (XEXP (operands[0], 0), r4); |
| force_into (XEXP (operands[1], 0), r5); |
| |
| /* r6 controls the size of the move. 16 is decremented from it |
| for each 64 bytes moved. Then the negative bit left over is used |
| as an index into a list of move instructions. e.g., a 72 byte move |
| would be set up with size(r6) = 14, for one iteration through the |
| big while loop, and a switch of -2 for the last part. */ |
| |
| final_switch = 16 - ((bytes / 4) % 16); |
| while_loop = ((bytes / 4) / 16 - 1) * 16; |
| emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch))); |
| emit_insn (gen_block_lump_real (func_addr_rtx)); |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| /* Prepare operands for a move define_expand; specifically, one of the |
| operands must be in a register. */ |
| |
| int |
| prepare_move_operands (rtx operands[], enum machine_mode mode) |
| { |
| if ((mode == SImode || mode == DImode) |
| && flag_pic |
| && ! ((mode == Pmode || mode == ptr_mode) |
| && tls_symbolic_operand (operands[1], Pmode) != 0)) |
| { |
| rtx temp; |
| if (SYMBOLIC_CONST_P (operands[1])) |
| { |
| if (GET_CODE (operands[0]) == MEM) |
| operands[1] = force_reg (Pmode, operands[1]); |
| else if (TARGET_SHMEDIA |
| && GET_CODE (operands[1]) == LABEL_REF |
| && target_reg_operand (operands[0], mode)) |
| /* It's ok. */; |
| else |
| { |
| temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode); |
| operands[1] = legitimize_pic_address (operands[1], mode, temp); |
| } |
| } |
| else if (GET_CODE (operands[1]) == CONST |
| && GET_CODE (XEXP (operands[1], 0)) == PLUS |
| && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0))) |
| { |
| temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode); |
| temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0), |
| mode, temp); |
| operands[1] = expand_binop (mode, add_optab, temp, |
| XEXP (XEXP (operands[1], 0), 1), |
| no_new_pseudos ? temp |
| : gen_reg_rtx (Pmode), |
| 0, OPTAB_LIB_WIDEN); |
| } |
| } |
| |
| if (! reload_in_progress && ! reload_completed) |
| { |
| /* Copy the source to a register if both operands aren't registers. */ |
| if (! register_operand (operands[0], mode) |
| && ! sh_register_operand (operands[1], mode)) |
| operands[1] = copy_to_mode_reg (mode, operands[1]); |
| |
| if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode)) |
| { |
| /* This is like change_address_1 (operands[0], mode, 0, 1) , |
| except that we can't use that function because it is static. */ |
| rtx new = change_address (operands[0], mode, 0); |
| MEM_COPY_ATTRIBUTES (new, operands[0]); |
| operands[0] = new; |
| } |
| |
| /* This case can happen while generating code to move the result |
| of a library call to the target. Reject `st r0,@(rX,rY)' because |
| reload will fail to find a spill register for rX, since r0 is already |
| being used for the source. */ |
| else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0) |
| && GET_CODE (operands[0]) == MEM |
| && GET_CODE (XEXP (operands[0], 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG) |
| operands[1] = copy_to_mode_reg (mode, operands[1]); |
| } |
| |
| if (mode == Pmode || mode == ptr_mode) |
| { |
| rtx op0, op1; |
| enum tls_model tls_kind; |
| |
| op0 = operands[0]; |
| op1 = operands[1]; |
| if ((tls_kind = tls_symbolic_operand (op1, Pmode))) |
| { |
| rtx tga_op1, tga_ret, tmp, tmp2; |
| |
| |
| switch (tls_kind) |
| { |
| case TLS_MODEL_GLOBAL_DYNAMIC: |
| tga_ret = gen_rtx_REG (Pmode, R0_REG); |
| emit_call_insn (gen_tls_global_dynamic (tga_ret, op1)); |
| op1 = tga_ret; |
| break; |
| |
| case TLS_MODEL_LOCAL_DYNAMIC: |
| tga_ret = gen_rtx_REG (Pmode, R0_REG); |
| emit_call_insn (gen_tls_local_dynamic (tga_ret, op1)); |
| |
| tmp = gen_reg_rtx (Pmode); |
| emit_move_insn (tmp, tga_ret); |
| |
| if (register_operand (op0, Pmode)) |
| tmp2 = op0; |
| else |
| tmp2 = gen_reg_rtx (Pmode); |
| |
| emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp)); |
| op1 = tmp2; |
| break; |
| |
| case TLS_MODEL_INITIAL_EXEC: |
| if (! flag_pic) |
| emit_insn (gen_GOTaddr2picreg ()); |
| tga_op1 = gen_reg_rtx (Pmode); |
| tmp = gen_sym2GOTTPOFF (op1); |
| emit_insn (gen_tls_initial_exec (tga_op1, tmp)); |
| op1 = tga_op1; |
| break; |
| |
| case TLS_MODEL_LOCAL_EXEC: |
| tmp2 = gen_reg_rtx (Pmode); |
| emit_insn (gen_load_gbr (tmp2)); |
| tmp = gen_reg_rtx (Pmode); |
| emit_insn (gen_symTPOFF2reg (tmp, op1)); |
| RTX_UNCHANGING_P (tmp) = 1; |
| |
| if (register_operand (op0, Pmode)) |
| op1 = op0; |
| else |
| op1 = gen_reg_rtx (Pmode); |
| |
| emit_insn (gen_addsi3 (op1, tmp, tmp2)); |
| break; |
| |
| default: |
| abort (); |
| } |
| operands[1] = op1; |
| } |
| } |
| |
| return 0; |
| } |
| |
| /* Prepare the operands for an scc instruction; make sure that the |
| compare has been done. */ |
| rtx |
| prepare_scc_operands (enum rtx_code code) |
| { |
| rtx t_reg = gen_rtx_REG (SImode, T_REG); |
| enum rtx_code oldcode = code; |
| enum machine_mode mode; |
| |
| /* First need a compare insn. */ |
| switch (code) |
| { |
| case NE: |
| /* It isn't possible to handle this case. */ |
| abort (); |
| case LT: |
| code = GT; |
| break; |
| case LE: |
| code = GE; |
| break; |
| case LTU: |
| code = GTU; |
| break; |
| case LEU: |
| code = GEU; |
| break; |
| default: |
| break; |
| } |
| if (code != oldcode) |
| { |
| rtx tmp = sh_compare_op0; |
| sh_compare_op0 = sh_compare_op1; |
| sh_compare_op1 = tmp; |
| } |
| |
| mode = GET_MODE (sh_compare_op0); |
| if (mode == VOIDmode) |
| mode = GET_MODE (sh_compare_op1); |
| |
| sh_compare_op0 = force_reg (mode, sh_compare_op0); |
| if ((code != EQ && code != NE |
| && (sh_compare_op1 != const0_rtx |
| || code == GTU || code == GEU || code == LTU || code == LEU)) |
| || (mode == DImode && sh_compare_op1 != const0_rtx) |
| || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)) |
| sh_compare_op1 = force_reg (mode, sh_compare_op1); |
| |
| if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT) |
| (mode == SFmode ? emit_sf_insn : emit_df_insn) |
| (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2, |
| gen_rtx (SET, VOIDmode, t_reg, |
| gen_rtx (code, SImode, |
| sh_compare_op0, sh_compare_op1)), |
| gen_rtx (USE, VOIDmode, get_fpscr_rtx ())))); |
| else |
| emit_insn (gen_rtx (SET, VOIDmode, t_reg, |
| gen_rtx (code, SImode, sh_compare_op0, |
| sh_compare_op1))); |
| |
| return t_reg; |
| } |
| |
| /* Called from the md file, set up the operands of a compare instruction. */ |
| |
| void |
| from_compare (rtx *operands, int code) |
| { |
| enum machine_mode mode = GET_MODE (sh_compare_op0); |
| rtx insn; |
| if (mode == VOIDmode) |
| mode = GET_MODE (sh_compare_op1); |
| if (code != EQ |
| || mode == DImode |
| || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)) |
| { |
| /* Force args into regs, since we can't use constants here. */ |
| sh_compare_op0 = force_reg (mode, sh_compare_op0); |
| if (sh_compare_op1 != const0_rtx |
| || code == GTU || code == GEU |
| || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)) |
| sh_compare_op1 = force_reg (mode, sh_compare_op1); |
| } |
| if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE) |
| { |
| from_compare (operands, GT); |
| insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1); |
| } |
| else |
| insn = gen_rtx_SET (VOIDmode, |
| gen_rtx_REG (SImode, T_REG), |
| gen_rtx (code, SImode, sh_compare_op0, |
| sh_compare_op1)); |
| if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT) |
| { |
| insn = gen_rtx (PARALLEL, VOIDmode, |
| gen_rtvec (2, insn, |
| gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))); |
| (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn); |
| } |
| else |
| emit_insn (insn); |
| } |
| |
| /* Functions to output assembly code. */ |
| |
| /* Return a sequence of instructions to perform DI or DF move. |
| |
| Since the SH cannot move a DI or DF in one instruction, we have |
| to take care when we see overlapping source and dest registers. */ |
| |
| const char * |
| output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[], |
| enum machine_mode mode) |
| { |
| rtx dst = operands[0]; |
| rtx src = operands[1]; |
| |
| if (GET_CODE (dst) == MEM |
| && GET_CODE (XEXP (dst, 0)) == PRE_DEC) |
| return "mov.l %T1,%0\n\tmov.l %1,%0"; |
| |
| if (register_operand (dst, mode) |
| && register_operand (src, mode)) |
| { |
| if (REGNO (src) == MACH_REG) |
| return "sts mach,%S0\n\tsts macl,%R0"; |
| |
| /* When mov.d r1,r2 do r2->r3 then r1->r2; |
| when mov.d r1,r0 do r1->r0 then r2->r1. */ |
| |
| if (REGNO (src) + 1 == REGNO (dst)) |
| return "mov %T1,%T0\n\tmov %1,%0"; |
| else |
| return "mov %1,%0\n\tmov %T1,%T0"; |
| } |
| else if (GET_CODE (src) == CONST_INT) |
| { |
| if (INTVAL (src) < 0) |
| output_asm_insn ("mov #-1,%S0", operands); |
| else |
| output_asm_insn ("mov #0,%S0", operands); |
| |
| return "mov %1,%R0"; |
| } |
| else if (GET_CODE (src) == MEM) |
| { |
| int ptrreg = -1; |
| int dreg = REGNO (dst); |
| rtx inside = XEXP (src, 0); |
| |
| if (GET_CODE (inside) == REG) |
| ptrreg = REGNO (inside); |
| else if (GET_CODE (inside) == SUBREG) |
| ptrreg = subreg_regno (inside); |
| else if (GET_CODE (inside) == PLUS) |
| { |
| ptrreg = REGNO (XEXP (inside, 0)); |
| /* ??? A r0+REG address shouldn't be possible here, because it isn't |
| an offsettable address. Unfortunately, offsettable addresses use |
| QImode to check the offset, and a QImode offsettable address |
| requires r0 for the other operand, which is not currently |
| supported, so we can't use the 'o' constraint. |
| Thus we must check for and handle r0+REG addresses here. |
| We punt for now, since this is likely very rare. */ |
| if (GET_CODE (XEXP (inside, 1)) == REG) |
| abort (); |
| } |
| else if (GET_CODE (inside) == LABEL_REF) |
| return "mov.l %1,%0\n\tmov.l %1+4,%T0"; |
| else if (GET_CODE (inside) == POST_INC) |
| return "mov.l %1,%0\n\tmov.l %1,%T0"; |
| else |
| abort (); |
| |
| /* Work out the safe way to copy. Copy into the second half first. */ |
| if (dreg == ptrreg) |
| return "mov.l %T1,%T0\n\tmov.l %1,%0"; |
| } |
| |
| return "mov.l %1,%0\n\tmov.l %T1,%T0"; |
| } |
| |
| /* Print an instruction which would have gone into a delay slot after |
| another instruction, but couldn't because the other instruction expanded |
| into a sequence where putting the slot insn at the end wouldn't work. */ |
| |
| static void |
| print_slot (rtx insn) |
| { |
| final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1, NULL); |
| |
| INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1; |
| } |
| |
| const char * |
| output_far_jump (rtx insn, rtx op) |
| { |
| struct { rtx lab, reg, op; } this; |
| rtx braf_base_lab = NULL_RTX; |
| const char *jump; |
| int far; |
| int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn)); |
| rtx prev; |
| |
| this.lab = gen_label_rtx (); |
| |
| if (TARGET_SH2 |
| && offset >= -32764 |
| && offset - get_attr_length (insn) <= 32766) |
| { |
| far = 0; |
| jump = "mov.w %O0,%1; braf %1"; |
| } |
| else |
| { |
| far = 1; |
| if (flag_pic) |
| { |
| if (TARGET_SH2) |
| jump = "mov.l %O0,%1; braf %1"; |
| else |
| jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1"; |
| } |
| else |
| jump = "mov.l %O0,%1; jmp @%1"; |
| } |
| /* If we have a scratch register available, use it. */ |
| if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN |
| && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch) |
| { |
| this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0)); |
| if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2) |
| jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1"; |
| output_asm_insn (jump, &this.lab); |
| if (dbr_sequence_length ()) |
| print_slot (final_sequence); |
| else |
| output_asm_insn ("nop", 0); |
| } |
| else |
| { |
| /* Output the delay slot insn first if any. */ |
| if (dbr_sequence_length ()) |
| print_slot (final_sequence); |
| |
| this.reg = gen_rtx_REG (SImode, 13); |
| /* We must keep the stack aligned to 8-byte boundaries on SH5. |
| Fortunately, MACL is fixed and call-clobbered, and we never |
| need its value across jumps, so save r13 in it instead of in |
| the stack. */ |
| if (TARGET_SH5) |
| output_asm_insn ("lds r13, macl", 0); |
| else |
| output_asm_insn ("mov.l r13,@-r15", 0); |
| output_asm_insn (jump, &this.lab); |
| if (TARGET_SH5) |
| output_asm_insn ("sts macl, r13", 0); |
| else |
| output_asm_insn ("mov.l @r15+,r13", 0); |
| } |
| if (far && flag_pic && TARGET_SH2) |
| { |
| braf_base_lab = gen_label_rtx (); |
| (*targetm.asm_out.internal_label) (asm_out_file, "L", |
| CODE_LABEL_NUMBER (braf_base_lab)); |
| } |
| if (far) |
| output_asm_insn (".align 2", 0); |
| (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab)); |
| this.op = op; |
| if (far && flag_pic) |
| { |
| if (TARGET_SH2) |
| this.lab = braf_base_lab; |
| output_asm_insn (".long %O2-%O0", &this.lab); |
| } |
| else |
| output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab); |
| return ""; |
| } |
| |
| /* Local label counter, used for constants in the pool and inside |
| pattern branches. */ |
| |
| static int lf = 100; |
| |
| /* Output code for ordinary branches. */ |
| |
| const char * |
| output_branch (int logic, rtx insn, rtx *operands) |
| { |
| switch (get_attr_length (insn)) |
| { |
| case 6: |
| /* This can happen if filling the delay slot has caused a forward |
| branch to exceed its range (we could reverse it, but only |
| when we know we won't overextend other branches; this should |
| best be handled by relaxation). |
| It can also happen when other condbranches hoist delay slot insn |
| from their destination, thus leading to code size increase. |
| But the branch will still be in the range -4092..+4098 bytes. */ |
| |
| if (! TARGET_RELAX) |
| { |
| int label = lf++; |
| /* The call to print_slot will clobber the operands. */ |
| rtx op0 = operands[0]; |
| |
| /* If the instruction in the delay slot is annulled (true), then |
| there is no delay slot where we can put it now. The only safe |
| place for it is after the label. final will do that by default. */ |
| |
| if (final_sequence |
| && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)) |
| && get_attr_length (XVECEXP (final_sequence, 0, 1))) |
| { |
| asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t", |
| ASSEMBLER_DIALECT ? "/" : ".", label); |
| print_slot (final_sequence); |
| } |
| else |
| asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label); |
| |
| output_asm_insn ("bra\t%l0", &op0); |
| fprintf (asm_out_file, "\tnop\n"); |
| (*targetm.asm_out.internal_label)(asm_out_file, "LF", label); |
| |
| return ""; |
| } |
| /* When relaxing, handle this like a short branch. The linker |
| will fix it up if it still doesn't fit after relaxation. */ |
| case 2: |
| return logic ? "bt%.\t%l0" : "bf%.\t%l0"; |
| |
| /* These are for SH2e, in which we have to account for the |
| extra nop because of the hardware bug in annulled branches. */ |
| case 8: |
| if (! TARGET_RELAX) |
| { |
| int label = lf++; |
| |
| if (final_sequence |
| && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))) |
| abort (); |
| asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n", |
| logic ? "f" : "t", |
| ASSEMBLER_DIALECT ? "/" : ".", label); |
| fprintf (asm_out_file, "\tnop\n"); |
| output_asm_insn ("bra\t%l0", operands); |
| fprintf (asm_out_file, "\tnop\n"); |
| (*targetm.asm_out.internal_label) (asm_out_file, "LF", label); |
| |
| return ""; |
| } |
| /* When relaxing, fall through. */ |
| case 4: |
| { |
| char buffer[10]; |
| |
| sprintf (buffer, "b%s%ss\t%%l0", |
| logic ? "t" : "f", |
| ASSEMBLER_DIALECT ? "/" : "."); |
| output_asm_insn (buffer, &operands[0]); |
| return "nop"; |
| } |
| |
| default: |
| /* There should be no longer branches now - that would |
| indicate that something has destroyed the branches set |
| up in machine_dependent_reorg. */ |
| abort (); |
| } |
| } |
| |
| const char * |
| output_branchy_insn (enum rtx_code code, const char *template, |
| rtx insn, rtx *operands) |
| { |
| rtx next_insn = NEXT_INSN (insn); |
| |
| if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn)) |
| { |
| rtx src = SET_SRC (PATTERN (next_insn)); |
| if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code) |
| { |
| /* Following branch not taken */ |
| operands[9] = gen_label_rtx (); |
| emit_label_after (operands[9], next_insn); |
| INSN_ADDRESSES_NEW (operands[9], |
| INSN_ADDRESSES (INSN_UID (next_insn)) |
| + get_attr_length (next_insn)); |
| return template; |
| } |
| else |
| { |
| int offset = (branch_dest (next_insn) |
| - INSN_ADDRESSES (INSN_UID (next_insn)) + 4); |
| if (offset >= -252 && offset <= 258) |
| { |
| if (GET_CODE (src) == IF_THEN_ELSE) |
| /* branch_true */ |
| src = XEXP (src, 1); |
| operands[9] = src; |
| return template; |
| } |
| } |
| } |
| operands[9] = gen_label_rtx (); |
| emit_label_after (operands[9], insn); |
| INSN_ADDRESSES_NEW (operands[9], |
| INSN_ADDRESSES (INSN_UID (insn)) |
| + get_attr_length (insn)); |
| return template; |
| } |
| |
| const char * |
| output_ieee_ccmpeq (rtx insn, rtx *operands) |
| { |
| return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands); |
| } |
| |
| /* Output the start of the assembler file. */ |
| |
| static void |
| sh_file_start (void) |
| { |
| default_file_start (); |
| |
| if (TARGET_ELF) |
| /* We need to show the text section with the proper |
| attributes as in TEXT_SECTION_ASM_OP, before dwarf2out |
| emits it without attributes in TEXT_SECTION_ASM_OP, else GAS |
| will complain. We can teach GAS specifically about the |
| default attributes for our choice of text section, but |
| then we would have to change GAS again if/when we change |
| the text section name. */ |
| fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP); |
| else |
| /* Switch to the data section so that the coffsem symbol |
| isn't in the text section. */ |
| data_section (); |
| |
| if (TARGET_LITTLE_ENDIAN) |
| fputs ("\t.little\n", asm_out_file); |
| |
| if (!TARGET_ELF) |
| { |
| if (TARGET_SHCOMPACT) |
| fputs ("\t.mode\tSHcompact\n", asm_out_file); |
| else if (TARGET_SHMEDIA) |
| fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n", |
| TARGET_SHMEDIA64 ? 64 : 32); |
| } |
| } |
| |
| /* Check if PAT includes UNSPEC_CALLER unspec pattern. */ |
| |
| static bool |
| unspec_caller_rtx_p (rtx pat) |
| { |
| switch (GET_CODE (pat)) |
| { |
| case CONST: |
| return unspec_caller_rtx_p (XEXP (pat, 0)); |
| case PLUS: |
| case MINUS: |
| if (unspec_caller_rtx_p (XEXP (pat, 0))) |
| return true; |
| return unspec_caller_rtx_p (XEXP (pat, 1)); |
| case UNSPEC: |
| if (XINT (pat, 1) == UNSPEC_CALLER) |
| return true; |
| default: |
| break; |
| } |
| |
| return false; |
| } |
| |
| /* Indicate that INSN cannot be duplicated. This is true for insn |
| that generates an unique label. */ |
| |
| static bool |
| sh_cannot_copy_insn_p (rtx insn) |
| { |
| rtx pat; |
| |
| if (!reload_completed || !flag_pic) |
| return false; |
| |
| if (GET_CODE (insn) != INSN) |
| return false; |
| if (asm_noperands (insn) >= 0) |
| return false; |
| |
| pat = PATTERN (insn); |
| if (GET_CODE (pat) != SET) |
| return false; |
| pat = SET_SRC (pat); |
| |
| if (unspec_caller_rtx_p (pat)) |
| return true; |
| |
| return false; |
| } |
| |
| /* Actual number of instructions used to make a shift by N. */ |
| static const char ashiftrt_insns[] = |
| { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2}; |
| |
| /* Left shift and logical right shift are the same. */ |
| static const char shift_insns[] = |
| { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3}; |
| |
| /* Individual shift amounts needed to get the above length sequences. |
| One bit right shifts clobber the T bit, so when possible, put one bit |
| shifts in the middle of the sequence, so the ends are eligible for |
| branch delay slots. */ |
| static const short shift_amounts[32][5] = { |
| {0}, {1}, {2}, {2, 1}, |
| {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2}, |
| {8}, {8, 1}, {8, 2}, {8, 1, 2}, |
| {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8}, |
| {16}, {16, 1}, {16, 2}, {16, 1, 2}, |
| {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8}, |
| {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2}, |
| {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}}; |
| |
| /* Likewise, but for shift amounts < 16, up to three highmost bits |
| might be clobbered. This is typically used when combined with some |
| kind of sign or zero extension. */ |
| |
| static const char ext_shift_insns[] = |
| { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3}; |
| |
| static const short ext_shift_amounts[32][4] = { |
| {0}, {1}, {2}, {2, 1}, |
| {2, 2}, {2, 1, 2}, {8, -2}, {8, -1}, |
| {8}, {8, 1}, {8, 2}, {8, 1, 2}, |
| {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1}, |
| {16}, {16, 1}, {16, 2}, {16, 1, 2}, |
| {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8}, |
| {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2}, |
| {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}}; |
| |
| /* Assuming we have a value that has been sign-extended by at least one bit, |
| can we use the ext_shift_amounts with the last shift turned to an arithmetic shift |
| to shift it by N without data loss, and quicker than by other means? */ |
| #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15) |
| |
| /* This is used in length attributes in sh.md to help compute the length |
| of arbitrary constant shift instructions. */ |
| |
| int |
| shift_insns_rtx (rtx insn) |
| { |
| rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); |
| int shift_count = INTVAL (XEXP (set_src, 1)); |
| enum rtx_code shift_code = GET_CODE (set_src); |
| |
| switch (shift_code) |
| { |
| case ASHIFTRT: |
| return ashiftrt_insns[shift_count]; |
| case LSHIFTRT: |
| case ASHIFT: |
| return shift_insns[shift_count]; |
| default: |
| abort(); |
| } |
| } |
| |
| /* Return the cost of a shift. */ |
| |
| static inline int |
| shiftcosts (rtx x) |
| { |
| int value; |
| |
| if (TARGET_SHMEDIA) |
| return 1; |
| |
| if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) |
| { |
| if (GET_MODE (x) == DImode |
| && GET_CODE (XEXP (x, 1)) == CONST_INT |
| && INTVAL (XEXP (x, 1)) == 1) |
| return 2; |
| |
| /* Everything else is invalid, because there is no pattern for it. */ |
| return 10000; |
| } |
| /* If shift by a non constant, then this will be expensive. */ |
| if (GET_CODE (XEXP (x, 1)) != CONST_INT) |
| return SH_DYNAMIC_SHIFT_COST; |
| |
| value = INTVAL (XEXP (x, 1)); |
| |
| /* Otherwise, return the true cost in instructions. */ |
| if (GET_CODE (x) == ASHIFTRT) |
| { |
| int cost = ashiftrt_insns[value]; |
| /* If SH3, then we put the constant in a reg and use shad. */ |
| if (cost > 1 + SH_DYNAMIC_SHIFT_COST) |
| cost = 1 + SH_DYNAMIC_SHIFT_COST; |
| return cost; |
| } |
| else |
| return shift_insns[value]; |
| } |
| |
| /* Return the cost of an AND operation. */ |
| |
| static inline int |
| andcosts (rtx x) |
| { |
| int i; |
| |
| /* Anding with a register is a single cycle and instruction. */ |
| if (GET_CODE (XEXP (x, 1)) != CONST_INT) |
| return 1; |
| |
| i = INTVAL (XEXP (x, 1)); |
| |
| if (TARGET_SHMEDIA) |
| { |
| if ((GET_CODE (XEXP (x, 1)) == CONST_INT |
| && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)))) |
| || EXTRA_CONSTRAINT_C16 (XEXP (x, 1))) |
| return 1; |
| else |
| return 2; |
| } |
| |
| /* These constants are single cycle extu.[bw] instructions. */ |
| if (i == 0xff || i == 0xffff) |
| return 1; |
| /* Constants that can be used in an and immediate instruction in a single |
| cycle, but this requires r0, so make it a little more expensive. */ |
| if (CONST_OK_FOR_K08 (i)) |
| return 2; |
| /* Constants that can be loaded with a mov immediate and an and. |
| This case is probably unnecessary. */ |
| if (CONST_OK_FOR_I08 (i)) |
| return 2; |
| /* Any other constants requires a 2 cycle pc-relative load plus an and. |
| This case is probably unnecessary. */ |
| return 3; |
| } |
| |
| /* Return the cost of an addition or a subtraction. */ |
| |
| static inline int |
| addsubcosts (rtx x) |
| { |
| /* Adding a register is a single cycle insn. */ |
| if (GET_CODE (XEXP (x, 1)) == REG |
| || GET_CODE (XEXP (x, 1)) == SUBREG) |
| return 1; |
| |
| /* Likewise for small constants. */ |
| if (GET_CODE (XEXP (x, 1)) == CONST_INT |
| && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1)))) |
| return 1; |
| |
| if (TARGET_SHMEDIA) |
| switch (GET_CODE (XEXP (x, 1))) |
| { |
| case CONST: |
| case LABEL_REF: |
| case SYMBOL_REF: |
| return TARGET_SHMEDIA64 ? 5 : 3; |
| |
| case CONST_INT: |
| if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)))) |
| return 2; |
| else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16)) |
| return 3; |
| else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16)) |
| return 4; |
| |
| /* Fall through. */ |
| default: |
| return 5; |
| } |
| |
| /* Any other constant requires a 2 cycle pc-relative load plus an |
| addition. */ |
| return 3; |
| } |
| |
| /* Return the cost of a multiply. */ |
| static inline int |
| multcosts (rtx x ATTRIBUTE_UNUSED) |
| { |
| if (TARGET_SHMEDIA) |
| return 3; |
| |
| if (TARGET_SH2) |
| { |
| /* We have a mul insn, so we can never take more than the mul and the |
| read of the mac reg, but count more because of the latency and extra |
| reg usage. */ |
| if (TARGET_SMALLCODE) |
| return 2; |
| return 3; |
| } |
| |
| /* If we're aiming at small code, then just count the number of |
| insns in a multiply call sequence. */ |
| if (TARGET_SMALLCODE) |
| return 5; |
| |
| /* Otherwise count all the insns in the routine we'd be calling too. */ |
| return 20; |
| } |
| |
| /* Compute a (partial) cost for rtx X. Return true if the complete |
| cost has been computed, and false if subexpressions should be |
| scanned. In either case, *TOTAL contains the cost result. */ |
| |
| static bool |
| sh_rtx_costs (rtx x, int code, int outer_code, int *total) |
| { |
| switch (code) |
| { |
| case CONST_INT: |
| if (TARGET_SHMEDIA) |
| { |
| if (INTVAL (x) == 0) |
| *total = 0; |
| else if (outer_code == AND && and_operand ((x), DImode)) |
| *total = 0; |
| else if ((outer_code == IOR || outer_code == XOR |
| || outer_code == PLUS) |
| && CONST_OK_FOR_I10 (INTVAL (x))) |
| *total = 0; |
| else if (CONST_OK_FOR_I16 (INTVAL (x))) |
| *total = COSTS_N_INSNS (outer_code != SET); |
| else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16)) |
| *total = COSTS_N_INSNS (2); |
| else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16)) |
| *total = COSTS_N_INSNS (3); |
| else |
| *total = COSTS_N_INSNS (4); |
| return true; |
| } |
| if (CONST_OK_FOR_I08 (INTVAL (x))) |
| *total = 0; |
| else if ((outer_code == AND || outer_code == IOR || outer_code == XOR) |
| && CONST_OK_FOR_K08 (INTVAL (x))) |
| *total = 1; |
| else |
| *total = 8; |
| return true; |
| |
| case CONST: |
| case LABEL_REF: |
| case SYMBOL_REF: |
| if (TARGET_SHMEDIA64) |
| *total = COSTS_N_INSNS (4); |
| else if (TARGET_SHMEDIA32) |
| *total = COSTS_N_INSNS (2); |
| else |
| *total = 5; |
| return true; |
| |
| case CONST_DOUBLE: |
| if (TARGET_SHMEDIA) |
| *total = COSTS_N_INSNS (4); |
| else |
| *total = 10; |
| return true; |
| |
| case PLUS: |
| *total = COSTS_N_INSNS (addsubcosts (x)); |
| return true; |
| |
| case AND: |
| *total = COSTS_N_INSNS (andcosts (x)); |
| return true; |
| |
| case MULT: |
| *total = COSTS_N_INSNS (multcosts (x)); |
| return true; |
| |
| case ASHIFT: |
| case ASHIFTRT: |
| case LSHIFTRT: |
| *total = COSTS_N_INSNS (shiftcosts (x)); |
| return true; |
| |
| case DIV: |
| case UDIV: |
| case MOD: |
| case UMOD: |
| *total = COSTS_N_INSNS (20); |
| return true; |
| |
| case FLOAT: |
| case FIX: |
| *total = 100; |
| return true; |
| |
| default: |
| return false; |
| } |
| } |
| |
| /* Compute the cost of an address. For the SH, all valid addresses are |
| the same cost. Use a slightly higher cost for reg + reg addressing, |
| since it increases pressure on r0. */ |
| |
| static int |
| sh_address_cost (rtx X) |
| { |
| return (GET_CODE (X) == PLUS |
| && ! CONSTANT_P (XEXP (X, 1)) |
| && ! TARGET_SHMEDIA ? 1 : 0); |
| } |
| |
| /* Code to expand a shift. */ |
| |
| void |
| gen_ashift (int type, int n, rtx reg) |
| { |
| /* Negative values here come from the shift_amounts array. */ |
| if (n < 0) |
| { |
| if (type == ASHIFT) |
| type = LSHIFTRT; |
| else |
| type = ASHIFT; |
| n = -n; |
| } |
| |
| switch (type) |
| { |
| case ASHIFTRT: |
| emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n))); |
| break; |
| case LSHIFTRT: |
| if (n == 1) |
| emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n))); |
| else |
| emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n))); |
| break; |
| case ASHIFT: |
| emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n))); |
| break; |
| } |
| } |
| |
| /* Same for HImode */ |
| |
| void |
| gen_ashift_hi (int type, int n, rtx reg) |
| { |
| /* Negative values here come from the shift_amounts array. */ |
| if (n < 0) |
| { |
| if (type == ASHIFT) |
| type = LSHIFTRT; |
| else |
| type = ASHIFT; |
| n = -n; |
| } |
| |
| switch (type) |
| { |
| case ASHIFTRT: |
| case LSHIFTRT: |
| /* We don't have HImode right shift operations because using the |
| ordinary 32 bit shift instructions for that doesn't generate proper |
| zero/sign extension. |
| gen_ashift_hi is only called in contexts where we know that the |
| sign extension works out correctly. */ |
| { |
| int offset = 0; |
| if (GET_CODE (reg) == SUBREG) |
| { |
| offset = SUBREG_BYTE (reg); |
| reg = SUBREG_REG (reg); |
| } |
| gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset)); |
| break; |
| } |
| case ASHIFT: |
| emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n))); |
| break; |
| } |
| } |
| |
| /* Output RTL to split a constant shift into its component SH constant |
| shift instructions. */ |
| |
| void |
| gen_shifty_op (int code, rtx *operands) |
| { |
| int value = INTVAL (operands[2]); |
| int max, i; |
| |
| /* Truncate the shift count in case it is out of bounds. */ |
| value = value & 0x1f; |
| |
| if (value == 31) |
| { |
| if (code == LSHIFTRT) |
| { |
| emit_insn (gen_rotlsi3_1 (operands[0], operands[0])); |
| emit_insn (gen_movt (operands[0])); |
| return; |
| } |
| else if (code == ASHIFT) |
| { |
| /* There is a two instruction sequence for 31 bit left shifts, |
| but it requires r0. */ |
| if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0) |
| { |
| emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx)); |
| emit_insn (gen_rotlsi3_31 (operands[0], operands[0])); |
| return; |
| } |
| } |
| } |
| else if (value == 0) |
| { |
| /* This can happen when not optimizing. We must output something here |
| to prevent the compiler from aborting in final.c after the try_split |
| call. */ |
| emit_insn (gen_nop ()); |
| return; |
| } |
| |
| max = shift_insns[value]; |
| for (i = 0; i < max; i++) |
| gen_ashift (code, shift_amounts[value][i], operands[0]); |
| } |
| |
| /* Same as above, but optimized for values where the topmost bits don't |
| matter. */ |
| |
| void |
| gen_shifty_hi_op (int code, rtx *operands) |
| { |
| int value = INTVAL (operands[2]); |
| int max, i; |
| void (*gen_fun) (int, int, rtx); |
| |
| /* This operation is used by and_shl for SImode values with a few |
| high bits known to be cleared. */ |
| value &= 31; |
| if (value == 0) |
| { |
| emit_insn (gen_nop ()); |
| return; |
| } |
| |
| gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift; |
| if (code == ASHIFT) |
| { |
| max = ext_shift_insns[value]; |
| for (i = 0; i < max; i++) |
| gen_fun (code, ext_shift_amounts[value][i], operands[0]); |
| } |
| else |
| /* When shifting right, emit the shifts in reverse order, so that |
| solitary negative values come first. */ |
| for (i = ext_shift_insns[value] - 1; i >= 0; i--) |
| gen_fun (code, ext_shift_amounts[value][i], operands[0]); |
| } |
| |
| /* Output RTL for an arithmetic right shift. */ |
| |
| /* ??? Rewrite to use super-optimizer sequences. */ |
| |
| int |
| expand_ashiftrt (rtx *operands) |
| { |
| rtx sym; |
| rtx wrk; |
| char func[18]; |
| tree func_name; |
| int value; |
| |
| if (TARGET_SH3) |
| { |
| if (GET_CODE (operands[2]) != CONST_INT) |
| { |
| rtx count = copy_to_mode_reg (SImode, operands[2]); |
| emit_insn (gen_negsi2 (count, count)); |
| emit_insn (gen_ashrsi3_d (operands[0], operands[1], count)); |
| return 1; |
| } |
| else if (ashiftrt_insns[INTVAL (operands[2]) & 31] |
| > 1 + SH_DYNAMIC_SHIFT_COST) |
| { |
| rtx count |
| = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31))); |
| emit_insn (gen_ashrsi3_d (operands[0], operands[1], count)); |
| return 1; |
| } |
| } |
| if (GET_CODE (operands[2]) != CONST_INT) |
| return 0; |
| |
| value = INTVAL (operands[2]) & 31; |
| |
| if (value == 31) |
| { |
| emit_insn (gen_ashrsi2_31 (operands[0], operands[1])); |
| return 1; |
| } |
| else if (value >= 16 && value <= 19) |
| { |
| wrk = gen_reg_rtx (SImode); |
| emit_insn (gen_ashrsi2_16 (wrk, operands[1])); |
| value -= 16; |
| while (value--) |
| gen_ashift (ASHIFTRT, 1, wrk); |
| emit_move_insn (operands[0], wrk); |
| return 1; |
| } |
| /* Expand a short sequence inline, longer call a magic routine. */ |
| else if (value <= 5) |
| { |
| wrk = gen_reg_rtx (SImode); |
| emit_move_insn (wrk, operands[1]); |
| while (value--) |
| gen_ashift (ASHIFTRT, 1, wrk); |
| emit_move_insn (operands[0], wrk); |
| return 1; |
| } |
| |
| wrk = gen_reg_rtx (Pmode); |
| |
| /* Load the value into an arg reg and call a helper. */ |
| emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]); |
| sprintf (func, "__ashiftrt_r4_%d", value); |
| func_name = get_identifier (func); |
| sym = function_symbol (IDENTIFIER_POINTER (func_name)); |
| emit_move_insn (wrk, sym); |
| emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk)); |
| emit_move_insn (operands[0], gen_rtx_REG (SImode, 4)); |
| return 1; |
| } |
| |
| int |
| sh_dynamicalize_shift_p (rtx count) |
| { |
| return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST; |
| } |
| |
| /* Try to find a good way to implement the combiner pattern |
| [(set (match_operand:SI 0 "register_operand" "r") |
| (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") |
| (match_operand:SI 2 "const_int_operand" "n")) |
| (match_operand:SI 3 "const_int_operand" "n"))) . |
| LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3. |
| return 0 for simple right / left or left/right shift combination. |
| return 1 for a combination of shifts with zero_extend. |
| return 2 for a combination of shifts with an AND that needs r0. |
| return 3 for a combination of shifts with an AND that needs an extra |
| scratch register, when the three highmost bits of the AND mask are clear. |
| return 4 for a combination of shifts with an AND that needs an extra |
| scratch register, when any of the three highmost bits of the AND mask |
| is set. |
| If ATTRP is set, store an initial right shift width in ATTRP[0], |
| and the instruction length in ATTRP[1] . These values are not valid |
| when returning 0. |
| When ATTRP is set and returning 1, ATTRP[2] gets set to the index into |
| shift_amounts for the last shift value that is to be used before the |
| sign extend. */ |
| int |
| shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp) |
| { |
| unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2; |
| int left = INTVAL (left_rtx), right; |
| int best = 0; |
| int cost, best_cost = 10000; |
| int best_right = 0, best_len = 0; |
| int i; |
| int can_ext; |
| |
| if (left < 0 || left > 31) |
| return 0; |
| if (GET_CODE (mask_rtx) == CONST_INT) |
| mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left; |
| else |
| mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left; |
| /* Can this be expressed as a right shift / left shift pair ? */ |
| lsb = ((mask ^ (mask - 1)) >> 1) + 1; |
| right = exact_log2 (lsb); |
| mask2 = ~(mask + lsb - 1); |
| lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1; |
| /* mask has no zeroes but trailing zeroes <==> ! mask2 */ |
| if (! mask2) |
| best_cost = shift_insns[right] + shift_insns[right + left]; |
| /* mask has no trailing zeroes <==> ! right */ |
| else if (! right && mask2 == ~(lsb2 - 1)) |
| { |
| int late_right = exact_log2 (lsb2); |
| best_cost = shift_insns[left + late_right] + shift_insns[late_right]; |
| } |
| /* Try to use zero extend */ |
| if (mask2 == ~(lsb2 - 1)) |
| { |
| int width, first; |
| |
| for (width = 8; width <= 16; width += 8) |
| { |
| /* Can we zero-extend right away? */ |
| if (lsb2 == (unsigned HOST_WIDE_INT)1 << width) |
| { |
| cost |
| = 1 + ext_shift_insns[right] + ext_shift_insns[left + right]; |
| if (cost < best_cost) |
| { |
| best = 1; |
| best_cost = cost; |
| best_right = right; |
| best_len = cost; |
| if (attrp) |
| attrp[2] = -1; |
| } |
| continue; |
| } |
| /* ??? Could try to put zero extend into initial right shift, |
| or even shift a bit left before the right shift. */ |
| /* Determine value of first part of left shift, to get to the |
| zero extend cut-off point. */ |
| first = width - exact_log2 (lsb2) + right; |
| if (first >= 0 && right + left - first >= 0) |
| { |
| cost = ext_shift_insns[right] + ext_shift_insns[first] + 1 |
| + ext_shift_insns[right + left - first]; |
| if (cost < best_cost) |
| { |
| best = 1; |
| best_cost = cost; |
| best_right = right; |
| best_len = cost; |
| if (attrp) |
| attrp[2] = first; |
| } |
| } |
| } |
| } |
| /* Try to use r0 AND pattern */ |
| for (i = 0; i <= 2; i++) |
| { |
| if (i > right) |
| break; |
| if (! CONST_OK_FOR_K08 (mask >> i)) |
| continue; |
| cost = (i != 0) + 2 + ext_shift_insns[left + i]; |
| if (cost < best_cost) |
| { |
| best = 2; |
| best_cost = cost; |
| best_right = i; |
| best_len = cost - 1; |
| } |
| } |
| /* Try to use a scratch register to hold the AND operand. */ |
| can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT)3 << 30)) == 0; |
| for (i = 0; i <= 2; i++) |
| { |
| if (i > right) |
| break; |
| cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3) |
| + (can_ext ? ext_shift_insns : shift_insns)[left + i]; |
| if (cost < best_cost) |
| { |
| best = 4 - can_ext; |
| best_cost = cost; |
| best_right = i; |
| best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i); |
| } |
| } |
| |
| if (attrp) |
| { |
| attrp[0] = best_right; |
| attrp[1] = best_len; |
| } |
| return best; |
| } |
| |
| /* This is used in length attributes of the unnamed instructions |
| corresponding to shl_and_kind return values of 1 and 2. */ |
| int |
| shl_and_length (rtx insn) |
| { |
| rtx set_src, left_rtx, mask_rtx; |
| int attributes[3]; |
| |
| set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); |
| left_rtx = XEXP (XEXP (set_src, 0), 1); |
| mask_rtx = XEXP (set_src, 1); |
| shl_and_kind (left_rtx, mask_rtx, attributes); |
| return attributes[1]; |
| } |
| |
| /* This is used in length attribute of the and_shl_scratch instruction. */ |
| |
| int |
| shl_and_scr_length (rtx insn) |
| { |
| rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); |
| int len = shift_insns[INTVAL (XEXP (set_src, 1))]; |
| rtx op = XEXP (set_src, 0); |
| len += shift_insns[INTVAL (XEXP (op, 1))] + 1; |
| op = XEXP (XEXP (op, 0), 0); |
| return len + shift_insns[INTVAL (XEXP (op, 1))]; |
| } |
| |
| /* Generating rtl? */ |
| extern int rtx_equal_function_value_matters; |
| |
| /* Generate rtl for instructions for which shl_and_kind advised a particular |
| method of generating them, i.e. returned zero. */ |
| |
| int |
| gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source) |
| { |
| int attributes[3]; |
| unsigned HOST_WIDE_INT mask; |
| int kind = shl_and_kind (left_rtx, mask_rtx, attributes); |
| int right, total_shift; |
| void (*shift_gen_fun) (int, rtx*) = gen_shifty_hi_op; |
| |
| right = attributes[0]; |
| total_shift = INTVAL (left_rtx) + right; |
| mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift; |
| switch (kind) |
| { |
| default: |
| return -1; |
| case 1: |
| { |
| int first = attributes[2]; |
| rtx operands[3]; |
| |
| if (first < 0) |
| { |
| emit_insn ((mask << right) <= 0xff |
| ? gen_zero_extendqisi2(dest, |
| gen_lowpart (QImode, source)) |
| : gen_zero_extendhisi2(dest, |
| gen_lowpart (HImode, source))); |
| source = dest; |
| } |
| if (source != dest) |
| emit_insn (gen_movsi (dest, source)); |
| operands[0] = dest; |
| if (right) |
| { |
| operands[2] = GEN_INT (right); |
| gen_shifty_hi_op (LSHIFTRT, operands); |
| } |
| if (first > 0) |
| { |
| operands[2] = GEN_INT (first); |
| gen_shifty_hi_op (ASHIFT, operands); |
| total_shift -= first; |
| mask <<= first; |
| } |
| if (first >= 0) |
| emit_insn (mask <= 0xff |
| ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest)) |
| : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest))); |
| if (total_shift > 0) |
| { |
| operands[2] = GEN_INT (total_shift); |
| gen_shifty_hi_op (ASHIFT, operands); |
| } |
| break; |
| } |
| case 4: |
| shift_gen_fun = gen_shifty_op; |
| case 3: |
| /* If the topmost bit that matters is set, set the topmost bits |
| that don't matter. This way, we might be able to get a shorter |
| signed constant. */ |
| if (mask & ((HOST_WIDE_INT)1 << (31 - total_shift))) |
| mask |= (HOST_WIDE_INT)~0 << (31 - total_shift); |
| case 2: |
| /* Don't expand fine-grained when combining, because that will |
| make the pattern fail. */ |
| if (rtx_equal_function_value_matters |
| || reload_in_progress || reload_completed) |
| { |
| rtx operands[3]; |
| |
| /* Cases 3 and 4 should be handled by this split |
| only while combining */ |
| if (kind > 2) |
| abort (); |
| if (right) |
| { |
| emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right))); |
| source = dest; |
| } |
| emit_insn (gen_andsi3 (dest, source, GEN_INT (mask))); |
| if (total_shift) |
| { |
| operands[0] = dest; |
| operands[1] = dest; |
| operands[2] = GEN_INT (total_shift); |
| shift_gen_fun (ASHIFT, operands); |
| } |
| break; |
| } |
| else |
| { |
| int neg = 0; |
| if (kind != 4 && total_shift < 16) |
| { |
| neg = -ext_shift_amounts[total_shift][1]; |
| if (neg > 0) |
| neg -= ext_shift_amounts[total_shift][2]; |
| else |
| neg = 0; |
| } |
| emit_insn (gen_and_shl_scratch (dest, source, |
| GEN_INT (right), |
| GEN_INT (mask), |
| GEN_INT (total_shift + neg), |
| GEN_INT (neg))); |
| emit_insn (gen_movsi (dest, dest)); |
| break; |
| } |
| } |
| return 0; |
| } |
| |
| /* Try to find a good way to implement the combiner pattern |
| [(set (match_operand:SI 0 "register_operand" "=r") |
| (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") |
| (match_operand:SI 2 "const_int_operand" "n") |
| (match_operand:SI 3 "const_int_operand" "n") |
| (const_int 0))) |
| (clobber (reg:SI T_REG))] |
| LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3. |
| return 0 for simple left / right shift combination. |
| return 1 for left shift / 8 bit sign extend / left shift. |
| return 2 for left shift / 16 bit sign extend / left shift. |
| return 3 for left shift / 8 bit sign extend / shift / sign extend. |
| return 4 for left shift / 16 bit sign extend / shift / sign extend. |
| return 5 for left shift / 16 bit sign extend / right shift |
| return 6 for < 8 bit sign extend / left shift. |
| return 7 for < 8 bit sign extend / left shift / single right shift. |
| If COSTP is nonzero, assign the calculated cost to *COSTP. */ |
| |
| int |
| shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp) |
| { |
| int left, size, insize, ext; |
| int cost = 0, best_cost; |
| int kind; |
| |
| left = INTVAL (left_rtx); |
| size = INTVAL (size_rtx); |
| insize = size - left; |
| if (insize <= 0) |
| abort (); |
| /* Default to left / right shift. */ |
| kind = 0; |
| best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size]; |
| if (size <= 16) |
| { |
| /* 16 bit shift / sign extend / 16 bit shift */ |
| cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size]; |
| /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden |
| below, by alternative 3 or something even better. */ |
| if (cost < best_cost) |
| { |
| kind = 5; |
| best_cost = cost; |
| } |
| } |
| /* Try a plain sign extend between two shifts. */ |
| for (ext = 16; ext >= insize; ext -= 8) |
| { |
| if (ext <= size) |
| { |
| cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext]; |
| if (cost < best_cost) |
| { |
| kind = ext / (unsigned) 8; |
| best_cost = cost; |
| } |
| } |
| /* Check if we can do a sloppy shift with a final signed shift |
| restoring the sign. */ |
| if (EXT_SHIFT_SIGNED (size - ext)) |
| cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1; |
| /* If not, maybe it's still cheaper to do the second shift sloppy, |
| and do a final sign extend? */ |
| else if (size <= 16) |
| cost = ext_shift_insns[ext - insize] + 1 |
| + ext_shift_insns[size > ext ? size - ext : ext - size] + 1; |
| else |
| continue; |
| if (cost < best_cost) |
| { |
| kind = ext / (unsigned) 8 + 2; |
| best_cost = cost; |
| } |
| } |
| /* Check if we can sign extend in r0 */ |
| if (insize < 8) |
| { |
| cost = 3 + shift_insns[left]; |
| if (cost < best_cost) |
| { |
| kind = 6; |
| best_cost = cost; |
| } |
| /* Try the same with a final signed shift. */ |
| if (left < 31) |
| { |
| cost = 3 + ext_shift_insns[left + 1] + 1; |
| if (cost < best_cost) |
| { |
| kind = 7; |
| best_cost = cost; |
| } |
| } |
| } |
| if (TARGET_SH3) |
| { |
| /* Try to use a dynamic shift. */ |
| cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST; |
| if (cost < best_cost) |
| { |
| kind = 0; |
| best_cost = cost; |
| } |
| } |
| if (costp) |
| *costp = cost; |
| return kind; |
| } |
| |
| /* Function to be used in the length attribute of the instructions |
| implementing this pattern. */ |
| |
| int |
| shl_sext_length (rtx insn) |
| { |
| rtx set_src, left_rtx, size_rtx; |
| int cost; |
| |
| set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); |
| left_rtx = XEXP (XEXP (set_src, 0), 1); |
| size_rtx = XEXP (set_src, 1); |
| shl_sext_kind (left_rtx, size_rtx, &cost); |
| return cost; |
| } |
| |
| /* Generate rtl for this pattern */ |
| |
| int |
| gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source) |
| { |
| int kind; |
| int left, size, insize, cost; |
| rtx operands[3]; |
| |
| kind = shl_sext_kind (left_rtx, size_rtx, &cost); |
| left = INTVAL (left_rtx); |
| size = INTVAL (size_rtx); |
| insize = size - left; |
| switch (kind) |
| { |
| case 1: |
| case 2: |
| case 3: |
| case 4: |
| { |
| int ext = kind & 1 ? 8 : 16; |
| int shift2 = size - ext; |
| |
| /* Don't expand fine-grained when combining, because that will |
| make the pattern fail. */ |
| if (! rtx_equal_function_value_matters |
| && ! reload_in_progress && ! reload_completed) |
| { |
| emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); |
| emit_insn (gen_movsi (dest, source)); |
| break; |
| } |
| if (dest != source) |
| emit_insn (gen_movsi (dest, source)); |
| operands[0] = dest; |
| if (ext - insize) |
| { |
| operands[2] = GEN_INT (ext - insize); |
| gen_shifty_hi_op (ASHIFT, operands); |
| } |
| emit_insn (kind & 1 |
| ? gen_extendqisi2(dest, gen_lowpart (QImode, dest)) |
| : gen_extendhisi2(dest, gen_lowpart (HImode, dest))); |
| if (kind <= 2) |
| { |
| if (shift2) |
| { |
| operands[2] = GEN_INT (shift2); |
| gen_shifty_op (ASHIFT, operands); |
| } |
| } |
| else |
| { |
| if (shift2 > 0) |
| { |
| if (EXT_SHIFT_SIGNED (shift2)) |
| { |
| operands[2] = GEN_INT (shift2 + 1); |
| gen_shifty_op (ASHIFT, operands); |
| operands[2] = GEN_INT (1); |
| gen_shifty_op (ASHIFTRT, operands); |
| break; |
| } |
| operands[2] = GEN_INT (shift2); |
| gen_shifty_hi_op (ASHIFT, operands); |
| } |
| else if (shift2) |
| { |
| operands[2] = GEN_INT (-shift2); |
| gen_shifty_hi_op (LSHIFTRT, operands); |
| } |
| emit_insn (size <= 8 |
| ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest)) |
| : gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); |
| } |
| break; |
| } |
| case 5: |
| { |
| int i = 16 - size; |
| if (! rtx_equal_function_value_matters |
| && ! reload_in_progress && ! reload_completed) |
| emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); |
| else |
| { |
| operands[0] = dest; |
| operands[2] = GEN_INT (16 - insize); |
| gen_shifty_hi_op (ASHIFT, operands); |
| emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); |
| } |
| /* Don't use gen_ashrsi3 because it generates new pseudos. */ |
| while (--i >= 0) |
| gen_ashift (ASHIFTRT, 1, dest); |
| break; |
| } |
| case 6: |
| case 7: |
| /* Don't expand fine-grained when combining, because that will |
| make the pattern fail. */ |
| if (! rtx_equal_function_value_matters |
| && ! reload_in_progress && ! reload_completed) |
| { |
| emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); |
| emit_insn (gen_movsi (dest, source)); |
| break; |
| } |
| emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1))); |
| emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1)))); |
| emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1)))); |
| operands[0] = dest; |
| operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx; |
| gen_shifty_op (ASHIFT, operands); |
| if (kind == 7) |
| emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1))); |
| break; |
| default: |
| return -1; |
| } |
| return 0; |
| } |
| |
| /* Prefix a symbol_ref name with "datalabel". */ |
| |
| rtx |
| gen_datalabel_ref (rtx sym) |
| { |
| if (GET_CODE (sym) == LABEL_REF) |
| return gen_rtx_CONST (GET_MODE (sym), |
| gen_rtx_UNSPEC (GET_MODE (sym), |
| gen_rtvec (1, sym), |
| UNSPEC_DATALABEL)); |
| |
| if (GET_CODE (sym) != SYMBOL_REF) |
| abort (); |
| |
| return sym; |
| } |
| |
| |
| /* The SH cannot load a large constant into a register, constants have to |
| come from a pc relative load. The reference of a pc relative load |
| instruction must be less than 1k infront of the instruction. This |
| means that we often have to dump a constant inside a function, and |
| generate code to branch around it. |
| |
| It is important to minimize this, since the branches will slow things |
| down and make things bigger. |
| |
| Worst case code looks like: |
| |
| mov.l L1,rn |
| bra L2 |
| nop |
| align |
| L1: .long value |
| L2: |
| .. |
| |
| mov.l L3,rn |
| bra L4 |
| nop |
| align |
| L3: .long value |
| L4: |
| .. |
| |
| We fix this by performing a scan before scheduling, which notices which |
| instructions need to have their operands fetched from the constant table |
| and builds the table. |
| |
| The algorithm is: |
| |
| scan, find an instruction which needs a pcrel move. Look forward, find the |
| last barrier which is within MAX_COUNT bytes of the requirement. |
| If there isn't one, make one. Process all the instructions between |
| the find and the barrier. |
| |
| In the above example, we can tell that L3 is within 1k of L1, so |
| the first move can be shrunk from the 3 insn+constant sequence into |
| just 1 insn, and the constant moved to L3 to make: |
| |
| mov.l L1,rn |
| .. |
| mov.l L3,rn |
| bra L4 |
| nop |
| align |
| L3:.long value |
| L4:.long value |
| |
| Then the second move becomes the target for the shortening process. */ |
| |
| typedef struct |
| { |
| rtx value; /* Value in table. */ |
| rtx label; /* Label of value. */ |
| rtx wend; /* End of window. */ |
| enum machine_mode mode; /* Mode of value. */ |
| |
| /* True if this constant is accessed as part of a post-increment |
| sequence. Note that HImode constants are never accessed in this way. */ |
| bool part_of_sequence_p; |
| } pool_node; |
| |
| /* The maximum number of constants that can fit into one pool, since |
| the pc relative range is 0...1020 bytes and constants are at least 4 |
| bytes long. */ |
| |
| #define MAX_POOL_SIZE (1020/4) |
| static pool_node pool_vector[MAX_POOL_SIZE]; |
| static int pool_size; |
| static rtx pool_window_label; |
| static int pool_window_last; |
| |
| /* ??? If we need a constant in HImode which is the truncated value of a |
| constant we need in SImode, we could combine the two entries thus saving |
| two bytes. Is this common enough to be worth the effort of implementing |
| it? */ |
| |
| /* ??? This stuff should be done at the same time that we shorten branches. |
| As it is now, we must assume that all branches are the maximum size, and |
| this causes us to almost always output constant pools sooner than |
| necessary. */ |
| |
| /* Add a constant to the pool and return its label. */ |
| |
| static rtx |
| add_constant (rtx x, enum machine_mode mode, rtx last_value) |
| { |
| int i; |
| rtx lab, new, ref, newref; |
| |
| /* First see if we've already got it. */ |
| for (i = 0; i < pool_size; i++) |
| { |
| if (x->code == pool_vector[i].value->code |
| && mode == pool_vector[i].mode) |
| { |
| if (x->code == CODE_LABEL) |
| { |
| if (XINT (x, 3) != XINT (pool_vector[i].value, 3)) |
| continue; |
| } |
| if (rtx_equal_p (x, pool_vector[i].value)) |
| { |
| lab = new = 0; |
| if (! last_value |
| || ! i |
| || ! rtx_equal_p (last_value, pool_vector[i-1].value)) |
| { |
| new = gen_label_rtx (); |
| LABEL_REFS (new) = pool_vector[i].label; |
| pool_vector[i].label = lab = new; |
| } |
| if (lab && pool_window_label) |
| { |
| newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label); |
| ref = pool_vector[pool_window_last].wend; |
| LABEL_NEXTREF (newref) = ref; |
| pool_vector[pool_window_last].wend = newref; |
| } |
| if (new) |
| pool_window_label = new; |
| pool_window_last = i; |
| return lab; |
| } |
| } |
| } |
| |
| /* Need a new one. */ |
| pool_vector[pool_size].value = x; |
| if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value)) |
| { |
| lab = 0; |
| pool_vector[pool_size - 1].part_of_sequence_p = true; |
| } |
| else |
| lab = gen_label_rtx (); |
| pool_vector[pool_size].mode = mode; |
| pool_vector[pool_size].label = lab; |
| pool_vector[pool_size].wend = NULL_RTX; |
| pool_vector[pool_size].part_of_sequence_p = (lab == 0); |
| if (lab && pool_window_label) |
| { |
| newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label); |
| ref = pool_vector[pool_window_last].wend; |
| LABEL_NEXTREF (newref) = ref; |
| pool_vector[pool_window_last].wend = newref; |
| } |
| if (lab) |
| pool_window_label = lab; |
| pool_window_last = pool_size; |
| pool_size++; |
| return lab; |
| } |
| |
| /* Output the literal table. START, if nonzero, is the first instruction |
| this table is needed for, and also indicates that there is at least one |
| casesi_worker_2 instruction; We have to emit the operand3 labels from |
| these insns at a 4-byte aligned position. BARRIER is the barrier |
| after which we are to place the table. */ |
| |
| static void |
| dump_table (rtx start, rtx barrier) |
| { |
| rtx scan = barrier; |
| int i; |
| int need_align = 1; |
| rtx lab, ref; |
| int have_df = 0; |
| |
| /* Do two passes, first time dump out the HI sized constants. */ |
| |
| for (i = 0; i < pool_size; i++) |
| { |
| pool_node *p = &pool_vector[i]; |
| |
| if (p->mode == HImode) |
| { |
| if (need_align) |
| { |
| scan = emit_insn_after (gen_align_2 (), scan); |
| need_align = 0; |
| } |
| for (lab = p->label; lab; lab = LABEL_REFS (lab)) |
| scan = emit_label_after (lab, scan); |
| scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx), |
| scan); |
| for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref)) |
| { |
| lab = XEXP (ref, 0); |
| scan = emit_insn_after (gen_consttable_window_end (lab), scan); |
| } |
| } |
| else if (p->mode == DFmode) |
| have_df = 1; |
| } |
| |
| need_align = 1; |
| |
| if (start) |
| { |
| scan = emit_insn_after (gen_align_4 (), scan); |
| need_align = 0; |
| for (; start != barrier; start = NEXT_INSN (start)) |
| if (GET_CODE (start) == INSN |
| && recog_memoized (start) == CODE_FOR_casesi_worker_2) |
| { |
| rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0)); |
| rtx lab = XEXP (XVECEXP (src, 0, 3), 0); |
| |
| scan = emit_label_after (lab, scan); |
| } |
| } |
| if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df) |
| { |
| rtx align_insn = NULL_RTX; |
| |
| scan = emit_label_after (gen_label_rtx (), scan); |
| scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan); |
| need_align = 0; |
| |
| for (i = 0; i < pool_size; i++) |
| { |
| pool_node *p = &pool_vector[i]; |
| |
| switch (p->mode) |
| { |
| case HImode: |
| break; |
| case SImode: |
| case SFmode: |
| if (align_insn && !p->part_of_sequence_p) |
| { |
| for (lab = p->label; lab; lab = LABEL_REFS (lab)) |
| emit_label_before (lab, align_insn); |
| emit_insn_before (gen_consttable_4 (p->value, const0_rtx), |
| align_insn); |
| for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref)) |
| { |
| lab = XEXP (ref, 0); |
| emit_insn_before (gen_consttable_window_end (lab), |
| align_insn); |
| } |
| delete_insn (align_insn); |
| align_insn = NULL_RTX; |
| continue; |
| } |
| else |
| { |
| for (lab = p->label; lab; lab = LABEL_REFS (lab)) |
| scan = emit_label_after (lab, scan); |
| scan = emit_insn_after (gen_consttable_4 (p->value, |
| const0_rtx), scan); |
| need_align = ! need_align; |
| } |
| break; |
| case DFmode: |
| if (need_align) |
| { |
| scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan); |
| align_insn = scan; |
| need_align = 0; |
| } |
| case DImode: |
| for (lab = p->label; lab; lab = LABEL_REFS (lab)) |
| scan = emit_label_after (lab, scan); |
| scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx), |
| scan); |
| break; |
| default: |
| abort (); |
| break; |
| } |
| |
| if (p->mode != HImode) |
| { |
| for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref)) |
| { |
| lab = XEXP (ref, 0); |
| scan = emit_insn_after (gen_consttable_window_end (lab), |
| scan); |
| } |
| } |
| } |
| |
| pool_size = 0; |
| } |
| |
| for (i = 0; i < pool_size; i++) |
| { |
| pool_node *p = &pool_vector[i]; |
| |
| switch (p->mode) |
| { |
| case HImode: |
| break; |
| case SImode: |
| case SFmode: |
| if (need_align) |
| { |
| need_align = 0; |
| scan = emit_label_after (gen_label_rtx (), scan); |
| scan = emit_insn_after (gen_align_4 (), scan); |
| } |
| for (lab = p->label; lab; lab = LABEL_REFS (lab)) |
| scan = emit_label_after (lab, scan); |
| scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx), |
| scan); |
| break; |
| case DFmode: |
| case DImode: |
| if (need_align) |
| { |
| need_align = 0; |
| scan = emit_label_after (gen_label_rtx (), scan); |
| scan = emit_insn_after (gen_align_4 (), scan); |
| } |
| for (lab = p->label; lab; lab = LABEL_REFS (lab)) |
| scan = emit_label_after (lab, scan); |
| scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx), |
| scan); |
| break; |
| default: |
| abort (); |
| break; |
| } |
| |
| if (p->mode != HImode) |
| { |
| for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref)) |
| { |
| lab = XEXP (ref, 0); |
| scan = emit_insn_after (gen_consttable_window_end (lab), scan); |
| } |
| } |
| } |
| |
| scan = emit_insn_after (gen_consttable_end (), scan); |
| scan = emit_barrier_after (scan); |
| pool_size = 0; |
| pool_window_label = NULL_RTX; |
| pool_window_last = 0; |
| } |
| |
| /* Return nonzero if constant would be an ok source for a |
| mov.w instead of a mov.l. */ |
| |
| static int |
| hi_const (rtx src) |
| { |
| return (GET_CODE (src) == CONST_INT |
| && INTVAL (src) >= -32768 |
| && INTVAL (src) <= 32767); |
| } |
| |
| /* Nonzero if the insn is a move instruction which needs to be fixed. */ |
| |
| /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the |
| CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't |
| need to fix it if the input value is CONST_OK_FOR_I08. */ |
| |
| static int |
| broken_move (rtx insn) |
| { |
| if (GET_CODE (insn) == INSN) |
| { |
| rtx pat = PATTERN (insn); |
| if (GET_CODE (pat) == PARALLEL) |
| pat = XVECEXP (pat, 0, 0); |
| if (GET_CODE (pat) == SET |
| /* We can load any 8 bit value if we don't care what the high |
| order bits end up as. */ |
| && GET_MODE (SET_DEST (pat)) != QImode |
| && (CONSTANT_P (SET_SRC (pat)) |
| /* Match mova_const. */ |
| || (GET_CODE (SET_SRC (pat)) == UNSPEC |
| && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA |
| && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST)) |
| && ! (TARGET_SH2E |
| && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE |
| && (fp_zero_operand (SET_SRC (pat)) |
| || fp_one_operand (SET_SRC (pat))) |
| /* ??? If this is a -m4 or -m4-single compilation, in general |
| we don't know the current setting of fpscr, so disable fldi. |
| There is an exception if this was a register-register move |
| before reload - and hence it was ascertained that we have |
| single precision setting - and in a post-reload optimization |
| we changed this to do a constant load. In that case |
| we don't have an r0 clobber, hence we must use fldi. */ |
| && (! TARGET_SH4 || TARGET_FMOVD |
| || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0)) |
| == SCRATCH)) |
| && GET_CODE (SET_DEST (pat)) == REG |
| && FP_REGISTER_P (REGNO (SET_DEST (pat)))) |
| && (GET_CODE (SET_SRC (pat)) != CONST_INT |
| || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat))))) |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| static int |
| mova_p (rtx insn) |
| { |
| return (GET_CODE (insn) == INSN |
| && GET_CODE (PATTERN (insn)) == SET |
| && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC |
| && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA |
| /* Don't match mova_const. */ |
| && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF); |
| } |
| |
| /* Fix up a mova from a switch that went out of range. */ |
| static void |
| fixup_mova (rtx mova) |
| { |
| if (! flag_pic) |
| { |
| SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0); |
| INSN_CODE (mova) = -1; |
| } |
| else |
| { |
| rtx worker = mova; |
| rtx lab = gen_label_rtx (); |
| rtx wpat, wpat0, wpat1, wsrc, diff; |
| |
| do |
| { |
| worker = NEXT_INSN (worker); |
| if (! worker |
| || GET_CODE (worker) == CODE_LABEL |
| || GET_CODE (worker) == JUMP_INSN) |
| abort (); |
| } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1); |
| wpat = PATTERN (worker); |
| wpat0 = XVECEXP (wpat, 0, 0); |
| wpat1 = XVECEXP (wpat, 0, 1); |
| wsrc = SET_SRC (wpat0); |
| PATTERN (worker) = (gen_casesi_worker_2 |
| (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1), |
| XEXP (XVECEXP (wsrc, 0, 2), 0), lab, |
| XEXP (wpat1, 0))); |
| INSN_CODE (worker) = -1; |
| diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0), |
| gen_rtx_LABEL_REF (Pmode, lab)); |
| diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC); |
| SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff); |
| INSN_CODE (mova) = -1; |
| } |
| } |
| |
| /* Find the last barrier from insn FROM which is close enough to hold the |
| constant pool. If we can't find one, then create one near the end of |
| the range. */ |
| |
| static rtx |
| find_barrier (int num_mova, rtx mova, rtx from) |
| { |
| int count_si = 0; |
| int count_hi = 0; |
| int found_hi = 0; |
| int found_si = 0; |
| int found_di = 0; |
| int hi_align = 2; |
| int si_align = 2; |
| int leading_mova = num_mova; |
| rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0; |
| int si_limit; |
| int hi_limit; |
| |
| /* For HImode: range is 510, add 4 because pc counts from address of |
| second instruction after this one, subtract 2 for the jump instruction |
| that we may need to emit before the table, subtract 2 for the instruction |
| that fills the jump delay slot (in very rare cases, reorg will take an |
| instruction from after the constant pool or will leave the delay slot |
| empty). This gives 510. |
| |