| /* Output routines for GCC for Renesas / SuperH SH. |
| Copyright (C) 1993-2021 Free Software Foundation, Inc. |
| Contributed by Steve Chamberlain (sac@cygnus.com). |
| Improved by Jim Wilson (wilson@cygnus.com). |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| GCC is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #include <sstream> |
| |
| #define IN_TARGET_CODE 1 |
| |
| #include "config.h" |
| #define INCLUDE_VECTOR |
| #include "system.h" |
| #include "coretypes.h" |
| #include "backend.h" |
| #include "target.h" |
| #include "rtl.h" |
| #include "tree.h" |
| #include "gimple.h" |
| #include "cfghooks.h" |
| #include "df.h" |
| #include "memmodel.h" |
| #include "tm_p.h" |
| #include "stringpool.h" |
| #include "attribs.h" |
| #include "optabs.h" |
| #include "emit-rtl.h" |
| #include "recog.h" |
| #include "diagnostic-core.h" |
| #include "alias.h" |
| #include "fold-const.h" |
| #include "stor-layout.h" |
| #include "calls.h" |
| #include "varasm.h" |
| #include "flags.h" |
| #include "explow.h" |
| #include "expr.h" |
| #include "reload.h" |
| #include "output.h" |
| #include "insn-attr.h" |
| #include "dwarf2.h" |
| #include "langhooks.h" |
| #include "cfgrtl.h" |
| #include "intl.h" |
| #include "sched-int.h" |
| #include "gimplify.h" |
| #include "tm-constrs.h" |
| #include "opts.h" |
| #include "tree-pass.h" |
| #include "context.h" |
| #include "builtins.h" |
| #include "rtl-iter.h" |
| #include "regs.h" |
| #include "toplev.h" |
| |
| /* This file should be included last. */ |
| #include "target-def.h" |
| |
| int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch; |
| |
| #define CONST_OK_FOR_ADD(size) CONST_OK_FOR_I08 (size) |
| #define GEN_MOV (*(gen_movsi)) |
| #define GEN_ADD3 (*(gen_addsi3)) |
| #define GEN_SUB3 (*(gen_subsi3)) |
| |
| /* Used to simplify the logic below. Find the attributes wherever |
| they may be. */ |
| #define SH_ATTRIBUTES(decl) \ |
| (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \ |
| : DECL_ATTRIBUTES (decl) \ |
| ? (DECL_ATTRIBUTES (decl)) \ |
| : TYPE_ATTRIBUTES (TREE_TYPE (decl)) |
| |
| /* Set to true by expand_prologue() when the function is an |
| interrupt handler. */ |
| bool current_function_interrupt; |
| |
| tree sh_deferred_function_attributes; |
| tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes; |
| |
| /* Global variables for machine-dependent things. */ |
| |
| /* Which cpu are we scheduling for. */ |
| enum processor_type sh_cpu; |
| |
| /* Definitions used in ready queue reordering for first scheduling pass. */ |
| |
| /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */ |
| static short *regmode_weight[2]; |
| |
| /* Total SFmode and SImode weights of scheduled insns. */ |
| static int curr_regmode_pressure[2]; |
| |
| /* Number of r0 life regions. */ |
| static int r0_life_regions; |
| |
| /* If true, skip cycles for Q -> R movement. */ |
| static int skip_cycles = 0; |
| |
| /* Cached value of can_issue_more. This is cached in sh_variable_issue hook |
| and returned from sh_reorder2. */ |
| static short cached_can_issue_more; |
| |
| /* Unique number for UNSPEC_BBR pattern. */ |
| static unsigned int unspec_bbr_uid = 1; |
| |
| /* Provides the class number of the smallest class containing |
| reg number. */ |
| enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] = |
| { |
| R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| FP0_REGS,FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS, |
| TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS, |
| DF_REGS, DF_REGS, DF_REGS, DF_REGS, |
| DF_REGS, DF_REGS, DF_REGS, DF_REGS, |
| NO_REGS, GENERAL_REGS, PR_REGS, T_REGS, |
| MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS, |
| GENERAL_REGS, GENERAL_REGS, |
| }; |
| |
| char sh_register_names[FIRST_PSEUDO_REGISTER] \ |
| [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER; |
| |
| char sh_additional_register_names[ADDREGNAMES_SIZE] \ |
| [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1] |
| = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER; |
| |
| int assembler_dialect; |
| |
| static void split_branches (rtx_insn *); |
| static int branch_dest (rtx); |
| static void print_slot (rtx_sequence *); |
| static rtx_code_label *add_constant (rtx, machine_mode, rtx); |
| static void dump_table (rtx_insn *, rtx_insn *); |
| static bool broken_move (rtx_insn *); |
| static bool mova_p (rtx_insn *); |
| static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *); |
| static bool noncall_uses_reg (rtx, rtx_insn *, rtx *); |
| static rtx_insn *gen_block_redirect (rtx_insn *, int, int); |
| static void sh_reorg (void); |
| static void sh_option_override (void); |
| static void sh_override_options_after_change (void); |
| static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool); |
| static rtx_insn* emit_frame_insn (rtx); |
| static rtx push (int); |
| static void pop (int); |
| static void push_regs (HARD_REG_SET* mask, bool interrupt_handler); |
| static int calc_live_regs (HARD_REG_SET *); |
| static HOST_WIDE_INT rounded_frame_size (int); |
| static bool sh_frame_pointer_required (void); |
| static void sh_emit_mode_set (int, int, int, HARD_REG_SET); |
| static int sh_mode_needed (int, rtx_insn *); |
| static int sh_mode_after (int, int, rtx_insn *); |
| static int sh_mode_entry (int); |
| static int sh_mode_exit (int); |
| static int sh_mode_priority (int entity, int n); |
| |
| static rtx mark_constant_pool_use (rtx); |
| static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, |
| int, bool *); |
| static tree sh_handle_resbank_handler_attribute (tree *, tree, |
| tree, int, bool *); |
| static tree sh2a_handle_function_vector_handler_attribute (tree *, tree, |
| tree, int, bool *); |
| static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *); |
| static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *); |
| static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *); |
| static void sh_print_operand (FILE *, rtx, int); |
| static void sh_print_operand_address (FILE *, machine_mode, rtx); |
| static bool sh_print_operand_punct_valid_p (unsigned char code); |
| static bool sh_asm_output_addr_const_extra (FILE *file, rtx x); |
| static void sh_output_function_epilogue (FILE *); |
| static void sh_insert_attributes (tree, tree *); |
| static const char *sh_check_pch_target_flags (int); |
| static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t); |
| static int sh_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int); |
| static int sh_issue_rate (void); |
| static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p); |
| static short find_set_regmode_weight (rtx, machine_mode); |
| static short find_insn_regmode_weight (rtx, machine_mode); |
| static void find_regmode_weight (basic_block, machine_mode); |
| static int find_r0_life_regions (basic_block); |
| static void sh_md_init_global (FILE *, int, int); |
| static void sh_md_finish_global (FILE *, int); |
| static int rank_for_reorder (const void *, const void *); |
| static void swap_reorder (rtx_insn **, int); |
| static void ready_reorder (rtx_insn **, int); |
| static bool high_pressure (machine_mode); |
| static int sh_reorder (FILE *, int, rtx_insn **, int *, int); |
| static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int); |
| static void sh_md_init (FILE *, int, int); |
| static int sh_variable_issue (FILE *, int, rtx_insn *, int); |
| |
| static bool sh_function_ok_for_sibcall (tree, tree); |
| |
| static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *); |
| static bool sh_ms_bitfield_layout_p (const_tree); |
| |
| static void sh_init_builtins (void); |
| static tree sh_builtin_decl (unsigned, bool); |
| static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int); |
| static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, |
| HOST_WIDE_INT, tree); |
| static void sh_file_start (void); |
| static bool sh_assemble_integer (rtx, unsigned int, int); |
| static bool flow_dependent_p (rtx_insn *, rtx_insn *); |
| static void flow_dependent_p_1 (rtx, const_rtx, void *); |
| static int shiftcosts (rtx); |
| static int and_xor_ior_costs (rtx, int); |
| static int addsubcosts (rtx); |
| static int multcosts (rtx); |
| static bool unspec_caller_rtx_p (rtx); |
| static bool sh_cannot_copy_insn_p (rtx_insn *); |
| static bool sh_cannot_force_const_mem_p (machine_mode, rtx); |
| static bool sh_rtx_costs (rtx, machine_mode, int, int, int *, bool); |
| static int sh_address_cost (rtx, machine_mode, addr_space_t, bool); |
| static int sh_pr_n_sets (void); |
| static rtx sh_allocate_initial_value (rtx); |
| static reg_class_t sh_preferred_reload_class (rtx, reg_class_t); |
| static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t, |
| machine_mode, |
| struct secondary_reload_info *); |
| static bool sh_legitimate_address_p (machine_mode, rtx, bool); |
| static rtx sh_legitimize_address (rtx, rtx, machine_mode); |
| static rtx sh_delegitimize_address (rtx); |
| static bool sh_cannot_substitute_mem_equiv_p (rtx); |
| static bool sh_legitimize_address_displacement (rtx *, rtx *, |
| poly_int64, machine_mode); |
| static int scavenge_reg (HARD_REG_SET *s); |
| |
| static rtx sh_struct_value_rtx (tree, int); |
| static rtx sh_function_value (const_tree, const_tree, bool); |
| static bool sh_function_value_regno_p (const unsigned int); |
| static rtx sh_libcall_value (machine_mode, const_rtx); |
| static bool sh_return_in_memory (const_tree, const_tree); |
| static rtx sh_builtin_saveregs (void); |
| static void sh_setup_incoming_varargs (cumulative_args_t, |
| const function_arg_info &, int *, int); |
| static bool sh_strict_argument_naming (cumulative_args_t); |
| static bool sh_pretend_outgoing_varargs_named (cumulative_args_t); |
| static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *); |
| static tree sh_build_builtin_va_list (void); |
| static void sh_va_start (tree, rtx); |
| static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); |
| static bool sh_promote_prototypes (const_tree); |
| static machine_mode sh_promote_function_mode (const_tree type, |
| machine_mode, |
| int *punsignedp, |
| const_tree funtype, |
| int for_return); |
| static bool sh_pass_by_reference (cumulative_args_t, |
| const function_arg_info &); |
| static bool sh_callee_copies (cumulative_args_t, const function_arg_info &); |
| static int sh_arg_partial_bytes (cumulative_args_t, const function_arg_info &); |
| static void sh_function_arg_advance (cumulative_args_t, |
| const function_arg_info &); |
| static rtx sh_function_arg (cumulative_args_t, const function_arg_info &); |
| static int sh_dwarf_calling_convention (const_tree); |
| static void sh_encode_section_info (tree, rtx, int); |
| static bool sh2a_function_vector_p (tree); |
| static void sh_trampoline_init (rtx, tree, rtx); |
| static rtx sh_trampoline_adjust_address (rtx); |
| static void sh_conditional_register_usage (void); |
| static bool sh_legitimate_constant_p (machine_mode, rtx); |
| static int mov_insn_size (machine_mode, bool); |
| static int mov_insn_alignment_mask (machine_mode, bool); |
| static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT, |
| unsigned int, |
| enum by_pieces_operation, |
| bool); |
| static bool sequence_insn_p (rtx_insn *); |
| static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool); |
| static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&, |
| machine_mode, bool); |
| static bool sh_legitimate_combined_insn (rtx_insn* insn); |
| |
| static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2); |
| |
| static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED; |
| static unsigned int sh_hard_regno_nregs (unsigned int, machine_mode); |
| static bool sh_hard_regno_mode_ok (unsigned int, machine_mode); |
| static bool sh_modes_tieable_p (machine_mode, machine_mode); |
| static bool sh_can_change_mode_class (machine_mode, machine_mode, reg_class_t); |
| |
| static const struct attribute_spec sh_attribute_table[] = |
| { |
| /* { name, min_len, max_len, decl_req, type_req, fn_type_req, |
| affects_type_identity, handler, exclude } */ |
| { "interrupt_handler", 0, 0, true, false, false, false, |
| sh_handle_interrupt_handler_attribute, NULL }, |
| { "sp_switch", 1, 1, true, false, false, false, |
| sh_handle_sp_switch_attribute, NULL }, |
| { "trap_exit", 1, 1, true, false, false, false, |
| sh_handle_trap_exit_attribute, NULL }, |
| { "renesas", 0, 0, false, true, false, false, |
| sh_handle_renesas_attribute, NULL }, |
| { "trapa_handler", 0, 0, true, false, false, false, |
| sh_handle_interrupt_handler_attribute, NULL }, |
| { "nosave_low_regs", 0, 0, true, false, false, false, |
| sh_handle_interrupt_handler_attribute, NULL }, |
| { "resbank", 0, 0, true, false, false, false, |
| sh_handle_resbank_handler_attribute, NULL }, |
| { "function_vector", 1, 1, true, false, false, false, |
| sh2a_handle_function_vector_handler_attribute, NULL }, |
| { NULL, 0, 0, false, false, false, false, NULL, NULL } |
| }; |
| |
| /* Initialize the GCC target structure. */ |
| #undef TARGET_ATTRIBUTE_TABLE |
| #define TARGET_ATTRIBUTE_TABLE sh_attribute_table |
| |
| /* The next two are used for debug info when compiling with -gdwarf. */ |
| #undef TARGET_ASM_UNALIGNED_HI_OP |
| #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t" |
| #undef TARGET_ASM_UNALIGNED_SI_OP |
| #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t" |
| |
| #undef TARGET_OPTION_OVERRIDE |
| #define TARGET_OPTION_OVERRIDE sh_option_override |
| |
| #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE |
| #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \ |
| sh_override_options_after_change |
| |
| #undef TARGET_PRINT_OPERAND |
| #define TARGET_PRINT_OPERAND sh_print_operand |
| #undef TARGET_PRINT_OPERAND_ADDRESS |
| #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address |
| #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P |
| #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p |
| #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA |
| #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra |
| |
| #undef TARGET_ASM_FUNCTION_EPILOGUE |
| #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue |
| |
| #undef TARGET_ASM_OUTPUT_MI_THUNK |
| #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk |
| |
| #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK |
| #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \ |
| hook_bool_const_tree_hwi_hwi_const_tree_true |
| |
| #undef TARGET_ASM_FILE_START |
| #define TARGET_ASM_FILE_START sh_file_start |
| #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE |
| #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true |
| |
| #undef TARGET_ASM_INTEGER |
| #define TARGET_ASM_INTEGER sh_assemble_integer |
| |
| #undef TARGET_REGISTER_MOVE_COST |
| #define TARGET_REGISTER_MOVE_COST sh_register_move_cost |
| |
| #undef TARGET_INSERT_ATTRIBUTES |
| #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes |
| |
| #undef TARGET_SCHED_ADJUST_COST |
| #define TARGET_SCHED_ADJUST_COST sh_adjust_cost |
| |
| #undef TARGET_SCHED_ISSUE_RATE |
| #define TARGET_SCHED_ISSUE_RATE sh_issue_rate |
| |
| /* The next 5 hooks have been implemented for reenabling sched1. With the |
| help of these macros we are limiting the movement of insns in sched1 to |
| reduce the register pressure. The overall idea is to keep count of SImode |
| and SFmode regs required by already scheduled insns. When these counts |
| cross some threshold values; give priority to insns that free registers. |
| The insn that frees registers is most likely to be the insn with lowest |
| LUID (original insn order); but such an insn might be there in the stalled |
| queue (Q) instead of the ready queue (R). To solve this, we skip cycles |
| up to a max of 8 cycles so that such insns may move from Q -> R. |
| |
| The description of the hooks are as below: |
| |
| TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic |
| scheduler; it is called inside the sched_init function just after |
| find_insn_reg_weights function call. It is used to calculate the SImode |
| and SFmode weights of insns of basic blocks; much similar to what |
| find_insn_reg_weights does. |
| TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook. |
| |
| TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is |
| indicated by TARGET_SCHED_REORDER2; doing this may move insns from |
| (Q)->(R). |
| |
| TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is |
| high; reorder the ready queue so that the insn with lowest LUID will be |
| issued next. |
| |
| TARGET_SCHED_REORDER2: If the register pressure is high, indicate to |
| TARGET_SCHED_DFA_NEW_CYCLE to skip cycles. |
| |
| TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it |
| can be returned from TARGET_SCHED_REORDER2. |
| |
| TARGET_SCHED_INIT: Reset the register pressure counting variables. */ |
| |
| #undef TARGET_SCHED_DFA_NEW_CYCLE |
| #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle |
| |
| #undef TARGET_SCHED_INIT_GLOBAL |
| #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global |
| |
| #undef TARGET_SCHED_FINISH_GLOBAL |
| #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global |
| |
| #undef TARGET_SCHED_VARIABLE_ISSUE |
| #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue |
| |
| #undef TARGET_SCHED_REORDER |
| #define TARGET_SCHED_REORDER sh_reorder |
| |
| #undef TARGET_SCHED_REORDER2 |
| #define TARGET_SCHED_REORDER2 sh_reorder2 |
| |
| #undef TARGET_SCHED_INIT |
| #define TARGET_SCHED_INIT sh_md_init |
| |
| #undef TARGET_DELEGITIMIZE_ADDRESS |
| #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address |
| |
| #undef TARGET_LEGITIMIZE_ADDRESS |
| #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address |
| |
| #undef TARGET_CAN_FOLLOW_JUMP |
| #define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump |
| |
| #undef TARGET_MS_BITFIELD_LAYOUT_P |
| #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p |
| |
| #undef TARGET_INIT_BUILTINS |
| #define TARGET_INIT_BUILTINS sh_init_builtins |
| #undef TARGET_BUILTIN_DECL |
| #define TARGET_BUILTIN_DECL sh_builtin_decl |
| #undef TARGET_EXPAND_BUILTIN |
| #define TARGET_EXPAND_BUILTIN sh_expand_builtin |
| |
| #undef TARGET_FUNCTION_OK_FOR_SIBCALL |
| #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall |
| |
| #undef TARGET_CANNOT_COPY_INSN_P |
| #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p |
| #undef TARGET_RTX_COSTS |
| #define TARGET_RTX_COSTS sh_rtx_costs |
| #undef TARGET_ADDRESS_COST |
| #define TARGET_ADDRESS_COST sh_address_cost |
| #undef TARGET_ALLOCATE_INITIAL_VALUE |
| #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value |
| |
| #undef TARGET_MACHINE_DEPENDENT_REORG |
| #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg |
| |
| #undef TARGET_DWARF_REGISTER_SPAN |
| #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span |
| |
| #ifdef HAVE_AS_TLS |
| #undef TARGET_HAVE_TLS |
| #define TARGET_HAVE_TLS true |
| #endif |
| |
| #undef TARGET_PROMOTE_PROTOTYPES |
| #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes |
| #undef TARGET_PROMOTE_FUNCTION_MODE |
| #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode |
| |
| #undef TARGET_FUNCTION_VALUE |
| #define TARGET_FUNCTION_VALUE sh_function_value |
| #undef TARGET_FUNCTION_VALUE_REGNO_P |
| #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p |
| #undef TARGET_LIBCALL_VALUE |
| #define TARGET_LIBCALL_VALUE sh_libcall_value |
| #undef TARGET_STRUCT_VALUE_RTX |
| #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx |
| #undef TARGET_RETURN_IN_MEMORY |
| #define TARGET_RETURN_IN_MEMORY sh_return_in_memory |
| |
| #undef TARGET_EXPAND_BUILTIN_SAVEREGS |
| #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs |
| #undef TARGET_SETUP_INCOMING_VARARGS |
| #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs |
| #undef TARGET_STRICT_ARGUMENT_NAMING |
| #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming |
| #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED |
| #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named |
| #undef TARGET_MUST_PASS_IN_STACK |
| #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size |
| #undef TARGET_PASS_BY_REFERENCE |
| #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference |
| #undef TARGET_CALLEE_COPIES |
| #define TARGET_CALLEE_COPIES sh_callee_copies |
| #undef TARGET_ARG_PARTIAL_BYTES |
| #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes |
| #undef TARGET_FUNCTION_ARG |
| #define TARGET_FUNCTION_ARG sh_function_arg |
| #undef TARGET_FUNCTION_ARG_ADVANCE |
| #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance |
| |
| #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV |
| #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv |
| |
| #undef TARGET_BUILD_BUILTIN_VA_LIST |
| #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list |
| #undef TARGET_EXPAND_BUILTIN_VA_START |
| #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start |
| #undef TARGET_GIMPLIFY_VA_ARG_EXPR |
| #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr |
| |
| #undef TARGET_VECTOR_MODE_SUPPORTED_P |
| #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p |
| |
| #undef TARGET_CHECK_PCH_TARGET_FLAGS |
| #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags |
| |
| #undef TARGET_DWARF_CALLING_CONVENTION |
| #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention |
| |
| #undef TARGET_FRAME_POINTER_REQUIRED |
| #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required |
| |
| #undef TARGET_MODE_EMIT |
| #define TARGET_MODE_EMIT sh_emit_mode_set |
| |
| #undef TARGET_MODE_NEEDED |
| #define TARGET_MODE_NEEDED sh_mode_needed |
| |
| #undef TARGET_MODE_AFTER |
| #define TARGET_MODE_AFTER sh_mode_after |
| |
| #undef TARGET_MODE_ENTRY |
| #define TARGET_MODE_ENTRY sh_mode_entry |
| |
| #undef TARGET_MODE_EXIT |
| #define TARGET_MODE_EXIT sh_mode_exit |
| |
| #undef TARGET_MODE_PRIORITY |
| #define TARGET_MODE_PRIORITY sh_mode_priority |
| |
| /* Return regmode weight for insn. */ |
| #define INSN_REGMODE_WEIGHT(INSN, MODE)\ |
| regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)] |
| |
| /* Return current register pressure for regmode. */ |
| #define CURR_REGMODE_PRESSURE(MODE)\ |
| curr_regmode_pressure[((MODE) == SImode) ? 0 : 1] |
| |
| #undef TARGET_ENCODE_SECTION_INFO |
| #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info |
| |
| #undef TARGET_LRA_P |
| #define TARGET_LRA_P sh_lra_p |
| |
| #undef TARGET_SECONDARY_RELOAD |
| #define TARGET_SECONDARY_RELOAD sh_secondary_reload |
| |
| #undef TARGET_PREFERRED_RELOAD_CLASS |
| #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class |
| |
| #undef TARGET_CONDITIONAL_REGISTER_USAGE |
| #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage |
| |
| #undef TARGET_LEGITIMATE_ADDRESS_P |
| #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p |
| |
| #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P |
| #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p |
| |
| #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT |
| #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \ |
| sh_legitimize_address_displacement |
| |
| #undef TARGET_TRAMPOLINE_INIT |
| #define TARGET_TRAMPOLINE_INIT sh_trampoline_init |
| #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS |
| #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address |
| |
| #undef TARGET_LEGITIMATE_CONSTANT_P |
| #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p |
| |
| #undef TARGET_CANONICALIZE_COMPARISON |
| #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison |
| |
| #undef TARGET_LEGITIMATE_COMBINED_INSN |
| #define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn |
| |
| #undef TARGET_FIXED_CONDITION_CODE_REGS |
| #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs |
| |
| #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P |
| #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \ |
| sh_use_by_pieces_infrastructure_p |
| |
| /* Machine-specific symbol_ref flags. */ |
| #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0) |
| |
| /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value |
| is used by optabs.c atomic op expansion code as well as in sync.md. */ |
| #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL |
| #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80 |
| |
| #undef TARGET_CANNOT_FORCE_CONST_MEM |
| #define TARGET_CANNOT_FORCE_CONST_MEM sh_cannot_force_const_mem_p |
| |
| #undef TARGET_HARD_REGNO_NREGS |
| #define TARGET_HARD_REGNO_NREGS sh_hard_regno_nregs |
| #undef TARGET_HARD_REGNO_MODE_OK |
| #define TARGET_HARD_REGNO_MODE_OK sh_hard_regno_mode_ok |
| |
| #undef TARGET_MODES_TIEABLE_P |
| #define TARGET_MODES_TIEABLE_P sh_modes_tieable_p |
| |
| #undef TARGET_CAN_CHANGE_MODE_CLASS |
| #define TARGET_CAN_CHANGE_MODE_CLASS sh_can_change_mode_class |
| |
| #undef TARGET_CONSTANT_ALIGNMENT |
| #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings |
| |
| #undef TARGET_HAVE_SPECULATION_SAFE_VALUE |
| #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed |
| |
| struct gcc_target targetm = TARGET_INITIALIZER; |
| |
| |
| /* Information on the currently selected atomic model. |
| This is initialized in sh_option_override. */ |
| static sh_atomic_model selected_atomic_model_; |
| |
| const sh_atomic_model& |
| selected_atomic_model (void) |
| { |
| return selected_atomic_model_; |
| } |
| |
| static sh_atomic_model |
| parse_validate_atomic_model_option (const char* str) |
| { |
| const char* model_names[sh_atomic_model::num_models]; |
| model_names[sh_atomic_model::none] = "none"; |
| model_names[sh_atomic_model::soft_gusa] = "soft-gusa"; |
| model_names[sh_atomic_model::hard_llcs] = "hard-llcs"; |
| model_names[sh_atomic_model::soft_tcb] = "soft-tcb"; |
| model_names[sh_atomic_model::soft_imask] = "soft-imask"; |
| |
| const char* model_cdef_names[sh_atomic_model::num_models]; |
| model_cdef_names[sh_atomic_model::none] = "NONE"; |
| model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA"; |
| model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS"; |
| model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB"; |
| model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK"; |
| |
| sh_atomic_model ret; |
| ret.type = sh_atomic_model::none; |
| ret.name = model_names[sh_atomic_model::none]; |
| ret.cdef_name = model_cdef_names[sh_atomic_model::none]; |
| ret.strict = false; |
| ret.tcb_gbr_offset = -1; |
| |
| /* Handle empty string as 'none'. */ |
| if (str == NULL || *str == '\0') |
| return ret; |
| |
| #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0) |
| |
| std::vector<std::string> tokens; |
| for (std::stringstream ss (str); ss.good (); ) |
| { |
| tokens.push_back (std::string ()); |
| std::getline (ss, tokens.back (), ','); |
| } |
| |
| if (tokens.empty ()) |
| err_ret ("invalid atomic model option"); |
| |
| /* The first token must be the atomic model name. */ |
| { |
| for (size_t i = 0; i < sh_atomic_model::num_models; ++i) |
| if (tokens.front () == model_names[i]) |
| { |
| ret.type = (sh_atomic_model::enum_type)i; |
| ret.name = model_names[i]; |
| ret.cdef_name = model_cdef_names[i]; |
| goto got_mode_name; |
| } |
| |
| err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ()); |
| got_mode_name:; |
| } |
| |
| /* Go through the remaining tokens. */ |
| for (size_t i = 1; i < tokens.size (); ++i) |
| { |
| if (tokens[i] == "strict") |
| ret.strict = true; |
| else if (!tokens[i].compare (0, strlen ("gbr-offset="), "gbr-offset=")) |
| { |
| std::string offset_str = tokens[i].substr (strlen ("gbr-offset=")); |
| ret.tcb_gbr_offset = integral_argument (offset_str.c_str ()); |
| if (offset_str.empty () || ret.tcb_gbr_offset == -1) |
| err_ret ("could not parse gbr-offset value \"%s\" in atomic model " |
| "option", offset_str.c_str ()); |
| } |
| else |
| err_ret ("unknown parameter \"%s\" in atomic model option", |
| tokens[i].c_str ()); |
| } |
| |
| /* Check that the selection makes sense. */ |
| if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3) |
| err_ret ("atomic model %s is only available on SH3 and SH4 targets", |
| ret.name); |
| |
| if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A) |
| err_ret ("atomic model %s is only available on SH4A targets", ret.name); |
| |
| if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1) |
| err_ret ("atomic model %s requires gbr-offset parameter", ret.name); |
| |
| if (ret.type == sh_atomic_model::soft_tcb |
| && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020 |
| || (ret.tcb_gbr_offset & 3) != 0)) |
| err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be " |
| "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset, |
| ret.name); |
| |
| if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE) |
| err_ret ("cannot use atomic model %s in user mode", ret.name); |
| |
| return ret; |
| |
| #undef err_ret |
| } |
| |
| /* Register SH specific RTL passes. */ |
| extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns, |
| const char* name); |
| extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx, |
| const char* name); |
| static void |
| register_sh_passes (void) |
| { |
| /* Running the sh_treg_combine pass after ce1 generates better code when |
| comparisons are combined and reg-reg moves are introduced, because |
| reg-reg moves will be eliminated afterwards. However, there are quite |
| some cases where combine will be unable to fold comparison related insns, |
| thus for now don't do it. |
| register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"), |
| PASS_POS_INSERT_AFTER, "ce1", 1); |
| */ |
| |
| /* Run sh_treg_combine pass after combine but before register allocation. */ |
| register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"), |
| PASS_POS_INSERT_AFTER, "split1", 1); |
| |
| /* Run sh_treg_combine pass after register allocation and basic block |
| reordering as this sometimes creates new opportunities. */ |
| register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"), |
| PASS_POS_INSERT_AFTER, "split3", 1); |
| |
| /* Optimize sett and clrt insns, by e.g. removing them if the T bit value |
| is known after a conditional branch. |
| This must be done after basic blocks and branch conditions have |
| stabilized and won't be changed by further passes. */ |
| register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"), |
| PASS_POS_INSERT_BEFORE, "sched2", 1); |
| } |
| |
| /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override |
| various options, and do some machine dependent initialization. */ |
| static void |
| sh_option_override (void) |
| { |
| int regno; |
| |
| SUBTARGET_OVERRIDE_OPTIONS; |
| |
| sh_cpu = PROCESSOR_SH1; |
| assembler_dialect = 0; |
| if (TARGET_SH2) |
| sh_cpu = PROCESSOR_SH2; |
| if (TARGET_SH2E) |
| sh_cpu = PROCESSOR_SH2E; |
| if (TARGET_SH2A) |
| sh_cpu = PROCESSOR_SH2A; |
| if (TARGET_SH3) |
| sh_cpu = PROCESSOR_SH3; |
| if (TARGET_SH3E) |
| sh_cpu = PROCESSOR_SH3E; |
| if (TARGET_SH4) |
| { |
| assembler_dialect = 1; |
| sh_cpu = PROCESSOR_SH4; |
| } |
| if (TARGET_SH4A) |
| { |
| assembler_dialect = 1; |
| sh_cpu = PROCESSOR_SH4A; |
| } |
| |
| /* User/priviledged mode is supported only on SH3* and SH4*. |
| Disable it for everything else. */ |
| if (!TARGET_SH3 && TARGET_USERMODE) |
| TARGET_USERMODE = false; |
| |
| if (! strcmp (sh_div_str, "call-div1")) |
| sh_div_strategy = SH_DIV_CALL_DIV1; |
| else if (! strcmp (sh_div_str, "call-fp") && TARGET_FPU_ANY) |
| sh_div_strategy = SH_DIV_CALL_FP; |
| else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT) |
| sh_div_strategy = SH_DIV_CALL_TABLE; |
| else |
| { |
| /* Pick one that makes most sense for the target in general. |
| It is not much good to use different functions depending on -Os, |
| since then we'll end up with two different functions when some of |
| the code is compiled for size, and some for speed. */ |
| |
| /* SH4 tends to emphasize speed. */ |
| if (TARGET_HARD_SH4) |
| sh_div_strategy = SH_DIV_CALL_TABLE; |
| /* These have their own way of doing things. */ |
| else if (TARGET_SH2A) |
| sh_div_strategy = SH_DIV_INTRINSIC; |
| /* SH1 .. SH3 cores often go into small-footprint systems, so |
| default to the smallest implementation available. */ |
| else |
| sh_div_strategy = SH_DIV_CALL_DIV1; |
| } |
| |
| if (sh_divsi3_libfunc[0]) |
| ; /* User supplied - leave it alone. */ |
| else if (TARGET_DIVIDE_CALL_FP) |
| sh_divsi3_libfunc = "__sdivsi3_i4"; |
| else if (TARGET_DIVIDE_CALL_TABLE) |
| sh_divsi3_libfunc = "__sdivsi3_i4i"; |
| else |
| sh_divsi3_libfunc = "__sdivsi3"; |
| |
| if (sh_branch_cost == -1) |
| { |
| /* The SH1 does not have delay slots, hence we get a pipeline stall |
| at every branch. The SH4 is superscalar, so the single delay slot |
| is not sufficient to keep both pipelines filled. |
| In any case, set the default branch cost to '2', as it results in |
| slightly overall smaller code and also enables some if conversions |
| that are required for matching special T bit related insns. */ |
| sh_branch_cost = 2; |
| } |
| |
| /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */ |
| if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4) |
| TARGET_ZDCBRANCH = 1; |
| |
| /* FDPIC code is a special form of PIC, and the vast majority of code |
| generation constraints that apply to PIC also apply to FDPIC, so we |
| set flag_pic to avoid the need to check TARGET_FDPIC everywhere |
| flag_pic is checked. */ |
| if (TARGET_FDPIC && !flag_pic) |
| flag_pic = 2; |
| |
| for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) |
| if (! VALID_REGISTER_P (regno)) |
| sh_register_names[regno][0] = '\0'; |
| |
| for (regno = 0; regno < ADDREGNAMES_SIZE; regno++) |
| if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno))) |
| sh_additional_register_names[regno][0] = '\0'; |
| |
| if (flag_pic && ! TARGET_PREFERGOT) |
| flag_no_function_cse = 1; |
| |
| if (targetm.small_register_classes_for_mode_p (VOIDmode)) |
| { |
| /* Never run scheduling before reload, since that can |
| break global alloc, and generates slower code anyway due |
| to the pressure on R0. */ |
| /* Enable sched1 for SH4 if the user explicitly requests. |
| When sched1 is enabled, the ready queue will be reordered by |
| the target hooks if pressure is high. We cannot do this for |
| PIC, SH3 and lower as they give spill failures for R0. */ |
| if (!TARGET_HARD_SH4 || flag_pic) |
| flag_schedule_insns = 0; |
| /* ??? Current exception handling places basic block boundaries |
| after call_insns. It causes the high pressure on R0 and gives |
| spill failures for R0 in reload. See PR 22553 and the thread |
| on gcc-patches |
| <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */ |
| else if (flag_exceptions) |
| { |
| if (flag_schedule_insns && global_options_set.x_flag_schedule_insns) |
| warning (0, "ignoring %<-fschedule-insns%> because of exception " |
| "handling bug"); |
| flag_schedule_insns = 0; |
| } |
| else if (flag_schedule_insns |
| && !global_options_set.x_flag_schedule_insns) |
| flag_schedule_insns = 0; |
| } |
| |
| /* Unwind info is not correct around the CFG unless either a frame |
| pointer is present or M_A_O_A is set. Fixing this requires rewriting |
| unwind info generation to be aware of the CFG and propagating states |
| around edges. */ |
| if ((flag_unwind_tables || flag_asynchronous_unwind_tables |
| || flag_exceptions || flag_non_call_exceptions) |
| && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS) |
| { |
| warning (0, "unwind tables currently require either a frame pointer " |
| "or %<-maccumulate-outgoing-args%> for correctness"); |
| TARGET_ACCUMULATE_OUTGOING_ARGS = 1; |
| } |
| |
| if (flag_unsafe_math_optimizations) |
| { |
| /* Enable fsca insn for SH4A if not otherwise specified by the user. */ |
| if (global_options_set.x_TARGET_FSCA == 0 |
| && (TARGET_SH4A_FP || TARGET_FPU_SH4_300)) |
| TARGET_FSCA = 1; |
| |
| /* Enable fsrra insn for SH4A if not otherwise specified by the user. */ |
| if (global_options_set.x_TARGET_FSRRA == 0 |
| && (TARGET_SH4A_FP || TARGET_FPU_SH4_300)) |
| TARGET_FSRRA = 1; |
| } |
| |
| /* Allow fsrra insn only if -funsafe-math-optimizations and |
| -ffinite-math-only is enabled. */ |
| TARGET_FSRRA = TARGET_FSRRA |
| && flag_unsafe_math_optimizations |
| && flag_finite_math_only; |
| |
| /* If the -mieee option was not explicitly set by the user, turn it on |
| unless -ffinite-math-only was specified. See also PR 33135. */ |
| if (! global_options_set.x_TARGET_IEEE) |
| TARGET_IEEE = ! flag_finite_math_only; |
| |
| if (sh_fixed_range_str) |
| sh_fix_range (sh_fixed_range_str); |
| |
| /* This target defaults to strict volatile bitfields. */ |
| if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2)) |
| flag_strict_volatile_bitfields = 1; |
| |
| sh_override_options_after_change (); |
| |
| /* Parse atomic model option and make sure it is valid for the current |
| target CPU. */ |
| selected_atomic_model_ |
| = parse_validate_atomic_model_option (sh_atomic_model_str); |
| |
| register_sh_passes (); |
| } |
| |
| /* Implement targetm.override_options_after_change. */ |
| |
| static void |
| sh_override_options_after_change (void) |
| { |
| /* Adjust loop, jump and function alignment values (in bytes), if those |
| were not specified by the user using -falign-loops, -falign-jumps |
| and -falign-functions options. |
| 32 bit alignment is better for speed, because instructions can be |
| fetched as a pair from a longword boundary. For size use 16 bit |
| alignment to get more compact code. |
| Aligning all jumps increases the code size, even if it might |
| result in slightly faster code. Thus, it is set to the smallest |
| alignment possible if not specified by the user. */ |
| if (flag_align_loops && !str_align_loops) |
| str_align_loops = optimize_size ? "2" : "4"; |
| |
| /* Parse values so that we can compare for current value. */ |
| parse_alignment_opts (); |
| if (flag_align_jumps && !str_align_jumps) |
| str_align_jumps = "2"; |
| else if (align_jumps.levels[0].get_value () < 2) |
| str_align_jumps = "2"; |
| |
| if (flag_align_functions && !str_align_functions) |
| str_align_functions = optimize_size ? "2" : "4"; |
| |
| /* The linker relaxation code breaks when a function contains |
| alignments that are larger than that at the start of a |
| compilation unit. */ |
| if (TARGET_RELAX) |
| { |
| /* Parse values so that we can compare for current value. */ |
| parse_alignment_opts (); |
| int min_align = MAX (align_loops.levels[0].get_value (), |
| align_jumps.levels[0].get_value ()); |
| |
| /* Also take possible .long constants / mova tables into account. */ |
| if (min_align < 4) |
| min_align = 4; |
| if (align_functions.levels[0].get_value () < min_align) |
| { |
| char *r = XNEWVEC (char, 16); |
| sprintf (r, "%d", min_align); |
| str_align_functions = r; |
| } |
| } |
| } |
| |
| /* Print the operand address in x to the stream. */ |
| static void |
| sh_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x) |
| { |
| switch (GET_CODE (x)) |
| { |
| case REG: |
| case SUBREG: |
| fprintf (stream, "@%s", reg_names[true_regnum (x)]); |
| break; |
| |
| case PLUS: |
| { |
| rtx base = XEXP (x, 0); |
| rtx index = XEXP (x, 1); |
| |
| switch (GET_CODE (index)) |
| { |
| case CONST_INT: |
| fprintf (stream, "@(%d,%s)", (int) INTVAL (index), |
| reg_names[true_regnum (base)]); |
| break; |
| |
| case REG: |
| case SUBREG: |
| { |
| int base_num = true_regnum (base); |
| int index_num = true_regnum (index); |
| |
| /* If base or index is R0, make sure that it comes first. |
| Usually one of them will be R0, but the order might be wrong. |
| If neither base nor index are R0 it's an error and we just |
| pass it on to the assembler. This avoids silent wrong code |
| bugs. */ |
| if (base_num == 0 && index_num != 0) |
| std::swap (base_num, index_num); |
| |
| fprintf (stream, "@(%s,%s)", reg_names[index_num], |
| reg_names[base_num]); |
| break; |
| } |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| break; |
| |
| case PRE_DEC: |
| fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]); |
| break; |
| |
| case POST_INC: |
| fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]); |
| break; |
| |
| default: |
| x = mark_constant_pool_use (x); |
| output_addr_const (stream, x); |
| break; |
| } |
| } |
| |
| /* Print operand x (an rtx) in assembler syntax to file stream |
| according to modifier code. |
| |
| '.' print a .s if insn needs delay slot |
| ',' print LOCAL_LABEL_PREFIX |
| '@' print trap, rte or rts depending upon pragma interruptness |
| '#' output a nop if there is nothing to put in the delay slot |
| ''' print likelihood suffix (/u for unlikely). |
| '>' print branch target if -fverbose-asm |
| 'O' print a constant without the # |
| 'R' print the LSW of a dp value - changes if in little endian |
| 'S' print the MSW of a dp value - changes if in little endian |
| 'T' print the next word of a dp value - same as 'R' in big endian mode. |
| 'M' print .b / .w / .l / .s / .d suffix if operand is a MEM. |
| 'N' print 'r63' if the operand is (const_int 0). |
| 'd' print a V2SF reg as dN instead of fpN. |
| 'm' print a pair `base,offset' or `base,index', for LD and ST. |
| 'U' Likewise for {LD,ST}{HI,LO}. |
| 'V' print the position of a single bit set. |
| 'W' print the position of a single bit cleared. |
| 't' print a memory address which is a register. |
| 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value. |
| 'o' output an operator. */ |
| static void |
| sh_print_operand (FILE *stream, rtx x, int code) |
| { |
| int regno; |
| machine_mode mode; |
| |
| switch (code) |
| { |
| tree trapa_attr; |
| |
| case '.': |
| if (final_sequence |
| && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0)) |
| && get_attr_length (final_sequence->insn (1))) |
| fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s"); |
| break; |
| case ',': |
| fprintf (stream, "%s", LOCAL_LABEL_PREFIX); |
| break; |
| case '@': |
| trapa_attr = lookup_attribute ("trap_exit", |
| DECL_ATTRIBUTES (current_function_decl)); |
| if (trapa_attr) |
| fprintf (stream, "trapa #%ld", |
| (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr)))); |
| else if (sh_cfun_interrupt_handler_p ()) |
| { |
| if (sh_cfun_resbank_handler_p ()) |
| fprintf (stream, "resbank\n"); |
| fprintf (stream, "rte"); |
| } |
| else |
| fprintf (stream, "rts"); |
| break; |
| case '#': |
| /* Output a nop if there's nothing in the delay slot. */ |
| if (dbr_sequence_length () == 0) |
| fprintf (stream, "\n\tnop"); |
| break; |
| case '\'': |
| { |
| rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0); |
| |
| if (note |
| && profile_probability::from_reg_br_prob_note (XINT (note, 0)) |
| < profile_probability::even ()) |
| fputs ("/u", stream); |
| break; |
| } |
| case '>': |
| if (flag_verbose_asm && JUMP_LABEL (current_output_insn)) |
| { |
| fputs ("\t! target: ", stream); |
| output_addr_const (stream, JUMP_LABEL (current_output_insn)); |
| } |
| break; |
| case 'O': |
| x = mark_constant_pool_use (x); |
| output_addr_const (stream, x); |
| break; |
| /* N.B.: %R / %S / %T adjust memory addresses by four. |
| While they can be used to access 64 bit parts of a larger value |
| held in general purpose registers, that won't work with memory - |
| neither for fp registers, since the frxx names are used. */ |
| case 'R': |
| if (REG_P (x) || GET_CODE (x) == SUBREG) |
| { |
| regno = true_regnum (x); |
| regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET; |
| fputs (reg_names[regno], (stream)); |
| } |
| else if (MEM_P (x)) |
| { |
| x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET); |
| sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0)); |
| } |
| else |
| { |
| rtx sub = NULL_RTX; |
| |
| mode = GET_MODE (x); |
| if (mode == VOIDmode) |
| mode = DImode; |
| if (GET_MODE_SIZE (mode) >= 8) |
| sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET); |
| if (sub) |
| sh_print_operand (stream, sub, 0); |
| else |
| output_operand_lossage ("invalid operand to %%R"); |
| } |
| break; |
| case 'S': |
| if (REG_P (x) || GET_CODE (x) == SUBREG) |
| { |
| regno = true_regnum (x); |
| regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET; |
| fputs (reg_names[regno], (stream)); |
| } |
| else if (MEM_P (x)) |
| { |
| x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET); |
| sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0)); |
| } |
| else |
| { |
| rtx sub = NULL_RTX; |
| |
| mode = GET_MODE (x); |
| if (mode == VOIDmode) |
| mode = DImode; |
| if (GET_MODE_SIZE (mode) >= 8) |
| sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET); |
| if (sub) |
| sh_print_operand (stream, sub, 0); |
| else |
| output_operand_lossage ("invalid operand to %%S"); |
| } |
| break; |
| case 'T': |
| /* Next word of a double. */ |
| switch (GET_CODE (x)) |
| { |
| case REG: |
| fputs (reg_names[REGNO (x) + 1], (stream)); |
| break; |
| case MEM: |
| { |
| machine_mode mode = GET_MODE (x); |
| if (GET_CODE (XEXP (x, 0)) != PRE_DEC |
| && GET_CODE (XEXP (x, 0)) != POST_INC) |
| x = adjust_address (x, SImode, 4); |
| sh_print_operand_address (stream, mode, XEXP (x, 0)); |
| } |
| break; |
| default: |
| break; |
| } |
| break; |
| |
| case 't': |
| gcc_assert (MEM_P (x)); |
| x = XEXP (x, 0); |
| switch (GET_CODE (x)) |
| { |
| case REG: |
| case SUBREG: |
| sh_print_operand (stream, x, 0); |
| break; |
| default: |
| break; |
| } |
| break; |
| |
| case 'o': |
| switch (GET_CODE (x)) |
| { |
| case PLUS: fputs ("add", stream); break; |
| case MINUS: fputs ("sub", stream); break; |
| case MULT: fputs ("mul", stream); break; |
| case DIV: fputs ("div", stream); break; |
| case EQ: fputs ("eq", stream); break; |
| case NE: fputs ("ne", stream); break; |
| case GT: case LT: fputs ("gt", stream); break; |
| case GE: case LE: fputs ("ge", stream); break; |
| case GTU: case LTU: fputs ("gtu", stream); break; |
| case GEU: case LEU: fputs ("geu", stream); break; |
| default: |
| break; |
| } |
| break; |
| case 'M': |
| if (MEM_P (x)) |
| { |
| switch (GET_MODE (x)) |
| { |
| case E_QImode: fputs (".b", stream); break; |
| case E_HImode: fputs (".w", stream); break; |
| case E_SImode: fputs (".l", stream); break; |
| case E_SFmode: fputs (".s", stream); break; |
| case E_DFmode: fputs (".d", stream); break; |
| default: gcc_unreachable (); |
| } |
| } |
| break; |
| |
| case 'm': |
| gcc_assert (MEM_P (x)); |
| x = XEXP (x, 0); |
| /* Fall through. */ |
| case 'U': |
| switch (GET_CODE (x)) |
| { |
| case REG: |
| case SUBREG: |
| sh_print_operand (stream, x, 0); |
| fputs (", 0", stream); |
| break; |
| |
| case PLUS: |
| sh_print_operand (stream, XEXP (x, 0), 0); |
| fputs (", ", stream); |
| sh_print_operand (stream, XEXP (x, 1), 0); |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| break; |
| |
| case 'V': |
| { |
| int num = exact_log2 (INTVAL (x)); |
| gcc_assert (num >= 0); |
| fprintf (stream, "#%d", num); |
| } |
| break; |
| |
| case 'W': |
| { |
| int num = exact_log2 (~INTVAL (x)); |
| gcc_assert (num >= 0); |
| fprintf (stream, "#%d", num); |
| } |
| break; |
| |
| case 'd': |
| gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode); |
| |
| fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1); |
| break; |
| |
| case 'N': |
| if (x == CONST0_RTX (GET_MODE (x))) |
| { |
| fprintf ((stream), "r63"); |
| break; |
| } |
| goto default_output; |
| case 'u': |
| if (CONST_INT_P (x)) |
| { |
| fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1)); |
| break; |
| } |
| /* Fall through. */ |
| |
| default_output: |
| default: |
| regno = 0; |
| mode = GET_MODE (x); |
| |
| switch (GET_CODE (x)) |
| { |
| case TRUNCATE: |
| { |
| rtx inner = XEXP (x, 0); |
| int offset = 0; |
| machine_mode inner_mode; |
| |
| /* We might see SUBREGs with vector mode registers inside. */ |
| if (GET_CODE (inner) == SUBREG |
| && (GET_MODE_SIZE (GET_MODE (inner)) |
| == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner)))) |
| && subreg_lowpart_p (inner)) |
| inner = SUBREG_REG (inner); |
| if (CONST_INT_P (inner)) |
| { |
| x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x))); |
| goto default_output; |
| } |
| inner_mode = GET_MODE (inner); |
| if (GET_CODE (inner) == SUBREG |
| && (GET_MODE_SIZE (GET_MODE (inner)) |
| < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner)))) |
| && REG_P (SUBREG_REG (inner))) |
| { |
| offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)), |
| GET_MODE (SUBREG_REG (inner)), |
| SUBREG_BYTE (inner), |
| GET_MODE (inner)); |
| inner = SUBREG_REG (inner); |
| } |
| if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8) |
| abort (); |
| /* Floating point register pairs are always big endian; |
| general purpose registers are 64 bit wide. */ |
| regno = REGNO (inner); |
| regno = (hard_regno_nregs (regno, inner_mode) |
| - hard_regno_nregs (regno, mode)) |
| + offset; |
| x = inner; |
| goto reg; |
| } |
| case SIGN_EXTEND: |
| x = XEXP (x, 0); |
| goto reg; |
| case SUBREG: |
| gcc_assert (SUBREG_BYTE (x) == 0 |
| && REG_P (SUBREG_REG (x))); |
| |
| x = SUBREG_REG (x); |
| /* Fall through. */ |
| |
| reg: |
| case REG: |
| regno += REGNO (x); |
| if (FP_REGISTER_P (regno) |
| && mode == V16SFmode) |
| fprintf ((stream), "mtrx%s", reg_names[regno] + 2); |
| else if (FP_REGISTER_P (REGNO (x)) |
| && mode == V4SFmode) |
| fprintf ((stream), "fv%s", reg_names[regno] + 2); |
| else if (REG_P (x) |
| && mode == V2SFmode) |
| fprintf ((stream), "fp%s", reg_names[regno] + 2); |
| else if (FP_REGISTER_P (REGNO (x)) |
| && GET_MODE_SIZE (mode) > 4) |
| fprintf ((stream), "d%s", reg_names[regno] + 1); |
| else |
| fputs (reg_names[regno], (stream)); |
| break; |
| |
| case MEM: |
| output_address (GET_MODE (x), XEXP (x, 0)); |
| break; |
| |
| default: |
| fputc ('#', stream); |
| output_addr_const (stream, x); |
| break; |
| } |
| break; |
| } |
| } |
| |
| static bool |
| sh_print_operand_punct_valid_p (unsigned char code) |
| { |
| return (code == '.' || code == '#' || code == '@' || code == ',' |
| || code == '$' || code == '\'' || code == '>'); |
| } |
| |
| /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ |
| static bool |
| sh_asm_output_addr_const_extra (FILE *file, rtx x) |
| { |
| if (GET_CODE (x) == UNSPEC) |
| { |
| switch (XINT (x, 1)) |
| { |
| case UNSPEC_PIC: |
| /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */ |
| output_addr_const (file, XVECEXP (x, 0, 0)); |
| break; |
| case UNSPEC_GOT: |
| output_addr_const (file, XVECEXP (x, 0, 0)); |
| fputs ("@GOT", file); |
| break; |
| case UNSPEC_GOTOFF: |
| output_addr_const (file, XVECEXP (x, 0, 0)); |
| fputs ("@GOTOFF", file); |
| break; |
| case UNSPEC_PLT: |
| output_addr_const (file, XVECEXP (x, 0, 0)); |
| fputs ("@PLT", file); |
| break; |
| case UNSPEC_GOTPLT: |
| output_addr_const (file, XVECEXP (x, 0, 0)); |
| fputs ("@GOTPLT", file); |
| break; |
| case UNSPEC_PCREL: |
| output_addr_const (file, XVECEXP (x, 0, 0)); |
| fputs ("@PCREL", file); |
| break; |
| case UNSPEC_DTPOFF: |
| output_addr_const (file, XVECEXP (x, 0, 0)); |
| fputs ("@DTPOFF", file); |
| break; |
| case UNSPEC_GOTTPOFF: |
| output_addr_const (file, XVECEXP (x, 0, 0)); |
| fputs ("@GOTTPOFF", file); |
| break; |
| case UNSPEC_TPOFF: |
| output_addr_const (file, XVECEXP (x, 0, 0)); |
| fputs ("@TPOFF", file); |
| break; |
| case UNSPEC_CALLER: |
| { |
| char name[32]; |
| /* LPCS stands for Label for PIC Call Site. */ |
| targetm.asm_out.generate_internal_label (name, "LPCS", |
| INTVAL (XVECEXP (x, 0, 0))); |
| assemble_name (file, name); |
| } |
| break; |
| case UNSPEC_SYMOFF: |
| output_addr_const (file, XVECEXP (x, 0, 0)); |
| fputc ('-', file); |
| if (GET_CODE (XVECEXP (x, 0, 1)) == CONST) |
| { |
| fputc ('(', file); |
| output_addr_const (file, XVECEXP (x, 0, 1)); |
| fputc (')', file); |
| } |
| else |
| output_addr_const (file, XVECEXP (x, 0, 1)); |
| break; |
| case UNSPEC_PCREL_SYMOFF: |
| output_addr_const (file, XVECEXP (x, 0, 0)); |
| fputs ("-(", file); |
| output_addr_const (file, XVECEXP (x, 0, 1)); |
| fputs ("-.)", file); |
| break; |
| case UNSPEC_GOTFUNCDESC: |
| output_addr_const (file, XVECEXP (x, 0, 0)); |
| fputs ("@GOTFUNCDESC", file); |
| break; |
| case UNSPEC_GOTOFFFUNCDESC: |
| output_addr_const (file, XVECEXP (x, 0, 0)); |
| fputs ("@GOTOFFFUNCDESC", file); |
| break; |
| default: |
| return false; |
| } |
| return true; |
| } |
| else |
| return false; |
| } |
| |
| /* Encode symbol attributes of a SYMBOL_REF into its |
| SYMBOL_REF_FLAGS. */ |
| static void |
| sh_encode_section_info (tree decl, rtx rtl, int first) |
| { |
| default_encode_section_info (decl, rtl, first); |
| |
| if (TREE_CODE (decl) == FUNCTION_DECL |
| && sh2a_function_vector_p (decl) && TARGET_SH2A) |
| SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION; |
| } |
| |
| /* Prepare operands for a move define_expand; specifically, one of the |
| operands must be in a register. */ |
| void |
| prepare_move_operands (rtx operands[], machine_mode mode) |
| { |
| if ((mode == SImode || mode == DImode) |
| && flag_pic |
| && ! ((mode == Pmode || mode == ptr_mode) |
| && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE)) |
| { |
| rtx temp; |
| if (SYMBOLIC_CONST_P (operands[1])) |
| { |
| if (MEM_P (operands[0])) |
| operands[1] = force_reg (Pmode, operands[1]); |
| else |
| { |
| temp = (!can_create_pseudo_p () |
| ? operands[0] |
| : gen_reg_rtx (Pmode)); |
| operands[1] = legitimize_pic_address (operands[1], mode, temp); |
| } |
| } |
| else if (GET_CODE (operands[1]) == CONST |
| && GET_CODE (XEXP (operands[1], 0)) == PLUS |
| && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0))) |
| { |
| temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode); |
| temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0), |
| mode, temp); |
| operands[1] = expand_binop (mode, add_optab, temp, |
| XEXP (XEXP (operands[1], 0), 1), |
| (!can_create_pseudo_p () |
| ? temp |
| : gen_reg_rtx (Pmode)), |
| 0, OPTAB_LIB_WIDEN); |
| } |
| } |
| |
| if (! reload_in_progress && ! reload_completed) |
| { |
| /* Copy the source to a register if both operands aren't registers. */ |
| if (! register_operand (operands[0], mode) |
| && ! register_operand (operands[1], mode)) |
| operands[1] = copy_to_mode_reg (mode, operands[1]); |
| |
| if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode)) |
| { |
| /* This is like change_address_1 (operands[0], mode, 0, 1) , |
| except that we can't use that function because it is static. */ |
| rtx new_rtx = change_address (operands[0], mode, 0); |
| MEM_COPY_ATTRIBUTES (new_rtx, operands[0]); |
| operands[0] = new_rtx; |
| } |
| |
| /* This case can happen while generating code to move the result |
| of a library call to the target. Reject `st r0,@(rX,rY)' because |
| reload will fail to find a spill register for rX, since r0 is already |
| being used for the source. */ |
| else if (refers_to_regno_p (R0_REG, operands[1]) |
| && MEM_P (operands[0]) |
| && GET_CODE (XEXP (operands[0], 0)) == PLUS |
| && REG_P (XEXP (XEXP (operands[0], 0), 1))) |
| operands[1] = copy_to_mode_reg (mode, operands[1]); |
| |
| /* When the displacement addressing is used, RA will assign r0 to |
| the pseudo register operand for the QI/HImode load/store. |
| This tends to make a long live range for R0 and might cause |
| anomalous register spills in some case with LRA. See PR |
| target/55212. |
| We split possible load/store to two move insns via r0 so as to |
| shorten R0 live range. It will make some codes worse but will |
| win on average for LRA. |
| Also when base+index addressing is used and the index term is |
| a subreg, LRA assumes that more hard registers can be available |
| in some situation. It isn't the case for SH in the problematic |
| case. We can pre-allocate R0 for that index term to avoid |
| the issue. See PR target/66591. */ |
| else if (sh_lra_p () |
| && ! TARGET_SH2A |
| && ((REG_P (operands[0]) && MEM_P (operands[1])) |
| || (REG_P (operands[1]) && MEM_P (operands[0])))) |
| { |
| bool load_p = REG_P (operands[0]); |
| rtx reg = operands[load_p ? 0 : 1]; |
| rtx adr = XEXP (operands[load_p ? 1 : 0], 0); |
| |
| if ((mode == QImode || mode == HImode) |
| && REGNO (reg) >= FIRST_PSEUDO_REGISTER |
| && GET_CODE (adr) == PLUS |
| && REG_P (XEXP (adr, 0)) |
| && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER) |
| && CONST_INT_P (XEXP (adr, 1)) |
| && INTVAL (XEXP (adr, 1)) != 0 |
| && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true)) |
| { |
| rtx r0_rtx = gen_rtx_REG (mode, R0_REG); |
| emit_move_insn (r0_rtx, operands[1]); |
| operands[1] = r0_rtx; |
| } |
| if (REGNO (reg) >= FIRST_PSEUDO_REGISTER |
| && GET_CODE (adr) == PLUS |
| && REG_P (XEXP (adr, 0)) |
| && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER) |
| && SUBREG_P (XEXP (adr, 1)) |
| && REG_P (SUBREG_REG (XEXP (adr, 1)))) |
| { |
| rtx r0_rtx = gen_rtx_REG (GET_MODE (XEXP (adr, 1)), R0_REG); |
| emit_move_insn (r0_rtx, XEXP (adr, 1)); |
| XEXP (adr, 1) = r0_rtx; |
| } |
| } |
| } |
| |
| if (mode == Pmode || mode == ptr_mode) |
| { |
| rtx op0 = operands[0]; |
| rtx op1 = operands[1]; |
| rtx opc; |
| if (GET_CODE (op1) == CONST |
| && GET_CODE (XEXP (op1, 0)) == PLUS |
| && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode) |
| != TLS_MODEL_NONE)) |
| { |
| opc = XEXP (XEXP (op1, 0), 1); |
| op1 = XEXP (XEXP (op1, 0), 0); |
| } |
| else |
| opc = NULL_RTX; |
| |
| enum tls_model tls_kind; |
| |
| if (! reload_in_progress && ! reload_completed |
| && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE) |
| { |
| rtx tga_op1, tga_ret, tmp, tmp2; |
| |
| if (! flag_pic |
| && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC |
| || tls_kind == TLS_MODEL_LOCAL_DYNAMIC |
| || tls_kind == TLS_MODEL_INITIAL_EXEC)) |
| { |
| static int got_labelno; |
| /* Don't schedule insns for getting GOT address when |
| the first scheduling is enabled, to avoid spill |
| failures for R0. */ |
| if (flag_schedule_insns) |
| emit_insn (gen_blockage ()); |
| emit_insn (gen_GOTaddr2picreg (GEN_INT (++got_labelno))); |
| emit_use (gen_rtx_REG (SImode, PIC_REG)); |
| if (flag_schedule_insns) |
| emit_insn (gen_blockage ()); |
| } |
| |
| switch (tls_kind) |
| { |
| case TLS_MODEL_GLOBAL_DYNAMIC: |
| tga_ret = gen_rtx_REG (Pmode, R0_REG); |
| if (TARGET_FDPIC) |
| emit_move_insn (gen_rtx_REG (Pmode, PIC_REG), |
| sh_get_fdpic_reg_initial_val ()); |
| emit_call_insn (gen_tls_global_dynamic (tga_ret, op1)); |
| tmp = gen_reg_rtx (Pmode); |
| emit_move_insn (tmp, tga_ret); |
| op1 = tmp; |
| break; |
| |
| case TLS_MODEL_LOCAL_DYNAMIC: |
| tga_ret = gen_rtx_REG (Pmode, R0_REG); |
| if (TARGET_FDPIC) |
| emit_move_insn (gen_rtx_REG (Pmode, PIC_REG), |
| sh_get_fdpic_reg_initial_val ()); |
| emit_call_insn (gen_tls_local_dynamic (tga_ret, op1)); |
| |
| tmp = gen_reg_rtx (Pmode); |
| emit_move_insn (tmp, tga_ret); |
| |
| if (register_operand (op0, Pmode)) |
| tmp2 = op0; |
| else |
| tmp2 = gen_reg_rtx (Pmode); |
| |
| emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp)); |
| op1 = tmp2; |
| break; |
| |
| case TLS_MODEL_INITIAL_EXEC: |
| tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode); |
| tmp = gen_sym2GOTTPOFF (op1); |
| if (TARGET_FDPIC) |
| emit_move_insn (gen_rtx_REG (Pmode, PIC_REG), |
| sh_get_fdpic_reg_initial_val ()); |
| emit_insn (gen_tls_initial_exec (tga_op1, tmp)); |
| op1 = tga_op1; |
| break; |
| |
| case TLS_MODEL_LOCAL_EXEC: |
| tmp2 = gen_reg_rtx (Pmode); |
| emit_insn (gen_store_gbr (tmp2)); |
| tmp = gen_reg_rtx (Pmode); |
| emit_insn (gen_symTPOFF2reg (tmp, op1)); |
| |
| if (register_operand (op0, Pmode)) |
| op1 = op0; |
| else |
| op1 = gen_reg_rtx (Pmode); |
| |
| emit_insn (gen_addsi3 (op1, tmp, tmp2)); |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| if (opc) |
| emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc))); |
| operands[1] = op1; |
| } |
| } |
| |
| if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P) |
| { |
| rtx base, offset; |
| split_const (operands[1], &base, &offset); |
| |
| if (GET_CODE (base) == SYMBOL_REF |
| && !offset_within_block_p (base, INTVAL (offset))) |
| { |
| rtx tmp = can_create_pseudo_p () ? gen_reg_rtx (mode) : operands[0]; |
| emit_move_insn (tmp, base); |
| if (!arith_operand (offset, mode)) |
| offset = force_reg (mode, offset); |
| emit_insn (gen_add3_insn (operands[0], tmp, offset)); |
| } |
| } |
| } |
| |
| /* Implement the canonicalize_comparison target hook for the combine |
| pass. For the target hook this function is invoked via |
| sh_canonicalize_comparison. This function is also re-used to |
| canonicalize comparisons in cbranch pattern expanders. */ |
| static void |
| sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1, |
| machine_mode mode, |
| bool op0_preserve_value) |
| { |
| /* When invoked from within the combine pass the mode is not specified, |
| so try to get it from one of the operands. */ |
| if (mode == VOIDmode) |
| mode = GET_MODE (op0); |
| if (mode == VOIDmode) |
| mode = GET_MODE (op1); |
| |
| // We need to have a mode to do something useful here. |
| if (mode == VOIDmode) |
| return; |
| |
| // Currently, we don't deal with floats here. |
| if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
| return; |
| |
| // Make sure that the constant operand is the second operand. |
| if (CONST_INT_P (op0) && !CONST_INT_P (op1)) |
| { |
| if (op0_preserve_value) |
| return; |
| |
| std::swap (op0, op1); |
| cmp = swap_condition (cmp); |
| } |
| |
| if (CONST_INT_P (op1)) |
| { |
| /* Try to adjust the constant operand in such a way that available |
| comparison insns can be utilized better and the constant can be |
| loaded with a 'mov #imm,Rm' insn. This avoids a load from the |
| constant pool. */ |
| const HOST_WIDE_INT val = INTVAL (op1); |
| |
| /* x > -1 --> x >= 0 |
| x > 0xFFFFFF7F --> x >= 0xFFFFFF80 |
| x <= -1 --> x < 0 |
| x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */ |
| if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE)) |
| { |
| cmp = cmp == GT ? GE : LT; |
| op1 = gen_int_mode (val + 1, mode); |
| } |
| |
| /* x >= 1 --> x > 0 |
| x >= 0x80 --> x > 0x7F |
| x < 1 --> x <= 0 |
| x < 0x80 --> x <= 0x7F */ |
| else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT)) |
| { |
| cmp = cmp == GE ? GT : LE; |
| op1 = gen_int_mode (val - 1, mode); |
| } |
| |
| /* unsigned x >= 1 --> x != 0 |
| unsigned x < 1 --> x == 0 */ |
| else if (val == 1 && (cmp == GEU || cmp == LTU)) |
| { |
| cmp = cmp == GEU ? NE : EQ; |
| op1 = CONST0_RTX (mode); |
| } |
| |
| /* unsigned x >= 0x80 --> unsigned x > 0x7F |
| unsigned x < 0x80 --> unsigned x < 0x7F */ |
| else if (val == 0x80 && (cmp == GEU || cmp == LTU)) |
| { |
| cmp = cmp == GEU ? GTU : LEU; |
| op1 = gen_int_mode (val - 1, mode); |
| } |
| |
| /* unsigned x > 0 --> x != 0 |
| unsigned x <= 0 --> x == 0 */ |
| else if (val == 0 && (cmp == GTU || cmp == LEU)) |
| cmp = cmp == GTU ? NE : EQ; |
| |
| /* unsigned x > 0x7FFFFFFF --> signed x < 0 |
| unsigned x <= 0x7FFFFFFF --> signed x >= 0 */ |
| else if (mode == SImode && (cmp == GTU || cmp == LEU) |
| && val == 0x7FFFFFFF) |
| { |
| cmp = cmp == GTU ? LT : GE; |
| op1 = const0_rtx; |
| } |
| |
| /* unsigned x >= 0x80000000 --> signed x < 0 |
| unsigned x < 0x80000000 --> signed x >= 0 */ |
| else if (mode == SImode && (cmp == GEU || cmp == LTU) |
| && (unsigned HOST_WIDE_INT)val |
| == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1)) |
| { |
| cmp = cmp == GEU ? LT : GE; |
| op1 = const0_rtx; |
| } |
| } |
| } |
| |
| /* This function implements the canonicalize_comparison target hook. |
| This wrapper around the internally used sh_canonicalize_comparison |
| function is needed to do the enum rtx_code <-> int conversion. |
| Target hooks cannot use enum rtx_code in its definition. */ |
| static void |
| sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1, |
| bool op0_preserve_value) |
| { |
| enum rtx_code tmp_code = (enum rtx_code)*code; |
| sh_canonicalize_comparison (tmp_code, *op0, *op1, |
| VOIDmode, op0_preserve_value); |
| *code = (int)tmp_code; |
| } |
| |
| /* This function implements the legitimate_combined_insn target hook, |
| which the combine pass uses to early reject combined insns, before |
| it tries to recog the insn and determine its cost. */ |
| static bool |
| sh_legitimate_combined_insn (rtx_insn* insn) |
| { |
| /* Reject combinations of memory loads and zero extensions, as these |
| interfere with other combine patterns such as zero extracts and bit |
| tests. The SH2A movu.{b|w} insns are formed later in the |
| 'sh_optimize_extu_exts' pass after combine/split1. */ |
| rtx p = PATTERN (insn); |
| if (GET_CODE (p) == SET |
| && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode |
| && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND |
| && MEM_P (XEXP (XEXP (p, 1), 0))) |
| return false; |
| |
| return true; |
| } |
| |
| bool |
| sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2) |
| { |
| *p1 = T_REG; |
| *p2 = INVALID_REGNUM; |
| return true; |
| } |
| |
| /* Try to calculate the branch distance of a conditional branch in bytes. |
| |
| FIXME: Because of PR 59189 we can't use the CFG here. Instead just |
| walk from this insn into the next (fall-through) basic block and see if |
| we hit the label. */ |
| unsigned int |
| sh_cbranch_distance (rtx_insn* _cbranch_insn, unsigned int max_dist) |
| { |
| rtx_jump_insn* cbranch_insn = safe_as_a<rtx_jump_insn*> (_cbranch_insn); |
| |
| if (dump_file) |
| { |
| fprintf (dump_file, "sh_cbranch_distance insn = \n"); |
| print_rtl_single (dump_file, cbranch_insn); |
| } |
| |
| unsigned int dist = 0; |
| |
| for (rtx_insn* i = next_nonnote_insn (cbranch_insn); |
| i != NULL && dist < max_dist; i = next_nonnote_insn (i)) |
| { |
| const unsigned int i_len = get_attr_length (i); |
| dist += i_len; |
| |
| if (dump_file) |
| fprintf (dump_file, " insn %d length = %u dist = %u\n", |
| INSN_UID (i), i_len, dist); |
| |
| if (rtx_code_label* l = dyn_cast<rtx_code_label*> (i)) |
| { |
| if (l == cbranch_insn->jump_target ()) |
| { |
| if (dump_file) |
| fprintf (dump_file, " cbranch dist = %u\n", dist); |
| return dist; |
| } |
| break; |
| } |
| } |
| |
| if (dump_file) |
| fprintf (dump_file, " cbranch dist = unknown\n"); |
| |
| return unknown_cbranch_distance; |
| } |
| |
| enum rtx_code |
| prepare_cbranch_operands (rtx *operands, machine_mode mode, |
| enum rtx_code comparison) |
| { |
| gcc_assert (can_create_pseudo_p ()); |
| |
| if (comparison == LAST_AND_UNUSED_RTX_CODE) |
| comparison = GET_CODE (operands[0]); |
| |
| sh_canonicalize_comparison (comparison, operands[1], operands[2], |
| mode, false); |
| |
| rtx op1 = operands[1]; |
| operands[1] = force_reg (mode, op1); |
| |
| /* When we are handling DImode comparisons, we want to keep constants so |
| that we can optimize the component comparisons; however, memory loads |
| are better issued as a whole so that they can be scheduled well. |
| SImode equality comparisons allow I08 constants, but only when they |
| compare r0. Hence, if operands[1] has to be loaded from somewhere else |
| into a register, that register might as well be r0, and we allow the |
| constant. If it is already in a register, this is likely to be |
| allocated to a different hard register, thus we load the constant into |
| a register unless it is zero. */ |
| if (!REG_P (operands[2]) |
| && (!CONST_INT_P (operands[2]) |
| || (mode == SImode && operands[2] != CONST0_RTX (SImode) |
| && ((comparison != EQ && comparison != NE) |
| || (REG_P (op1) && REGNO (op1) != R0_REG) |
| || !satisfies_constraint_I08 (operands[2]))))) |
| operands[2] = force_reg (mode, operands[2]); |
| |
| return comparison; |
| } |
| |
| static void |
| expand_cbranchsi4 (rtx *operands, enum rtx_code comparison, |
| profile_probability probability) |
| { |
| rtx (*branch_expander) (rtx) = gen_branch_true; |
| comparison = prepare_cbranch_operands (operands, SImode, comparison); |
| switch (comparison) |
| { |
| case NE: case LT: case LE: case LTU: case LEU: |
| comparison = reverse_condition (comparison); |
| branch_expander = gen_branch_false; |
| default: ; |
| } |
| emit_insn (gen_rtx_SET (get_t_reg_rtx (), |
| gen_rtx_fmt_ee (comparison, SImode, |
| operands[1], operands[2]))); |
| rtx_insn *jump = emit_jump_insn (branch_expander (operands[3])); |
| if (probability.initialized_p ()) |
| add_reg_br_prob_note (jump, probability); |
| } |
| |
| void |
| expand_cbranchsi4 (rtx *operands, enum rtx_code comparison) |
| { |
| expand_cbranchsi4 (operands, comparison, |
| profile_probability::uninitialized ()); |
| } |
| |
| /* ??? How should we distribute probabilities when more than one branch |
| is generated. So far we only have some ad-hoc observations: |
| - If the operands are random, they are likely to differ in both parts. |
| - If comparing items in a hash chain, the operands are random or equal; |
| operation should be EQ or NE. |
| - If items are searched in an ordered tree from the root, we can expect |
| the highpart to be unequal about half of the time; operation should be |
| an inequality comparison, operands non-constant, and overall probability |
| about 50%. Likewise for quicksort. |
| - Range checks will be often made against constants. Even if we assume for |
| simplicity an even distribution of the non-constant operand over a |
| sub-range here, the same probability could be generated with differently |
| wide sub-ranges - as long as the ratio of the part of the subrange that |
| is before the threshold to the part that comes after the threshold stays |
| the same. Thus, we can't really tell anything here; |
| assuming random distribution is at least simple. |
| */ |
| bool |
| expand_cbranchdi4 (rtx *operands, enum rtx_code comparison) |
| { |
| enum rtx_code msw_taken, msw_skip, lsw_taken; |
| rtx_code_label *skip_label = NULL; |
| rtx op1h, op1l, op2h, op2l; |
| int num_branches; |
| profile_probability prob, rev_prob; |
| profile_probability msw_taken_prob = profile_probability::uninitialized (), |
| msw_skip_prob = profile_probability::uninitialized (), |
| lsw_taken_prob = profile_probability::uninitialized (); |
| |
| comparison = prepare_cbranch_operands (operands, DImode, comparison); |
| op1h = gen_highpart_mode (SImode, DImode, operands[1]); |
| op2h = gen_highpart_mode (SImode, DImode, operands[2]); |
| op1l = gen_lowpart (SImode, operands[1]); |
| op2l = gen_lowpart (SImode, operands[2]); |
| msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE; |
| prob = split_branch_probability; |
| rev_prob = prob.invert (); |
| switch (comparison) |
| { |
| case EQ: |
| msw_skip = NE; |
| lsw_taken = EQ; |
| if (prob.initialized_p ()) |
| { |
| /* FIXME: This is not optimal. We do not really know the probability |
| that values differ by MCW only, but we should probably distribute |
| probabilities more evenly. */ |
| msw_skip_prob = rev_prob; |
| lsw_taken_prob = prob > profile_probability::never () |
| ? profile_probability::guessed_always () |
| : profile_probability::guessed_never (); |
| } |
| break; |
| case NE: |
| msw_taken = NE; |
| msw_taken_prob = prob; |
| lsw_taken = NE; |
| lsw_taken_prob = profile_probability::guessed_never (); |
| break; |
| case GTU: case GT: |
| msw_taken = comparison; |
| if (CONST_INT_P (op2l) && INTVAL (op2l) == -1) |
| break; |
| if (comparison != GTU || op2h != CONST0_RTX (SImode)) |
| msw_skip = swap_condition (msw_taken); |
| lsw_taken = GTU; |
| break; |
| case GEU: case GE: |
| if (op2l == CONST0_RTX (SImode)) |
| msw_taken = comparison; |
| else |
| { |
| msw_taken = comparison == GE ? GT : GTU; |
| msw_skip = swap_condition (msw_taken); |
| lsw_taken = GEU; |
| } |
| break; |
| case LTU: case LT: |
| msw_taken = comparison; |
| if (op2l == CONST0_RTX (SImode)) |
| break; |
| msw_skip = swap_condition (msw_taken); |
| lsw_taken = LTU; |
| break; |
| case LEU: case LE: |
| if (CONST_INT_P (op2l) && INTVAL (op2l) == -1) |
| msw_taken = comparison; |
| else |
| { |
| lsw_taken = LEU; |
| if (comparison == LE) |
| msw_taken = LT; |
| else if (op2h != CONST0_RTX (SImode)) |
| msw_taken = LTU; |
| else |
| { |
| msw_skip = swap_condition (LTU); |
| break; |
| } |
| msw_skip = swap_condition (msw_taken); |
| } |
| break; |
| default: return false; |
| } |
| num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE) |
| + (msw_skip != LAST_AND_UNUSED_RTX_CODE) |
| + (lsw_taken != LAST_AND_UNUSED_RTX_CODE)); |
| if (comparison != EQ && comparison != NE && num_branches > 1) |
| { |
| if (!CONSTANT_P (operands[2]) |
| && prob.initialized_p () |
| && prob.to_reg_br_prob_base () >= (int) (REG_BR_PROB_BASE * 3 / 8U) |
| && prob.to_reg_br_prob_base () <= (int) (REG_BR_PROB_BASE * 5 / 8U)) |
| { |
| msw_taken_prob = prob.apply_scale (1, 2); |
| msw_skip_prob = rev_prob.apply_scale (REG_BR_PROB_BASE, |
| rev_prob.to_reg_br_prob_base () |
| + REG_BR_PROB_BASE); |
| lsw_taken_prob = prob; |
| } |
| else |
| { |
| msw_taken_prob = prob; |
| msw_skip_prob = profile_probability::guessed_always (); |
| /* ??? If we have a constant op2h, should we use that when |
| calculating lsw_taken_prob? */ |
| lsw_taken_prob = prob; |
| } |
| } |
| operands[1] = op1h; |
| operands[2] = op2h; |
| |
| if (msw_taken != LAST_AND_UNUSED_RTX_CODE) |
| expand_cbranchsi4 (operands, msw_taken, msw_taken_prob); |
| if (msw_skip != LAST_AND_UNUSED_RTX_CODE) |
| { |
| rtx taken_label = operands[3]; |
| |
| /* Operands were possibly modified, but msw_skip doesn't expect this. |
| Always use the original ones. */ |
| if (msw_taken != LAST_AND_UNUSED_RTX_CODE) |
| { |
| operands[1] = op1h; |
| operands[2] = op2h; |
| } |
| |
| operands[3] = skip_label = gen_label_rtx (); |
| expand_cbranchsi4 (operands, msw_skip, msw_skip_prob); |
| operands[3] = taken_label; |
| } |
| operands[1] = op1l; |
| operands[2] = op2l; |
| if (lsw_taken != LAST_AND_UNUSED_RTX_CODE) |
| expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob); |
| if (msw_skip != LAST_AND_UNUSED_RTX_CODE) |
| emit_label (skip_label); |
| return true; |
| } |
| |
| /* Given an operand, return 1 if the evaluated operand plugged into an |
| if_then_else will result in a branch_true, 0 if branch_false, or |
| -1 if neither nor applies. The truth table goes like this: |
| |
| op | cmpval | code | result |
| ---------+--------+---------+-------------------- |
| T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1) |
| T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1) |
| T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0) |
| T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0) |
| !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1) |
| !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1) |
| !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0) |
| !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */ |
| int |
| sh_eval_treg_value (rtx op) |
| { |
| if (t_reg_operand (op, GET_MODE (op))) |
| return 1; |
| if (negt_reg_operand (op, GET_MODE (op))) |
| return 0; |
| |
| rtx_code code = GET_CODE (op); |
| if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1))) |
| return -1; |
| |
| int cmpop = code == EQ ? 1 : 0; |
| int cmpval = INTVAL (XEXP (op, 1)); |
| if (cmpval != 0 && cmpval != 1) |
| return -1; |
| |
| int t; |
| if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))) |
| t = 0; |
| else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0)))) |
| t = 1; |
| else |
| return -1; |
| |
| return t ^ (cmpval == cmpop); |
| } |
| |
| /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case |
| of floating-point comparisons. */ |
| static void |
| sh_emit_set_t_insn (rtx insn, machine_mode mode) |
| { |
| if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT |
| && GET_CODE (insn) != PARALLEL) |
| { |
| insn = gen_rtx_PARALLEL (VOIDmode, |
| gen_rtvec (3, insn, |
| gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)), |
| gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG)))); |
| } |
| emit_insn (insn); |
| } |
| |
| /* Prepare the operands for an scc instruction; make sure that the |
| compare has been done and the result is in T_REG. */ |
| void |
| sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1) |
| { |
| rtx t_reg = get_t_reg_rtx (); |
| enum rtx_code oldcode = code; |
| |
| /* First need a compare insn. */ |
| switch (code) |
| { |
| case NE: |
| /* It isn't possible to handle this case. */ |
| gcc_unreachable (); |
| case LT: |
| code = GT; |
| break; |
| case LE: |
| code = GE; |
| break; |
| case LTU: |
| code = GTU; |
| break; |
| case LEU: |
| code = GEU; |
| break; |
| default: |
| break; |
| } |
| if (code != oldcode) |
| std::swap (op0, op1); |
| |
| machine_mode mode = GET_MODE (op0); |
| if (mode == VOIDmode) |
| mode = GET_MODE (op1); |
| |
| op0 = force_reg (mode, op0); |
| if ((code != EQ && code != NE |
| && (op1 != const0_rtx |
| || code == GTU || code == GEU || code == LTU || code == LEU)) |
| || (mode == DImode && op1 != const0_rtx) |
| || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)) |
| op1 = force_reg (mode, op1); |
| |
| sh_emit_set_t_insn (gen_rtx_SET (t_reg, |
| gen_rtx_fmt_ee (code, SImode, op0, op1)), |
| mode); |
| } |
| |
| /* Called from the md file, set up the operands of a compare instruction. */ |
| void |
| sh_emit_compare_and_branch (rtx *operands, machine_mode mode) |
| { |
| enum rtx_code code = GET_CODE (operands[0]); |
| enum rtx_code branch_code; |
| rtx op0 = operands[1]; |
| rtx op1 = operands[2]; |
| rtx insn; |
| bool need_ccmpeq = false; |
| |
| if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT) |
| { |
| op0 = force_reg (mode, op0); |
| op1 = force_reg (mode, op1); |
| } |
| else |
| { |
| if (code != EQ || mode == DImode) |
| { |
| /* Force args into regs, since we can't use constants here. */ |
| op0 = force_reg (mode, op0); |
| if (op1 != const0_rtx || code == GTU || code == GEU) |
| op1 = force_reg (mode, op1); |
| } |
| } |
| |
| if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
| { |
| if (code == LT |
| || (code == LE && TARGET_IEEE && TARGET_SH2E) |
| || (code == GE && !(TARGET_IEEE && TARGET_SH2E))) |
| { |
| std::swap (op0, op1); |
| code = swap_condition (code); |
| } |
| |
| /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */ |
| if (code == GE) |
| { |
| gcc_assert (TARGET_IEEE && TARGET_SH2E); |
| need_ccmpeq = true; |
| code = GT; |
| } |
| |
| /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed |
| to EQ/GT respectively. */ |
| gcc_assert (code == EQ || code == GT || code == NE || code == LE); |
| } |
| |
| switch (code) |
| { |
| case EQ: |
| case GT: |
| case GE: |
| case GTU: |
| case GEU: |
| branch_code = code; |
| break; |
| case NE: |
| case LT: |
| case LE: |
| case LTU: |
| case LEU: |
| branch_code = reverse_condition (code); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| |
| insn = gen_rtx_SET (get_t_reg_rtx (), |
| gen_rtx_fmt_ee (branch_code, SImode, op0, op1)); |
| |
| sh_emit_set_t_insn (insn, mode); |
| if (need_ccmpeq) |
| sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode); |
| |
| if (branch_code == code) |
| emit_jump_insn (gen_branch_true (operands[3])); |
| else |
| emit_jump_insn (gen_branch_false (operands[3])); |
| } |
| |
| void |
| sh_emit_compare_and_set (rtx *operands, machine_mode mode) |
| { |
| enum rtx_code code = GET_CODE (operands[1]); |
| rtx op0 = operands[2]; |
| rtx op1 = operands[3]; |
| rtx_code_label *lab = NULL; |
| bool invert = false; |
| |
| op0 = force_reg (mode, op0); |
| if ((code != EQ && code != NE |
| && (op1 != const0_rtx |
| || code == GTU || code == GEU || code == LTU || code == LEU)) |
| || (mode == DImode && op1 != const0_rtx) |
| || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)) |
| op1 = force_reg (mode, op1); |
| |
| if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
| { |
| if (code == LT || code == LE) |
| { |
| std::swap (op0, op1); |
| code = swap_condition (code); |
| } |
| if (code == GE) |
| { |
| if (TARGET_IEEE) |
| { |
| lab = gen_label_rtx (); |
| sh_emit_scc_to_t (EQ, op0, op1); |
| emit_jump_insn (gen_branch_true (lab)); |
| code = GT; |
| } |
| else |
| { |
| code = LT; |
| invert = true; |
| } |
| } |
| } |
| |
| if (code == NE) |
| { |
| code = EQ; |
| invert = true; |
| } |
| |
| sh_emit_scc_to_t (code, op0, op1); |
| if (lab) |
| emit_label (lab); |
| if (invert) |
| emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ())); |
| else |
| emit_move_insn (operands[0], get_t_reg_rtx ()); |
| } |
| |
| /* Functions to output assembly code. */ |
| |
| /* Return a sequence of instructions to perform DI or DF move. |
| |
| Since the SH cannot move a DI or DF in one instruction, we have |
| to take care when we see overlapping source and dest registers. */ |
| const char * |
| output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[], |
| machine_mode mode) |
| { |
| rtx dst = operands[0]; |
| rtx src = operands[1]; |
| |
| if (MEM_P (dst) |
| && GET_CODE (XEXP (dst, 0)) == PRE_DEC) |
| return "mov.l %T1,%0" "\n" |
| " mov.l %1,%0"; |
| |
| if (register_operand (dst, mode) |
| && register_operand (src, mode)) |
| { |
| if (REGNO (src) == MACH_REG) |
| return "sts mach,%S0" "\n" |
| " sts macl,%R0"; |
| |
| /* When mov.d r1,r2 do r2->r3 then r1->r2; |
| when mov.d r1,r0 do r1->r0 then r2->r1. */ |
| if (REGNO (src) + 1 == REGNO (dst)) |
| return "mov %T1,%T0" "\n" |
| " mov %1,%0"; |
| else |
| return "mov %1,%0" "\n" |
| " mov %T1,%T0"; |
| } |
| else if (CONST_INT_P (src)) |
| { |
| if (INTVAL (src) < 0) |
| output_asm_insn ("mov #-1,%S0", operands); |
| else |
| output_asm_insn ("mov #0,%S0", operands); |
| |
| return "mov %1,%R0"; |
| } |
| else if (MEM_P (src)) |
| { |
| int ptrreg = -1; |
| int dreg = REGNO (dst); |
| rtx inside = XEXP (src, 0); |
| |
| switch (GET_CODE (inside)) |
| { |
| case REG: |
| ptrreg = REGNO (inside); |
| break; |
| |
| case SUBREG: |
| ptrreg = subreg_regno (inside); |
| break; |
| |
| case PLUS: |
| ptrreg = REGNO (XEXP (inside, 0)); |
| /* ??? A r0+REG address shouldn't be possible here, because it isn't |
| an offsettable address. Unfortunately, offsettable addresses use |
| QImode to check the offset, and a QImode offsettable address |
| requires r0 for the other operand, which is not currently |
| supported, so we can't use the 'o' constraint. |
| Thus we must check for and handle r0+REG addresses here. |
| We punt for now, since this is likely very rare. */ |
| gcc_assert (!REG_P (XEXP (inside, 1))); |
| break; |
| |
| case LABEL_REF: |
| return "mov.l %1,%0" "\n" |
| " mov.l %1+4,%T0"; |
| case POST_INC: |
| return "mov.l %1,%0" "\n" |
| " mov.l %1,%T0"; |
| default: |
| gcc_unreachable (); |
| } |
| |
| /* Work out the safe way to copy. Copy into the second half first. */ |
| if (dreg == ptrreg) |
| return "mov.l %T1,%T0" "\n" |
| " mov.l %1,%0"; |
| } |
| |
| return "mov.l %1,%0" "\n" |
| " mov.l %T1,%T0"; |
| } |
| |
| /* Print an instruction which would have gone into a delay slot after |
| another instruction, but couldn't because the other instruction expanded |
| into a sequence where putting the slot insn at the end wouldn't work. */ |
| static void |
| print_slot (rtx_sequence *seq) |
| { |
| final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL); |
| |
| seq->insn (1)->set_deleted (); |
| } |
| |
| const char * |
| output_far_jump (rtx_insn *insn, rtx op) |
| { |
| struct { rtx lab, reg, op; } this_jmp; |
| rtx_code_label *braf_base_lab = NULL; |
| const char *jump; |
| int far; |
| int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn)); |
| rtx_insn *prev; |
| |
| this_jmp.lab = gen_label_rtx (); |
| |
| if (TARGET_SH2 |
| && offset >= -32764 |
| && offset - get_attr_length (insn) <= 32766 |
| && ! CROSSING_JUMP_P (insn)) |
| { |
| far = 0; |
| jump = "mov.w %O0,%1" "\n" |
| " braf %1"; |
| } |
| else |
| { |
| far = 1; |
| if (flag_pic) |
| { |
| if (TARGET_SH2) |
| jump = "mov.l %O0,%1" "\n" |
| " braf %1"; |
| else |
| jump = "mov.l r0,@-r15" "\n" |
| " mova %O0,r0" "\n" |
| " mov.l @r0,%1" "\n" |
| " add r0,%1" "\n" |
| " mov.l @r15+,r0" "\n" |
| " jmp @%1"; |
| } |
| else |
| jump = "mov.l %O0,%1" "\n" |
| " jmp @%1"; |
| } |
| /* If we have a scratch register available, use it. */ |
| if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn))) |
| && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch) |
| { |
| this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0)); |
| if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2) |
| jump = "mov.l r1,@-r15" "\n" |
| " mova %O0,r0" "\n" |
| " mov.l @r0,r1" "\n" |
| " add r1,r0" "\n" |
| " mov.l @r15+,r1" "\n" |
| " jmp @%1"; |
| output_asm_insn (jump, &this_jmp.lab); |
| if (dbr_sequence_length ()) |
| print_slot (final_sequence); |
| else |
| output_asm_insn ("nop", 0); |
| } |
| else |
| { |
| /* Output the delay slot insn first if any. */ |
| if (dbr_sequence_length ()) |
| print_slot (final_sequence); |
| |
| this_jmp.reg = gen_rtx_REG (SImode, 13); |
| output_asm_insn ("mov.l r13,@-r15", 0); |
| output_asm_insn (jump, &this_jmp.lab); |
| output_asm_insn ("mov.l @r15+,r13", 0); |
| } |
| if (far && flag_pic && TARGET_SH2) |
| { |
| braf_base_lab = gen_label_rtx (); |
| (*targetm.asm_out.internal_label) (asm_out_file, "L", |
| CODE_LABEL_NUMBER (braf_base_lab)); |
| } |
| if (far) |
| output_asm_insn (".align 2", 0); |
| (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab)); |
| this_jmp.op = op; |
| if (far && flag_pic) |
| { |
| if (TARGET_SH2) |
| this_jmp.lab = braf_base_lab; |
| output_asm_insn (".long %O2-%O0", &this_jmp.lab); |
| } |
| else |
| output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab); |
| return ""; |
| } |
| |
| /* Local label counter, used for constants in the pool and inside |
| pattern branches. */ |
| static int lf = 100; |
| |
| /* Output code for ordinary branches. */ |
| const char * |
| output_branch (int logic, rtx_insn *insn, rtx *operands) |
| { |
| switch (get_attr_length (insn)) |
| { |
| case 6: |
| /* This can happen if filling the delay slot has caused a forward |
| branch to exceed its range (we could reverse it, but only |
| when we know we won't overextend other branches; this should |
| best be handled by relaxation). |
| It can also happen when other condbranches hoist delay slot insn |
| from their destination, thus leading to code size increase. |
| But the branch will still be in the range -4092..+4098 bytes. */ |
| if (! TARGET_RELAX) |
| { |
| int label = lf++; |
| /* The call to print_slot will clobber the operands. */ |
| rtx op0 = operands[0]; |
| |
| /* If the instruction in the delay slot is annulled (true), then |
| there is no delay slot where we can put it now. The only safe |
| place for it is after the label. final will do that by default. */ |
| |
| if (final_sequence |
| && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0)) |
| && get_attr_length (final_sequence->insn (1))) |
| { |
| asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t", |
| ASSEMBLER_DIALECT ? "/" : ".", label); |
| print_slot (final_sequence); |
| } |
| else |
| asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label); |
| |
| output_asm_insn ("bra\t%l0", &op0); |
| fprintf (asm_out_file, "\tnop\n"); |
| (*targetm.asm_out.internal_label) (asm_out_file, "LF", label); |
| |
| return ""; |
| } |
| /* FALLTHRU */ |
| /* When relaxing, handle this like a short branch. The linker |
| will fix it up if it still doesn't fit after relaxation. */ |
| case 2: |
| return logic ? "bt%.\t%l0" : "bf%.\t%l0"; |
| |
| /* These are for SH2e, in which we have to account for the |
| extra nop because of the hardware bug in annulled branches. */ |
| case 8: |
| if (! TARGET_RELAX) |
| { |
| int label = lf++; |
| |
| gcc_assert (!final_sequence |
| || !(INSN_ANNULLED_BRANCH_P |
| (XVECEXP (final_sequence, 0, 0)))); |
| asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n", |
| logic ? "f" : "t", |
| ASSEMBLER_DIALECT ? "/" : ".", label); |
| fprintf (asm_out_file, "\tnop\n"); |
| output_asm_insn ("bra\t%l0", operands); |
| fprintf (asm_out_file, "\tnop\n"); |
| (*targetm.asm_out.internal_label) (asm_out_file, "LF", label); |
| |
| return ""; |
| } |
| /* FALLTHRU */ |
| case 4: |
| { |
| char buffer[10]; |
| |
| sprintf (buffer, "b%s%ss\t%%l0", |
| logic ? "t" : "f", |
| ASSEMBLER_DIALECT ? "/" : "."); |
| output_asm_insn (buffer, &operands[0]); |
| return "nop"; |
| } |
| |
| default: |
| /* There should be no longer branches now - that would |
| indicate that something has destroyed the branches set |
| up in machine_dependent_reorg. */ |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Output a code sequence for INSN using TEMPL with OPERANDS; but before, |
| fill in operands 9 as a label to the successor insn. |
| We try to use jump threading where possible. |
| IF CODE matches the comparison in the IF_THEN_ELSE of a following jump, |
| we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means |
| follow jmp and bt, if the address is in range. */ |
| const char * |
| output_branchy_insn (enum rtx_code code, const char *templ, |
| rtx_insn *insn, rtx *operands) |
| { |
| rtx_insn *next_insn = NEXT_INSN (insn); |
| |
| if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn)) |
| { |
| rtx src = SET_SRC (PATTERN (next_insn)); |
| if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code) |
| { |
| /* Following branch not taken */ |
| rtx_code_label *lab = gen_label_rtx (); |
| emit_label_after (lab, next_insn); |
| INSN_ADDRESSES_NEW (lab, |
| INSN_ADDRESSES (INSN_UID (next_insn)) |
| + get_attr_length (next_insn)); |
| operands[9] = lab; |
| return templ; |
| } |
| else |
| { |
| int offset = (branch_dest (next_insn) |
| - INSN_ADDRESSES (INSN_UID (next_insn)) + 4); |
| if (offset >= -252 && offset <= 258) |
| { |
| if (GET_CODE (src) == IF_THEN_ELSE) |
| /* branch_true */ |
| src = XEXP (src, 1); |
| operands[9] = src; |
| return templ; |
| } |
| } |
| } |
| rtx_code_label *lab = gen_label_rtx (); |
| emit_label_after (lab, insn); |
| INSN_ADDRESSES_NEW (lab, |
| INSN_ADDRESSES (INSN_UID (insn)) |
| + get_attr_length (insn)); |
| operands[9] = lab; |
| return templ; |
| } |
| |
| const char * |
| output_ieee_ccmpeq (rtx_insn *insn, rtx *operands) |
| { |
| return output_branchy_insn (NE, "bt %l9" "\n" |
| " fcmp/eq %1,%0", |
| insn, operands); |
| } |
| |
| /* Output the start of the assembler file. */ |
| static void |
| sh_file_start (void) |
| { |
| default_file_start (); |
| |
| if (TARGET_ELF) |
| /* We need to show the text section with the proper |
| attributes as in TEXT_SECTION_ASM_OP, before dwarf2out |
| emits it without attributes in TEXT_SECTION_ASM_OP, else GAS |
| will complain. We can teach GAS specifically about the |
| default attributes for our choice of text section, but |
| then we would have to change GAS again if/when we change |
| the text section name. */ |
| fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP); |
| else |
| /* Switch to the data section so that the coffsem symbol |
| isn't in the text section. */ |
| switch_to_section (data_section); |
| |
| if (TARGET_LITTLE_ENDIAN) |
| fputs ("\t.little\n", asm_out_file); |
| } |
| |
| /* Implementation of TARGET_ASM_INTEGER for SH. Pointers to functions |
| need to be output as pointers to function descriptors for |
| FDPIC. */ |
| |
| static bool |
| sh_assemble_integer (rtx value, unsigned int size, int aligned_p) |
| { |
| if (TARGET_FDPIC && size == UNITS_PER_WORD |
| && GET_CODE (value) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (value)) |
| { |
| fputs ("\t.long\t", asm_out_file); |
| output_addr_const (asm_out_file, value); |
| fputs ("@FUNCDESC\n", asm_out_file); |
| return true; |
| } |
| return default_assemble_integer (value, size, aligned_p); |
| } |
| |
| /* Check if PAT includes UNSPEC_CALLER unspec pattern. */ |
| static bool |
| unspec_caller_rtx_p (rtx pat) |
| { |
| rtx base, offset; |
| split_const (pat, &base, &offset); |
| |
| if (GET_CODE (base) == UNSPEC) |
| { |
| if (XINT (base, 1) == UNSPEC_CALLER) |
| return true; |
| for (int i = 0; i < XVECLEN (base, 0); i++) |
| if (unspec_caller_rtx_p (XVECEXP (base, 0, i))) |
| return true; |
| } |
| return false; |
| } |
| |
| /* Indicate that INSN cannot be duplicated. This is true for insn |
| that generates a unique label. */ |
| static bool |
| sh_cannot_copy_insn_p (rtx_insn *insn) |
| { |
| if (!reload_completed || !flag_pic) |
| return false; |
| |
| if (!NONJUMP_INSN_P (insn)) |
| return false; |
| if (asm_noperands (insn) >= 0) |
| return false; |
| |
| rtx pat = PATTERN (insn); |
| |
| if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == USE) |
| return false; |
| |
| if (TARGET_FDPIC && GET_CODE (pat) == PARALLEL) |
| { |
| rtx t = XVECEXP (pat, 0, XVECLEN (pat, 0) - 1); |
| if (GET_CODE (t) == USE && unspec_caller_rtx_p (XEXP (t, 0))) |
| return true; |
| } |
| |
| if (GET_CODE (pat) != SET) |
| return false; |
| pat = SET_SRC (pat); |
| |
| if (unspec_caller_rtx_p (pat)) |
| return true; |
| |
| return false; |
| } |
| |
| /* Number of instructions used to make an arithmetic right shift by N. */ |
| static const char ashiftrt_insns[] = |
| { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2}; |
| |
| /* Description of a logical left or right shift, when expanded to a sequence |
| of 1/2/8/16 shifts. |
| Notice that one bit right shifts clobber the T bit. One bit left shifts |
| are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */ |
| enum |
| { |
| ASHL_CLOBBERS_T = 1 << 0, |
| LSHR_CLOBBERS_T = 1 << 1 |
| }; |
| |
| struct ashl_lshr_sequence |
| { |
| char insn_count; |
| signed char amount[6]; |
| char clobbers_t; |
| }; |
| |
| static const struct ashl_lshr_sequence ashl_lshr_seq[32] = |
| { |
| { 0, { 0 }, 0 }, // 0 |
| { 1, { 1 }, LSHR_CLOBBERS_T }, |
| { 1, { 2 }, 0 }, |
| { 2, { 2, 1 }, LSHR_CLOBBERS_T }, |
| { 2, { 2, 2 }, 0 }, // 4 |
| { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T }, |
| { 3, { 2, 2, 2 }, 0 }, |
| { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T }, |
| { 1, { 8 }, 0 }, // 8 |
| { 2, { 8, 1 }, LSHR_CLOBBERS_T }, |
| { 2, { 8, 2 }, 0 }, |
| { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T }, |
| { 3, { 8, 2, 2 }, 0 }, // 12 |
| { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T }, |
| { 3, { 8, -2, 8 }, 0 }, |
| { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T }, |
| { 1, { 16 }, 0 }, // 16 |
| { 2, { 16, 1 }, LSHR_CLOBBERS_T }, |
| { 2, { 16, 2 }, 0 }, |
| { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T }, |
| { 3, { 16, 2, 2 }, 0 }, // 20 |
| { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T }, |
| { 3, { 16, -2, 8 }, 0 }, |
| { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T }, |
| { 2, { 16, 8 }, 0 }, // 24 |
| { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T }, |
| { 3, { 16, 8, 2 }, 0 }, |
| { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T }, |
| { 4, { 16, 8, 2, 2 }, 0 }, // 28 |
| { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T }, |
| { 3, { 16, -2, 16 }, 0 }, |
| |
| /* For a right shift by 31 a 2 insn shll-movt sequence can be used. |
| For a left shift by 31 a 2 insn and-rotl sequences can be used. |
| However, the shift-and combiner code needs this entry here to be in |
| terms of real shift insns. */ |
| { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T } |
| }; |
| |
| /* Individual shift amounts for shift amounts < 16, up to three highmost |
| bits might be clobbered. This is typically used when combined with some |
| kind of sign or zero extension. */ |
| static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] = |
| { |
| { 0, { 0 }, 0 }, // 0 |
| { 1, { 1 }, LSHR_CLOBBERS_T }, |
| { 1, { 2 }, 0 }, |
| { 2, { 2, 1 }, LSHR_CLOBBERS_T }, |
| { 2, { 2, 2 }, 0 }, // 4 |
| { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T }, |
| { 2, { 8, -2 }, 0 }, |
| { 2, { 8, -1 }, ASHL_CLOBBERS_T }, |
| { 1, { 8 }, 0 }, // 8 |
| { 2, { 8, 1 }, LSHR_CLOBBERS_T }, |
| { 2, { 8, 2 }, 0 }, |
| { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T }, |
| { 3, { 8, 2, 2 }, 0 }, // 12 |
| { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T }, |
| { 2, { 16, -2 }, 0 }, |
| { 2, { 16, -1 }, ASHL_CLOBBERS_T }, |
| { 1, { 16 }, 0 }, // 16 |
| { 2, { 16, 1 }, LSHR_CLOBBERS_T }, |
| { 2, { 16, 2 }, 0 }, |
| { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T }, |
| { 3, { 16, 2, 2 }, 0 }, // 20 |
| { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T }, |
| { 3, { 16, -2, 8 }, 0 }, |
| { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T }, |
| { 2, { 16, 8 }, 0 }, // 24 |
| { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T }, |
| { 3, { 16, 8, 2 }, 0 }, |
| { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T }, |
| { 4, { 16, 8, 2, 2 }, 0 }, // 28 |
| { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T }, |
| { 3, { 16, -2, 16 }, 0 }, |
| { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T } |
| }; |
| |
| /* Return true if a shift left consisting of 1/2/8/16 shift instructions |
| will clobber the T bit. */ |
| bool |
| sh_ashlsi_clobbers_t_reg_p (rtx shift_amount) |
| { |
| gcc_assert (CONST_INT_P (shift_amount)); |
| |
| const int shift_amount_i = INTVAL (shift_amount) & 31; |
| |
| /* Special case for shift count of 31: use and-rotl sequence. */ |
| if (shift_amount_i == 31) |
| return true; |
| |
| return (ashl_lshr_seq[shift_amount_i].clobbers_t |
| & ASHL_CLOBBERS_T) != 0; |
| } |
| |
| /* Return true if a logical right shift consisting of 1/2/8/16 shift |
| instructions will clobber the T bit. */ |
| bool |
| sh_lshrsi_clobbers_t_reg_p (rtx shift_amount) |
| { |
| gcc_assert (CONST_INT_P (shift_amount)); |
| |
| /* For right shifts the constant might be negative. */ |
| const int shift_amount_i = std::abs (INTVAL (shift_amount)) & 31; |
| |
| /* Special case for shift count of 31: use shll-movt sequence. */ |
| if (shift_amount_i == 31) |
| return true; |
| |
| return (ashl_lshr_seq[shift_amount_i].clobbers_t |
| & LSHR_CLOBBERS_T) != 0; |
| } |
| |
| /* Return true if it is potentially beneficial to use a dynamic shift |
| instruction (shad / shar) instead of a combination of 1/2/8/16 |
| shift instructions for the specified shift count. |
| If dynamic shifts are not available, always return false. */ |
| bool |
| sh_dynamicalize_shift_p (rtx count) |
| { |
| gcc_assert (CONST_INT_P (count)); |
| |
| /* For right shifts the constant might be negative. */ |
| const int shift_amount_i = std::abs (INTVAL (count)) & 31; |
| int insn_count; |
| |
| /* For left and right shifts, there are shorter 2 insn sequences for |
| shift amounts of 31. */ |
| if (shift_amount_i == 31) |
| insn_count = 2; |
| else |
| insn_count = ashl_lshr_seq[shift_amount_i].insn_count; |
| |
| return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST); |
| } |
| |
| /* Assuming we have a value that has been sign-extended by at least one bit, |
| can we use the ext_shift_amounts with the last shift turned to an |
| arithmetic shift to shift it by N without data loss, and quicker than by |
| other means? */ |
| #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15) |
| |
| /* Return the cost of a shift. */ |
| static inline int |
| shiftcosts (rtx x) |
| { |
| if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) |
| { |
| if (GET_MODE (x) == DImode |
| && CONST_INT_P (XEXP (x, 1)) |
| && INTVAL (XEXP (x, 1)) == 1) |
| return 2; |
| |
| /* Everything else is invalid, because there is no pattern for it. */ |
| return -1; |
| } |
| /* If shift by a non constant, then this will be expensive. */ |
| if (!CONST_INT_P (XEXP (x, 1))) |
| return SH_DYNAMIC_SHIFT_COST; |
| |
| /* Otherwise, return the true cost in instructions. Cope with out of range |
| shift counts more or less arbitrarily. */ |
| int value = INTVAL (XEXP (x, 1)) & 31; |
| |
| if (GET_CODE (x) == ASHIFTRT) |
| { |
| int cost = ashiftrt_insns[value]; |
| /* If dynamic shifts are available and profitable in this case, then we |
| put the constant in a reg and use shad. */ |
| if (cost > 1 + SH_DYNAMIC_SHIFT_COST) |
| cost = 1 + SH_DYNAMIC_SHIFT_COST; |
| return cost; |
| } |
| else |
| return ashl_lshr_seq[value].insn_count; |
| } |
| |
| /* Return the cost of an AND/XOR/IOR operation. */ |
| static inline int |
| and_xor_ior_costs (rtx x, int code) |
| { |
| /* On SH1-4 we have only max. SImode operations. |
| Double the cost for modes > SImode. */ |
| const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1; |
| |
| /* A logical operation with two registers is a single cycle |
| instruction. */ |
| if (!CONST_INT_P (XEXP (x, 1))) |
| return 1 * cost_scale; |
| |
| int i = INTVAL (XEXP (x, 1)); |
| |
| /* These constants are single cycle extu.[bw] instructions. */ |
| if ((i == 0xff || i == 0xffff) && code == AND) |
| return 1 * cost_scale; |
| /* Constants that can be used in an instruction as an immediate are |
| a single cycle, but this requires r0, so make it a little more |
| expensive. */ |
| if (CONST_OK_FOR_K08 (i)) |
| return 2 * cost_scale; |
| /* Constants that can be loaded with a mov immediate need one more cycle. |
| This case is probably unnecessary. */ |
| if (CONST_OK_FOR_I08 (i)) |
| return 2 * cost_scale; |
| /* Any other constant requires an additional 2 cycle pc-relative load. |
| This case is probably unnecessary. */ |
| return 3 * cost_scale; |
| } |
| |
| /* Return the cost of an addition or a subtraction. */ |
| static inline int |
| addsubcosts (rtx x) |
| { |
| if (GET_MODE (x) == SImode) |
| { |
| /* The addc or subc patterns will eventually become one or two |
| instructions. Below are some costs for some of the patterns |
| which combine would reject because the costs of the individual |
| insns in the patterns are lower. |
| |
| FIXME: It would be much easier if we had something like insn cost |
| attributes and the cost calculation machinery used those attributes |
| in the first place. This would eliminate redundant recog-like C |
| code to calculate costs of complex patterns. */ |
| rtx op0 = XEXP (x, 0); |
| rtx op1 = XEXP (x, 1); |
| |
| if (GET_CODE (x) == PLUS) |
| { |
| if (GET_CODE (op0) == AND |
| && XEXP (op0, 1) == const1_rtx |
| && (GET_CODE (op1) == PLUS |
| || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx))) |
| return 1; |
| |
| if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx |
| && GET_CODE (op1) == LSHIFTRT |
| && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31) |
| return 1; |
| } |
| /* Let's assume that adding the result of an insns that stores into |
| the T bit is cheap. */ |
| if (treg_set_expr (op1, SImode)) |
| return 1; |
| if (treg_set_expr (op0, SImode)) |
| return 1; |
| } |
| |
| /* On SH1-4 we have only max. SImode operations. |
| Double the cost for modes > SImode. */ |
| const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1; |
| |
| /* Adding a register is a single cycle insn. */ |
| if (REG_P (XEXP (x, 1)) |
| || GET_CODE (XEXP (x, 1)) == SUBREG) |
| return 1 * cost_scale; |
| |
| /* Likewise for small constants. */ |
| if (CONST_INT_P (XEXP (x, 1)) |
| && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1)))) |
| return 1 * cost_scale; |
| |
| /* Any other constant requires a 2 cycle pc-relative load plus an |
| addition. */ |
| return 3 * cost_scale; |
| } |
| |
| /* Return the cost of a multiply. */ |
| static inline int |
| multcosts (rtx x ATTRIBUTE_UNUSED) |
| { |
| if (sh_multcost >= 0) |
| return sh_multcost; |
| |
| if (TARGET_SH2) |
| { |
| /* We have a mul insn, so we can never take more than the mul and the |
| read of the mac reg, but count more because of the latency and extra |
| reg usage. */ |
| if (optimize_size) |
| return 2; |
| return 3; |
| } |
| |
| /* If we're aiming at small code, then just count the number of |
| insns in a multiply call sequence. */ |
| if (optimize_size) |
| return 5; |
| |
| /* Otherwise count all the insns in the routine we'd be calling too. */ |
| return 20; |
| } |
| |
| /* Compute a (partial) cost for rtx X. Return true if the complete |
| cost has been computed, and false if subexpressions should be |
| scanned. In either case, *TOTAL contains the cost result. */ |
| static bool |
| sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code, |
| int opno ATTRIBUTE_UNUSED, |
| int *total, bool speed ATTRIBUTE_UNUSED) |
| { |
| int code = GET_CODE (x); |
| |
| switch (code) |
| { |
| /* The lower-subreg pass decides whether to split multi-word regs |
| into individual regs by looking at the cost for a SET of certain |
| modes with the following patterns: |
| (set (reg) (reg)) |
| (set (reg) (const_int 0)) |
| On machines that support vector-move operations a multi-word move |
| is the same cost as individual reg move. On SH there is no |
| vector-move, so we have to provide the correct cost in the number |
| of move insns to load/store the reg of the mode in question. */ |
| case SET: |
| if (sh_movt_set_dest (x) != NULL || sh_movrt_set_dest (x) != NULL) |
| { |
| *total = COSTS_N_INSNS (1); |
| return true; |
| } |
| |
| if (register_operand (SET_DEST (x), VOIDmode) |
| && (register_operand (SET_SRC (x), VOIDmode) |
| || satisfies_constraint_Z (SET_SRC (x)))) |
| { |
| const machine_mode mode = GET_MODE (SET_DEST (x)); |
| *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) |
| / mov_insn_size (mode, TARGET_SH2A)); |
| return true; |
| } |
| return false; |
| |
| /* The cost of a mem access is mainly the cost of the address mode. */ |
| case MEM: |
| *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x), |
| true); |
| return true; |
| |
| case IF_THEN_ELSE: |
| /* This case is required for the if_then_else negc pattern. */ |
| if (treg_set_expr (XEXP (x, 0), SImode)) |
| { |
| *total = COSTS_N_INSNS (1); |
| return true; |
| } |
| else |
| return false; |
| |
| /* Zero extracts of single bits are usually combine patterns for the |
| tst insns. */ |
| case ZERO_EXTRACT: |
| if (GET_CODE (XEXP (x, 0)) == XOR |
| && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode) |
| && XEXP (x, 1) == const1_rtx |
| && CONST_INT_P (XEXP (x, 2)) |
| && CONST_INT_P (XEXP (XEXP (x, 0), 1)) |
| /* Check that the xor constaint overlaps with the extracted bit. */ |
| && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2))))) |
| { |
| *total = 1; //COSTS_N_INSNS (1); |
| return true; |
| } |
| |
| /* div0s variant. */ |
| if (GET_CODE (XEXP (x, 0)) == XOR |
| && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR |
| && CONST_INT_P (XEXP (XEXP (x, 0), 1))) |
| { |
| *total = 1; |
| return true; |
| } |
| return false; |
| |
| /* The cost of a sign or zero extend depends on whether the source is a |
| reg or a mem. In case of a mem take the address into account. */ |
| case SIGN_EXTEND: |
| if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))) |
| { |
| *total = COSTS_N_INSNS (1); |
| return true; |
| } |
| if (MEM_P (XEXP (x, 0))) |
| { |
| *total = sh_address_cost (XEXP (XEXP (x, 0), 0), |
| GET_MODE (XEXP (x, 0)), |
| MEM_ADDR_SPACE (XEXP (x, 0)), true); |
| return true; |
| } |
| return false; |
| |
| case ZERO_EXTEND: |
| if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0)))) |
| { |
| *total = COSTS_N_INSNS (1); |
| return true; |
| } |
| else if (TARGET_SH2A && MEM_P (XEXP (x, 0)) |
| && (GET_MODE (XEXP (x, 0)) == QImode |
| || GET_MODE (XEXP (x, 0)) == HImode)) |
| { |
| /* Handle SH2A's movu.b and movu.w insn. */ |
| *total = sh_address_cost (XEXP (XEXP (x, 0), 0), |
| GET_MODE (XEXP (x, 0)), |
| MEM_ADDR_SPACE (XEXP (x, 0)), true); |
| return true; |
| } |
| return false; |
| |
| /* mems for SFmode and DFmode can be inside a parallel due to |
| the way the fpscr is handled. */ |
| case PARALLEL: |
| for (int i = 0; i < XVECLEN (x, 0); i++) |
| { |
| rtx xx = XVECEXP (x, 0, i); |
| if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0))) |
| { |
| *total = sh_address_cost (XEXP (XEXP (xx, 0), 0), |
| GET_MODE (XEXP (xx, 0)), |
| MEM_ADDR_SPACE (XEXP (xx, 0)), true); |
| return true; |
| } |
| if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1))) |
| { |
| *total = sh_address_cost (XEXP (XEXP (xx, 1), 0), |
| GET_MODE (XEXP (xx, 1)), |
| MEM_ADDR_SPACE (XEXP (xx, 1)), true); |
| return true; |
| } |
| } |
| |
| if (sh_1el_vec (x, VOIDmode)) |
| *total = outer_code != SET; |
| else if (sh_rep_vec (x, VOIDmode)) |
| *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4 |
| + (outer_code != SET)); |
| else |
| *total = COSTS_N_INSNS (3) + (outer_code != SET); |
| return true; |
| |
| case CONST_INT: |
| if (CONST_OK_FOR_I08 (INTVAL (x))) |
| *total = 0; |
| else if ((outer_code == AND || outer_code == IOR || outer_code == XOR) |
| && CONST_OK_FOR_K08 (INTVAL (x))) |
| *total = 1; |
| /* prepare_cmp_insn will force costly constants int registers before |
| the cbranch[sd]i4 patterns can see them, so preserve potentially |
| interesting ones not covered by I08 above. */ |
| else if (outer_code == COMPARE |
| && ((unsigned HOST_WIDE_INT) INTVAL (x) |
| == (unsigned HOST_WIDE_INT) 0x7fffffff + 1 |
| || INTVAL (x) == 0x7fffffff |
| || INTVAL (x) == 0x80 || INTVAL (x) == -0x81)) |
| *total = 1; |
| else |
| *total = 8; |
| return true; |
| |
| case EQ: |
| /* An and with a constant compared against zero is |
| most likely going to be a TST #imm, R0 instruction. */ |
| if (XEXP (x, 1) == const0_rtx |
| && ((GET_CODE (XEXP (x, 0)) == AND |
| || (SUBREG_P (XEXP (x, 0)) |
| && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND)) |
| || GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT)) |
| { |
| *total = 1; |
| return true; |
| } |
| |
| else if (XEXP (x, 1) == const0_rtx |
| && GET_CODE (XEXP (x, 0)) == AND |
| && CONST_INT_P (XEXP (XEXP (x, 0), 1)) |
| && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT |
| && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode) |
| && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))) |
| { |
| *total = 1; |
| return true; |
| } |
| else |
| return false; |
| |
| case SMIN: |
| case SMAX: |
| /* This is most likely a clips.b or clips.w insn that is being made up |
| by combine. */ |
| if (TARGET_SH2A |
| && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN) |
| && CONST_INT_P (XEXP (XEXP (x, 0), 1)) |
| && REG_P (XEXP (XEXP (x, 0), 0)) |
| && CONST_INT_P (XEXP (x, 1))) |
| { |
| *total = COSTS_N_INSNS (1); |
| return true; |
| } |
| else |
| return false; |
| |
| case CONST: |
| case LABEL_REF: |
| case SYMBOL_REF: |
| *total = 5; |
| return true; |
| |
| case CONST_DOUBLE: |
| /* prepare_cmp_insn will force costly constants int registers before |
| the cbranchdi4 pattern can see them, so preserve potentially |
| interesting ones. */ |
| if (outer_code == COMPARE && GET_MODE (x) == DImode) |
| *total = 1; |
| else |
| *total = 10; |
| return true; |
| |
| case CONST_VECTOR: |
| /* FIXME: This looks broken. Only the last statement has any effect. |
| Probably this could be folded with the PARALLEL case? */ |
| if (x == CONST0_RTX (GET_MODE (x))) |
| *total = 0; |
| else if (sh_1el_vec (x, VOIDmode)) |
| *total = outer_code != SET; |
| if (sh_rep_vec (x, VOIDmode)) |
| *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4 |
| + (outer_code != SET)); |
| *total = COSTS_N_INSNS (3) + (outer_code != SET); |
| return true; |
| |
| case PLUS: |
| case MINUS: |
| *total = COSTS_N_INSNS (addsubcosts (x)); |
| return true; |
| |
| case AND: |
| /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */ |
| if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx) |
| { |
| *total = COSTS_N_INSNS (1); |
| return true; |
| } |
| /* Fall through. */ |
| |
| case XOR: |
| case IOR: |
| *total = COSTS_N_INSNS (and_xor_ior_costs (x, code)); |
| return true; |
| |
| case MULT: |
| *total = COSTS_N_INSNS (multcosts (x)); |
| return true; |
| |
| case LT: |
| case GE: |
| /* div0s sign comparison. */ |
| if (GET_CODE (XEXP (x, 0)) == XOR |
| && REG_P ((XEXP (XEXP (x, 0), 0))) |
| && REG_P ((XEXP (XEXP (x, 0), 1))) |
| && satisfies_constraint_Z (XEXP (x, 1))) |
| { |
| *total = COSTS_N_INSNS (1); |
| return true; |
| } |
| else |
| return false; |
| |
| case LSHIFTRT: |
| /* div0s sign comparison. */ |
| if (GET_CODE (XEXP (x, 0)) == XOR |
| && REG_P ((XEXP (XEXP (x, 0), 0))) |
| && REG_P ((XEXP (XEXP (x, 0), 1))) |
| && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31) |
| { |
| *total = COSTS_N_INSNS (1); |
| return true; |
| } |
| /* FALLTHRU */ |
| case ASHIFT: |
| case ASHIFTRT: |
| { |
| int cost = shiftcosts (x); |
| if (cost < 0) |
| return false; |
| *total = COSTS_N_INSNS (cost); |
| return true; |
| } |
| |
| case DIV: |
| case UDIV: |
| case MOD: |
| case UMOD: |
| *total = COSTS_N_INSNS (20); |
| return true; |
| |
| case FLOAT: |
| case FIX: |
| *total = 100; |
| return true; |
| |
| default: |
| return false; |
| } |
| } |
| |
| /* Determine the size of the fundamental move insn that will be used |
| for the specified mode. */ |
| static inline int |
| mov_insn_size (machine_mode mode, bool consider_sh2a) |
| { |
| const int mode_sz = GET_MODE_SIZE (mode); |
| |
| if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode) |
| || (TARGET_FMOVD && mode == DFmode)) |
| return mode_sz; |
| else |
| { |
| /* The max. available mode for actual move insns is SImode. |
| Larger accesses will be split into multiple loads/stores. */ |
| const int max_mov_sz = GET_MODE_SIZE (SImode); |
| return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz; |
| } |
| } |
| |
| /* Determine the maximum possible displacement for a move insn for the |
| specified mode. */ |
| int |
| sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a) |
| { |
| /* The 4 byte displacement move insns are the same as the 2 byte |
| versions but take a 12 bit displacement. All we need to do is to |
| scale the max. displacement value accordingly. */ |
| const int disp_scale = consider_sh2a ? (4095 / 15) : 1; |
| |
| /* SH2A supports FPU move insns with 12 bit displacements. |
| Other variants to do not support any kind of displacements for |
| FPU move insns. */ |
| if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT) |
| return 0; |
| else |
| { |
| const int mov_insn_sz = mov_insn_size (mode, consider_sh2a); |
| const int mode_sz = GET_MODE_SIZE (mode); |
| int r = 15 * mov_insn_sz * disp_scale; |
| |
| /* If the mov insn will be split into multiple loads/stores, the |
| maximum possible displacement is a bit smaller. */ |
| if (mode_sz > mov_insn_sz) |
| r -= mode_sz - mov_insn_sz; |
| return r; |
| } |
| } |
| |
| /* Determine the alignment mask for a move insn of the |
| specified mode. */ |
| static inline int |
| mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a) |
| { |
| const int mov_insn_sz = mov_insn_size (mode, consider_sh2a); |
| return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0; |
| } |
| |
| /* Return the displacement value of a displacement address. */ |
| HOST_WIDE_INT |
| sh_disp_addr_displacement (rtx x) |
| { |
| gcc_assert (satisfies_constraint_Sdd (x)); |
| return INTVAL (XEXP (XEXP (x, 0), 1)); |
| } |
| |
| /* Compute the cost of an address. */ |
| static int |
| sh_address_cost (rtx x, machine_mode mode, |
| addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED) |
| { |
| /* 'GBR + 0'. Account one more because of R0 restriction. */ |
| if (REG_P (x) && REGNO (x) == GBR_REG) |
| return 2; |
| |
| /* Simple reg, post-inc, pre-dec addressing. */ |
| if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC) |
| return 1; |
| |
| /* 'reg + disp' addressing. */ |
| if (GET_CODE (x) == PLUS |
| && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1))) |
| { |
| /* 'GBR + disp'. Account one more because of R0 restriction. */ |
| if (REGNO (XEXP (x, 0)) == GBR_REG |
| && gbr_displacement (XEXP (x, 1), mode)) |
| return 2; |
| |
| const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1)); |
| |
| if (offset == 0) |
| return 1; |
| |
| /* The displacement would fit into a 2 byte move insn. |
| HImode and QImode loads/stores with displacement put pressure on |
| R0 which will most likely require another reg copy. Thus account |
| a higher cost for that. */ |
| if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false)) |
| return (mode == HImode || mode == QImode) ? 2 : 1; |
| |
| /* The displacement would fit into a 4 byte move insn (SH2A). */ |
| if (TARGET_SH2A |
| && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true)) |
| return 2; |
| |
| /* The displacement is probably out of range and will require extra |
| calculations. */ |
| return 3; |
| } |
| |
| /* 'reg + reg' addressing. Account a slightly higher cost because of |
| increased pressure on R0. */ |
| if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1))) |
| return 3; |
| |
| /* Not sure what it is - probably expensive. */ |
| return 10; |
| } |
| |
| /* Code to expand a shift. */ |
| static void |
| gen_ashift (int type, int n, rtx reg) |
| { |
| rtx n_rtx; |
| |
| /* Negative values here come from the shift_amounts array. */ |
| if (n < 0) |
| { |
| if (type == ASHIFT) |
| type = LSHIFTRT; |
| else |
| type = ASHIFT; |
| n = -n; |
| } |
| |
| n_rtx = GEN_INT (n); |
| gcc_assert (satisfies_constraint_P27 (n_rtx)); |
| |
| switch (type) |
| { |
| case ASHIFTRT: |
| emit_insn (gen_ashrsi3_k (reg, reg, n_rtx)); |
| break; |
| case LSHIFTRT: |
| if (n == 1) |
| emit_insn (gen_shlr (reg, reg)); |
| else |
| emit_insn (gen_lshrsi3_k (reg, reg, n_rtx)); |
| break; |
| case ASHIFT: |
| emit_insn (gen_ashlsi3_k (reg, reg, n_rtx)); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Code to expand a HImode shift. */ |
| static void |
| gen_ashift_hi (int type, int n, rtx reg) |
| { |
| /* Negative values here come from the shift_amounts array. */ |
| if (n < 0) |
| { |
| if (type == ASHIFT) |
| type = LSHIFTRT; |
| else |
| type = ASHIFT; |
| n = -n; |
| } |
| |
| switch (type) |
| { |
| case ASHIFTRT: |
| case LSHIFTRT: |
| /* We don't have HImode right shift operations because using the |
| ordinary 32 bit shift instructions for that doesn't generate proper |
| zero/sign extension. |
| gen_ashift_hi is only called in contexts where we know that the |
| sign extension works out correctly. */ |
| { |
| int offset = 0; |
| if (GET_CODE (reg) == SUBREG) |
| { |
| offset = SUBREG_BYTE (reg); |
| reg = SUBREG_REG (reg); |
| } |
| gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset)); |
| break; |
| } |
| case ASHIFT: |
| emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n))); |
| break; |
| } |
| } |
| |
| /* Output RTL to split a constant shift into its component SH constant |
| shift instructions. */ |
| void |
| gen_shifty_op (int code, rtx *operands) |
| { |
| int value = INTVAL (operands[2]); |
| int max, i; |
| |
| /* Truncate the shift count in case it is out of bounds. */ |
| value = value & 31; |
| |
| if (value == 31) |
| { |
| if (code == LSHIFTRT) |
| { |
| emit_insn (gen_rotlsi3_1 (operands[0], operands[0])); |
| emit_insn (gen_movt (operands[0], get_t_reg_rtx ())); |
| return; |
| } |
| else if (code == ASHIFT) |
| { |
| /* There is a two instruction sequence for 31 bit left shifts, |
| but it requires r0. */ |
| if (REG_P (operands[0]) && REGNO (operands[0]) == 0) |
| { |
| emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx)); |
| emit_insn (gen_rotlsi3_31 (operands[0], operands[0])); |
| return; |
| } |
| } |
| } |
| else if (value == 0) |
| { |
| /* This can happen even when optimizing, if there were subregs before |
| reload. Don't output a nop here, as this is never optimized away; |
| use a no-op move instead. */ |
| emit_insn (gen_rtx_SET (operands[0], operands[0])); |
| return; |
| } |
| |
| max = ashl_lshr_seq[value].insn_count; |
| for (i = 0; i < max; i++) |
| gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]); |
| } |
| |
| /* Same as gen_shifty_op, but optimized for values where the topmost bits |
| don't matter. */ |
| void |
| gen_shifty_hi_op (int code, rtx *operands) |
| { |
| int value = INTVAL (operands[2]); |
| int max, i; |
| void (*gen_fun) (int, int, rtx); |
| |
| /* This operation is used by and_shl for SImode values with a few |
| high bits known to be cleared. */ |
| value &= 31; |
| if (value == 0) |
| { |
| emit_insn (gen_nop ()); |
| return; |
| } |
| |
| gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift; |
| if (code == ASHIFT) |
| { |
| max = ext_ashl_lshr_seq[value].insn_count; |
| for (i = 0; i < max; i++) |
| gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]); |
| } |
| else |
| /* When shifting right, emit the shifts in reverse order, so that |
| solitary negative values come first. */ |
| for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--) |
| gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]); |
| } |
| |
| /* Output RTL for an arithmetic right shift. |
| ??? Rewrite to use super-optimizer sequences. */ |
| bool |
| expand_ashiftrt (rtx *operands) |
| { |
| rtx wrk; |
| char func[18]; |
| int value; |
| |
| if (TARGET_DYNSHIFT) |
| { |
| if (!CONST_INT_P (operands[2])) |
| { |
| rtx count = copy_to_mode_reg (SImode, operands[2]); |
| emit_insn (gen_negsi2 (count, count)); |
| emit_insn (gen_ashrsi3_d (operands[0], operands[1], count)); |
| return true; |
| } |
| else if (ashiftrt_insns[INTVAL (operands[2]) & 31] |
| > 1 + SH_DYNAMIC_SHIFT_COST) |
| { |
| rtx count |
| = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31))); |
| emit_insn (gen_ashrsi3_d (operands[0], operands[1], count)); |
| return true; |
| } |
| } |
| if (!CONST_INT_P (operands[2])) |
| return false; |
| |
| value = INTVAL (operands[2]) & 31; |
| |
| if (value == 31) |
| { |
| /* If we are called from abs expansion, arrange things so that we |
| we can use a single MT instruction that doesn't clobber the source, |
| if LICM can hoist out the load of the constant zero. */ |
| if (currently_expanding_to_rtl) |
| { |
| emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)), |
| operands[1])); |
| emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ())); |
| return true; |
| } |
| emit_insn (gen_ashrsi2_31 (operands[0], operands[1])); |
| return true; |
| } |
| else if (value >= 16 && value <= 19) |
| { |
| wrk = gen_reg_rtx (SImode); |
| emit_insn (gen_ashrsi2_16 (wrk, operands[1])); |
| value -= 16; |
| while (value--) |
| gen_ashift (ASHIFTRT, 1, wrk); |
| emit_move_insn (operands[0], wrk); |
| return true; |
| } |
| /* Expand a short sequence inline, longer call a magic routine. */ |
| else if (value <= 5) |
| { |
| wrk = gen_reg_rtx (SImode); |
| emit_move_insn (wrk, operands[1]); |
| while (value--) |
| gen_ashift (ASHIFTRT, 1, wrk); |
| emit_move_insn (operands[0], wrk); |
| return true; |
| } |
| |
| wrk = gen_reg_rtx (Pmode); |
| |
| /* Load the value into an arg reg and call a helper. */ |
| emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]); |
| sprintf (func, "__ashiftrt_r4_%d", value); |
| rtx lab = function_symbol (wrk, func, SFUNC_STATIC).lab; |
| emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk, lab)); |
| emit_move_insn (operands[0], gen_rtx_REG (SImode, 4)); |
| return true; |
| } |
| |
| /* Try to find a good way to implement the combiner pattern |
| [(set (match_operand:SI 0 "register_operand" "r") |
| (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") |
| (match_operand:SI 2 "const_int_operand" "n")) |
| (match_operand:SI 3 "const_int_operand" "n"))) . |
| LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3. |
| return 0 for simple right / left or left/right shift combination. |
| return 1 for a combination of shifts with zero_extend. |
| return 2 for a combination of shifts with an AND that needs r0. |
| return 3 for a combination of shifts with an AND that needs an extra |
| scratch register, when the three highmost bits of the AND mask are clear. |
| return 4 for a combination of shifts with an AND that needs an extra |
| scratch register, when any of the three highmost bits of the AND mask |
| is set. |
| If ATTRP is set, store an initial right shift width in ATTRP[0], |
| and the instruction length in ATTRP[1] . These values are not valid |
| when returning 0. |
| When ATTRP is set and returning 1, ATTRP[2] gets set to the index into |
| shift_amounts for the last shift value that is to be used before the |
| sign extend. */ |
| int |
| shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp) |
| { |
| unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2; |
| int left = INTVAL (left_rtx), right; |
| int best = 0; |
| int cost, best_cost = 10000; |
| int best_right = 0, best_len = 0; |
| int i; |
| int can_ext; |
| |
| if (left < 0 || left > 31) |
| return 0; |
| if (CONST_INT_P (mask_rtx)) |
| mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left; |
| else |
| mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left; |
| /* Can this be expressed as a right shift / left shift pair? */ |
| lsb = ((mask ^ (mask - 1)) >> 1) + 1; |
| right = exact_log2 (lsb); |
| mask2 = ~(mask + lsb - 1); |
| lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1; |
| /* mask has no zeroes but trailing zeroes <==> ! mask2 */ |
| if (! mask2) |
| best_cost = ashl_lshr_seq[right].insn_count |
| + ashl_lshr_seq[right + left].insn_count; |
| /* mask has no trailing zeroes <==> ! right */ |
| else if (! right && mask2 == ~(lsb2 - 1)) |
| { |
| int late_right = exact_log2 (lsb2); |
| best_cost = ashl_lshr_seq[left + late_right].insn_count |
| + ashl_lshr_seq[late_right].insn_count; |
| } |
| /* Try to use zero extend. */ |
| if (mask2 == ~(lsb2 - 1)) |
| { |
| int width, first; |
| |
| for (width = 8; width <= 16; width += 8) |
| { |
| /* Can we zero-extend right away? */ |
| if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width) |
| { |
| cost = 1 + ext_ashl_lshr_seq[right].insn_count |
| + ext_ashl_lshr_seq[left + right].insn_count; |
| if (cost < best_cost) |
| { |
| best = 1; |
| best_cost = cost; |
| best_right = right; |
| best_len = cost; |
| if (attrp) |
| attrp[2] = -1; |
| } |
| continue; |
| } |
| /* ??? Could try to put zero extend into initial right shift, |
| or even shift a bit left before the right shift. */ |
| /* Determine value of first part of left shift, to get to the |
| zero extend cut-off point. */ |
| first = width - exact_log2 (lsb2) + right; |
| if (first >= 0 && right + left - first >= 0) |
| { |
| cost = ext_ashl_lshr_seq[right].insn_count |
| + ext_ashl_lshr_seq[first].insn_count + 1 |
| + ext_ashl_lshr_seq[right + left - first].insn_count; |
| |
| if (cost < best_cost) |
| { |
| best = 1; |
| best_cost = cost; |
| best_right = right; |
| best_len = cost; |
| if (attrp) |
| attrp[2] = first; |
| } |
| } |
| } |
| } |
| /* Try to use r0 AND pattern */ |
| for (i = 0; i <= 2; i++) |
| { |
| if (i > right) |
| break; |
| if (! CONST_OK_FOR_K08 (mask >> i)) |
| continue; |
| cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count; |
| if (cost < best_cost) |
| { |
| best = 2; |
| best_cost = cost; |
| best_right = i; |
| best_len = cost - 1; |
| } |
| } |
| /* Try to use a scratch register to hold the AND operand. */ |
| can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0; |
| for (i = 0; i <= 2; i++) |
| { |
| if (i > right) |
| break; |
| cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3) |
| + (can_ext |
| ? ext_ashl_lshr_seq |
| : ashl_lshr_seq)[left + i].insn_count; |
| if (cost < best_cost) |
| { |
| best = 4 - can_ext; |
| best_cost = cost; |
| best_right = i; |
| best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i); |
| } |
| } |
| |
| if (attrp) |
| { |
| attrp[0] = best_right; |
| attrp[1] = best_len; |
| } |
| return best; |
| } |
| |
| /* This is used in length attributes of the unnamed instructions |
| corresponding to shl_and_kind return values of 1 and 2. */ |
| int |
| shl_and_length (rtx insn) |
| { |
| rtx set_src, left_rtx, mask_rtx; |
| int attributes[3]; |
| |
| set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); |
| left_rtx = XEXP (XEXP (set_src, 0), 1); |
| mask_rtx = XEXP (set_src, 1); |
| shl_and_kind (left_rtx, mask_rtx, attributes); |
| return attributes[1]; |
| } |
| |
| /* This is used in length attribute of the and_shl_scratch instruction. */ |
| int |
| shl_and_scr_length (rtx insn) |
| { |
| rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); |
| int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count; |
| rtx op = XEXP (set_src, 0); |
| len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1; |
| op = XEXP (XEXP (op, 0), 0); |
| return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count; |
| } |
| |
| /* Generate rtl for instructions for which shl_and_kind advised a particular |
| method of generating them, i.e. returned zero. */ |
| bool |
| gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source) |
| { |
| int attributes[3]; |
| unsigned HOST_WIDE_INT mask; |
| int kind = shl_and_kind (left_rtx, mask_rtx, attributes); |
| int right, total_shift; |
| void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op; |
| |
| right = attributes[0]; |
| total_shift = INTVAL (left_rtx) + right; |
| mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift; |
| switch (kind) |
| { |
| default: |
| return true; |
| case 1: |
| { |
| int first = attributes[2]; |
| rtx operands[3]; |
| |
| if (first < 0) |
| { |
| emit_insn ((mask << right) <= 0xff |
| ? gen_zero_extendqisi2 (dest, |
| gen_lowpart (QImode, source)) |
| : gen_zero_extendhisi2 (dest, |
| gen_lowpart (HImode, source))); |
| source = dest; |
| } |
| if (source != dest) |
| emit_insn (gen_movsi (dest, source)); |
| operands[0] = dest; |
| if (right) |
| { |
| operands[2] = GEN_INT (right); |
| gen_shifty_hi_op (LSHIFTRT, operands); |
| } |
| if (first > 0) |
| { |
| operands[2] = GEN_INT (first); |
| gen_shifty_hi_op (ASHIFT, operands); |
| total_shift -= first; |
| mask <<= first; |
| } |
| if (first >= 0) |
| emit_insn (mask <= 0xff |
| ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest)) |
| : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest))); |
| if (total_shift > 0) |
| { |
| operands[2] = GEN_INT (total_shift); |
| gen_shifty_hi_op (ASHIFT, operands); |
| } |
| break; |
| } |
| case 4: |
| shift_gen_fun = gen_shifty_op; |
| /* FALLTHRU */ |
| case 3: |
| /* If the topmost bit that matters is set, set the topmost bits |
| that don't matter. This way, we might be able to get a shorter |
| signed constant. */ |
| if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift))) |
| mask |= (HOST_WIDE_INT) ((HOST_WIDE_INT_M1U) << (31 - total_shift)); |
| /* FALLTHRU */ |
| case 2: |
| /* Don't expand fine-grained when combining, because that will |
| make the pattern fail. */ |
| if (currently_expanding_to_rtl |
| || reload_in_progress || reload_completed) |
| { |
| rtx operands[3]; |
| |
| /* Cases 3 and 4 should be handled by this split |
| only while combining */ |
| gcc_assert (kind <= 2); |
| if (right) |
| { |
| emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right))); |
| source = dest; |
| } |
| emit_insn (gen_andsi3 (dest, source, GEN_INT (mask))); |
| if (total_shift) |
| { |
| operands[0] = dest; |
| operands[1] = dest; |
| operands[2] = GEN_INT (total_shift); |
| shift_gen_fun (ASHIFT, operands); |
| } |
| break; |
| } |
| else |
| { |
| int neg = 0; |
| if (kind != 4 && total_shift < 16) |
| { |
| neg = -ext_ashl_lshr_seq[total_shift].amount[1]; |
| if (neg > 0) |
| neg -= ext_ashl_lshr_seq[total_shift].amount[2]; |
| else |
| neg = 0; |
| } |
| emit_insn (gen_and_shl_scratch (dest, source, |
| GEN_INT (right), |
| GEN_INT (mask), |
| GEN_INT (total_shift + neg), |
| GEN_INT (neg))); |
| emit_insn (gen_movsi (dest, dest)); |
| break; |
| } |
| } |
| return false; |
| } |
| |
| /* Try to find a good way to implement the combiner pattern |
| [(set (match_operand:SI 0 "register_operand" "=r") |
| (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r") |
| (match_operand:SI 2 "const_int_operand" "n") |
| (match_operand:SI 3 "const_int_operand" "n") |
| (const_int 0))) |
| (clobber (reg:SI T_REG))] |
| LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3. |
| return 0 for simple left / right shift combination. |
| return 1 for left shift / 8 bit sign extend / left shift. |
| return 2 for left shift / 16 bit sign extend / left shift. |
| return 3 for left shift / 8 bit sign extend / shift / sign extend. |
| return 4 for left shift / 16 bit sign extend / shift / sign extend. |
| return 5 for left shift / 16 bit sign extend / right shift |
| return 6 for < 8 bit sign extend / left shift. |
| return 7 for < 8 bit sign extend / left shift / single right shift. |
| If COSTP is nonzero, assign the calculated cost to *COSTP. */ |
| int |
| shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp) |
| { |
| int left, size, insize, ext; |
| int cost = 0, best_cost; |
| int kind; |
| |
| left = INTVAL (left_rtx); |
| size = INTVAL (size_rtx); |
| insize = size - left; |
| gcc_assert (insize > 0); |
| /* Default to left / right shift. */ |
| kind = 0; |
| best_cost = ashl_lshr_seq[32 - insize].insn_count |
| + ashl_lshr_seq[32 - size].insn_count; |
| if (size <= 16) |
| { |
| /* 16 bit shift / sign extend / 16 bit shift */ |
| cost = ashl_lshr_seq[16 - insize].insn_count + 1 |
| + ashl_lshr_seq[16 - size].insn_count; |
| /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden |
| below, by alternative 3 or something even better. */ |
| if (cost < best_cost) |
| { |
| kind = 5; |
| best_cost = cost; |
| } |
| } |
| /* Try a plain sign extend between two shifts. */ |
| for (ext = 16; ext >= insize; ext -= 8) |
| { |
| if (ext <= size) |
| { |
| cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1 |
| + ashl_lshr_seq[size - ext].insn_count; |
| if (cost < best_cost) |
| { |
| kind = ext / (unsigned) 8; |
| best_cost = cost; |
| } |
| } |
| /* Check if we can do a sloppy shift with a final signed shift |
| restoring the sign. */ |
| if (EXT_SHIFT_SIGNED (size - ext)) |
| cost = ext_ashl_lshr_seq[ext - insize].insn_count |
| + ext_ashl_lshr_seq[size - ext].insn_count + 1; |
| /* If not, maybe it's still cheaper to do the second shift sloppy, |
| and do a final sign extend? */ |
| else if (size <= 16) |
| cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1 |
| + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count |
| + 1; |
| else |
| continue; |
| if (cost < best_cost) |
| { |
| kind = ext / (unsigned) 8 + 2; |
| best_cost = cost; |
| } |
| } |
| /* Check if we can sign extend in r0 */ |
| if (insize < 8) |
| { |
| cost = 3 + ashl_lshr_seq[left].insn_count; |
| if (cost < best_cost) |
| { |
| kind = 6; |
| best_cost = cost; |
| } |
| /* Try the same with a final signed shift. */ |
| if (left < 31) |
| { |
| cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1; |
| if (cost < best_cost) |
| { |
| kind = 7; |
| best_cost = cost; |
| } |
| } |
| } |
| if (TARGET_DYNSHIFT) |
| { |
| /* Try to use a dynamic shift. */ |
| cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST; |
| if (cost < best_cost) |
| { |
| kind = 0; |
| best_cost = cost; |
| } |
| } |
| if (costp) |
| *costp = cost; |
| return kind; |
| } |
| |
| /* Function to be used in the length attribute of the instructions |
| implementing this pattern. */ |
| int |
| shl_sext_length (rtx insn) |
| { |
| rtx set_src, left_rtx, size_rtx; |
| int cost; |
| |
| set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); |
| left_rtx = XEXP (XEXP (set_src, 0), 1); |
| size_rtx = XEXP (set_src, 1); |
| shl_sext_kind (left_rtx, size_rtx, &cost); |
| return cost; |
| } |
| |
| /* Generate rtl for this pattern */ |
| bool |
| gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source) |
| { |
| int kind; |
| int left, size, insize, cost; |
| rtx operands[3]; |
| |
| kind = shl_sext_kind (left_rtx, size_rtx, &cost); |
| left = INTVAL (left_rtx); |
| size = INTVAL (size_rtx); |
| insize = size - left; |
| switch (kind) |
| { |
| case 1: |
| case 2: |
| case 3: |
| case 4: |
| { |
| int ext = kind & 1 ? 8 : 16; |
| int shift2 = size - ext; |
| |
| /* Don't expand fine-grained when combining, because that will |
| make the pattern fail. */ |
| if (! currently_expanding_to_rtl |
| && ! reload_in_progress && ! reload_completed) |
| { |
| emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); |
| emit_insn (gen_movsi (dest, source)); |
| break; |
| } |
| if (dest != source) |
| emit_insn (gen_movsi (dest, source)); |
| operands[0] = dest; |
| if (ext - insize) |
| { |
| operands[2] = GEN_INT (ext - insize); |
| gen_shifty_hi_op (ASHIFT, operands); |
| } |
| emit_insn (kind & 1 |
| ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest)) |
| : gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); |
| if (kind <= 2) |
| { |
| if (shift2) |
| { |
| operands[2] = GEN_INT (shift2); |
| gen_shifty_op (ASHIFT, operands); |
| } |
| } |
| else |
| { |
| if (shift2 > 0) |
| { |
| if (EXT_SHIFT_SIGNED (shift2)) |
| { |
| operands[2] = GEN_INT (shift2 + 1); |
| gen_shifty_op (ASHIFT, operands); |
| operands[2] = const1_rtx; |
| gen_shifty_op (ASHIFTRT, operands); |
| break; |
| } |
| operands[2] = GEN_INT (shift2); |
| gen_shifty_hi_op (ASHIFT, operands); |
| } |
| else if (shift2) |
| { |
| operands[2] = GEN_INT (-shift2); |
| gen_shifty_hi_op (LSHIFTRT, operands); |
| } |
| emit_insn (size <= 8 |
| ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest)) |
| : gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); |
| } |
| break; |
| } |
| case 5: |
| { |
| int i = 16 - size; |
| if (! currently_expanding_to_rtl |
| && ! reload_in_progress && ! reload_completed) |
| emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); |
| else |
| { |
| operands[0] = dest; |
| operands[2] = GEN_INT (16 - insize); |
| gen_shifty_hi_op (ASHIFT, operands); |
| emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest))); |
| } |
| /* Don't use gen_ashrsi3 because it generates new pseudos. */ |
| while (--i >= 0) |
| gen_ashift (ASHIFTRT, 1, dest); |
| break; |
| } |
| case 6: |
| case 7: |
| /* Don't expand fine-grained when combining, because that will |
| make the pattern fail. */ |
| if (! currently_expanding_to_rtl |
| && ! reload_in_progress && ! reload_completed) |
| { |
| emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx)); |
| emit_insn (gen_movsi (dest, source)); |
| break; |
| } |
| emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1))); |
| emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1)))); |
| emit_insn (gen_addsi3 (dest, dest, GEN_INT (HOST_WIDE_INT_M1U << (insize - 1)))); |
| operands[0] = dest; |
| operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx; |
| gen_shifty_op (ASHIFT, operands); |
| if (kind == 7) |
| emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx)); |
| break; |
| default: |
| return true; |
| } |
| return false; |
| } |
| |
| typedef struct label_ref_list_d |
| { |
| rtx_code_label *label; |
| struct label_ref_list_d *next; |
| } *label_ref_list_t; |
| |
| static object_allocator<label_ref_list_d> label_ref_list_d_pool |
| ("label references list"); |
| |
| /* The SH cannot load a large constant into a register, constants have to |
| come from a pc relative load. The reference of a pc relative load |
| instruction must be less than 1k in front of the instruction. This |
| means that we often have to dump a constant inside a function, and |
| generate code to branch around it. |
| |
| It is important to minimize this, since the branches will slow things |
| down and make things bigger. |
| |
| Worst case code looks like: |
| |
| mov.l L1,rn |
| bra L2 |
| nop |
| align |
| L1: .long value |
| L2: |
| .. |
| |
| mov.l L3,rn |
| bra L4 |
| nop |
| align |
| L3: .long value |
| L4: |
| .. |
| |
| We fix this by performing a scan before scheduling, which notices which |
| instructions need to have their operands fetched from the constant table |
| and builds the table. |
| |
| The algorithm is: |
| |
| scan, find an instruction which needs a pcrel move. Look forward, find the |
| last barrier which is within MAX_COUNT bytes of the requirement. |
| If there isn't one, make one. Process all the instructions between |
| the find and the barrier. |
| |
| In the above example, we can tell that L3 is within 1k of L1, so |
| the first move can be shrunk from the 3 insn+constant sequence into |
| just 1 insn, and the constant moved to L3 to make: |
| |
| mov.l L1,rn |
| .. |
| mov.l L3,rn |
| bra L4 |
| nop |
| align |
| L3:.long value |
| L4:.long value |
| |
| Then the second move becomes the target for the shortening process. */ |
| |
| typedef struct |
| { |
| rtx value; /* Value in table. */ |
| rtx_code_label *label; /* Label of value. */ |
| label_ref_list_t wend; /* End of window. */ |
| machine_mode mode; /* Mode of value. */ |
| |
| /* True if this constant is accessed as part of a post-increment |
| sequence. Note that HImode constants are never accessed in this way. */ |
| bool part_of_sequence_p; |
| } pool_node; |
| |
| /* The maximum number of constants that can fit into one pool, since |
| constants in the range 0..510 are at least 2 bytes long, and in the |
| range from there to 1018 at least 4 bytes. */ |
| |
| #define MAX_POOL_SIZE 372 |
| static pool_node pool_vector[MAX_POOL_SIZE]; |
| static int pool_size; |
| static rtx_code_label *pool_window_label; |
| static int pool_window_last; |
| |
| static int max_labelno_before_reorg; |
| |
| /* ??? If we need a constant in HImode which is the truncated value of a |
| constant we need in SImode, we could combine the two entries thus saving |
| two bytes. Is this common enough to be worth the effort of implementing |
| it? */ |
| |
| /* ??? This stuff should be done at the same time that we shorten branches. |
| As it is now, we must assume that all branches are the maximum size, and |
| this causes us to almost always output constant pools sooner than |
| necessary. */ |
| |
| /* Add a constant to the pool and return its label. */ |
| static rtx_code_label * |
| add_constant (rtx x, machine_mode mode, rtx last_value) |
| { |
| rtx_code_label *lab, *new_rtx; |
| label_ref_list_t ref, newref; |
| |
| /* First see if we've already got it. */ |
| for (int i = 0; i < pool_size; i++) |
| { |
| if (x->code == pool_vector[i].value->code |
| && mode == pool_vector[i].mode) |
| { |
| if (x->code == CODE_LABEL) |
| { |
| if (XINT (x, 3) != XINT (pool_vector[i].value, 3)) |
| continue; |
| } |
| if (rtx_equal_p (x, pool_vector[i].value)) |
| { |
| lab = new_rtx = 0; |
| if (! last_value |
| || ! i |
| || ! rtx_equal_p (last_value, pool_vector[i-1].value)) |
| { |
| new_rtx = gen_label_rtx (); |
| LABEL_REFS (new_rtx) = pool_vector[i].label; |
| pool_vector[i].label = lab = new_rtx; |
| } |
| if (lab && pool_window_label) |
| { |
| newref = label_ref_list_d_pool.allocate (); |
| newref->label = pool_window_label; |
| ref = pool_vector[pool_window_last].wend; |
| newref->next = ref; |
| pool_vector[pool_window_last].wend = newref; |
| } |
| if (new_rtx) |
| pool_window_label = new_rtx; |
| pool_window_last = i; |
| return lab; |
| } |
| } |
| } |
| |
| /* Need a new one. */ |
| pool_vector[pool_size].value = x; |
| if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value)) |
| { |
| lab = 0; |
| pool_vector[pool_size - 1].part_of_sequence_p = true; |
| } |
| else |
| lab = gen_label_rtx (); |
| pool_vector[pool_size].mode = mode; |
| pool_vector[pool_size].label = lab; |
| pool_vector[pool_size].wend = NULL; |
| pool_vector[pool_size].part_of_sequence_p = (lab == 0); |
| if (lab && pool_window_label) |
| { |
| newref = label_ref_list_d_pool.allocate (); |
| newref->label = pool_window_label; |
| ref = pool_vector[pool_window_last].wend; |
| newref->next = ref; |
| pool_vector[pool_window_last].wend = newref; |
| } |
| if (lab) |
| pool_window_label = lab; |
| pool_window_last = pool_size; |
| pool_size++; |
| return lab; |
| } |
| |
| /* Output the literal table. START, if nonzero, is the first instruction |
| this table is needed for, and also indicates that there is at least one |
| casesi_worker_2 instruction; We have to emit the operand3 labels from |
| these insns at a 4-byte aligned position. BARRIER is the barrier |
| after which we are to place the table. */ |
| static void |
| dump_table (rtx_insn *start, rtx_insn *barrier) |
| { |
| rtx_insn *scan = barrier; |
| bool need_align = true; |
| rtx_code_label *lab; |
| label_ref_list_t ref; |
| bool have_df = false; |
| |
| /* Do two passes, first time dump out the HI sized constants. */ |
| |
| for (int i = 0; i < pool_size; i++) |
| { |
| pool_node *p = &pool_vector[i]; |
| |
| if (p->mode == HImode) |
| { |
| if (need_align) |
| { |
| scan = emit_insn_after (gen_align_2 (), scan); |
| need_align = false; |
| } |
| for (lab = p->label; lab; |
| lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab))) |
| scan = emit_label_after (lab, scan); |
| scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx), |
| scan); |
| for (ref = p->wend; ref; ref = ref->next) |
| { |
| lab = ref->label; |
| scan = emit_insn_after (gen_consttable_window_end (lab), scan); |
| } |
| } |
| else if (p->mode == DFmode) |
| have_df = true; |
| } |
| |
| need_align = true; |
| |
| if (start) |
| { |
| scan = emit_insn_after (gen_align_4 (), scan); |
| need_align = false; |
| for (; start != barrier; start = NEXT_INSN (start)) |
| if (NONJUMP_INSN_P (start) |
| && recog_memoized (start) == CODE_FOR_casesi_worker_2) |
| { |
| rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0)); |
| rtx lab = XEXP (XVECEXP (src, 0, 3), 0); |
| |
| scan = emit_label_after (as_a <rtx_insn *> (lab), scan); |
| } |
| } |
| if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df) |
| { |
| rtx_insn *align_insn = NULL; |
| |
| scan = emit_label_after (gen_label_rtx (), scan); |
| scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan); |
| need_align = false; |
| |
| for (int i = 0; i < pool_size; i++) |
| { |
| pool_node *p = &pool_vector[i]; |
| |
| switch (p->mode) |
| { |
| case E_HImode: |
| break; |
| case E_SImode: |
| case E_SFmode: |
| if (align_insn && !p->part_of_sequence_p) |
| { |
| for (lab = p->label; lab; |
| lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab))) |
| emit_label_before (lab, align_insn); |
| emit_insn_before (gen_consttable_4 (p->value, const0_rtx), |
| align_insn); |
| for (ref = p->wend; ref; ref = ref->next) |
| { |
| lab = ref->label; |
| emit_insn_before (gen_consttable_window_end (lab), |
| align_insn); |
| } |
| delete_insn (align_insn); |
| align_insn = NULL; |
| continue; |
| } |
| else |
| { |
| for (lab = p->label; lab; |
| lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab))) |
| scan = emit_label_after (lab, scan); |
| scan = emit_insn_after (gen_consttable_4 (p->value, |
| const0_rtx), scan); |
| need_align = ! need_align; |
| } |
| break; |
| case E_DFmode: |
| if (need_align) |
| { |
| scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan); |
| align_insn = scan; |
| need_align = false; |
| } |
| /* FALLTHRU */ |
| case E_DImode: |
| for (lab = p->label; lab; |
| lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab))) |
| scan = emit_label_after (lab, scan); |
| scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx), |
| scan); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| |
| if (p->mode != HImode) |
| { |
| for (ref = p->wend; ref; ref = ref->next) |
| { |
| lab = ref->label; |
| scan = emit_insn_after (gen_consttable_window_end (lab), |
| scan); |
| } |
| } |
| } |
| |
| pool_size = 0; |
| } |
| |
| for (int i = 0; i < pool_size; i++) |
| { |
| pool_node *p = &pool_vector[i]; |
| |
| switch (p->mode) |
| { |
| case E_HImode: |
| break; |
| case E_SImode: |
| case E_SFmode: |
| if (need_align) |
| { |
| need_align = false; |
| scan = emit_label_after (gen_label_rtx (), scan); |
| scan = emit_insn_after (gen_align_4 (), scan); |
| } |
| for (lab = p->label; lab; |
| lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab))) |
| scan = emit_label_after (lab, scan); |
| scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx), |
| scan); |
| break; |
| case E_DFmode: |
| case E_DImode: |
| if (need_align) |
| { |
| need_align = false; |
| scan = emit_label_after (gen_label_rtx (), scan); |
| scan = emit_insn_after (gen_align_4 (), scan); |
| } |
| for (lab = p->label; lab; |
| lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab))) |
| scan = emit_label_after (lab, scan); |
| scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx), |
| scan); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| |
| if (p->mode != HImode) |
| { |
| for (ref = p->wend; ref; ref = ref->next) |
| { |
| lab = ref->label; |
| scan = emit_insn_after (gen_consttable_window_end (lab), scan); |
| } |
| } |
| } |
| |
| scan = emit_insn_after (gen_consttable_end (), scan); |
| scan = emit_barrier_after (scan); |
| pool_size = 0; |
| pool_window_label = NULL; |
| pool_window_last = 0; |
| } |
| |
| #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0) |
| |
| /* Nonzero if the insn is a move instruction which needs to be fixed. */ |
| |
| /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the |
| CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't |
| need to fix it if the input value is CONST_OK_FOR_I08. */ |
| static bool |
| broken_move (rtx_insn *insn) |
| { |
| if (NONJUMP_INSN_P (insn)) |
| { |
| rtx pat = PATTERN (insn); |
| if (GET_CODE (pat) == PARALLEL) |
| pat = XVECEXP (pat, 0, 0); |
| if (GET_CODE (pat) == SET |
| /* We can load any 8-bit value if we don't care what the high |
| order bits end up as. */ |
| && GET_MODE (SET_DEST (pat)) != QImode |
| && (CONSTANT_P (SET_SRC (pat)) |
| || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE |
| && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B) |
| /* Match mova_const. */ |
| || (GET_CODE (SET_SRC (pat)) == UNSPEC |
| && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA |
| && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST)) |
| && ! (TARGET_SH2E |
| && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE |
| && (fp_zero_operand (SET_SRC (pat)) |
| || fp_one_operand (SET_SRC (pat))) |
| /* In general we don't know the current setting of fpscr, so |
| disable fldi. |
| There is an exception if this was a register-register move |
| before reload - and hence it was ascertained that we have |
| single precision setting - and in a post-reload optimization |
| we changed this to do a constant load. In that case |
| we don't have an r0 clobber, hence we must use fldi. */ |
| && (TARGET_FMOVD |
| || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0)) |
| == SCRATCH)) |
| && REG_P (SET_DEST (pat)) |
| && FP_REGISTER_P (REGNO (SET_DEST (pat)))) |
| && ! (TARGET_SH2A |
| && GET_MODE (SET_DEST (pat)) == SImode |
| && (satisfies_constraint_I20 (SET_SRC (pat)) |
| || satisfies_constraint_I28 (SET_SRC (pat)))) |
| && ! satisfies_constraint_I08 (SET_SRC (pat))) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* Return true if the specified insn is a mova insn. */ |
| static bool |
| mova_p (rtx_insn *insn) |
| { |
| return (NONJUMP_INSN_P (insn) |
| && GET_CODE (PATTERN (insn)) == SET |
| && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC |
| && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA |
| /* Don't match mova_const. */ |
| && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF); |
| } |
| |
| /* Fix up a mova from a switch that went out of range. */ |
| static void |
| fixup_mova (rtx_insn *mova) |
| { |
| PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode); |
| if (! flag_pic) |
| { |
| SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova); |
| INSN_CODE (mova) = -1; |
| } |
| else |
| { |
| rtx_insn *worker = mova; |
| rtx_code_label *lab = gen_label_rtx (); |
| rtx wpat, wpat0, wpat1, wsrc, target, base, diff; |
| |
| do |
| { |
| worker = NEXT_INSN (worker); |
| gcc_assert (worker |
| && !LABEL_P (worker) |
| && !JUMP_P (worker)); |
| } while (NOTE_P (worker) |
| || recog_memoized (worker) != CODE_FOR_casesi_worker_1); |
| wpat = PATTERN (worker); |
| wpat0 = XVECEXP (wpat, 0, 0); |
| wpat1 = XVECEXP (wpat, 0, 1); |
| wsrc = SET_SRC (wpat0); |
| PATTERN (worker) = (gen_casesi_worker_2 |
| (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1), |
| XEXP (XVECEXP (wsrc, 0, 2), 0), lab, |
| XEXP (wpat1, 0))); |
| INSN_CODE (worker) = -1; |
| target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0); |
| base = gen_rtx_LABEL_REF (Pmode, lab); |
| diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF); |
| SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff); |
| INSN_CODE (mova) = -1; |
| } |
| } |
| |
| /* NEW_MOVA is a mova we've just encountered while scanning forward. Update |
| *num_mova, and check if the new mova is not nested within the first one. |
| return 0 if *first_mova was replaced, 1 if new_mova was replaced, |
| 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */ |
| static int |
| untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova) |
| { |
| int n_addr = 0; /* Initialization to shut up spurious warning. */ |
| int f_target, n_target = 0; /* Likewise. */ |
| |
| if (optimize) |
| { |
| /* If NEW_MOVA has no address yet, it will be handled later. */ |
| if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova)) |
| return -1; |
| |
| n_addr = INSN_ADDRESSES (INSN_UID (new_mova)); |
| n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0))); |
| if (n_addr > n_target || n_addr + 1022 < n_target) |
| { |
| /* Change the mova into a load. |
| broken_move will then return true for it. */ |
| fixup_mova (new_mova); |
| return 1; |
| } |
| } |
| if (!(*num_mova)++) |
| { |
| *first_mova = new_mova; |
| return 2; |
| } |
| if (!optimize |
| || ((f_target |
| = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0)))) |
| >= n_target)) |
| return -1; |
| |
| (*num_mova)--; |
| if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova)) |
| > n_target - n_addr) |
| { |
| fixup_mova (*first_mova); |
| return 0; |
| } |
| else |
| { |
| fixup_mova (new_mova); |
| return 1; |
| } |
| } |
| |
| /* Find the last barrier from insn FROM which is close enough to hold the |
| constant pool. If we can't find one, then create one near the end of |
| the range. */ |
| static rtx_insn * |
| find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from) |
| { |
| int count_si = 0; |
| int count_hi = 0; |
| int found_hi = 0; |
| int found_si = 0; |
| int hi_align = 2; |
| int si_align = 2; |
| int leading_mova = num_mova; |
| rtx_insn *barrier_before_mova = NULL; |
| rtx_insn *found_barrier = NULL; |
| rtx_insn *good_barrier = NULL; |
| int si_limit; |
| int hi_limit; |
| rtx_insn *orig = from; |
| rtx_insn *last_got = NULL; |
| rtx_insn *last_symoff = NULL; |
| |
| /* For HImode: range is 510, add 4 because pc counts from address of |
| second instruction after this one, subtract 2 for the jump instruction |
| that we may need to emit before the table, subtract 2 for the instruction |
| that fills the jump delay slot (in very rare cases, reorg will take an |
| instruction from after the constant pool or will leave the delay slot |
| empty). This gives 510. |
| For SImode: range is 1020, add 4 because pc counts from address of |
| second instruction after this one, subtract 2 in case pc is 2 byte |
| aligned, subtract 2 for the jump instruction that we may need to emit |
| before the table, subtract 2 for the instruction that fills the jump |
| delay slot. This gives 1018. */ |
| |
| /* The branch will always be shortened now that the reference address for |
| forward branches is the successor address, thus we need no longer make |
| adjustments to the [sh]i_limit for -O0. */ |
| |
| si_limit = 1018; |
| hi_limit = 510; |
| |
| while (from && count_si < si_limit && count_hi < hi_limit) |
| { |
| int inc = get_attr_length (from); |
| int new_align = 1; |
| |
| /* If this is a label that existed at the time of the compute_alignments |
| call, determine the alignment. N.B. When find_barrier recurses for |
| an out-of-reach mova, we might see labels at the start of previously |
| inserted constant tables. */ |
| if (LABEL_P (from) |
| && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg) |
| { |
| if (optimize) |
| new_align = 1 << label_to_alignment (from).levels[0].log; |
| else if (BARRIER_P (prev_nonnote_insn (from))) |
| new_align = 1 << barrier_align (from); |
| else |
| new_align = 1; |
| inc = 0; |
| } |
| /* In case we are scanning a constant table because of recursion, check |
| for explicit alignments. If the table is long, we might be forced |
| to emit the new table in front of it; the length of the alignment |
| might be the last straw. */ |
| else if (NONJUMP_INSN_P (from) |
| && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE |
| && XINT (PATTERN (from), 1) == UNSPECV_ALIGN) |
| new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0)); |
| /* When we find the end of a constant table, paste the new constant |
| at the end. That is better than putting it in front because |
| this way, we don't need extra alignment for adding a 4-byte-aligned |
| mov(a) label to a 2/4 or 8/4 byte aligned table. */ |
| else if (NONJUMP_INSN_P (from) |
| && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE |
| && XINT (PATTERN (from), 1) == UNSPECV_CONST_END) |
| return from; |
| |
| if (BARRIER_P (from)) |
| { |
| rtx_insn *next; |
| |
| found_barrier = from; |
| |
| /* If we are at the end of the function, or in front of an alignment |
| instruction, we need not insert an extra alignment. We prefer |
| this kind of barrier. */ |
| if (barrier_align (from) > 2) |
| good_barrier = from; |
| |
| /* If we are at the end of a hot/cold block, dump the constants |
| here. */ |
| next = NEXT_INSN (from); |
| if (next |
| && NOTE_P (next) |
| && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS) |
| break; |
| } |
| |
| if (broken_move (from)) |
| { |
| rtx pat, src, dst; |
| machine_mode mode; |
| |
| pat = PATTERN (from); |
| if (GET_CODE (pat) == PARALLEL) |
| pat = XVECEXP (pat, 0, 0); |
| src = SET_SRC (pat); |
| dst = SET_DEST (pat); |
| mode = GET_MODE (dst); |
| |
| /* GOT pcrelat setting comes in pair of |
| mova .L8,r0 |
| mov.l .L8,r12 |
| instructions. (plus add r0,r12). |
| Remember if we see one without the other. */ |
| if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0))) |
| last_got = last_got ? NULL : from; |
| else if (PIC_ADDR_P (src)) |
| last_got = last_got ? NULL : from; |
| |
| /* We must explicitly check the mode, because sometimes the |
| front end will generate code to load unsigned constants into |
| HImode targets without properly sign extending them. */ |
| if (mode == HImode |
| || (mode == SImode && satisfies_constraint_I16 (src) |
| && REGNO (dst) != FPUL_REG)) |
| { |
| found_hi += 2; |
| /* We put the short constants before the long constants, so |
| we must count the length of short constants in the range |
| for the long constants. */ |
| /* ??? This isn't optimal, but is easy to do. */ |
| si_limit -= 2; |
| } |
| else |
| { |
| /* We dump DF/DI constants before SF/SI ones, because |
| the limit is the same, but the alignment requirements |
| are higher. We may waste up to 4 additional bytes |
| for alignment, and the DF/DI constant may have |
| another SF/SI constant placed before it. */ |
| while (si_align > 2 && found_si + si_align - 2 > count_si) |
| si_align >>= 1; |
| if (found_si > count_si) |
| count_si = found_si; |
| found_si += GET_MODE_SIZE (mode); |
| if (num_mova) |
| si_limit -= GET_MODE_SIZE (mode); |
| } |
| } |
| |
| if (mova_p (from)) |
| { |
| switch (untangle_mova (&num_mova, &mova, from)) |
| { |
| case 1: |
| if (flag_pic) |
| { |
| rtx src = SET_SRC (PATTERN (from)); |
| if (GET_CODE (src) == CONST |
| && GET_CODE (XEXP (src, 0)) == UNSPEC |
| && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF) |
| last_symoff = from; |
| } |
| break; |
| case 0: return find_barrier (0, 0, mova); |
| case 2: |
| { |
| leading_mova = 0; |
| barrier_before_mova |
| = good_barrier ? good_barrier : found_barrier; |
| } |
| default: break; |
| } |
| if (found_si > count_si) |
| count_si = found_si; |
| } |
| else if (JUMP_TABLE_DATA_P (from) |
| && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC) |
| { |
| if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode) |
| || (num_mova |
| && (prev_nonnote_insn (from) |
| == XEXP (MOVA_LABELREF (mova), 0)))) |
| num_mova--; |
| if (barrier_align (next_real_insn (from)) == align_jumps.levels[0].log) |
| { |
| /* We have just passed the barrier in front of the |
| ADDR_DIFF_VEC, which is stored in found_barrier. Since |
| the ADDR_DIFF_VEC is accessed as data, just like our pool |
| constants, this is a good opportunity to accommodate what |
| we have gathered so far. |
| If we waited any longer, we could end up at a barrier in |
| front of code, which gives worse cache usage for separated |
| instruction / data caches. */ |
| good_barrier = found_barrier; |
| break; |
| } |
| else |
| { |
| rtx body = PATTERN (from); |
| inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body)); |
| } |
| } |
| /* For the SH1, we generate alignments even after jumps-around-jumps. */ |
| else if (JUMP_P (from) |
| && ! TARGET_SH2 |
| && ! optimize_size) |
| new_align = 4; |
| |
| /* There is a possibility that a bf is transformed into a bf/s by the |
| delay slot scheduler. */ |
| if (JUMP_P (from) |
| && get_attr_type (from) == TYPE_CBRANCH |
| && ! sequence_insn_p (from)) |
| inc += 2; |
| |
| if (found_si) |
| { |
| count_si += inc; |
| if (new_align > si_align) |
| { |
| si_limit -= (count_si - 1) & (new_align - si_align); |
| si_align = new_align; |
| } |
| count_si = (count_si + new_align - 1) & -new_align; |
| } |
| if (found_hi) |
| { |
| count_hi += inc; |
| if (new_align > hi_align) |
| { |
| hi_limit -= (count_hi - 1) & (new_align - hi_align); |
| hi_align = new_align; |
| } |
| count_hi = (count_hi + new_align - 1) & -new_align; |
| } |
| from = NEXT_INSN (from); |
| } |
| |
| if (num_mova) |
| { |
| if (leading_mova) |
| { |
| /* Try as we might, the leading mova is out of range. Change |
| it into a load (which will become a pcload) and retry. */ |
| fixup_mova (mova); |
| return find_barrier (0, 0, mova); |
| } |
| else |
| { |
| /* Insert the constant pool table before the mova instruction, |
| to prevent the mova label reference from going out of range. */ |
| from = mova; |
| good_barrier = found_barrier = barrier_before_mova; |
| } |
| } |
| |
| if (found_barrier) |
| { |
| if (good_barrier && next_real_insn (found_barrier)) |
| found_barrier = good_barrier; |
| } |
| else |
| { |
| /* We didn't find a barrier in time to dump our stuff, |
| so we'll make one. */ |
| rtx_code_label *label = gen_label_rtx (); |
| |
| /* Don't emit a constant table in the middle of insns for |
| casesi_worker_2. This is a bit overkill but is enough |
| because casesi_worker_2 wouldn't appear so frequently. */ |
| if (last_symoff) |
| from = last_symoff; |
| |
| /* If we exceeded the range, then we must back up over the last |
| instruction we looked at. Otherwise, we just need to undo the |
| NEXT_INSN at the end of the loop. */ |
| if (PREV_INSN (from) != orig |
| && (count_hi > hi_limit || count_si > si_limit)) |
| from = PREV_INSN (PREV_INSN (from)); |
| else |
| from = PREV_INSN (from); |
| |
| /* Don't emit a constant table int the middle of global pointer setting, |
| since that that would move the addressing base GOT into another table. |
| We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_ |
| in the pool anyway, so just move up the whole constant pool. |
| |
| However, avoid doing so when the last single GOT mov is the starting |
| insn itself. Going past above the start insn would create a negative |
| offset, causing errors. */ |
| if (last_got && last_got != orig) |
| from = PREV_INSN (last_got); |
| |
| /* Don't insert the constant pool table at the position which |
| may be the landing pad. */ |
| if (flag_exceptions |
| && CALL_P (from) |
| && find_reg_note (from, REG_EH_REGION, NULL_RTX)) |
| from = PREV_INSN (from); |
| |
| /* Walk back to be just before any jump or label. |
| Putting it before a label reduces the number of times the branch |
| around the constant pool table will be hit. Putting it before |
| a jump makes it more likely that the bra delay slot will be |
| filled. */ |
| while (NOTE_P (from) || JUMP_P (from) || LABEL_P (from)) |
| from = PREV_INSN (from); |
| |
| if (CALL_P (from)) |
| { |
| bool sibcall_p = SIBLING_CALL_P (from); |
| |
| /* If FROM was a sibling call, then we know that control |
| will not return. In fact, we were guaranteed to hit |
| a barrier before another real insn. |
| |
| The jump around the constant pool is unnecessary. It |
| costs space, but more importantly it confuses dwarf2cfi |
| generation. */ |
| if (sibcall_p) |
| return emit_barrier_after (from); |
| } |
| |
| from = emit_jump_insn_after (gen_jump (label), from); |
| JUMP_LABEL (from) = label; |
| LABEL_NUSES (label) = 1; |
| found_barrier = emit_barrier_after (from); |
| emit_label_after (label, found_barrier); |
| } |
| |
| return found_barrier; |
| } |
| |
| /* If the instruction INSN is implemented by a special function, and we can |
| positively find the register that is used to call the sfunc, and this |
| register is not used anywhere else in this instruction - except as the |
| destination of a set, return this register; else, return 0. */ |
| rtx |
| sfunc_uses_reg (rtx_insn *insn) |
| { |
| int i; |
| rtx pattern, part, reg_part, reg; |
| |
| if (!NONJUMP_INSN_P (insn)) |
| return NULL_RTX; |
| pattern = PATTERN (insn); |
| if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC) |
| return NULL_RTX; |
| |
| for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--) |
| { |
| part = XVECEXP (pattern, 0, i); |
| if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode) |
| reg_part = part; |
| } |
| if (! reg_part) |
| return NULL_RTX; |
| reg = XEXP (reg_part, 0); |
| for (int i = XVECLEN (pattern, 0) - 1; i >= 0; i--) |
| { |
| part = XVECEXP (pattern, 0, i); |
| if (part == reg_part || GET_CODE (part) == CLOBBER) |
| continue; |
| if (reg_mentioned_p (reg, ((GET_CODE (part) == SET |
| && REG_P (SET_DEST (part))) |
| ? SET_SRC (part) : part))) |
| return NULL_RTX; |
| } |
| return reg; |
| } |
| |
| /* See if the only way in which INSN uses REG is by calling it, or by |
| setting it while calling it. Set *SET to a SET rtx if the register |
| is set by INSN. */ |
| static bool |
| noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set) |
| { |
| *set = NULL_RTX; |
| |
| rtx reg2 = sfunc_uses_reg (insn); |
| if (reg2 && REGNO (reg2) == REGNO (reg)) |
| { |
| rtx pattern = single_set (insn); |
| if (pattern |
| && REG_P (SET_DEST (pattern)) |
| && REGNO (reg) == REGNO (SET_DEST (pattern))) |
| *set = pattern; |
| return false; |
| } |
| if (!CALL_P (insn)) |
| { |
| /* We don't use rtx_equal_p because we don't care if the mode is |
| different. */ |
| rtx pattern = single_set (insn); |
| if (pattern |
| && REG_P (SET_DEST (pattern)) |
| && REGNO (reg) == REGNO (SET_DEST (pattern))) |
| { |
| rtx par, part; |
| int i; |
| |
| *set = pattern; |
| par = PATTERN (insn); |
| if (GET_CODE (par) == PARALLEL) |
| for (i = XVECLEN (par, 0) - 1; i >= 0; i--) |
| { |
| part = XVECEXP (par, 0, i); |
| if (GET_CODE (part) != SET && reg_mentioned_p (reg, part)) |
| return true; |
| } |
| return reg_mentioned_p (reg, SET_SRC (pattern)); |
| } |
| |
| return true; |
| } |
| |
| rtx pattern = PATTERN (insn); |
| |
| if (GET_CODE (pattern) == PARALLEL) |
| { |
| for (int i = XVECLEN (pattern, 0) - 1; i >= 1; i--) |
| if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i))) |
| return true; |
| pattern = XVECEXP (pattern, 0, 0); |
| } |
| |
| if (GET_CODE (pattern) == SET) |
| { |
| if (reg_mentioned_p (reg, SET_DEST (pattern))) |
| { |
| /* We don't use rtx_equal_p, because we don't care if the |
| mode is different. */ |
| if (!REG_P (SET_DEST (pattern)) |
| || REGNO (reg) != REGNO (SET_DEST (pattern))) |
| return true; |
| |
| *set = pattern; |
| } |
| |
| pattern = SET_SRC (pattern); |
| } |
| |
| if (GET_CODE (pattern) != CALL |
| || !MEM_P (XEXP (pattern, 0)) |
| || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0))) |
| return true; |
| |
| return false; |
| } |
| |
| /* Given a X, a pattern of an insn or a part of it, return a mask of used |
| general registers. Bits 0..15 mean that the respective registers |
| are used as inputs in the instruction. Bits 16..31 mean that the |
| registers 0..15, respectively, are used as outputs, or are clobbered. |
| IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */ |
| int |
| regs_used (rtx x, int is_dest) |
| { |
| enum rtx_code code; |
| const char *fmt; |
| int used = 0; |
| |
| if (! x) |
| return used; |
| code = GET_CODE (x); |
| switch (code) |
| { |
| case REG: |
| if (REGNO (x) < 16) |
| return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1) |
| << (REGNO (x) + is_dest)); |
| return 0; |
| case SUBREG: |
| { |
| rtx y = SUBREG_REG (x); |
| |
| if (!REG_P (y)) |
| break; |
| if (REGNO (y) < 16) |
| return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1) |
| << (REGNO (y) + |
| subreg_regno_offset (REGNO (y), |
| GET_MODE (y), |
| SUBREG_BYTE (x), |
| GET_MODE (x)) + is_dest)); |
| return 0; |
| } |
| case SET: |
| return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16); |
| case RETURN: |
| /* If there was a return value, it must have been indicated with USE. */ |
| return 0x00ffff00; |
| case CLOBBER: |
| is_dest = 1; |
| break; |
| case MEM: |
| is_dest = 0; |
| break; |
| case CALL: |
| used |= 0x00ff00f0; |
| break; |
| default: |
| break; |
| } |
| |
| fmt = GET_RTX_FORMAT (code); |
| |
| for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) |
| { |
| if (fmt[i] == 'E') |
| { |
| for (int j = XVECLEN (x, i) - 1; j >= 0; j--) |
| used |= regs_used (XVECEXP (x, i, j), is_dest); |
| } |
| else if (fmt[i] == 'e') |
| used |= regs_used (XEXP (x, i), is_dest); |
| } |
| return used; |
| } |
| |
| /* Create an instruction that prevents redirection of a conditional branch |
| to the destination of the JUMP with address ADDR. |
| If the branch needs to be implemented as an indirect jump, try to find |
| a scratch register for it. |
| If NEED_BLOCK is 0, don't do anything unless we need a scratch register. |
| If any preceding insn that doesn't fit into a delay slot is good enough, |
| pass 1. Pass 2 if a definite blocking insn is needed. |
| -1 is used internally to avoid deep recursion. |
| If a blocking instruction is made or recognized, return it. */ |
| static rtx_insn * |
| gen_block_redirect (rtx_insn *jump, int addr, int need_block) |
| { |
| int dead = 0; |
| rtx_insn *prev = prev_nonnote_insn (jump); |
| |
| /* First, check if we already have an instruction that satisfies our need. */ |
| if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ()) |
| { |
| if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch) |
| return prev; |
| if (GET_CODE (PATTERN (prev)) == USE |
| || GET_CODE (PATTERN (prev)) == CLOBBER |
| || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES) |
| prev = jump; |
| else if ((need_block &= ~1) < 0) |
| return prev; |
| else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect) |
| need_block = 0; |
| } |
| if (GET_CODE (PATTERN (jump)) == RETURN) |
| { |
| if (! need_block) |
| return prev; |
| /* Reorg even does nasty things with return insns that cause branches |
| to go out of range - see find_end_label and callers. */ |
| return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump); |
| } |
| /* We can't use JUMP_LABEL here because it might be undefined |
| when not optimizing. */ |
| rtx dest = XEXP (SET_SRC (PATTERN (jump)), 0); |
| /* If the branch is out of range, try to find a scratch register for it. */ |
| if (optimize |
| && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092 |
| > 4092 + 4098)) |
| { |
| rtx_insn *scan; |
| /* Don't look for the stack pointer as a scratch register, |
| it would cause trouble if an interrupt occurred. */ |
| unsigned attempt = 0x7fff, used; |
| int jump_left = flag_expensive_optimizations + 1; |
| |
| /* It is likely that the most recent eligible instruction is wanted for |
| the delay slot. Therefore, find out which registers it uses, and |
| try to avoid using them. */ |
| |
| for (scan = jump; (scan = PREV_INSN (scan)); ) |
| { |
| if (scan->deleted ()) |
| continue; |
| rtx_code code = GET_CODE (scan); |
| if (code == CODE_LABEL || code == JUMP_INSN) |
| break; |
| if (code == INSN |
| && GET_CODE (PATTERN (scan)) != USE |
| && GET_CODE (PATTERN (scan)) != CLOBBER |
| && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES) |
| { |
| attempt &= ~regs_used (PATTERN (scan), 0); |
| break; |
| } |
| } |
| for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump); |
| (scan = NEXT_INSN (scan)); ) |
| { |
| if (scan->deleted ()) |
| continue; |
| rtx_code code = GET_CODE (scan); |
| if (INSN_P (scan)) |
| { |
| used |= regs_used (PATTERN (scan), 0); |
| if (code == CALL_INSN) |
| used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0); |
| dead |= (used >> 16) & ~used; |
| if (dead & attempt) |
| { |
| dead &= attempt; |
| break; |
| } |
| if (code == JUMP_INSN) |
| { |
| if (jump_left-- && simplejump_p (scan)) |
| scan = JUMP_LABEL_AS_INSN (scan); |
| else |
| break; |
| } |
| } |
| } |
| /* Mask out the stack pointer again, in case it was |
| the only 'free' register we have found. */ |
| dead &= 0x7fff; |
| } |
| /* If the immediate destination is still in range, check for possible |
| threading with a jump beyond the delay slot insn. |
| Don't check if we are called recursively; the jump has been or will be |
| checked in a different invocation then. */ |
| |
| else if (optimize && need_block >= 0) |
| { |
| rtx_insn *next = next_active_insn (as_a<rtx_insn *> (dest)); |
| next = next_active_insn (next); |
| if (next && JUMP_P (next) |
| && GET_CODE (PATTERN (next)) == SET |
| && recog_memoized (next) == CODE_FOR_jump_compact) |
| { |
| dest = JUMP_LABEL (next); |
| if (dest |
| && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092 |
| > 4092 + 4098)) |
| gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1); |
| } |
| } |
| |
| if (dead) |
| { |
| rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead)); |
| |
| /* It would be nice if we could convert the jump into an indirect |
| jump / far branch right now, and thus exposing all constituent |
| instructions to further optimization. However, reorg uses |
| simplejump_p to determine if there is an unconditional jump where |
| it should try to schedule instructions from the target of the |
| branch; simplejump_p fails for indirect jumps even if they have |
| a JUMP_LABEL. */ |
| rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch |
| (reg, GEN_INT (unspec_bbr_uid++)), |
| jump); |
| /* ??? We would like this to have the scope of the jump, but that |
| scope will change when a delay slot insn of an inner scope is added. |
| Hence, after delay slot scheduling, we'll have to expect |
| NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and |
| the jump. */ |
| |
| INSN_LOCATION (insn) = INSN_LOCATION (jump); |
| INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch; |
| return insn; |
| } |
| else if (need_block) |
| /* We can't use JUMP_LABEL here because it might be undefined |
| when not optimizing. */ |
| return emit_insn_before (gen_block_branch_redirect |
| (GEN_INT (unspec_bbr_uid++)), |
| jump); |
| return prev; |
| } |
| |
| #define CONDJUMP_MIN -252 |
| #define CONDJUMP_MAX 262 |
| struct far_branch |
| { |
| /* A label (to be placed) in front of the jump |
| that jumps to our ultimate destination. */ |
| rtx_insn *near_label; |
| /* Where we are going to insert it if we cannot move the jump any farther, |
| or the jump itself if we have picked up an existing jump. */ |
| rtx_insn *insert_place; |
| /* The ultimate destination. */ |
| rtx_insn *far_label; |
| struct far_branch *prev; |
| /* If the branch has already been created, its address; |
| else the address of its first prospective user. */ |
| int address; |
| }; |
| |
| enum mdep_reorg_phase_e mdep_reorg_phase; |
| |
| static void |
| gen_far_branch (struct far_branch *bp) |
| { |
| rtx_insn *insn = bp->insert_place; |
| rtx_jump_insn *jump; |
| rtx_code_label *label = gen_label_rtx (); |
| |
| emit_label_after (label, insn); |
| if (bp->far_label) |
| { |
| jump = emit_jump_insn_after (gen_jump (bp->far_label), insn); |
| LABEL_NUSES (bp->far_label)++; |
| } |
| else |
| jump = emit_jump_insn_after (gen_return (), insn); |
| |
| /* Emit a barrier so that reorg knows that any following instructions |
| are not reachable via a fall-through path. |
| But don't do this when not optimizing, since we wouldn't suppress the |
| alignment for the barrier then, and could end up with out-of-range |
| pc-relative loads. */ |
| if (optimize) |
| emit_barrier_after (jump); |
| emit_label_after (bp->near_label, insn); |
| |
| if (bp->far_label) |
| JUMP_LABEL (jump) = bp->far_label; |
| else |
| { |
| rtx pat = PATTERN (jump); |
| gcc_assert (ANY_RETURN_P (pat)); |
| JUMP_LABEL (jump) = pat; |
| } |
| |
| bool ok = invert_jump (as_a <rtx_jump_insn *> (insn), label, 1); |
| gcc_assert (ok); |
| |
| /* If we are branching around a jump (rather than a return), prevent |
| reorg from using an insn from the jump target as the delay slot insn - |
| when reorg did this, it pessimized code (we rather hide the delay slot) |
| and it could cause branches to go out of range. */ |
| if (bp->far_label) |
| (emit_insn_after |
| (gen_stuff_delay_slot |
| (GEN_INT (unspec_bbr_uid++), |
| GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)), |
| insn)); |
| /* Prevent reorg from undoing our splits. */ |
| gen_block_redirect (jump, bp->address += 2, 2); |
| } |
| |
| /* Fix up ADDR_DIFF_VECs. */ |
| void |
| fixup_addr_diff_vecs (rtx_insn *first) |
| { |
| rtx_insn *insn; |
| |
| for (insn = first; insn; insn = NEXT_INSN (insn)) |
| { |
| rtx vec_lab, pat, prevpat, x, braf_label; |
| rtx_insn *prev; |
| |
| if (! JUMP_TABLE_DATA_P (insn) |
| || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) |
| continue; |
| pat = PATTERN (insn); |
| vec_lab = XEXP (XEXP (pat, 0), 0); |
| |
| /* Search the matching casesi_jump_2. */ |
| for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev)) |
| { |
| if (!JUMP_P (prev)) |
| continue; |
| prevpat = PATTERN (prev); |
| if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2) |
| continue; |
| x = XVECEXP (prevpat, 0, 1); |
| if (GET_CODE (x) != USE) |
| continue; |
| x = XEXP (x, 0); |
| if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab) |
| break; |
| } |
| /* FIXME: This is a bug in the optimizer, but it seems harmless |
| to just avoid panicing. */ |
| if (!prev) |
| continue; |
| |
| /* Emit the reference label of the braf where it belongs, right after |
| the casesi_jump_2 (i.e. braf). */ |
| braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0); |
| emit_label_after (as_a <rtx_insn *> (braf_label), prev); |
| |
| /* Fix up the ADDR_DIF_VEC to be relative |
| to the reference address of the braf. */ |
| XEXP (XEXP (pat, 0), 0) = braf_label; |
| } |
| } |
| |
| /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following |
| a barrier. Return the base 2 logarithm of the desired alignment. */ |
| int |
| barrier_align (rtx_insn *barrier_or_label) |
| { |
| if (! barrier_or_label) |
| return 0; |
| |
| if (LABEL_P (barrier_or_label) |
| && NEXT_INSN (barrier_or_label) |
| && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label))) |
| return 2; |
| |
| if (BARRIER_P (barrier_or_label) |
| && PREV_INSN (barrier_or_label) |
| && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label))) |
| { |
| rtx pat = PATTERN (PREV_INSN (barrier_or_label)); |
| /* If this is a very small table, we want to keep the alignment after |
| the table to the minimum for proper code alignment. */ |
| return ((optimize_size |
| || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat)) |
| <= (unsigned) 1 << (CACHE_LOG - 2))) |
| ? 1 : align_jumps.levels[0].log); |
| } |
| |
| rtx_insn *next = next_active_insn (barrier_or_label); |
| |
| if (! next) |
| return 0; |
| |
| rtx pat = PATTERN (next); |
| |
| if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN) |
| /* This is a barrier in front of a constant table. */ |
| return 0; |
| |
| if (optimize_size) |
| return 0; |
| |
| if (! TARGET_SH2 || ! optimize) |
| return align_jumps.levels[0].log; |
| |
| /* When fixing up pcloads, a constant table might be inserted just before |
| the basic block that ends with the barrier. Thus, we can't trust the |
| instruction lengths before that. */ |
| if (mdep_reorg_phase > SH_FIXUP_PCLOAD) |
| { |
| /* Check if there is an immediately preceding branch to the insn beyond |
| the barrier. We must weight the cost of discarding useful information |
| from the current cache line when executing this branch and there is |
| an alignment, against that of fetching unneeded insn in front of the |
| branch target when there is no alignment. */ |
| |
| /* There are two delay_slot cases to consider. One is the simple case |
| where the preceding branch is to the insn beyond the barrier (simple |
| delay slot filling), and the other is where the preceding branch has |
| a delay slot that is a duplicate of the insn after the barrier |
| (fill_eager_delay_slots) and the branch is to the insn after the insn |
| after the barrier. */ |
| |
| int slot, credit; |
| bool jump_to_next = false; |
| |
| /* Skip to the insn before the JUMP_INSN before the barrier under |
| investigation. */ |
| rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label)); |
| |
| for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2; |
| credit >= 0 && prev && NONJUMP_INSN_P (prev); |
| prev = prev_real_insn (prev)) |
| { |
| jump_to_next = false; |
| if (GET_CODE (PATTERN (prev)) == USE |
| || GET_CODE (PATTERN (prev)) == CLOBBER) |
| continue; |
| if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev))) |
| { |
| prev = prev_seq->insn (1); |
| if (INSN_UID (prev) == INSN_UID (next)) |
| { |
| /* Delay slot was filled with insn at jump target. */ |
| jump_to_next = true; |
| continue; |
| } |
| } |
| |
| if (slot && |
| get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES) |
| slot = 0; |
| credit -= get_attr_length (prev); |
| } |
| if (prev && jump_to_label_p (prev)) |
| { |
| rtx_insn *x; |
| if (jump_to_next |
| || next_real_insn (JUMP_LABEL_AS_INSN (prev)) == next |
| /* If relax_delay_slots() decides NEXT was redundant |
| with some previous instruction, it will have |
| redirected PREV's jump to the following insn. */ |
| || JUMP_LABEL (prev) == next_nonnote_insn (next) |
| /* There is no upper bound on redundant instructions |
| that might have been skipped, but we must not put an |
| alignment where none had been before. */ |
| || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))), |
| (INSN_P (x) |
| && (INSN_CODE (x) == CODE_FOR_block_branch_redirect |
| || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch |
| || INSN_CODE (x) == CODE_FOR_stuff_delay_slot)))) |
| { |
| rtx pat = PATTERN (prev); |
| if (GET_CODE (pat) == PARALLEL) |
| pat = XVECEXP (pat, 0, 0); |
| if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0)) |
| return 0; |
| } |
| } |
| } |
| |
| return align_jumps.levels[0].log; |
| } |
| |
| /* If we are inside a phony loop, almost any kind of label can turn up as the |
| first one in the loop. Aligning a braf label causes incorrect switch |
| destination addresses; we can detect braf labels because they are |
| followed by a BARRIER. |
| Applying loop alignment to small constant or switch tables is a waste |
| of space, so we suppress this too. */ |
| int |
| sh_loop_align (rtx_insn *label) |
| { |
| rtx_insn *next = label; |
| |
| if (! optimize || optimize_size) |
| return 0; |
| |
| do |
| next = next_nonnote_insn (next); |
| while (next && LABEL_P (next)); |
| |
| if (! next |
| || ! INSN_P (next) |
| || recog_memoized (next) == CODE_FOR_consttable_2) |
| return 0; |
| |
| return align_loops.levels[0].log; |
| } |
| |
| /* Do a final pass over the function, just before delayed branch |
| scheduling. */ |
| static void |
| sh_reorg (void) |
| { |
| rtx_insn *first, *insn, *mova = NULL; |
| int num_mova; |
| rtx r0_rtx = gen_rtx_REG (Pmode, 0); |
| rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx); |
| |
| first = get_insns (); |
| max_labelno_before_reorg = max_label_num (); |
| |
| /* We must split call insns before introducing `mova's. If we're |
| optimizing, they'll have already been split. Otherwise, make |
| sure we don't split them too late. */ |
| if (! optimize) |
| split_all_insns_noflow (); |
| |
| /* If relaxing, generate pseudo-ops to associate function calls with |
| the symbols they call. It does no harm to not generate these |
| pseudo-ops. However, when we can generate them, it enables the |
| linker to potentially relax the jsr to a bsr, and eliminate the |
| register load and, possibly, the constant pool entry. */ |
| |
| mdep_reorg_phase = SH_INSERT_USES_LABELS; |
| if (TARGET_RELAX) |
| { |
| /* Remove all REG_LABEL_OPERAND notes. We want to use them for our |
| own purposes. This works because none of the remaining passes |
| need to look at them. |
| |
| ??? But it may break in the future. We should use a machine |
| dependent REG_NOTE, or some other approach entirely. */ |
| for (insn = first; insn; insn = NEXT_INSN (insn)) |
| { |
| if (INSN_P (insn)) |
| { |
| rtx note; |
| |
| while ((note = find_reg_note (insn, REG_LABEL_OPERAND, |
| NULL_RTX)) != 0) |
| remove_note (insn, note); |
| } |
| } |
| |
| for (insn = first; insn; insn = NEXT_INSN (insn)) |
| { |
| rtx pattern, reg, set, dies; |
| rtx_code_label *label; |
| rtx_insn *link, *scan; |
| int rescan = 0, foundinsn = 0; |
| |
| if (CALL_P (insn)) |
| { |
| pattern = PATTERN (insn); |
| |
| if (GET_CODE (pattern) == PARALLEL) |
| pattern = XVECEXP (pattern, 0, 0); |
| if (GET_CODE (pattern) == SET) |
| pattern = SET_SRC (pattern); |
| |
| if (GET_CODE (pattern) != CALL |
| || !MEM_P (XEXP (pattern, 0))) |
| continue; |
| |
| reg = XEXP (XEXP (pattern, 0), 0); |
| } |
| else |
| { |
| reg = sfunc_uses_reg (insn); |
| if (! reg) |
| continue; |
| } |
| |
| if (!REG_P (reg)) |
| continue; |
| |
| /* Try scanning backward to find where the register is set. */ |
| link = NULL; |
| for (scan = PREV_INSN (insn); |
| scan && !LABEL_P (scan); |
| scan = PREV_INSN (scan)) |
| { |
| if (! INSN_P (scan)) |
| continue; |
| |
| if (! reg_mentioned_p (reg, scan)) |
| continue; |
| |
| if (noncall_uses_reg (reg, scan, &set)) |
| break; |
| |
| if (set) |
| { |
| link = scan; |
| break; |
| } |
| } |
| |
| if (! link) |
| continue; |
| |
| /* The register is set at LINK. */ |
| |
| /* We can only optimize the function call if the register is |
| being set to a symbol. In theory, we could sometimes |
| optimize calls to a constant location, but the assembler |
| and linker do not support that at present. */ |
| if (GET_CODE (SET_SRC (set)) != SYMBOL_REF |
| && GET_CODE (SET_SRC (set)) != LABEL_REF) |
| continue; |
| |
| /* Scan forward from LINK to the place where REG dies, and |
| make sure that the only insns which use REG are |
| themselves function calls. */ |
| |
| /* ??? This doesn't work for call targets that were allocated |
| by reload, since there may not be a REG_DEAD note for the |
| register. */ |
| |
| dies = NULL_RTX; |
| for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan)) |
| { |
| rtx scanset; |
| |
| /* Don't try to trace forward past a CODE_LABEL if we haven't |
| seen INSN yet. Ordinarily, we will only find the setting insn |
| if it is in the same basic block. However, |
| cross-jumping can insert code labels in between the load and |
| the call, and can result in situations where a single call |
| insn may have two targets depending on where we came from. */ |
| |
| if (LABEL_P (scan) && ! foundinsn) |
| break; |
| |
| if (! INSN_P (scan)) |
| continue; |
| |
| /* Don't try to trace forward past a JUMP. To optimize |
| safely, we would have to check that all the |
| instructions at the jump destination did not use REG. */ |
| |
| if (JUMP_P (scan)) |
| break; |
| |
| if (! reg_mentioned_p (reg, scan)) |
| continue; |
| |
| if (noncall_uses_reg (reg, scan, &scanset)) |
| break; |
| |
| if (scan == insn) |
| foundinsn = 1; |
| |
| if (scan != insn |
| && (CALL_P (scan) || sfunc_uses_reg (scan))) |
| { |
| /* There is a function call to this register other |
| than the one we are checking. If we optimize |
| this call, we need to rescan again below. */ |
| rescan = 1; |
| } |
| |
| /* ??? We shouldn't have to worry about SCANSET here. |
| We should just be able to check for a REG_DEAD note |
| on a function call. However, the REG_DEAD notes are |
| apparently not dependable around libcalls; c-torture |
| execute/920501-2 is a test case. If SCANSET is set, |
| then this insn sets the register, so it must have |
| died earlier. Unfortunately, this will only handle |
| the cases in which the register is, in fact, set in a |
| later insn. */ |
| |
| /* ??? We shouldn't have to use FOUNDINSN here. |
| This dates back to when we used LOG_LINKS to find |
| the most recent insn which sets the register. */ |
| |
| if (foundinsn |
| && (scanset |
| || find_reg_note (scan, REG_DEAD, reg))) |
| { |
| dies = scan; |
| break; |
| } |
| } |
| |
| if (! dies) |
| { |
| /* Either there was a branch, or some insn used REG |
| other than as a function call address. */ |
| continue; |
| } |
| |
| /* Create a code label, and put it in a REG_LABEL_OPERAND note |
| on the insn which sets the register, and on each call insn |
| which uses the register. In final_prescan_insn we look for |
| the REG_LABEL_OPERAND notes, and output the appropriate label |
| or pseudo-op. */ |
| |
| label = gen_label_rtx (); |
| add_reg_note (link, REG_LABEL_OPERAND, label); |
| add_reg_note (insn, REG_LABEL_OPERAND, label); |
| if (rescan) |
| { |
| scan = link; |
| do |
| { |
| rtx reg2; |
| |
| scan = NEXT_INSN (scan); |
| if (scan != insn |
| && ((CALL_P (scan) |
| && reg_mentioned_p (reg, scan)) |
| || ((reg2 = sfunc_uses_reg (scan)) |
| && REGNO (reg2) == REGNO (reg)))) |
| add_reg_note (scan, REG_LABEL_OPERAND, label); |
| } |
| while (scan != dies); |
| } |
| } |
| } |
| |
| if (TARGET_SH2) |
| fixup_addr_diff_vecs (first); |
| |
| if (optimize) |
| { |
| mdep_reorg_phase = SH_SHORTEN_BRANCHES0; |
| shorten_branches (first); |
| } |
| |
| /* Scan the function looking for move instructions which have to be |
| changed to pc-relative loads and insert the literal tables. */ |
| mdep_reorg_phase = SH_FIXUP_PCLOAD; |
| for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn)) |
| { |
| if (mova_p (insn)) |
| { |
| /* ??? basic block reordering can move a switch table dispatch |
| below the switch table. Check if that has happened. |
| We only have the addresses available when optimizing; but then, |
| this check shouldn't be needed when not optimizing. */ |
| if (!untangle_mova (&num_mova, &mova, insn)) |
| { |
| insn = mova; |
| num_mova = 0; |
| } |
| } |
| else if (JUMP_TABLE_DATA_P (insn) |
| && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC |
| && num_mova |
| /* ??? loop invariant motion can also move a mova out of a |
| loop. Since loop does this code motion anyway, maybe we |
| should wrap UNSPEC_MOVA into a CONST, so that reload can |
| move it back. */ |
| && ((num_mova > 1 |
| && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode) |
| || (prev_nonnote_insn (insn) |
| == XEXP (MOVA_LABELREF (mova), 0)))) |
| { |
| rtx_insn *scan; |
| int total; |
| |
| num_mova--; |
| |
| /* Some code might have been inserted between the mova and |
| its ADDR_DIFF_VEC. Check if the mova is still in range. */ |
| for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan)) |
| total += get_attr_length (scan); |
| |
| /* range of mova is 1020, add 4 because pc counts from address of |
| second instruction after this one, subtract 2 in case pc is 2 |
| byte aligned. Possible alignment needed for the ADDR_DIFF_VEC |
| cancels out with alignment effects of the mova itself. */ |
| if (total > 1022) |
| { |
| /* Change the mova into a load, and restart scanning |
| there. broken_move will then return true for mova. */ |
| fixup_mova (mova); |
| insn = mova; |
| } |
| } |
| if (broken_move (insn) |
| || (NONJUMP_INSN_P (insn) |
| && recog_memoized (insn) == CODE_FOR_casesi_worker_2)) |
| { |
| rtx_insn *scan; |
| /* Scan ahead looking for a barrier to stick the constant table |
| behind. */ |
| rtx_insn *barrier = find_barrier (num_mova, mova, insn); |
| rtx_insn *last_float_move = NULL; |
| rtx last_float = 0, *last_float_addr = NULL; |
| int need_aligned_label = 0; |
| |
| if (num_mova && ! mova_p (mova)) |
| { |
| /* find_barrier had to change the first mova into a |
| pcload; thus, we have to start with this new pcload. */ |
| insn = mova; |
| num_mova = 0; |
| } |
| /* Now find all the moves between the points and modify them. */ |
| for (scan = insn; scan != barrier; scan = NEXT_INSN (scan)) |
| { |
| if (LABEL_P (scan)) |
| last_float = 0; |
| if (NONJUMP_INSN_P (scan) |
| && recog_memoized (scan) == CODE_FOR_casesi_worker_2) |
| need_aligned_label = 1; |
| if (broken_move (scan)) |
| { |
| rtx *patp = &PATTERN (scan), pat = *patp; |
| rtx src, dst; |
| rtx lab; |
| rtx newsrc; |
| machine_mode mode; |
| |
| if (GET_CODE (pat) == PARALLEL) |
| patp = &XVECEXP (pat, 0, 0), pat = *patp; |
| src = SET_SRC (pat); |
| dst = SET_DEST (pat); |
| mode = GET_MODE (dst); |
| |
| if (mode == SImode && satisfies_constraint_I16 (src) |
| && REGNO (dst) != FPUL_REG) |
| { |
| int offset = 0; |
| |
| mode = HImode; |
| while (GET_CODE (dst) == SUBREG) |
| { |
| offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)), |
| GET_MODE (SUBREG_REG (dst)), |
| SUBREG_BYTE (dst), |
| GET_MODE (dst)); |
| dst = SUBREG_REG (dst); |
| } |
| dst = gen_rtx_REG (HImode, REGNO (dst) + offset); |
| } |
| if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst))) |
| { |
| /* This must be an insn that clobbers r0. */ |
| rtx *clobberp = &XVECEXP (PATTERN (scan), 0, |
| XVECLEN (PATTERN (scan), 0) |
| - 1); |
| rtx clobber = *clobberp; |
| |
| gcc_assert (GET_CODE (clobber) == CLOBBER |
| && rtx_equal_p (XEXP (clobber, 0), r0_rtx)); |
| |
| if (last_float |
| && reg_set_between_p (r0_rtx, last_float_move, scan)) |
| last_float = 0; |
| lab = add_constant (src, mode, last_float); |
| if (lab) |
| emit_insn_before (gen_mova (lab), scan); |
| else |
| { |
| /* There will be a REG_UNUSED note for r0 on |
| LAST_FLOAT_MOVE; we have to change it to REG_INC, |
| lest reorg:mark_target_live_regs will not |
| consider r0 to be used, and we end up with delay |
| slot insn in front of SCAN that clobbers r0. */ |
| rtx note |
| = find_regno_note (last_float_move, REG_UNUSED, 0); |
| |
| /* If we are not optimizing, then there may not be |
| a note. */ |
| if (note) |
| PUT_REG_NOTE_KIND (note, REG_INC); |
| |
| *last_float_addr = r0_inc_rtx; |
| } |
| last_float_move = scan; |
| last_float = src; |
| newsrc = gen_const_mem (mode, |
| (((TARGET_SH4 && ! TARGET_FMOVD) |
| || REGNO (dst) == FPUL_REG) |
| ? r0_inc_rtx |
| : r0_rtx)); |
| last_float_addr = &XEXP (newsrc, 0); |
| |
| /* Remove the clobber of r0. */ |
| *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber), |
| gen_rtx_SCRATCH (Pmode)); |
| } |
| /* This is a mova needing a label. Create it. */ |
| else if (GET_CODE (src) == UNSPEC |
| && XINT (src, 1) == UNSPEC_MOVA |
| && GET_CODE (XVECEXP (src, 0, 0)) == CONST) |
| { |
| lab = add_constant (XVECEXP (src, 0, 0), mode, 0); |
| newsrc = gen_rtx_LABEL_REF (VOIDmode, lab); |
| newsrc = gen_rtx_UNSPEC (SImode, |
| gen_rtvec (1, newsrc), |
| UNSPEC_MOVA); |
| } |
| else if (GET_CODE (src) == UNSPEC_VOLATILE |
| && XINT (src, 1) == UNSPECV_SP_SWITCH_B) |
| { |
| newsrc = XVECEXP (src, 0, 0); |
| XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc); |
| INSN_CODE (scan) = -1; |
| continue; |
| } |
| else |
| { |
| lab = add_constant (src, mode, 0); |
| newsrc = gen_rtx_LABEL_REF (VOIDmode, lab); |
| newsrc = gen_const_mem (mode, newsrc); |
| } |
| *patp = gen_rtx_SET (dst, newsrc); |
| INSN_CODE (scan) = -1; |
| } |
| } |
| dump_table (need_aligned_label ? insn : 0, barrier); |
| insn = barrier; |
| } |
| } |
| label_ref_list_d_pool.release (); |
| for (insn = first; insn; insn = NEXT_INSN (insn)) |
| PUT_MODE (insn, VOIDmode); |
| |
| mdep_reorg_phase = SH_SHORTEN_BRANCHES1; |
| INSN_ADDRESSES_FREE (); |
| split_branches (first); |
| |
| /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it |
| also has an effect on the register that holds the address of the sfunc. |
| Insert an extra dummy insn in front of each sfunc that pretends to |
| use this register. */ |
| if (flag_delayed_branch) |
| { |
| for (insn = first; insn; insn = NEXT_INSN (insn)) |
| { |
| rtx reg = sfunc_uses_reg (insn); |
| |
| if (! reg) |
| continue; |
| emit_insn_before (gen_use_sfunc_addr (reg), insn); |
| } |
| } |
| mdep_reorg_phase = SH_AFTER_MDEP_REORG; |
| } |
| |
| /* Return the UID of the insn that follows the specified label. */ |
| int |
| get_dest_uid (rtx_insn *label, int max_uid) |
| { |
| rtx_insn *dest = next_real_insn (label); |
| |
| if (! dest) |
| /* This can happen for an undefined label. */ |
| return 0; |
| int dest_uid = INSN_UID (dest); |
| /* If this is a newly created branch redirection blocking instruction, |
| we cannot index the branch_uid or insn_addresses arrays with its |
| uid. But then, we won't need to, because the actual destination is |
| the following branch. */ |
| while (dest_uid >= max_uid) |
| { |
| dest = NEXT_INSN (dest); |
| dest_uid = INSN_UID (dest); |
| } |
| if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN) |
| return 0; |
| return dest_uid; |
| } |
| |
| /* Split condbranches that are out of range. Also add clobbers for |
| scratch registers that are needed in far jumps. |
| We do this before delay slot scheduling, so that it can take our |
| newly created instructions into account. It also allows us to |
| find branches with common targets more easily. */ |
| static void |
| split_branches (rtx_insn *first) |
| { |
| rtx_insn *insn; |
| struct far_branch **uid_branch, *far_branch_list = 0; |
| int max_uid = get_max_uid (); |
| int ok; |
| |
| /* Find out which branches are out of range. */ |
| shorten_branches (first); |
| |
| uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch); |
| memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch); |
| |
| for (insn = first; insn; insn = NEXT_INSN (insn)) |
| if (! INSN_P (insn)) |
| continue; |
| else if (insn->deleted ()) |
| { |
| /* Shorten_branches would split this instruction again, |
| so transform it into a note. */ |
| SET_INSN_DELETED (insn); |
| } |
| else if (JUMP_P (insn)) |
| { |
| enum attr_type type = get_attr_type (insn); |
| if (type == TYPE_CBRANCH) |
| { |
| rtx_insn *next, *beyond; |
| |
| if (get_attr_length (insn) > 4) |
| { |
| rtx src = SET_SRC (PATTERN (insn)); |
| rtx_insn *olabel = safe_as_a <rtx_insn *> (XEXP (XEXP (src, 1), 0)); |
| int addr = INSN_ADDRESSES (INSN_UID (insn)); |
| rtx_insn *label = 0; |
| int dest_uid = get_dest_uid (olabel, max_uid); |
| struct far_branch *bp = uid_branch[dest_uid]; |
| |
| /* redirect_jump needs a valid JUMP_LABEL, and it might delete |
| the label if the LABEL_NUSES count drops to zero. There is |
| always a jump_optimize pass that sets these values, but it |
| proceeds to delete unreferenced code, and then if not |
| optimizing, to un-delete the deleted instructions, thus |
| leaving labels with too low uses counts. */ |
| if (! optimize) |
| { |
| JUMP_LABEL (insn) = olabel; |
| LABEL_NUSES (olabel)++; |
| } |
| if (! bp) |
| { |
| bp = (struct far_branch *) alloca (sizeof *bp); |
| uid_branch[dest_uid] = bp; |
| bp->prev = far_branch_list; |
| far_branch_list = bp; |
| bp->far_label = as_a <rtx_insn *> ( |
| XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), |
| 0)); |
| LABEL_NUSES (bp->far_label)++; |
| } |
| else |
| { |
| label = bp->near_label; |
| if (! label && bp->address - addr >= CONDJUMP_MIN) |
| { |
| rtx_insn *block = bp->insert_place; |
| |
| if (GET_CODE (PATTERN (block)) == RETURN) |
| block = PREV_INSN (block); |
| else |
| block = gen_block_redirect (block, |
| bp->address, 2); |
| label = emit_label_after (gen_label_rtx (), |
| PREV_INSN (block)); |
| bp->near_label = label; |
| } |
| else if (label && ! NEXT_INSN (label)) |
| { |
| if (addr + 2 - bp->address <= CONDJUMP_MAX) |
| bp->insert_place = insn; |
| else |
| gen_far_branch (bp); |
| } |
| } |
| if (! label |
| || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN)) |
| { |
| bp->near_label = label = gen_label_rtx (); |
| bp->insert_place = insn; |
| bp->address = addr; |
| } |
| ok = redirect_jump (as_a <rtx_jump_insn *> (insn), label, 0); |
| gcc_assert (ok); |
| } |
| else |
| { |
| /* get_attr_length (insn) == 2 */ |
| /* Check if we have a pattern where reorg wants to redirect |
| the branch to a label from an unconditional branch that |
| is too far away. */ |
| /* We can't use JUMP_LABEL here because it might be undefined |
| when not optimizing. */ |
| /* A syntax error might cause beyond to be NULL_RTX. */ |
| rtx temp = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0); |
| beyond = next_active_insn (as_a<rtx_insn *> (temp)); |
| |
| if (beyond |
| && (JUMP_P (beyond) |
| || ((beyond = next_active_insn (beyond)) |
| && JUMP_P (beyond))) |
| && GET_CODE (PATTERN (beyond)) == SET |
| && recog_memoized (beyond) == CODE_FOR_jump_compact |
| && ((INSN_ADDRESSES |
| (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0))) |
| - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252) |
| > 252 + 258 + 2)) |
| gen_block_redirect (beyond, |
| INSN_ADDRESSES (INSN_UID (beyond)), 1); |
| } |
| |
| next = next_active_insn (insn); |
| |
| if (next |
| && (JUMP_P (next) |
| || ((next = next_active_insn (next)) |
| && JUMP_P (next))) |
| && GET_CODE (PATTERN (next)) == SET |
| && recog_memoized (next) == CODE_FOR_jump_compact |
| && ((INSN_ADDRESSES |
| (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0))) |
| - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252) |
| > 252 + 258 + 2)) |
| gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1); |
| } |
| else if (type == TYPE_JUMP || type == TYPE_RETURN) |
| { |
| int addr = INSN_ADDRESSES (INSN_UID (insn)); |
| rtx_insn *far_label = 0; |
| int dest_uid = 0; |
| struct far_branch *bp; |
| |
| if (type == TYPE_JUMP) |
| { |
| if (CROSSING_JUMP_P (insn)) |
| { |
| emit_insn_before (gen_block_branch_redirect (const0_rtx), |
| insn); |
| continue; |
| } |
| |
| far_label = as_a <rtx_insn *> ( |
| XEXP (SET_SRC (PATTERN (insn)), 0)); |
| dest_uid = get_dest_uid (far_label, max_uid); |
| if (! dest_uid) |
| { |
| /* Parse errors can lead to labels outside |
| the insn stream. */ |
| if (! NEXT_INSN (far_label)) |
| continue; |
| |
| if (! optimize) |
| { |
| JUMP_LABEL (insn) = far_label; |
| LABEL_NUSES (far_label)++; |
| } |
| redirect_jump (as_a <rtx_jump_insn *> (insn), ret_rtx, 1); |
| far_label = 0; |
| } |
| } |
| bp = uid_branch[dest_uid]; |
| if (! bp) |
| { |
| bp = (struct far_branch *) alloca (sizeof *bp); |
| uid_branch[dest_uid] = bp; |
| bp->prev = far_branch_list; |
| far_branch_list = bp; |
| bp->near_label = 0; |
| bp->far_label = far_label; |
| if (far_label) |
| LABEL_NUSES (far_label)++; |
| } |
| else if (bp->near_label && ! NEXT_INSN (bp->near_label)) |
| if (addr - bp->address <= CONDJUMP_MAX) |
| emit_label_after (bp->near_label, PREV_INSN (insn)); |
| else |
| { |
| gen_far_branch (bp); |
| bp->near_label = 0; |
| } |
| else |
| bp->near_label = 0; |
| bp->address = addr; |
| bp->insert_place = insn; |
| if (! far_label) |
| emit_insn_before (gen_block_branch_redirect (const0_rtx), insn); |
| else |
| gen_block_redirect (insn, addr, bp->near_label ? 2 : 0); |
| } |
| } |
| /* Generate all pending far branches, |
| and free our references to the far labels. */ |
| while (far_branch_list) |
| { |
| if (far_branch_list->near_label |
| && ! NEXT_INSN (far_branch_list->near_label)) |
| gen_far_branch (far_branch_list); |
| if (optimize |
| && far_branch_list->far_label |
| && ! --LABEL_NUSES (far_branch_list->far_label)) |
| delete_insn (far_branch_list->far_label); |
| far_branch_list = far_branch_list->prev; |
| } |
| |
| /* Instruction length information is no longer valid due to the new |
| instructions that have been generated. */ |
| init_insn_lengths (); |
| } |
| |
| /* Dump out instruction addresses, which is useful for debugging the |
| constant pool table stuff. |
| |
| If relaxing, output the label and pseudo-ops used to link together |
| calls and the instruction which set the registers. |
| |
| ??? The addresses printed by this routine for insns are nonsense for |
| insns which are inside of a sequence where none of the inner insns have |
| variable length. This is because the second pass of shorten_branches |
| does not bother to update them. */ |
| void |
| final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED, |
| int noperands ATTRIBUTE_UNUSED) |
| { |
| if (TARGET_DUMPISIZE) |
| fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn))); |
| |
| if (TARGET_RELAX) |
| { |
| if (rtx note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX)) |
| { |
| rtx pattern = PATTERN (insn); |
| if (GET_CODE (pattern) == PARALLEL) |
| pattern = XVECEXP (pattern, 0, 0); |
| switch (GET_CODE (pattern)) |
| { |
| case SET: |
| if (GET_CODE (SET_SRC (pattern)) != CALL |
| && get_attr_type (insn) != TYPE_SFUNC) |
| { |
| targetm.asm_out.internal_label |
| (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0))); |
| break; |
| } |
| /* FALLTHROUGH */ |
| case CALL: |
| asm_fprintf (asm_out_file, "\t.uses %LL%d\n", |
| CODE_LABEL_NUMBER (XEXP (note, 0))); |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| } |
| } |
| |
| /* Dump out any constants accumulated in the final pass. These will |
| only be labels. */ |
| const char * |
| output_jump_label_table (void) |
| { |
| if (pool_size) |
| { |
| fprintf (asm_out_file, "\t.align 2\n"); |
| for (int i = 0; i < pool_size; i++) |
| { |
| pool_node *p = &pool_vector[i]; |
| |
| (*targetm.asm_out.internal_label) (asm_out_file, "L", |
| CODE_LABEL_NUMBER (p->label)); |
| output_asm_insn (".long %O0", &p->value); |
| } |
| pool_size = 0; |
| } |
| |
| return ""; |
| } |
| |
| /* A full frame looks like: |
| |
| arg-5 |
| arg-4 |
| [ if current_function_anonymous_args |
| arg-3 |
| arg-2 |
| arg-1 |
| arg-0 ] |
| saved-fp |
| saved-r10 |
| saved-r11 |
| saved-r12 |
| saved-pr |
| local-n |
| .. |
| local-1 |
| local-0 <- fp points here. |
| |
| Number of bytes pushed for anonymous args, used to pass information |
| between expand_prologue and expand_epilogue. |
| |
| Adjust the stack by SIZE bytes. REG holds the rtl of the register to be |
| adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's |
| for an epilogue and a negative value means that it's for a sibcall |
| epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of |
| all the registers that are about to be restored, and hence dead. */ |
| static void |
| output_stack_adjust (int size, rtx reg, int epilogue_p, |
| HARD_REG_SET *live_regs_mask, bool frame_p) |
| { |
| rtx_insn *(*emit_fn) (rtx) = frame_p ? &emit_frame_insn : &emit_insn; |
| if (size) |
| { |
| HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT; |
| |
| /* This test is bogus, as output_stack_adjust is used to re-align the |
| stack. */ |
| #if 0 |
| gcc_assert (!(size % align)); |
| #endif |
| |
| if (CONST_OK_FOR_ADD (size)) |
| emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size))); |
| /* Try to do it with two partial adjustments; however, we must make |
| sure that the stack is properly aligned at all times, in case |
| an interrupt occurs between the two partial adjustments. */ |
| else if (CONST_OK_FOR_ADD (size / 2 & -align) |
| && CONST_OK_FOR_ADD (size - (size / 2 & -align))) |
| { |
| emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align))); |
| emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align)))); |
| } |
| else |
| { |
| rtx const_reg; |
| rtx insn; |
| int temp = epilogue_p ? 7 : 1; |
| int i; |
| |
| /* If TEMP is invalid, we could temporarily save a general |
| register to MACL. However, there is currently no need |
| to handle this case, so just die when we see it. */ |
| if (epilogue_p < 0 |
| || current_function_interrupt |
| || ! call_used_regs[temp] || fixed_regs[temp]) |
| temp = -1; |
| if (temp < 0 && ! current_function_interrupt && epilogue_p >= 0) |
| { |
| HARD_REG_SET temps = (regs_invalidated_by_call |
| & ~fixed_reg_set |
| & savable_regs); |
| if (epilogue_p > 0) |
| { |
| int nreg = 0; |
| if (crtl->return_rtx) |
| { |
| machine_mode mode; |
| mode = GET_MODE (crtl->return_rtx); |
| if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG) |
| nreg = hard_regno_nregs (FIRST_RET_REG, mode); |
| } |
| for (i = 0; i < nreg; i++) |
| CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i); |
| if (crtl->calls_eh_return) |
| { |
| CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO); |
| for (i = 0; i <= 3; i++) |
| CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i)); |
| } |
| } |
| if (epilogue_p <= 0) |
| { |
| for (i = FIRST_PARM_REG; |
| i < FIRST_PARM_REG + NPARM_REGS (SImode); i++) |
| CLEAR_HARD_REG_BIT (temps, i); |
| if (cfun->static_chain_decl != NULL) |
| CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM); |
| } |
| temp = scavenge_reg (&temps); |
| } |
| if (temp < 0 && live_regs_mask) |
| { |
| HARD_REG_SET temps; |
| |
| temps = *live_regs_mask; |
| CLEAR_HARD_REG_BIT (temps, REGNO (reg)); |
| temp = scavenge_reg (&temps); |
| } |
| if (temp < 0) |
| { |
| rtx adj_reg, tmp_reg, mem; |
| |
| /* If we reached here, the most likely case is the (sibcall) |
| epilogue. Put a special push/pop sequence for such case as |
| the last resort. This looks lengthy but would not be problem |
| because it seems to be very rare. */ |
| gcc_assert (epilogue_p); |
| |
| /* ??? There is still the slight possibility that r4 or |
| r5 have been reserved as fixed registers or assigned |
| as global registers, and they change during an |
| interrupt. There are possible ways to handle this: |
| |
| - If we are adjusting the frame pointer (r14), we can do |
| with a single temp register and an ordinary push / pop |
| on the stack. |
| - Grab any call-used or call-saved registers (i.e. not |
| fixed or globals) for the temps we need. We might |
| also grab r14 if we are adjusting the stack pointer. |
| If we can't find enough available registers, issue |
| a diagnostic and die - the user must have reserved |
| way too many registers. |
| But since all this is rather unlikely to happen and |
| would require extra testing, we just die if r4 / r5 |
| are not available. */ |
| gcc_assert (!fixed_regs[4] && !fixed_regs[5] |
| && !global_regs[4] && !global_regs[5]); |
| |
| adj_reg = gen_rtx_REG (GET_MODE (reg), 4); |
| tmp_reg = gen_rtx_REG (GET_MODE (reg), 5); |
| emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg); |
| emit_insn (GEN_MOV (adj_reg, GEN_INT (size))); |
| emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg)); |
| mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg)); |
| emit_move_insn (mem, tmp_reg); |
| emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg)); |
| mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg)); |
| emit_move_insn (mem, tmp_reg); |
| emit_move_insn (reg, adj_reg); |
| mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg)); |
| emit_move_insn (adj_reg, mem); |
| mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg)); |
| emit_move_insn (tmp_reg, mem); |
| /* Tell flow the insns that pop r4/r5 aren't dead. */ |
| emit_use (tmp_reg); |
| emit_use (adj_reg); |
| return; |
| } |
| const_reg = gen_rtx_REG (GET_MODE (reg), temp); |
| |
| /* If SIZE is negative, subtract the positive value. |
| This sometimes allows a constant pool entry to be shared |
| between prologue and epilogue code. */ |
| if (size < 0) |
| { |
| emit_insn (GEN_MOV (const_reg, GEN_INT (-size))); |
| insn = emit_fn (GEN_SUB3 (reg, reg, const_reg)); |
| } |
| else |
| { |
| emit_insn (GEN_MOV (const_reg, GEN_INT (size))); |
| insn = emit_fn (GEN_ADD3 (reg, reg, const_reg)); |
| } |
| add_reg_note (insn, REG_FRAME_RELATED_EXPR, |
| gen_rtx_SET (reg, gen_rtx_PLUS (SImode, reg, |
| GEN_INT (size)))); |
| } |
| } |
| } |
| |
| /* Emit the specified insn and mark it as frame related. */ |
| static rtx_insn * |
| emit_frame_insn (rtx x) |
| { |
| rtx_insn *insn = emit_insn (x); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| return insn; |
| } |
| |
| /* Output RTL to push register RN onto the stack. */ |
| static rtx |
| push (int rn) |
| { |
| rtx x; |
| if (rn == FPUL_REG) |
| x = gen_push_fpul (); |
| else if (rn == FPSCR_REG) |
| x = gen_push_fpscr (); |
| else if (TARGET_FPU_DOUBLE && TARGET_FMOVD |
| && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn)) |
| { |
| if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1) |
| return NULL_RTX; |
| x = gen_push_4 (gen_rtx_REG (DFmode, rn)); |
| } |
| else if (TARGET_SH2E && FP_REGISTER_P (rn)) |
| x = gen_push_e (gen_rtx_REG (SFmode, rn)); |
| else |
| x = gen_push (gen_rtx_REG (SImode, rn)); |
| |
| x = emit_frame_insn (x); |
| add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM)); |
| return x; |
| } |
| |
| /* Output RTL to pop register RN from the stack. */ |
| static void |
| pop (int rn) |
| { |
| rtx x, sp_reg, reg; |
| if (rn == FPUL_REG) |
| x = gen_pop_fpul (); |
| else if (rn == FPSCR_REG) |
| x = gen_pop_fpscr (); |
| else if (TARGET_FPU_DOUBLE && TARGET_FMOVD |
| && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn)) |
| { |
| if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1) |
| return; |
| x = gen_pop_4 (gen_rtx_REG (DFmode, rn)); |
| } |
| else if (TARGET_SH2E && FP_REGISTER_P (rn)) |
| x = gen_pop_e (gen_rtx_REG (SFmode, rn)); |
| else |
| x = gen_pop (gen_rtx_REG (SImode, rn)); |
| |
| x = emit_insn (x); |
| |
| sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM); |
| reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL |
| ? SET_DEST (XVECEXP (PATTERN (x), 0, 0)) |
| : SET_DEST (PATTERN (x))); |
| add_reg_note (x, REG_CFA_RESTORE, reg); |
| add_reg_note (x, REG_CFA_ADJUST_CFA, |
| gen_rtx_SET (sp_reg, |
| plus_constant (SImode, sp_reg, |
| GET_MODE_SIZE (GET_MODE (reg))))); |
| add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM)); |
| RTX_FRAME_RELATED_P (x) = 1; |
| } |
| |
| /* Generate code to push the regs specified in the mask. */ |
| static void |
| push_regs (HARD_REG_SET *mask, bool interrupt_handler) |
| { |
| bool skip_fpscr = false; |
| |
| /* Push PR last; this gives better latencies after the prologue, and |
| candidates for the return delay slot when there are no general |
| registers pushed. */ |
| for (int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0; |
| i < FIRST_PSEUDO_REGISTER; i++) |
| { |
| /* If this is an interrupt handler, and the SZ bit varies, |
| and we have to push any floating point register, we need |
| to switch to the correct precision first. */ |
| if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD |
| && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS])) |
| { |
| push (FPSCR_REG); |
| fpscr_set_from_mem (NORMAL_MODE (FP_MODE), ~*mask); |
| skip_fpscr = true; |
| } |
| if (i != PR_REG |
| && (i != FPSCR_REG || ! skip_fpscr) |
| && TEST_HARD_REG_BIT (*mask, i)) |
| { |
| /* If the ISR has RESBANK attribute assigned, don't push any of |
| the following registers - R0-R14, MACH, MACL and GBR. */ |
| if (! (sh_cfun_resbank_handler_p () |
| && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG) |
| || i == MACH_REG |
| || i == MACL_REG |
| || i == GBR_REG))) |
| push (i); |
| } |
| } |
| |
| /* Push banked registers last to improve delay slot opportunities. */ |
| if (interrupt_handler) |
| { |
| bool use_movml = false; |
| |
| if (TARGET_SH2A) |
| { |
| unsigned int count = 0; |
| |
| for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++) |
| if (TEST_HARD_REG_BIT (*mask, i)) |
| count++; |
| else |
| break; |
| |
| /* Use movml when all banked registers are pushed. */ |
| if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1) |
| use_movml = true; |
| } |
| |
| if (sh_cfun_resbank_handler_p ()) |
| ; /* Do nothing. */ |
| else if (use_movml) |
| { |
| rtx x, mem, reg, set; |
| rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM); |
| |
| /* We must avoid scheduling multiple store insn with another |
| insns. */ |
| emit_insn (gen_blockage ()); |
| x = gen_movml_push_banked (sp_reg); |
| x = emit_frame_insn (x); |
| for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++) |
| { |
| mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4)); |
| reg = gen_rtx_REG (SImode, i); |
| add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (mem, reg)); |
| } |
| |
| set = gen_rtx_SET (sp_reg, plus_constant (Pmode, sp_reg, - 32)); |
| add_reg_note (x, REG_CFA_ADJUST_CFA, set); |
| emit_insn (gen_blockage ()); |
| } |
| else |
| for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++) |
| if (TEST_HARD_REG_BIT (*mask, i)) |
| push (i); |
| } |
| |
| /* Don't push PR register for an ISR with RESBANK attribute assigned. */ |
| if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ()) |
| push (PR_REG); |
| } |
| |
| /* Work out the registers which need to be saved, both as a mask and a |
| count of saved words. Return the count. |
| |
| If doing a pragma interrupt function, then push all regs used by the |
| function, and if we call another function (we can tell by looking at PR), |
| make sure that all the regs it clobbers are safe too. */ |
| static int |
| calc_live_regs (HARD_REG_SET *live_regs_mask) |
| { |
| unsigned int reg; |
| tree attrs; |
| bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler; |
| bool nosave_low_regs; |
| |
| attrs = DECL_ATTRIBUTES (current_function_decl); |
| interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p (); |
| trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE; |
| interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler; |
| nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE; |
| |
| CLEAR_HARD_REG_SET (*live_regs_mask); |
| if (TARGET_FPU_DOUBLE && TARGET_FMOVD && interrupt_handler |
| && df_regs_ever_live_p (FPSCR_REG)) |
| target_flags &= ~MASK_FPU_SINGLE; |
| /* If we can save a lot of saves by switching to double mode, do that. */ |
| else if (TARGET_FPU_DOUBLE && TARGET_FMOVD && TARGET_FPU_SINGLE) |
| for (int count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2) |
| if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1) |
| && (! call_used_regs[reg] |
| || interrupt_handler) |
| && ++count > 2) |
| { |
| target_flags &= ~MASK_FPU_SINGLE; |
| break; |
| } |
| |
| |
| rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG); |
| bool pr_live = (pr_initial |
| ? (!REG_P (pr_initial) |
| || REGNO (pr_initial) != (PR_REG)) |
| : df_regs_ever_live_p (PR_REG)); |
| /* For Shcompact, if not optimizing, we end up with a memory reference |
| using the return address pointer for __builtin_return_address even |
| though there is no actual need to put the PR register on the stack. */ |
| pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM); |
| |
| /* Force PR to be live if the prologue has to call the SHmedia |
| argument decoder or register saver. */ |
| bool has_call = pr_live; |
| |
| int count; |
| for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; ) |
| { |
| if (reg == PR_REG |
| ? pr_live |
| : interrupt_handler |
| ? (/* Need to save all the regs ever live. */ |
| (df_regs_ever_live_p (reg) |
| || (call_used_regs[reg] |
| && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG |
| || reg == PIC_OFFSET_TABLE_REGNUM) |
| && has_call)) |
| && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM |
| && reg != RETURN_ADDRESS_POINTER_REGNUM |
| && reg != T_REG && reg != GBR_REG |
| && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG |
| /* Push fpscr only on targets which have FPU */ |
| && (reg != FPSCR_REG || TARGET_FPU_ANY)) |
| : (/* Only push those regs which are used and need to be saved. */ |
| (false) |
| || (df_regs_ever_live_p (reg) |
| && ((!call_used_regs[reg] |
| && !(reg != PIC_OFFSET_TABLE_REGNUM |
| && fixed_regs[reg] |
| && call_used_or_fixed_reg_p (reg))) |
| || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY))) |
| || (crtl->calls_eh_return |
| && (reg == EH_RETURN_DATA_REGNO (0) |
| || reg == EH_RETURN_DATA_REGNO (1) |
| || reg == EH_RETURN_DATA_REGNO (2) |
| || reg == EH_RETURN_DATA_REGNO (3))) |
| || ((reg == MACL_REG || reg == MACH_REG) |
| && df_regs_ever_live_p (reg) |
| && sh_cfun_attr_renesas_p ()) |
| )) |
| { |
| SET_HARD_REG_BIT (*live_regs_mask, reg); |
| count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg)); |
| |
| if (TARGET_FPU_DOUBLE && TARGET_FMOVD |
| && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT) |
| { |
| if (FP_REGISTER_P (reg)) |
| { |
| if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1)) |
| { |
| SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1)); |
| count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1)); |
| } |
| } |
| else if (XD_REGISTER_P (reg)) |
| { |
| /* Must switch to double mode to access these registers. */ |
| target_flags &= ~MASK_FPU_SINGLE; |
| } |
| } |
| } |
| if (nosave_low_regs && reg == R8_REG) |
| break; |
| } |
| |
| return count; |
| } |
| |
| /* Code to generate prologue and epilogue sequences */ |
| |
| /* PUSHED is the number of bytes that are being pushed on the |
| stack for register saves. Return the frame size, padded |
| appropriately so that the stack stays properly aligned. */ |
| static HOST_WIDE_INT |
| rounded_frame_size (int pushed) |
| { |
| HOST_WIDE_INT size = get_frame_size (); |
| HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT; |
| |
| if (ACCUMULATE_OUTGOING_ARGS) |
| size += crtl->outgoing_args_size; |
| |
| return ((size + pushed + align - 1) & -align) - pushed; |
| } |
| |
| /* Expand code for the function prologue. */ |
| void |
| sh_expand_prologue (void) |
| { |
| int save_flags = target_flags; |
| tree sp_switch_attr |
| = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)); |
| |
| current_function_interrupt = sh_cfun_interrupt_handler_p (); |
| |
| /* We have pretend args if we had an object sent partially in registers |
| and partially on the stack, e.g. a large structure. */ |
| int pretend_args = crtl->args.pretend_args_size; |
| if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl) |
| && (NPARM_REGS(SImode) |
| > crtl->args.info.arg_count[(int) SH_ARG_INT])) |
| pretend_args = 0; |
| |
| output_stack_adjust (-pretend_args, stack_pointer_rtx, 0, NULL, true); |
| int stack_usage = pretend_args; |
| |
| /* Emit the code for SETUP_VARARGS. */ |
| if (cfun->stdarg) |
| { |
| if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)) |
| { |
| /* Push arg regs as if they'd been provided by caller in stack. */ |
| for (int i = 0; i < NPARM_REGS(SImode); i++) |
| { |
| int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1; |
| |
| if (i >= (NPARM_REGS(SImode) |
| - crtl->args.info.arg_count[(int) SH_ARG_INT] |
| )) |
| break; |
| push (rn); |
| stack_usage += GET_MODE_SIZE (SImode); |
| } |
| } |
| } |
| |
| /* If we're supposed to switch stacks at function entry, do so now. */ |
| if (sp_switch_attr) |
| { |
| rtx lab, newsrc; |
| /* The argument specifies a variable holding the address of the |
| stack the interrupt function should switch to/from at entry/exit. */ |
| tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr)); |
| const char* s = ggc_strdup (TREE_STRING_POINTER (arg)); |
| rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s); |
| |
| lab = add_constant (sp_switch, SImode, 0); |
| newsrc = gen_rtx_LABEL_REF (VOIDmode, lab); |
| |
| emit_insn (gen_sp_switch_1 (newsrc)); |
| } |
| |
| HARD_REG_SET live_regs_mask; |
| int d = calc_live_regs (&live_regs_mask); |
| /* ??? Maybe we could save some switching if we can move a mode switch |
| that already happens to be at the function start into the prologue. */ |
| if (target_flags != save_flags && ! current_function_interrupt) |
| emit_insn (gen_toggle_sz ()); |
| |
| push_regs (&live_regs_mask, current_function_interrupt); |
| stack_usage += d; |
| |
| if (flag_pic && !TARGET_FDPIC |
| && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) |
| emit_insn (gen_GOTaddr2picreg (const0_rtx)); |
| |
| if (target_flags != save_flags && ! current_function_interrupt) |
| emit_insn (gen_toggle_sz ()); |
| |
| target_flags = save_flags; |
| |
| output_stack_adjust (-rounded_frame_size (d), |
| stack_pointer_rtx, 0, NULL, true); |
| stack_usage += rounded_frame_size (d); |
| |
| if (frame_pointer_needed) |
| emit_frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx)); |
| |
| /* If we are profiling, make sure no instructions are scheduled before |
| the call to mcount. Similarly if some call instructions are swapped |
| before frame related insns, it'll confuse the unwinder because |
| currently SH has no unwind info for function epilogues. */ |
| if (crtl->profile || flag_exceptions || flag_unwind_tables) |
| emit_insn (gen_blockage ()); |
| |
| if (flag_stack_usage_info) |
| current_function_static_stack_size = stack_usage; |
| } |
| |
| /* Expand code for the function epilogue. */ |
| void |
| sh_expand_epilogue (bool sibcall_p) |
| { |
| int save_flags = target_flags; |
| bool fpscr_deferred = false; |
| int e = sibcall_p ? -1 : 1; |
| |
| HARD_REG_SET live_regs_mask; |
| int d = calc_live_regs (&live_regs_mask); |
| |
| int save_size = d; |
| int frame_size = rounded_frame_size (d); |
| |
| if (frame_pointer_needed) |
| { |
| /* We must avoid scheduling the epilogue with previous basic blocks. |
| See PR/18032 and PR/40313. */ |
| emit_insn (gen_blockage ()); |
| output_stack_adjust (frame_size, hard_frame_pointer_rtx, e, |
| &live_regs_mask, true); |
| |
| /* We must avoid moving the stack pointer adjustment past code |
| which reads from the local frame, else an interrupt could |
| occur after the SP adjustment and clobber data in the local |
| frame. */ |
| emit_insn (gen_blockage ()); |
| emit_frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx)); |
| } |
| else if (frame_size) |
| { |
| /* We must avoid moving the stack pointer adjustment past code |
| which reads from the local frame, else an interrupt could |
| occur after the SP adjustment and clobber data in the local |
| frame. */ |
| emit_insn (gen_blockage ()); |
| output_stack_adjust (frame_size, stack_pointer_rtx, e, |
| &live_regs_mask, true); |
| } |
| |
| /* Pop all the registers. */ |
| |
| if (target_flags != save_flags && ! current_function_interrupt) |
| emit_insn (gen_toggle_sz ()); |
| |
| { |
| int last_reg; |
| |
| save_size = 0; |
| /* For an ISR with RESBANK attribute assigned, don't pop PR |
| register. */ |
| if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG) |
| && !sh_cfun_resbank_handler_p ()) |
| { |
| if (!frame_pointer_needed) |
| emit_insn (gen_blockage ()); |
| pop (PR_REG); |
| } |
| |
| /* Banked registers are popped first to avoid being scheduled in the |
| delay slot. RTE switches banks before the ds instruction. */ |
| if (current_function_interrupt) |
| { |
| bool use_movml = false; |
| |
| if (TARGET_SH2A) |
| { |
| unsigned int count = 0; |
| |
| for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++) |
| if (TEST_HARD_REG_BIT (live_regs_mask, i)) |
| count++; |
| else |
| break; |
| |
| /* Use movml when all banked register are poped. */ |
| if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1) |
| use_movml = true; |
| } |
| |
| if (sh_cfun_resbank_handler_p ()) |
| ; /* Do nothing. */ |
| else if (use_movml) |
| { |
| rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM); |
| |
| /* We must avoid scheduling multiple load insn with another |
| insns. */ |
| emit_insn (gen_blockage ()); |
| emit_insn (gen_movml_pop_banked (sp_reg)); |
| emit_insn (gen_blockage ()); |
| } |
| else |
| for (int i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--) |
| if (TEST_HARD_REG_BIT (live_regs_mask, i)) |
| pop (i); |
| |
| last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1; |
| } |
| else |
| last_reg = FIRST_PSEUDO_REGISTER; |
| |
| for (int i = 0; i < last_reg; i++) |
| { |
| int j = (FIRST_PSEUDO_REGISTER - 1) - i; |
| |
| if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD |
| && hard_reg_set_intersect_p (live_regs_mask, |
| reg_class_contents[DF_REGS])) |
| fpscr_deferred = true; |
| /* For an ISR with RESBANK attribute assigned, don't pop |
| following registers, R0-R14, MACH, MACL and GBR. */ |
| else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j) |
| && ! (sh_cfun_resbank_handler_p () |
| && ((j >= FIRST_GENERAL_REG |
| && j < LAST_GENERAL_REG) |
| || j == MACH_REG |
| || j == MACL_REG |
| || j == GBR_REG))) |
| pop (j); |
| |
| if (j == FIRST_FP_REG && fpscr_deferred) |
| pop (FPSCR_REG); |
| } |
| } |
| if (target_flags != save_flags && ! current_function_interrupt) |
| emit_insn (gen_toggle_sz ()); |
| target_flags = save_flags; |
| |
| output_stack_adjust (crtl->args.pretend_args_size + save_size, |
| stack_pointer_rtx, e, NULL, true); |
| |
| if (crtl->calls_eh_return) |
| emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx, |
| EH_RETURN_STACKADJ_RTX)); |
| |
| /* Switch back to the normal stack if necessary. */ |
| if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl))) |
| emit_insn (gen_sp_switch_2 ()); |
| |
| /* Tell flow the insn that pops PR isn't dead. */ |
| if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)) |
| emit_use (gen_rtx_REG (SImode, PR_REG)); |
| } |
| |
| /* Emit code to change the current function's return address to RA. |
| TEMP is available as a scratch register, if needed. */ |
| void |
| sh_set_return_address (rtx ra, rtx tmp) |
| { |
| HARD_REG_SET live_regs_mask; |
| int d = calc_live_regs (&live_regs_mask); |
| |
| /* If pr_reg isn't life, we can set it directly. */ |
| if (! TEST_HARD_REG_BIT (live_regs_mask, PR_REG)) |
| { |
| rtx rr = gen_rtx_REG (SImode, PR_REG); |
| emit_insn (GEN_MOV (rr, ra)); |
| /* Tell flow the register for return isn't dead. */ |
| emit_use (rr); |
| return; |
| } |
| |
| int pr_offset = rounded_frame_size (d); |
| |
| emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset))); |
| |
| if (frame_pointer_needed) |
| emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx)); |
| else |
| emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx)); |
| |
| tmp = gen_frame_mem (Pmode, tmp); |
| emit_insn (GEN_MOV (tmp, ra)); |
| /* Tell this store isn't dead. */ |
| emit_use (tmp); |
| } |
| |
| /* Clear variables at function end. */ |
| static void |
| sh_output_function_epilogue (FILE *) |
| { |
| } |
| |
| static rtx |
| sh_builtin_saveregs (void) |
| { |
| /* First unnamed integer register. */ |
| int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT]; |
| /* Number of integer registers we need to save. */ |
| int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg); |
| /* First unnamed SFmode float reg */ |
| int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT]; |
| /* Number of SFmode float regs to save. */ |
| int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg); |
| rtx regbuf, fpregs; |
| int bufsize, regno; |
| alias_set_type alias_set; |
| |
| if (!TARGET_FPU_ANY) |
| { |
| error ("%<__builtin_saveregs%> not supported by this subtarget"); |
| return const0_rtx; |
| } |
| |
| /* Allocate block of memory for the regs. */ |
| /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte? |
| Or can assign_stack_local accept a 0 SIZE argument? */ |
| bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD); |
| |
| if (n_floatregs & 1) |
| { |
| rtx addr; |
| |
| regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0); |
| addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0)); |
| emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD))); |
| regbuf = change_address (regbuf, BLKmode, addr); |
| } |
| else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs) |
| { |
| rtx addr, mask; |
| |
| regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0); |
| addr = copy_to_mode_reg (Pmode, plus_constant (Pmode, |
| XEXP (regbuf, 0), 4)); |
| mask = copy_to_mode_reg (Pmode, GEN_INT (-8)); |
| emit_insn (gen_andsi3 (addr, addr, mask)); |
| regbuf = change_address (regbuf, BLKmode, addr); |
| } |
| else |
| regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0); |
| alias_set = get_varargs_alias_set (); |
| set_mem_alias_set (regbuf, alias_set); |
| |
| /* Save int args. |
| This is optimized to only save the regs that are necessary. Explicitly |
| named args need not be saved. */ |
| if (n_intregs > 0) |
| move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg, |
| adjust_address (regbuf, BLKmode, |
| n_floatregs * UNITS_PER_WORD), |
| n_intregs); |
| |
| /* Save float args. |
| This is optimized to only save the regs that are necessary. Explicitly |
| named args need not be saved. |
| We explicitly build a pointer to the buffer because it halves the insn |
| count when not optimizing (otherwise the pointer is built for each reg |
| saved). |
| We emit the moves in reverse order so that we can use predecrement. */ |
| |
| fpregs = copy_to_mode_reg (Pmode, |
| plus_constant (Pmode, XEXP (regbuf, 0), |
| n_floatregs * UNITS_PER_WORD)); |
| if (TARGET_FPU_DOUBLE) |
| { |
| rtx mem; |
| for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2) |
| { |
| emit_insn (gen_addsi3 (fpregs, fpregs, |
| GEN_INT (-2 * UNITS_PER_WORD))); |
| mem = change_address (regbuf, DFmode, fpregs); |
| emit_move_insn (mem, |
| gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno)); |
| } |
| regno = first_floatreg; |
| if (regno & 1) |
| { |
| emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD))); |
| mem = change_address (regbuf, SFmode, fpregs); |
| emit_move_insn (mem, |
| gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) |
| + regno - SH_REG_MSW_OFFSET)); |
| } |
| } |
| else |
| for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--) |
| { |
| rtx mem; |
| |
| emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD))); |
| mem = change_address (regbuf, SFmode, fpregs); |
| emit_move_insn (mem, |
| gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno)); |
| } |
| |
| /* Return the address of the regbuf. */ |
| return XEXP (regbuf, 0); |
| } |
| |
| /* Define the `__builtin_va_list' type for the ABI. */ |
| static tree |
| sh_build_builtin_va_list (void) |
| { |
| tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack; |
| tree record, type_decl; |
| |
| if ((! TARGET_SH2E && ! TARGET_SH4) |
| || TARGET_HITACHI || sh_cfun_attr_renesas_p ()) |
| return ptr_type_node; |
| |
| record = (*lang_hooks.types.make_type) (RECORD_TYPE); |
| type_decl = build_decl (BUILTINS_LOCATION, |
| TYPE_DECL, get_identifier ("__va_list_tag"), record); |
| |
| f_next_o = build_decl (BUILTINS_LOCATION, |
| FIELD_DECL, get_identifier ("__va_next_o"), |
| ptr_type_node); |
| f_next_o_limit = build_decl (BUILTINS_LOCATION, |
| FIELD_DECL, |
| get_identifier ("__va_next_o_limit"), |
| ptr_type_node); |
| f_next_fp = build_decl (BUILTINS_LOCATION, |
| FIELD_DECL, get_identifier ("__va_next_fp"), |
| ptr_type_node); |
| f_next_fp_limit = build_decl (BUILTINS_LOCATION, |
| FIELD_DECL, |
| get_identifier ("__va_next_fp_limit"), |
| ptr_type_node); |
| f_next_stack = build_decl (BUILTINS_LOCATION, |
| FIELD_DECL, get_identifier ("__va_next_stack"), |
| ptr_type_node); |
| |
| DECL_FIELD_CONTEXT (f_next_o) = record; |
| DECL_FIELD_CONTEXT (f_next_o_limit) = record; |
| DECL_FIELD_CONTEXT (f_next_fp) = record; |
| DECL_FIELD_CONTEXT (f_next_fp_limit) = record; |
| DECL_FIELD_CONTEXT (f_next_stack) = record; |
| |
| TYPE_STUB_DECL (record) = type_decl; |
| TYPE_NAME (record) = type_decl; |
| TYPE_FIELDS (record) = f_next_o; |
| DECL_CHAIN (f_next_o) = f_next_o_limit; |
| DECL_CHAIN (f_next_o_limit) = f_next_fp; |
| DECL_CHAIN (f_next_fp) = f_next_fp_limit; |
| DECL_CHAIN (f_next_fp_limit) = f_next_stack; |
| |
| layout_type (record); |
| |
| return record; |
| } |
| |
| /* Implement `va_start' for varargs and stdarg. */ |
| static void |
| sh_va_start (tree valist, rtx nextarg) |
| { |
| tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack; |
| tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack; |
| tree t, u; |
| int nfp, nint; |
| |
| if ((! TARGET_SH2E && ! TARGET_SH4) |
| || TARGET_HITACHI || sh_cfun_attr_renesas_p ()) |
| { |
| std_expand_builtin_va_start (valist, nextarg); |
| return; |
| } |
| |
| f_next_o = TYPE_FIELDS (va_list_type_node); |
| f_next_o_limit = DECL_CHAIN (f_next_o); |
| f_next_fp = DECL_CHAIN (f_next_o_limit); |
| f_next_fp_limit = DECL_CHAIN (f_next_fp); |
| f_next_stack = DECL_CHAIN (f_next_fp_limit); |
| |
| next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o, |
| NULL_TREE); |
| next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit), |
| valist, f_next_o_limit, NULL_TREE); |
| next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp, |
| NULL_TREE); |
| next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit), |
| valist, f_next_fp_limit, NULL_TREE); |
| next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack), |
| valist, f_next_stack, NULL_TREE); |
| |
| /* Call __builtin_saveregs. */ |
| u = make_tree (sizetype, expand_builtin_saveregs ()); |
| u = fold_convert (ptr_type_node, u); |
| t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u); |
| TREE_SIDE_EFFECTS (t) = 1; |
| expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); |
| |
| nfp = crtl->args.info.arg_count[SH_ARG_FLOAT]; |
| if (nfp < 8) |
| nfp = 8 - nfp; |
| else |
| nfp = 0; |
| u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp); |
| t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u); |
| TREE_SIDE_EFFECTS (t) = 1; |
| expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); |
| |
| t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u); |
| TREE_SIDE_EFFECTS (t) = 1; |
| expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); |
| |
| nint = crtl->args.info.arg_count[SH_ARG_INT]; |
| if (nint < 4) |
| nint = 4 - nint; |
| else |
| nint = 0; |
| u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint); |
| t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u); |
| TREE_SIDE_EFFECTS (t) = 1; |
| expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); |
| |
| u = make_tree (ptr_type_node, nextarg); |
| t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u); |
| TREE_SIDE_EFFECTS (t) = 1; |
| expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); |
| } |
| |
| /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized |
| member, return it. */ |
| static tree |
| find_sole_member (tree type) |
| { |
| tree field, member = NULL_TREE; |
| |
| for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
| { |
| if (TREE_CODE (field) != FIELD_DECL) |
| continue; |
| if (!DECL_SIZE (field)) |
| return NULL_TREE; |
| if (integer_zerop (DECL_SIZE (field))) |
| continue; |
| if (member) |
| return NULL_TREE; |
| member = field; |
| } |
| return member; |
| } |
| |
| /* Implement `va_arg'. */ |
| static tree |
| sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, |
| gimple_seq *post_p ATTRIBUTE_UNUSED) |
| { |
| tree tmp; |
| tree addr, lab_over = NULL, result = NULL; |
| tree eff_type; |
| |
| const bool pass_by_ref |
| = !VOID_TYPE_P (type) && must_pass_va_arg_in_stack (type); |
| |
| if (pass_by_ref) |
| type = build_pointer_type (type); |
| |
| HOST_WIDE_INT size = int_size_in_bytes (type); |
| HOST_WIDE_INT rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD; |
| tree pptr_type_node = build_pointer_type (ptr_type_node); |
| |
| if ((TARGET_SH2E || TARGET_SH4) |
| && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ())) |
| { |
| tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack; |
| tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack; |
| tree lab_false; |
| tree member; |
| |
| f_next_o = TYPE_FIELDS (va_list_type_node); |
| f_next_o_limit = DECL_CHAIN (f_next_o); |
| f_next_fp = DECL_CHAIN (f_next_o_limit); |
| f_next_fp_limit = DECL_CHAIN (f_next_fp); |
| f_next_stack = DECL_CHAIN (f_next_fp_limit); |
| |
| next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o, |
| NULL_TREE); |
| next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit), |
| valist, f_next_o_limit, NULL_TREE); |
| next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), |
| valist, f_next_fp, NULL_TREE); |
| next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit), |
| valist, f_next_fp_limit, NULL_TREE); |
| next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack), |
| valist, f_next_stack, NULL_TREE); |
| |
| /* Structures with a single member with a distinct mode are passed |
| like their member. This is relevant if the latter has a REAL_TYPE |
| or COMPLEX_TYPE type. */ |
| eff_type = type; |
| while (TREE_CODE (eff_type) == RECORD_TYPE |
| && (member = find_sole_member (eff_type)) |
| && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE |
| || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE |
| || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE)) |
| { |
| tree field_type = TREE_TYPE (member); |
| |
| if (TYPE_MODE (eff_type) == TYPE_MODE (field_type)) |
| eff_type = field_type; |
| else |
| { |
| gcc_assert ((TYPE_ALIGN (eff_type) |
| < GET_MODE_ALIGNMENT (TYPE_MODE (field_type))) |
| || (TYPE_ALIGN (eff_type) |
| > GET_MODE_BITSIZE (TYPE_MODE (field_type)))); |
| break; |
| } |
| } |
| |
| bool pass_as_float; |
| if (TARGET_FPU_DOUBLE) |
| { |
| pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8) |
| || (TREE_CODE (eff_type) == COMPLEX_TYPE |
| && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE |
| && size <= 16)); |
| } |
| else |
| { |
| pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4); |
| } |
| |
| addr = create_tmp_var (pptr_type_node); |
| lab_false = create_artificial_label (UNKNOWN_LOCATION); |
| lab_over = create_artificial_label (UNKNOWN_LOCATION); |
| |
| valist = build_simple_mem_ref (addr); |
| |
| if (pass_as_float) |
| { |
| tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp)); |
| tree cmp; |
| bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE; |
| |
| tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp)); |
| gimplify_assign (unshare_expr (addr), tmp, pre_p); |
| |
| gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p); |
| tmp = next_fp_limit; |
| if (size > 4 && !is_double) |
| tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size); |
| tmp = build2 (GE_EXPR, boolean_type_node, |
| unshare_expr (next_fp_tmp), unshare_expr (tmp)); |
| cmp = build3 (COND_EXPR, void_type_node, tmp, |
| build1 (GOTO_EXPR, void_type_node, |
| unshare_expr (lab_false)), NULL_TREE); |
| if (!is_double) |
| gimplify_and_add (cmp, pre_p); |
| |
| if (TYPE_ALIGN (eff_type) > BITS_PER_WORD |
| || (is_double || size == 16)) |
| { |
| tmp = fold_convert (sizetype, next_fp_tmp); |
| tmp = build2 (BIT_AND_EXPR, sizetype, tmp, |
| size_int (UNITS_PER_WORD)); |
| tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp); |
| gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p); |
| } |
| if (is_double) |
| gimplify_and_add (cmp, pre_p); |
| |
| #ifdef FUNCTION_ARG_SCmode_WART |
| if (TYPE_MODE (eff_type) == SCmode |
| && TARGET_SH4 && TARGET_LITTLE_ENDIAN) |
| { |
| tree subtype = TREE_TYPE (eff_type); |
| tree real, imag; |
| |
| imag |
| = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL); |
| imag = get_initialized_tmp_var (imag, pre_p, NULL); |
| |
| real |
| = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL); |
| real = get_initialized_tmp_var (real, pre_p, NULL); |
| |
| result = build2 (COMPLEX_EXPR, eff_type, real, imag); |
| if (type != eff_type) |
| result = build1 (VIEW_CONVERT_EXPR, type, result); |
| result = get_initialized_tmp_var (result, pre_p, NULL); |
| } |
| #endif /* FUNCTION_ARG_SCmode_WART */ |
| |
| tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over)); |
| gimplify_and_add (tmp, pre_p); |
| |
| tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false)); |
| gimplify_and_add (tmp, pre_p); |
| |
| tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack)); |
| gimplify_assign (unshare_expr (addr), tmp, pre_p); |
| gimplify_assign (unshare_expr (next_fp_tmp), |
| unshare_expr (valist), pre_p); |
| |
| gimplify_assign (unshare_expr (valist), |
| unshare_expr (next_fp_tmp), post_p); |
| valist = next_fp_tmp; |
| } |
| else |
| { |
| tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize); |
| tmp = build2 (GT_EXPR, boolean_type_node, tmp, |
| unshare_expr (next_o_limit)); |
| tmp = build3 (COND_EXPR, void_type_node, tmp, |
| build1 (GOTO_EXPR, void_type_node, |
| unshare_expr (lab_false)), |
| NULL_TREE); |
| gimplify_and_add (tmp, pre_p); |
| |
| tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o)); |
| gimplify_assign (unshare_expr (addr), tmp, pre_p); |
| |
| tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over)); |
| gimplify_and_add (tmp, pre_p); |
| |
| tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false)); |
| gimplify_and_add (tmp, pre_p); |
| |
| if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A)) |
| gimplify_assign (unshare_expr (next_o), |
| unshare_expr (next_o_limit), pre_p); |
| |
| tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack)); |
| gimplify_assign (unshare_expr (addr), tmp, pre_p); |
| } |
| |
| if (!result) |
| { |
| tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over)); |
| gimplify_and_add (tmp, pre_p); |
| } |
| } |
| |
| /* ??? In va-sh.h, there had been code to make values larger than |
| size 8 indirect. This does not match the FUNCTION_ARG macros. */ |
| |
| tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL); |
| if (result) |
| { |
| gimplify_assign (result, tmp, pre_p); |
| result = build1 (NOP_EXPR, TREE_TYPE (result), result); |
| tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over)); |
| gimplify_and_add (tmp, pre_p); |
| } |
| else |
| result = tmp; |
| |
| if (pass_by_ref) |
| result = build_va_arg_indirect_ref (result); |
| |
| return result; |
| } |
| |
| /* 64 bit floating points memory transfers are paired single precision loads |
| or store. So DWARF information needs fixing in little endian (unless |
| PR=SZ=1 in FPSCR). */ |
| rtx |
| sh_dwarf_register_span (rtx reg) |
| { |
| unsigned regno = REGNO (reg); |
| |
| if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode) |
| return NULL_RTX; |
| |
| return |
| gen_rtx_PARALLEL (VOIDmode, |
| gen_rtvec (2, |
| gen_rtx_REG (SFmode, regno + 1), |
| gen_rtx_REG (SFmode, regno))); |
| } |
| |
| static machine_mode |
| sh_promote_function_mode (const_tree type, machine_mode mode, |
| int *punsignedp, const_tree funtype, |
| int for_return) |
| { |
| if (sh_promote_prototypes (funtype)) |
| return promote_mode (type, mode, punsignedp); |
| else |
| return default_promote_function_mode (type, mode, punsignedp, funtype, |
| for_return); |
| } |
| |
| static bool |
| sh_promote_prototypes (const_tree type) |
| { |
| if (TARGET_HITACHI) |
| return false; |
| if (! type) |
| return true; |
| return ! sh_attr_renesas_p (type); |
| } |
| |
| static bool |
| sh_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg) |
| { |
| CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); |
| |
| if (targetm.calls.must_pass_in_stack (arg)) |
| return true; |
| |
| /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function |
| wants to know about pass-by-reference semantics for incoming |
| arguments. */ |
| if (! cum) |
| return false; |
| |
| return false; |
| } |
| |
| static bool |
| sh_callee_copies (cumulative_args_t cum, const function_arg_info &arg) |
| { |
| /* ??? How can it possibly be correct to return true only on the |
| caller side of the equation? Is there someplace else in the |
| sh backend that's magically producing the copies? */ |
| return (get_cumulative_args (cum)->outgoing |
| && ((arg.mode == BLKmode |
| ? TYPE_ALIGN (arg.type) |
| : GET_MODE_ALIGNMENT (arg.mode)) |
| % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0)); |
| } |
| |
| static sh_arg_class |
| get_sh_arg_class (machine_mode mode) |
| { |
| if (TARGET_FPU_ANY && mode == SFmode) |
| return SH_ARG_FLOAT; |
| |
| if (TARGET_FPU_DOUBLE |
| && (GET_MODE_CLASS (mode) == MODE_FLOAT |
| || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)) |
| return SH_ARG_FLOAT; |
| |
| return SH_ARG_INT; |
| } |
| |
| /* Round a register number up to a proper boundary for an arg of mode |
| MODE. |
| The SH doesn't care about double alignment, so we only |
| round doubles to even regs when asked to explicitly. */ |
| static int |
| sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode) |
| { |
| /* FIXME: This used to be a macro and has been copy pasted into this |
| function as is. Make this more readable. */ |
| return |
| (((TARGET_ALIGN_DOUBLE |
| || (TARGET_FPU_DOUBLE |
| && (mode == DFmode || mode == DCmode) |
| && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode))) |
| && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD) |
| ? (cum.arg_count[(int) get_sh_arg_class (mode)] |
| + (cum.arg_count[(int) get_sh_arg_class (mode)] & 1)) |
| : cum.arg_count[(int) get_sh_arg_class (mode)]); |
| } |
| |
| /* Return true if arg of the specified mode should be passed in a register |
| or false otherwise. */ |
| static bool |
| sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode, |
| const_tree type) |
| { |
| /* FIXME: This used to be a macro and has been copy pasted into this |
| function as is. Make this more readable. */ |
| return |
| ((type == 0 |
| || (! TREE_ADDRESSABLE (type) |
| && (! (TARGET_HITACHI || cum.renesas_abi) |
| || ! (AGGREGATE_TYPE_P (type) |
| || (!TARGET_FPU_ANY |
| && (GET_MODE_CLASS (mode) == MODE_FLOAT |
| && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode))))))) |
| && ! cum.force_mem |
| && (TARGET_SH2E |
| ? ((mode) == BLKmode |
| ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD |
| + int_size_in_bytes (type)) |
| <= NPARM_REGS (SImode) * UNITS_PER_WORD) |
| : ((sh_round_reg (cum, mode) |
| + sh_hard_regno_nregs (BASE_ARG_REG (mode), mode)) |
| <= NPARM_REGS (mode))) |
| : sh_round_reg (cum, mode) < NPARM_REGS (mode))); |
| } |
| |
| static int |
| sh_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg) |
| { |
| CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); |
| int words = 0; |
| |
| if (sh_pass_in_reg_p (*cum, arg.mode, arg.type) |
| && !TARGET_FPU_DOUBLE |
| && (sh_round_reg (*cum, arg.mode) |
| + CEIL (arg.promoted_size_in_bytes (), UNITS_PER_WORD) |
| > NPARM_REGS (arg.mode))) |
| words = NPARM_REGS (arg.mode) - sh_round_reg (*cum, arg.mode); |
| |
| return words * UNITS_PER_WORD; |
| } |
| |
| |
| /* Define where to put the arguments to a function. |
| Value is zero to push the argument on the stack, |
| or a hard register in which to store the argument. |
| |
| CUM is a variable of type CUMULATIVE_ARGS which gives info about |
| the preceding args and about the function being called. |
| ARG is a description of the argument. |
| |
| On SH the first args are normally in registers |
| and the rest are pushed. Any arg that starts within the first |
| NPARM_REGS words is at least partially passed in a register unless |
| its data type forbids. */ |
| static rtx |
| sh_function_arg (cumulative_args_t ca_v, const function_arg_info &arg) |
| { |
| CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v); |
| machine_mode mode = arg.mode; |
| |
| if (arg.end_marker_p ()) |
| return ca->renesas_abi ? const1_rtx : const0_rtx; |
| |
| if (sh_pass_in_reg_p (*ca, mode, arg.type) |
| && (arg.named || ! (TARGET_HITACHI || ca->renesas_abi))) |
| { |
| int regno; |
| |
| if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN |
| && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1))) |
| { |
| rtx r1 = gen_rtx_EXPR_LIST (VOIDmode, |
| gen_rtx_REG (SFmode, |
| BASE_ARG_REG (mode) |
| + (sh_round_reg (*ca, mode) ^ 1)), |
| const0_rtx); |
| rtx r2 = gen_rtx_EXPR_LIST (VOIDmode, |
| gen_rtx_REG (SFmode, |
| BASE_ARG_REG (mode) |
| + ((sh_round_reg (*ca, mode) + 1) ^ 1)), |
| GEN_INT (4)); |
| return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2)); |
| } |
| |
| /* If the alignment of a DF value causes an SF register to be |
| skipped, we will use that skipped register for the next SF |
| value. */ |
| if ((TARGET_HITACHI || ca->renesas_abi) |
| && ca->free_single_fp_reg |
| && mode == SFmode) |
| return gen_rtx_REG (mode, ca->free_single_fp_reg); |
| |
| regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode)) |
| ^ (mode == SFmode && TARGET_SH4 |
| && TARGET_LITTLE_ENDIAN |
| && ! TARGET_HITACHI && ! ca->renesas_abi); |
| return gen_rtx_REG (mode, regno); |
| |
| } |
| |
| return NULL_RTX; |
| } |
| |
| /* Update the data in CUM to advance over argument ARG. */ |
| static void |
| sh_function_arg_advance (cumulative_args_t ca_v, |
| const function_arg_info &arg) |
| { |
| CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v); |
| |
| if (ca->force_mem) |
| ca->force_mem = false; |
| |
| if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE) |
| { |
| /* Note that we've used the skipped register. */ |
| if (arg.mode == SFmode && ca->free_single_fp_reg) |
| { |
| ca->free_single_fp_reg = 0; |
| return; |
| } |
| /* When we have a DF after an SF, there's an SF register that get |
| skipped in order to align the DF value. We note this skipped |
| register, because the next SF value will use it, and not the |
| SF that follows the DF. */ |
| if (arg.mode == DFmode |
| && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode)) |
| { |
| ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode) |
| + BASE_ARG_REG (arg.mode)); |
| } |
| } |
| |
| if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi) |
| || sh_pass_in_reg_p (*ca, arg.mode, arg.type)) |
| (ca->arg_count[(int) get_sh_arg_class (arg.mode)] |
| = (sh_round_reg (*ca, arg.mode) |
| + CEIL (arg.promoted_size_in_bytes (), UNITS_PER_WORD))); |
| } |
| |
| /* The Renesas calling convention doesn't quite fit into this scheme since |
| the address is passed like an invisible argument, but one that is always |
| passed in memory. */ |
| static rtx |
| sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED) |
| { |
| if (TARGET_HITACHI || sh_attr_renesas_p (fndecl)) |
| return NULL_RTX; |
| return gen_rtx_REG (Pmode, 2); |
| } |
| |
| /* Worker function for TARGET_FUNCTION_VALUE. |
| |
| For the SH, this is like LIBCALL_VALUE, except that we must change the |
| mode like PROMOTE_MODE does. |
| ??? PROMOTE_MODE is ignored for non-scalar types. The set of types |
| tested here has to be kept in sync with the one in |
| explow.c:promote_mode. */ |
| static rtx |
| sh_function_value (const_tree valtype, |
| const_tree fn_decl_or_type, |
| bool outgoing ATTRIBUTE_UNUSED) |
| { |
| if (fn_decl_or_type |
| && !DECL_P (fn_decl_or_type)) |
| fn_decl_or_type = NULL; |
| |
| return gen_rtx_REG ( |
| ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT |
| && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4 |
| && (TREE_CODE (valtype) == INTEGER_TYPE |
| || TREE_CODE (valtype) == ENUMERAL_TYPE |
| || TREE_CODE (valtype) == BOOLEAN_TYPE |
| || TREE_CODE (valtype) == REAL_TYPE |
| || TREE_CODE (valtype) == OFFSET_TYPE)) |
| && sh_promote_prototypes (fn_decl_or_type) |
| ? SImode : TYPE_MODE (valtype)), |
| BASE_RETURN_VALUE_REG (TYPE_MODE (valtype))); |
| } |
| |
| /* Worker function for TARGET_LIBCALL_VALUE. */ |
| static rtx |
| sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) |
| { |
| return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode)); |
| } |
| |
| /* Return true if N is a possible register number of function value. */ |
| static bool |
| sh_function_value_regno_p (const unsigned int regno) |
| { |
| return regno == FIRST_RET_REG || (TARGET_SH2E && regno == FIRST_FP_RET_REG); |
| } |
| |
| /* Worker function for TARGET_RETURN_IN_MEMORY. */ |
| static bool |
| sh_return_in_memory (const_tree type, const_tree fndecl) |
| { |
| return TYPE_MODE (type) == BLKmode |
| || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl)) |
| && TREE_CODE (type) == RECORD_TYPE); |
| } |
| |
| /* We actually emit the code in sh_expand_prologue. We used to use |
| a static variable to flag that we need to emit this code, but that |
| doesn't when inlining, when functions are deferred and then emitted |
| later. Fortunately, we already have two flags that are part of struct |
| function that tell if a function uses varargs or stdarg. */ |
| static void |
| sh_setup_incoming_varargs (cumulative_args_t ca, |
| const function_arg_info &arg, |
| int *pretend_arg_size, |
| int second_time ATTRIBUTE_UNUSED) |
| { |
| gcc_assert (cfun->stdarg); |
| if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)) |
| { |
| int named_parm_regs, anon_parm_regs; |
| |
| named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), arg.mode) |
| + CEIL (arg.promoted_size_in_bytes (), |
| UNITS_PER_WORD)); |
| anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs; |
| if (anon_parm_regs > 0) |
| *pretend_arg_size = anon_parm_regs * 4; |
| } |
| } |
| |
| static bool |
| sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED) |
| { |
| return false; |
| } |
| |
| static bool |
| sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v) |
| { |
| CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v); |
| |
| return ! (TARGET_HITACHI || ca->renesas_abi); |
| } |
| |
| |
| /* Define the offset between two registers, one to be eliminated, and |
| the other its replacement, at the start of a routine. */ |
| int |
| initial_elimination_offset (int from, int to) |
| { |
| const int regs_saved_rounding = 0; |
| int save_flags = target_flags; |
| HARD_REG_SET live_regs_mask; |
| |
| int regs_saved = calc_live_regs (&live_regs_mask); |
| |
| int total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding; |
| target_flags = save_flags; |
| |
| int total_saved_regs_space = regs_saved + regs_saved_rounding; |
| |
| if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) |
| return total_saved_regs_space + total_auto_space; |
| |
| if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM) |
| return total_saved_regs_space + total_auto_space; |
| |
| /* Initial gap between fp and sp is 0. */ |
| if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) |
| return 0; |
| |
| if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM) |
| return rounded_frame_size (0); |
| |
| if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) |
| return rounded_frame_size (0); |
| |
| gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM |
| && (to == HARD_FRAME_POINTER_REGNUM |
| || to == STACK_POINTER_REGNUM)); |
| return total_auto_space; |
| } |
| |
| /* Parse the -mfixed-range= option string. */ |
| void |
| sh_fix_range (const char *const_str) |
| { |
| /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and |
| REG2 are either register names or register numbers. The effect |
| of this option is to mark the registers in the range from REG1 to |
| REG2 as ``fixed'' so they won't be used by the compiler. */ |
| |
| char* str = strcpy ((char*)alloca (strlen (const_str) + 1), const_str); |
| |
| while (1) |
| { |
| char* dash = strchr (str, '-'); |
| if (!dash) |
| { |
| warning (0, "value of %<-mfixed-range%> must have form REG1-REG2"); |
| return; |
| } |
| *dash = '\0'; |
| char* comma = strchr (dash + 1, ','); |
| if (comma) |
| *comma = '\0'; |
| |
| int first = decode_reg_name (str); |
| if (first < 0) |
| { |
| warning (0, "unknown register name: %s", str); |
| return; |
| } |
| |
| int last = decode_reg_name (dash + 1); |
| if (last < 0) |
| { |
| warning (0, "unknown register name: %s", dash + 1); |
| return; |
| } |
| |
| *dash = '-'; |
| |
| if (first > last) |
| { |
| warning (0, "%s-%s is an empty range", str, dash + 1); |
| return; |
| } |
| |
| for (int i = first; i <= last; ++i) |
| fixed_regs[i] = 1; |
| |
| if (!comma) |
| break; |
| |
| *comma = ','; |
| str = comma + 1; |
| } |
| } |
| |
| /* Insert any deferred function attributes from earlier pragmas. */ |
| static void |
| sh_insert_attributes (tree node, tree *attributes) |
| { |
| if (TREE_CODE (node) != FUNCTION_DECL) |
| return; |
| |
| /* We are only interested in fields. */ |
| if (!DECL_P (node)) |
| return; |
| |
| /* Append the attributes to the deferred attributes. */ |
| *sh_deferred_function_attributes_tail = *attributes; |
| tree attrs = sh_deferred_function_attributes; |
| if (!attrs) |
| return; |
| |
| /* Some attributes imply or require the interrupt attribute. */ |
| if (!lookup_attribute ("interrupt_handler", attrs) |
| && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node))) |
| { |
| /* If we have a trapa_handler, but no interrupt_handler attribute, |
| insert an interrupt_handler attribute. */ |
| if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE) |
| /* We can't use sh_pr_interrupt here because that's not in the |
| java frontend. */ |
| attrs |
| = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs); |
| /* However, for sp_switch, trap_exit, nosave_low_regs and resbank, |
| if the interrupt attribute is missing, we ignore the attribute |
| and warn. */ |
| else if (lookup_attribute ("sp_switch", attrs) |
| || lookup_attribute ("trap_exit", attrs) |
| || lookup_attribute ("nosave_low_regs", attrs) |
| || lookup_attribute ("resbank", attrs)) |
| { |
| tree *tail; |
| |
| for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs)) |
| { |
| if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs)) |
| || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs)) |
| || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)) |
| || is_attribute_p ("resbank", TREE_PURPOSE (attrs))) |
| warning (OPT_Wattributes, |
| "%qE attribute only applies to interrupt functions", |
| TREE_PURPOSE (attrs)); |
| else |
| { |
| *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE, |
| NULL_TREE); |
| tail = &TREE_CHAIN (*tail); |
| } |
| } |
| attrs = *attributes; |
| } |
| } |
| |
| /* Install the processed list. */ |
| *attributes = attrs; |
| |
| /* Clear deferred attributes. */ |
| sh_deferred_function_attributes = NULL_TREE; |
| sh_deferred_function_attributes_tail = &sh_deferred_function_attributes; |
| |
| return; |
| } |
| |
| /*------------------------------------------------------------------------------ |
| Target specific attributes |
| Supported attributes are: |
| |
| * interrupt_handler |
| Specifies this function is an interrupt handler. |
| |
| * trapa_handler |
| Like interrupt_handler, but don't save all registers. |
| |
| * sp_switch |
| Specifies an alternate stack for an interrupt handler to run on. |
| |
| * trap_exit |
| Use a trapa to exit an interrupt function instead of rte. |
| |
| * nosave_low_regs |
| Don't save r0..r7 in an interrupt handler function. |
| This is useful on SH3* and SH4*, which have a separate set of low |
| regs for user and privileged modes. |
| This is mainly to be used for non-reentrant interrupt handlers (i.e. |
| those that run with interrupts disabled and thus can't be |
| interrupted thenselves). |
| |
| * renesas |
| Use Renesas calling/layout conventions (functions and structures). |
| |
| * resbank |
| In case of an interrupt handler function, use a register bank to |
| save registers R0-R14, MACH, MACL, GBR and PR. |
| This is available only on SH2A targets. |
| |
| * function_vector |
| Declares a function to be called using the TBR relative addressing |
| mode. Takes an argument that specifies the slot number in the table |
| where this function can be looked up by the JSR/N @@(disp8,TBR) insn. |
| */ |
| |
| /* Handle a 'resbank' attribute. */ |
| static tree |
| sh_handle_resbank_handler_attribute (tree * node, tree name, |
| tree args ATTRIBUTE_UNUSED, |
| int flags ATTRIBUTE_UNUSED, |
| bool * no_add_attrs) |
| { |
| if (!TARGET_SH2A) |
| { |
| warning (OPT_Wattributes, "%qE attribute is supported only for SH2A", |
| name); |
| *no_add_attrs = true; |
| } |
| if (TREE_CODE (*node) != FUNCTION_DECL) |
| { |
| warning (OPT_Wattributes, "%qE attribute only applies to functions", |
| name); |
| *no_add_attrs = true; |
| } |
| |
| return NULL_TREE; |
| } |
| |
| /* Handle an "interrupt_handler" attribute; arguments as in |
| struct attribute_spec.handler. */ |
| static tree |
| sh_handle_interrupt_handler_attribute (tree *node, tree name, |
| tree args ATTRIBUTE_UNUSED, |
| int flags ATTRIBUTE_UNUSED, |
| bool *no_add_attrs) |
| { |
| if (TREE_CODE (*node) != FUNCTION_DECL) |
| { |
| warning (OPT_Wattributes, "%qE attribute only applies to functions", |
| name); |
| *no_add_attrs = true; |
| } |
| |
| return NULL_TREE; |
| } |
| |
| /* Handle an 'function_vector' attribute; arguments as in |
| struct attribute_spec.handler. */ |
| static tree |
| sh2a_handle_function_vector_handler_attribute (tree * node, tree name, |
| tree args ATTRIBUTE_UNUSED, |
| int flags ATTRIBUTE_UNUSED, |
| bool * no_add_attrs) |
| { |
| if (!TARGET_SH2A) |
| { |
| warning (OPT_Wattributes, "%qE attribute only applies to SH2A", |
| name); |
| *no_add_attrs = true; |
| } |
| else if (TREE_CODE (*node) != FUNCTION_DECL) |
| { |
| warning (OPT_Wattributes, "%qE attribute only applies to functions", |
| name); |
| *no_add_attrs = true; |
| } |
| else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST) |
| { |
| /* The argument must be a constant integer. */ |
| warning (OPT_Wattributes, |
| "%qE attribute argument not an integer constant", |
| name); |
| *no_add_attrs = true; |
| } |
| else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255) |
| { |
| /* The argument value must be between 0 to 255. */ |
| warning (OPT_Wattributes, |
| "%qE attribute argument should be between 0 to 255", |
| name); |
| *no_add_attrs = true; |
| } |
| return NULL_TREE; |
| } |
| |
| /* Returns true if current function has been assigned the attribute |
| 'function_vector'. */ |
| bool |
| sh2a_is_function_vector_call (rtx x) |
| { |
| if (GET_CODE (x) == SYMBOL_REF |
| && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION)) |
| { |
| tree tr = SYMBOL_REF_DECL (x); |
| |
| if (sh2a_function_vector_p (tr)) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* Returns the function vector number, if the attribute |
| 'function_vector' is assigned, otherwise returns zero. */ |
| int |
| sh2a_get_function_vector_number (rtx x) |
| { |
| if ((GET_CODE (x) == SYMBOL_REF) |
| && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION)) |
| { |
| tree t = SYMBOL_REF_DECL (x); |
| |
| if (TREE_CODE (t) != FUNCTION_DECL) |
| return 0; |
| |
| for (tree list = SH_ATTRIBUTES (t); list; list = TREE_CHAIN (list)) |
| if (is_attribute_p ("function_vector", TREE_PURPOSE (list))) |
| return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list))); |
| |
| return 0; |
| } |
| else |
| return 0; |
| } |
| |
| /* Handle an "sp_switch" attribute; arguments as in |
| struct attribute_spec.handler. */ |
| static tree |
| sh_handle_sp_switch_attribute (tree *node, tree name, tree args, |
| int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) |
| { |
| if (TREE_CODE (*node) != FUNCTION_DECL) |
| { |
| warning (OPT_Wattributes, "%qE attribute only applies to functions", |
| name); |
| *no_add_attrs = true; |
| } |
| else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST) |
| { |
| /* The argument must be a constant string. */ |
| warning (OPT_Wattributes, "%qE attribute argument not a string constant", |
| name); |
| *no_add_attrs = true; |
| } |
| |
| return NULL_TREE; |
| } |
| |
| /* Handle an "trap_exit" attribute; arguments as in |
| struct attribute_spec.handler. */ |
| static tree |
| sh_handle_trap_exit_attribute (tree *node, tree name, tree args, |
| int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) |
| { |
| if (TREE_CODE (*node) != FUNCTION_DECL) |
| { |
| warning (OPT_Wattributes, "%qE attribute only applies to functions", |
| name); |
| *no_add_attrs = true; |
| } |
| /* The argument specifies a trap number to be used in a trapa instruction |
| at function exit (instead of an rte instruction). */ |
| else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST) |
| { |
| /* The argument must be a constant integer. */ |
| warning (OPT_Wattributes, "%qE attribute argument not an " |
| "integer constant", name); |
| *no_add_attrs = true; |
| } |
| |
| return NULL_TREE; |
| } |
| |
| static tree |
| sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED, |
| tree name ATTRIBUTE_UNUSED, |
| tree args ATTRIBUTE_UNUSED, |
| int flags ATTRIBUTE_UNUSED, |
| bool *no_add_attrs ATTRIBUTE_UNUSED) |
| { |
| return NULL_TREE; |
| } |
| |
| /* True if __attribute__((renesas)) or -mrenesas. */ |
| bool |
| sh_attr_renesas_p (const_tree td) |
| { |
| if (TARGET_HITACHI) |
| return true; |
| if (td == NULL_TREE) |
| return false; |
| if (DECL_P (td)) |
| td = TREE_TYPE (td); |
| if (td == error_mark_node) |
| return false; |
| return lookup_attribute ("renesas", TYPE_ATTRIBUTES (td)) != NULL_TREE; |
| } |
| |
| /* True if __attribute__((renesas)) or -mrenesas, for the current |
| function. */ |
| bool |
| sh_cfun_attr_renesas_p (void) |
| { |
| return sh_attr_renesas_p (current_function_decl); |
| } |
| |
| /* Returns true if the current function has the "interrupt_handler" |
| attribute set. */ |
| bool |
| sh_cfun_interrupt_handler_p (void) |
| { |
| return (lookup_attribute ("interrupt_handler", |
| DECL_ATTRIBUTES (current_function_decl)) |
| != NULL_TREE); |
| } |
| |
| /* Returns true if FUNC has been assigned the attribute |
| "function_vector". */ |
| bool |
| sh2a_function_vector_p (tree func) |
| { |
| if (TREE_CODE (func) != FUNCTION_DECL) |
| return false; |
| |
| for (tree list = SH_ATTRIBUTES (func); list; list = TREE_CHAIN (list)) |
| if (is_attribute_p ("function_vector", get_attribute_name (list))) |
| return true; |
| |
| return false; |
| } |
| |
| /* Returns true if given tree has the "resbank" attribute set. */ |
| bool |
| sh_cfun_resbank_handler_p (void) |
| { |
| return ((lookup_attribute ("resbank", |
| DECL_ATTRIBUTES (current_function_decl)) |
| != NULL_TREE) |
| && (lookup_attribute ("interrupt_handler", |
| DECL_ATTRIBUTES (current_function_decl)) |
| != NULL_TREE) && TARGET_SH2A); |
| } |
| |
| /* Returns true if the current function has a "trap_exit" attribute set. */ |
| bool |
| sh_cfun_trap_exit_p (void) |
| { |
| return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl)) |
| != NULL_TREE; |
| } |
| |
| /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */ |
| static const char * |
| sh_check_pch_target_flags (int old_flags) |
| { |
| if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3 |
| | MASK_SH_E | MASK_HARD_SH4 |
| | MASK_FPU_SINGLE | MASK_SH4)) |
| return _("created and used with different architectures / ABIs"); |
| if ((old_flags ^ target_flags) & MASK_HITACHI) |
| return _("created and used with different ABIs"); |
| if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN) |
| return _("created and used with different endianness"); |
| return NULL; |
| } |
| |
| /* Predicates used by the templates. */ |
| |
| /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx. |
| Used only in general_movsrc_operand. */ |
| bool |
| system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED) |
| { |
| switch (REGNO (op)) |
| { |
| case PR_REG: |
| case MACL_REG: |
| case MACH_REG: |
| return true; |
| } |
| return false; |
| } |
| |
| /* Returns true if OP is a floating point value with value 0.0. */ |
| bool |
| fp_zero_operand (rtx op) |
| { |
| if (GET_MODE (op) != SFmode) |
| return false; |
| |
| const REAL_VALUE_TYPE* r = CONST_DOUBLE_REAL_VALUE (op); |
| return real_equal (r, &dconst0) && ! REAL_VALUE_MINUS_ZERO (*r); |
| } |
| |
| /* Returns true if OP is a floating point value with value 1.0. */ |
| bool |
| fp_one_operand (rtx op) |
| { |
| if (GET_MODE (op) != SFmode) |
| return false; |
| |
| return real_equal (CONST_DOUBLE_REAL_VALUE (op), &dconst1); |
| } |
| |
| /* Return the TLS type for TLS symbols. */ |
| enum tls_model |
| tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED) |
| { |
| if (GET_CODE (op) != SYMBOL_REF) |
| return TLS_MODEL_NONE; |
| return SYMBOL_REF_TLS_MODEL (op); |
| } |
| |
| /* Return the destination address of a branch. */ |
| static int |
| branch_dest (rtx branch) |
| { |
| rtx dest = SET_SRC (PATTERN (branch)); |
| |
| if (GET_CODE (dest) == IF_THEN_ELSE) |
| dest = XEXP (dest, 1); |
| |
| return INSN_ADDRESSES (INSN_UID (XEXP (dest, 0))); |
| } |
| |
| /* Return nonzero if REG is not used after INSN. |
| We assume REG is a reload reg, and therefore does |
| not live past labels. It may live past calls or jumps though. */ |
| bool |
| reg_unused_after (rtx reg, rtx_insn *insn) |
| { |
| /* If the reg is set by this instruction, then it is safe for our |
| case. Disregard the case where this is a store to memory, since |
| we are checking a register used in the store address. */ |
| rtx set = single_set (insn); |
| if (set && !MEM_P (SET_DEST (set)) |
| && reg_overlap_mentioned_p (reg, SET_DEST (set))) |
| return true; |
| |
| while ((insn = NEXT_INSN (insn))) |
| { |
| if (!INSN_P (insn)) |
| continue; |
| |
| rtx_code code = GET_CODE (insn); |
| |
| #if 0 |
| /* If this is a label that existed before reload, then the register |
| is dead here. However, if this is a label added by reorg, then |
| the register may still be live here. We can't tell the difference, |
| so we just ignore labels completely. */ |
| if (code == CODE_LABEL) |
| return 1; |
| /* else */ |
| #endif |
| |
| if (code == JUMP_INSN) |
| return false; |
| |
| /* If this is a sequence, we must handle them all at once. |
| We could have for instance a call that sets the target register, |
| and an insn in a delay slot that uses the register. In this case, |
| we must return 0. */ |
| else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE) |
| { |
| rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn)); |
| bool retval = false; |
| |
| for (int i = 0; i < seq->len (); i++) |
| { |
| rtx_insn *this_insn = seq->insn (i); |
| rtx set = single_set (this_insn); |
| |
| if (CALL_P (this_insn)) |
| code = CALL_INSN; |
| else if (JUMP_P (this_insn)) |
| { |
| if (INSN_ANNULLED_BRANCH_P (this_insn)) |
| return false; |
| code = JUMP_INSN; |
| } |
| |
| if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) |
| return false; |
| if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) |
| { |
| if (!MEM_P (SET_DEST (set))) |
| retval = true; |
| else |
| return false; |
| } |
| if (set == NULL_RTX |
| && reg_overlap_mentioned_p (reg, PATTERN (this_insn))) |
| return false; |
| } |
| if (retval) |
| return true; |
| else if (code == JUMP_INSN) |
| return false; |
| } |
| |
| rtx set = single_set (insn); |
| if (set && reg_overlap_mentioned_p (reg, SET_SRC (set))) |
| return false; |
| if (set && reg_overlap_mentioned_p (reg, SET_DEST (set))) |
| return !MEM_P (SET_DEST (set)); |
| if (set == NULL && reg_overlap_mentioned_p (reg, PATTERN (insn))) |
| return false; |
| |
| if (code == CALL_INSN && call_used_regs[REGNO (reg)]) |
| return true; |
| } |
| return true; |
| } |
| |
| |
| static GTY(()) rtx t_reg_rtx; |
| rtx |
| get_t_reg_rtx (void) |
| { |
| if (! t_reg_rtx) |
| t_reg_rtx = gen_rtx_REG (SImode, T_REG); |
| return t_reg_rtx; |
| } |
| |
| static GTY(()) tree fpscr_values; |
| |
| static void |
| emit_fpu_switch (rtx scratch, int index) |
| { |
| if (fpscr_values == NULL) |
| { |
| tree t = build_index_type (integer_one_node); |
| t = build_array_type (integer_type_node, t); |
| t = build_decl (BUILTINS_LOCATION, |
| VAR_DECL, get_identifier ("__fpscr_values"), t); |
| DECL_ARTIFICIAL (t) = 1; |
| DECL_IGNORED_P (t) = 1; |
| DECL_EXTERNAL (t) = 1; |
| TREE_STATIC (t) = 1; |
| TREE_PUBLIC (t) = 1; |
| TREE_USED (t) = 1; |
| |
| fpscr_values = t; |
| } |
| |
| rtx src = DECL_RTL (fpscr_values); |
| if (!can_create_pseudo_p ()) |
| { |
| emit_move_insn (scratch, XEXP (src, 0)); |
| if (index != 0) |
| emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4))); |
| src = adjust_automodify_address (src, SImode, scratch, index * 4); |
| } |
| else |
| src = adjust_address (src, SImode, index * 4); |
| |
| emit_insn (gen_lds_fpscr (src)); |
| } |
| |
| static rtx get_free_reg (HARD_REG_SET); |
| |
| /* This function returns a register to use to load the address to load |
| the fpscr from. Currently it always returns r1 or r7, but when we are |
| able to use pseudo registers after combine, or have a better mechanism |
| for choosing a register, it should be done here. */ |
| /* REGS_LIVE is the liveness information for the point for which we |
| need this allocation. In some bare-bones exit blocks, r1 is live at the |
| start. We can even have all of r0..r3 being live: |
| __complex__ long long f (double d) { if (d == 0) return 2; else return 3; } |
| INSN before which new insns are placed with will clobber the register |
| we return. If a basic block consists only of setting the return value |
| register to a pseudo and using that register, the return value is not |
| live before or after this block, yet we we'll insert our insns right in |
| the middle. */ |
| static rtx |
| get_free_reg (HARD_REG_SET regs_live) |
| { |
| if (! TEST_HARD_REG_BIT (regs_live, 1)) |
| return gen_rtx_REG (Pmode, 1); |
| |
| /* Hard reg 1 is live; since this is a small register classes target, |
| there shouldn't be anything but a jump before the function end. */ |
| gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7)); |
| return gen_rtx_REG (Pmode, 7); |
| } |
| |
| /* This function will set the fpscr from memory. |
| MODE is the mode we are setting it to. */ |
| void |
| fpscr_set_from_mem (int mode, HARD_REG_SET regs_live) |
| { |
| enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode; |
| enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE); |
| |
| rtx addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX; |
| emit_fpu_switch (addr_reg, fp_mode == norm_mode); |
| } |
| |
| /* Is the given character a logical line separator for the assembler? */ |
| #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR |
| #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';') |
| #endif |
| |
| static bool |
| sequence_insn_p (rtx_insn *insn) |
| { |
| rtx_insn* prev = PREV_INSN (insn); |
| if (prev == NULL) |
| return false; |
| |
| rtx_insn* next = NEXT_INSN (prev); |
| if (next == NULL) |
| return false; |
| |
| return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE; |
| } |
| |
| int |
| sh_insn_length_adjustment (rtx_insn *insn) |
| { |
| /* Instructions with unfilled delay slots take up an extra two bytes for |
| the nop in the delay slot. */ |
| if (((NONJUMP_INSN_P (insn) |
| && GET_CODE (PATTERN (insn)) != USE |
| && GET_CODE (PATTERN (insn)) != CLOBBER) |
| || CALL_P (insn) || JUMP_P (insn)) |
| && ! sequence_insn_p (insn) |
| && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES) |
| return 2; |
| |
| /* Increase the insn length of a cbranch without a delay slot insn to |
| force a delay slot which will be stuffed with a nop. */ |
| if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2 |
| && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH |
| && ! sequence_insn_p (insn)) |
| return 2; |
| |
| /* sh-dsp parallel processing insn take four bytes instead of two. */ |
| |
| if (NONJUMP_INSN_P (insn)) |
| { |
| int sum = 0; |
| rtx body = PATTERN (insn); |
| const char *templ; |
| char c; |
| bool maybe_label = true; |
| |
| if (GET_CODE (body) == ASM_INPUT) |
| templ = XSTR (body, 0); |
| else if (asm_noperands (body) >= 0) |
| templ |
| = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL); |
| else |
| return 0; |
| do |
| { |
| int ppi_adjust = 0; |
| |
| do |
| c = *templ++; |
| while (c == ' ' || c == '\t'); |
| /* all sh-dsp parallel-processing insns start with p. |
| The only non-ppi sh insn starting with p is pref. |
| The only ppi starting with pr is prnd. */ |
| if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2)) |
| ppi_adjust = 2; |
| /* The repeat pseudo-insn expands two three insns, a total of |
| six bytes in size. */ |
| else if ((c == 'r' || c == 'R') |
| && ! strncasecmp ("epeat", templ, 5)) |
| ppi_adjust = 4; |
| while (c && c != '\n' |
| && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ)) |
| { |
| /* If this is a label, it is obviously not a ppi insn. */ |
| if (c == ':' && maybe_label) |
| { |
| ppi_adjust = 0; |
| break; |
| } |
| else if (c == '\'' || c == '"') |
| maybe_label = false; |
| c = *templ++; |
| } |
| sum += ppi_adjust; |
| maybe_label = c != ':'; |
| } |
| while (c); |
| return sum; |
| } |
| return 0; |
| } |
| |
| /* Return TRUE for a valid displacement for the REG+disp addressing |
| with MODE. */ |
| bool |
| sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a, |
| bool allow_zero) |
| { |
| if (! CONST_INT_P (op)) |
| return false; |
| |
| { |
| const HOST_WIDE_INT offset = INTVAL (op); |
| const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a); |
| const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a); |
| |
| /* If the mode does not support any displacement always return false. |
| Even though an index of '0' is actually always valid, it will cause |
| troubles when e.g. a DFmode move is split into two SFmode moves, |
| where one SFmode move will have index '0' and the other move will |
| have index '4'. */ |
| if (!allow_zero && max_disp < 1) |
| return false; |
| |
| return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0; |
| } |
| } |
| |
| /* Recognize an RTL expression that is a valid memory address for |
| an instruction. |
| The MODE argument is the machine mode for the MEM expression |
| that wants to use this address. |
| Allow REG |
| REG+disp |
| REG+r0 |
| REG++ |
| --REG |
| GBR |
| GBR+disp */ |
| static bool |
| sh_legitimate_address_p (machine_mode mode, rtx x, bool strict) |
| { |
| if (REG_P (x) && REGNO (x) == GBR_REG) |
| return true; |
| |
| if (MAYBE_BASE_REGISTER_RTX_P (x, strict)) |
| return true; |
| else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC) |
| && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict)) |
| return true; |
| else if (GET_CODE (x) == PLUS) |
| { |
| rtx xop0 = XEXP (x, 0); |
| rtx xop1 = XEXP (x, 1); |
| |
| if (REG_P (xop0) && REGNO (xop0) == GBR_REG) |
| return gbr_displacement (xop1, mode); |
| |
| if (GET_MODE_SIZE (mode) <= 8 |
| && MAYBE_BASE_REGISTER_RTX_P (xop0, strict) |
| && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false)) |
| return true; |
| |
| if (GET_MODE_SIZE (mode) <= 4 |
| || (TARGET_FPU_DOUBLE && TARGET_FMOVD && mode == DFmode)) |
| { |
| if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict) |
| && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict)) |
| return true; |
| if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict) |
| && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)) |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol |
| isn't protected by a PIC unspec. */ |
| bool |
| nonpic_symbol_mentioned_p (rtx x) |
| { |
| if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF |
| || GET_CODE (x) == PC) |
| return true; |
| |
| /* We don't want to look into the possible MEM location of a |
| CONST_DOUBLE, since we're not going to use it, in general. */ |
| if (GET_CODE (x) == CONST_DOUBLE) |
| return false; |
| |
| if (GET_CODE (x) == UNSPEC |
| && (XINT (x, 1) == UNSPEC_PIC |
| || XINT (x, 1) == UNSPEC_GOT |
| || XINT (x, 1) == UNSPEC_GOTOFF |
| || XINT (x, 1) == UNSPEC_GOTPLT |
| || XINT (x, 1) == UNSPEC_GOTTPOFF |
| || XINT (x, 1) == UNSPEC_DTPOFF |
| || XINT (x, 1) == UNSPEC_TPOFF |
| || XINT (x, 1) == UNSPEC_PLT |
| || XINT (x, 1) == UNSPEC_PCREL |
| || XINT (x, 1) == UNSPEC_SYMOFF |
| || XINT (x, 1) == UNSPEC_PCREL_SYMOFF |
| || XINT (x, 1) == UNSPEC_GOTFUNCDESC |
| || XINT (x, 1) == UNSPEC_GOTOFFFUNCDESC)) |
| return false; |
| |
| const char* fmt = GET_RTX_FORMAT (GET_CODE (x)); |
| for (int i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) |
| { |
| if (fmt[i] == 'E') |
| { |
| for (int j = XVECLEN (x, i) - 1; j >= 0; j--) |
| if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j))) |
| return true; |
| } |
| else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i))) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* Convert a non-PIC address in `orig' to a PIC address using @GOT or |
| @GOTOFF in `reg'. */ |
| rtx |
| legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED, rtx reg) |
| { |
| if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE) |
| return orig; |
| |
| if (GET_CODE (orig) == LABEL_REF |
| || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig))) |
| { |
| if (reg == NULL_RTX) |
| reg = gen_reg_rtx (Pmode); |
| |
| if (TARGET_FDPIC |
| && GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (orig)) |
| { |
| /* Weak functions may be NULL which doesn't work with |
| GOTOFFFUNCDESC because the runtime offset is not known. */ |
| if (SYMBOL_REF_WEAK (orig)) |
| emit_insn (gen_symGOTFUNCDESC2reg (reg, orig)); |
| else |
| emit_insn (gen_symGOTOFFFUNCDESC2reg (reg, orig)); |
| } |
| else if (TARGET_FDPIC |
| && (GET_CODE (orig) == LABEL_REF |
| || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_DECL (orig) |
| && (TREE_READONLY (SYMBOL_REF_DECL (orig)) |
| || SYMBOL_REF_EXTERNAL_P (orig) |
| || DECL_SECTION_NAME(SYMBOL_REF_DECL (orig)))))) |
| /* In FDPIC, GOTOFF can only be used for writable data. */ |
| emit_insn (gen_symGOT2reg (reg, orig)); |
| else |
| emit_insn (gen_symGOTOFF2reg (reg, orig)); |
| return reg; |
| } |
| else if (GET_CODE (orig) == SYMBOL_REF) |
| { |
| if (reg == NULL_RTX) |
| reg = gen_reg_rtx (Pmode); |
| |
| if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (orig)) |
| emit_insn (gen_symGOTFUNCDESC2reg (reg, orig)); |
| else |
| emit_insn (gen_symGOT2reg (reg, orig)); |
| return reg; |
| } |
| return orig; |
| } |
| |
| /* Given a (logical) mode size and an offset in bytes, try to find a the |
| appropriate displacement value for a mov insn. On SH the displacements |
| are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max. |
| 15 bytes in QImode. To compensate this we create a new base address by |
| adding an adjustment value to it. |
| |
| If the originally requested offset is greater than 127 we prefer using |
| values 124..127 over 128..131 to increase opportunities to use the |
| add #imm, Rn insn. |
| |
| In some cases it is possible that a requested offset might seem unaligned |
| or inappropriate for the mode size, like offset = 2 and mode size = 4. |
| This is compensated by adjusting the base address so that the effective |
| address of the displacement move insn will be aligned. |
| |
| This is not the best possible way of rebasing the base address, as it |
| does not look at other present displacement addressings around it. |
| In some cases this can create more base address adjustments than would |
| actually be necessary. */ |
| struct disp_adjust |
| { |
| rtx offset_adjust; |
| rtx mov_disp; |
| }; |
| |
| static struct disp_adjust |
| sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset) |
| { |
| struct disp_adjust res = { NULL_RTX, NULL_RTX }; |
| |
| /* Do not try to use SH2A's large displacements here, because this would |
| effectively disable the small displacement insns. */ |
| const int mode_sz = GET_MODE_SIZE (mode); |
| const int mov_insn_sz = mov_insn_size (mode, false); |
| const int max_disp = sh_max_mov_insn_displacement (mode, false); |
| const int max_disp_next = max_disp + mov_insn_sz; |
| HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0; |
| HOST_WIDE_INT offset_adjust; |
| |
| /* In some cases this actually does happen and we must check for it. */ |
| if (mode_sz < 1 || mode_sz > 8 || max_disp < 1) |
| return res; |
| |
| /* Keeps the previous behavior for QImode displacement addressing. |
| This just decides how the offset is re-based. Removing this special |
| case will result in slightly bigger code on average, but it's not that |
| bad actually. */ |
| if (mov_insn_sz == 1) |
| align_modifier = 0; |
| |
| offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier; |
| |
| if (mode_sz + offset - offset_adjust <= max_disp_next) |
| { |
| res.offset_adjust = GEN_INT (offset_adjust); |
| res.mov_disp = GEN_INT (offset - offset_adjust); |
| } |
| |
| return res; |
| } |
| |
| /* Try to modify an illegitimate address and make it legitimate. |
| If we find one, return the new, valid address. |
| Otherwise, return the original address. */ |
| static rtx |
| sh_legitimize_address (rtx x, rtx oldx, machine_mode mode) |
| { |
| if (flag_pic) |
| x = legitimize_pic_address (oldx, mode, NULL_RTX); |
| |
| if ((TARGET_FPU_DOUBLE && mode == DFmode) |
| || (TARGET_SH2E && mode == SFmode)) |
| return x; |
| |
| if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)) |
| && BASE_REGISTER_RTX_P (XEXP (x, 0))) |
| { |
| struct disp_adjust adj = sh_find_mov_disp_adjust (mode, |
| INTVAL (XEXP (x, 1))); |
| |
| if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX) |
| { |
| rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0), |
| adj.offset_adjust, NULL_RTX, 0, |
| OPTAB_LIB_WIDEN); |
| return gen_rtx_PLUS (Pmode, sum, adj.mov_disp); |
| } |
| } |
| return x; |
| } |
| |
| /* Attempt to replace *p, which is an address that needs reloading, with |
| a valid memory address for an operand of mode MODE. |
| Like for sh_legitimize_address, for the SH we try to get a normal form |
| of the address. That will allow inheritance of the address reloads. */ |
| bool |
| sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum, |
| int itype) |
| { |
| enum reload_type type = (enum reload_type) itype; |
| const int mode_sz = GET_MODE_SIZE (mode); |
| |
| if (sh_lra_p ()) |
| return false; |
| |
| if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1)) |
| && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true)) |
| { |
| const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1)); |
| struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset); |
| |
| if (TARGET_SH2A && mode == DFmode && (offset & 0x7)) |
| { |
| push_reload (*p, NULL_RTX, p, NULL, |
| BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); |
| return true; |
| } |
| |
| if (TARGET_SH2E && mode == SFmode) |
| { |
| *p = copy_rtx (*p); |
| push_reload (*p, NULL_RTX, p, NULL, |
| BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); |
| return true; |
| } |
| |
| /* FIXME: Do not allow to legitimize QImode and HImode displacement |
| moves because then reload has a problem figuring the constraint |
| that the move insn target/source reg must be R0. |
| Or maybe some handling is wrong in sh_secondary_reload for this |
| to work properly? */ |
| if ((mode_sz == 4 || mode_sz == 8) |
| && ! (TARGET_SH4 && mode == DFmode) |
| && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX) |
| { |
| rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust); |
| *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp); |
| push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL, |
| BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); |
| return true; |
| } |
| } |
| |
| /* We must re-recognize what we created before. */ |
| if (GET_CODE (*p) == PLUS |
| && (mode_sz == 4 || mode_sz == 8) |
| && GET_CODE (XEXP (*p, 0)) == PLUS |
| && CONST_INT_P (XEXP (XEXP (*p, 0), 1)) |
| && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true) |
| && CONST_INT_P (XEXP (*p, 1)) |
| && ! (TARGET_SH2E && mode == SFmode)) |
| { |
| /* Because this address is so complex, we know it must have |
| been created by LEGITIMIZE_RELOAD_ADDRESS before; thus, |
| it is already unshared, and needs no further unsharing. */ |
| push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL, |
| BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* In the name of slightly smaller debug output, and to cater to |
| general assembler lossage, recognize various UNSPEC sequences |
| and turn them back into a direct symbol reference. */ |
| static rtx |
| sh_delegitimize_address (rtx orig_x) |
| { |
| orig_x = delegitimize_mem_from_attrs (orig_x); |
| |
| rtx x = orig_x; |
| if (MEM_P (x)) |
| x = XEXP (x, 0); |
| if (GET_CODE (x) == CONST) |
| { |
| rtx y = XEXP (x, 0); |
| if (GET_CODE (y) == UNSPEC) |
| { |
| if (XINT (y, 1) == UNSPEC_GOT |
| || XINT (y, 1) == UNSPEC_GOTOFF |
| || XINT (y, 1) == UNSPEC_SYMOFF) |
| return XVECEXP (y, 0, 0); |
| else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF) |
| { |
| if (GET_CODE (XVECEXP (y, 0, 0)) == CONST) |
| { |
| rtx symplt = XEXP (XVECEXP (y, 0, 0), 0); |
| |
| if (GET_CODE (symplt) == UNSPEC |
| && (XINT (symplt, 1) == UNSPEC_PLT |
| || XINT (symplt, 1) == UNSPEC_PCREL)) |
| return XVECEXP (symplt, 0, 0); |
| } |
| } |
| } |
| } |
| |
| return orig_x; |
| } |
| |
| /* Mark the use of a constant in the literal table. If the constant |
| has multiple labels, make it unique. */ |
| static rtx |
| mark_constant_pool_use (rtx x) |
| { |
| if (x == NULL_RTX) |
| return x; |
| |
| switch (GET_CODE (x)) |
| { |
| case LABEL_REF: |
| x = XEXP (x, 0); |
| case CODE_LABEL: |
| break; |
| default: |
| return x; |
| } |
| |
| /* Get the first label in the list of labels for the same constant |
| and delete another labels in the list. */ |
| rtx_insn* lab = as_a <rtx_insn*> (x); |
| for (rtx_insn* insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn)) |
| { |
| if (!LABEL_P (insn) |
| || LABEL_REFS (insn) != NEXT_INSN (insn)) |
| break; |
| lab = insn; |
| } |
| |
| for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn)) |
| as_a<rtx_insn *> (insn)->set_deleted (); |
| |
| /* Mark constants in a window. */ |
| for (rtx_insn* insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn; |
| insn = NEXT_INSN (insn)) |
| { |
| if (!NONJUMP_INSN_P (insn)) |
| continue; |
| |
| rtx pattern = PATTERN (insn); |
| if (GET_CODE (pattern) != UNSPEC_VOLATILE) |
| continue; |
| |
| switch (XINT (pattern, 1)) |
| { |
| case UNSPECV_CONST2: |
| case UNSPECV_CONST4: |
| case UNSPECV_CONST8: |
| XVECEXP (pattern, 0, 1) = const1_rtx; |
| break; |
| case UNSPECV_WINDOW_END: |
| if (XVECEXP (pattern, 0, 0) == x) |
| return lab; |
| break; |
| case UNSPECV_CONST_END: |
| return lab; |
| default: |
| break; |
| } |
| } |
| |
| return lab; |
| } |
| |
| /* Return true if it's possible to redirect BRANCH1 to the destination |
| of an unconditional jump BRANCH2. We only want to do this if the |
| resulting branch will have a short displacement. */ |
| static bool |
| sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2) |
| { |
| /* Don't follow if BRANCH2 is possible to be a jump crossing between |
| hot and cold partitions. */ |
| if (flag_reorder_blocks_and_partition |
| && simplejump_p (branch2) |
| && CROSSING_JUMP_P (branch2)) |
| return false; |
| |
| if (flag_expensive_optimizations && simplejump_p (branch2)) |
| { |
| rtx dest = XEXP (SET_SRC (single_set (branch2)), 0); |
| rtx_insn *insn; |
| int distance; |
| |
| for (distance = 0, insn = NEXT_INSN (branch1); |
| insn && distance < 256; |
| insn = PREV_INSN (insn)) |
| { |
| if (insn == dest) |
| return true; |
| else |
| distance += get_attr_length (insn); |
| } |
| for (distance = 0, insn = NEXT_INSN (branch1); |
| insn && distance < 256; |
| insn = NEXT_INSN (insn)) |
| { |
| if (insn == dest) |
| return true; |
| else |
| distance += get_attr_length (insn); |
| } |
| } |
| return false; |
| } |
| |
| /* Return nonzero if register old_reg can be renamed to register new_reg. */ |
| bool |
| sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED, |
| unsigned int new_reg) |
| { |
| /* Interrupt functions can only use registers that have already been |
| saved by the prologue, even if they would normally be |
| call-clobbered. */ |
| if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg)) |
| return false; |
| |
| return true; |
| } |
| |
| /* Function to update the integer COST |
| based on the relationship between INSN that is dependent on |
| DEP_INSN through the dependence LINK. The default is to make no |
| adjustment to COST. This can be used for example to specify to |
| the scheduler that an output- or anti-dependence does not incur |
| the same cost as a data-dependence. The return value should be |
| the new value for COST. */ |
| static int |
| sh_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost, |
| unsigned int) |
| { |
| rtx reg, use_pat; |
| |
| if (dep_type == 0) |
| { |
| if (recog_memoized (insn) < 0 |
| || recog_memoized (dep_insn) < 0) |
| return cost; |
| |
| rtx dep_set = single_set (dep_insn); |
| |
| /* The latency that we specify in the scheduling description refers |
| to the actual output, not to an auto-increment register; for that, |
| the latency is one. */ |
| if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1) |
| { |
| rtx set = single_set (insn); |
| |
| if (set |
| && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set)) |
| && (!MEM_P (SET_DEST (set)) |
| || !reg_mentioned_p (SET_DEST (dep_set), |
| XEXP (SET_DEST (set), 0)))) |
| cost = 1; |
| } |
| /* The only input for a call that is timing-critical is the |
| function's address. */ |
| if (CALL_P (insn)) |
| { |
| rtx call = get_call_rtx_from (insn); |
| if (call |
| /* sibcalli_thunk uses a symbol_ref in an unspec. */ |
| && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC |
| || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))) |
| cost -= TARGET_SH4_300 ? 3 : 6; |
| } |
| /* Likewise, the most timing critical input for an sfuncs call |
| is the function address. However, sfuncs typically start |
| using their arguments pretty quickly. |
| Assume a four cycle delay for SH4 before they are needed. |
| Cached ST40-300 calls are quicker, so assume only a one |
| cycle delay there. |
| ??? Maybe we should encode the delays till input registers |
| are needed by sfuncs into the sfunc call insn. */ |
| /* All sfunc calls are parallels with at least four components. |
| Exploit this to avoid unnecessary calls to sfunc_uses_reg. */ |
| else if (GET_CODE (PATTERN (insn)) == PARALLEL |
| && XVECLEN (PATTERN (insn), 0) >= 4 |
| && (reg = sfunc_uses_reg (insn))) |
| { |
| if (! reg_set_p (reg, dep_insn)) |
| cost -= TARGET_SH4_300 ? 1 : 4; |
| } |
| if (TARGET_HARD_SH4 && !TARGET_SH4_300) |
| { |
| attr_type dep_type = get_attr_type (dep_insn); |
| attr_type type; |
| if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD) |
| cost--; |
| else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI) |
| && (type = get_attr_type (insn)) != TYPE_CALL |
| && type != TYPE_SFUNC) |
| cost--; |
| /* When the preceding instruction loads the shift amount of |
| the following SHAD/SHLD, the latency of the load is increased |
| by 1 cycle. */ |
| if (get_attr_type (insn) == TYPE_DYN_SHIFT |
| && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES |
| && reg_overlap_mentioned_p (SET_DEST (dep_set), |
| XEXP (SET_SRC (single_set (insn)), |
| 1))) |
| cost++; |
| /* When an LS group instruction with a latency of less than |
| 3 cycles is followed by a double-precision floating-point |
| instruction, FIPR, or FTRV, the latency of the first |
| instruction is increased to 3 cycles. */ |
| else if (cost < 3 |
| && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP |
| && get_attr_dfp_comp (insn) == DFP_COMP_YES) |
| cost = 3; |
| /* The lsw register of a double-precision computation is ready one |
| cycle earlier. */ |
| else if (reload_completed |
| && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES |
| && (use_pat = single_set (insn)) |
| && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))), |
| SET_SRC (use_pat))) |
| cost -= 1; |
| |
| if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES |
| && get_attr_late_fp_use (insn) == LATE_FP_USE_YES) |
| cost -= 1; |
| } |
| else if (TARGET_SH4_300) |
| { |
| /* Stores need their input register two cycles later. */ |
| attr_type type; |
| if (dep_set && cost >= 1 |
| && ((type = get_attr_type (insn)) == TYPE_STORE |
| || type == TYPE_PSTORE |
| || type == TYPE_FSTORE || type == TYPE_MAC_MEM)) |
| { |
| rtx set = single_set (insn); |
| |
| if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0)) |
| && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set))) |
| { |
| cost -= 2; |
| /* But don't reduce the cost below 1 if the address depends |
| on a side effect of dep_insn. */ |
| if (cost < 1 |
| && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn)) |
| cost = 1; |
| } |
| } |
| } |
| } |
| /* An anti-dependence penalty of two applies if the first insn is a double |
| precision fadd / fsub / fmul. */ |
| else if (!TARGET_SH4_300 |
| && dep_type == REG_DEP_ANTI |
| && recog_memoized (dep_insn) >= 0 |
| && (get_attr_type (dep_insn) == TYPE_DFP_ARITH |
| || get_attr_type (dep_insn) == TYPE_DFP_MUL) |
| /* A lot of alleged anti-flow dependences are fake, |
| so check this one is real. */ |
| && flow_dependent_p (dep_insn, insn)) |
| cost = 2; |
| |
| return cost; |
| } |
| |
| /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check |
| if DEP_INSN is anti-flow dependent on INSN. */ |
| static bool |
| flow_dependent_p (rtx_insn *insn, rtx_insn *dep_insn) |
| { |
| rtx tmp = PATTERN (insn); |
| |
| note_stores (dep_insn, flow_dependent_p_1, &tmp); |
| return tmp == NULL_RTX; |
| } |
| |
| /* A helper function for flow_dependent_p called through note_stores. */ |
| static void |
| flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data) |
| { |
| rtx * pinsn = (rtx *) data; |
| |
| if (*pinsn && reg_referenced_p (x, *pinsn)) |
| *pinsn = NULL_RTX; |
| } |
| |
| /* For use by sh_allocate_initial_value. Note that sh.md contains some |
| 'special function' patterns (type sfunc) that clobber pr, but that |
| do not look like function calls to leaf_function_p. Hence we must |
| do this extra check. */ |
| static int |
| sh_pr_n_sets (void) |
| { |
| return DF_REG_DEF_COUNT (PR_REG); |
| } |
| |
| /* Return where to allocate pseudo for a given hard register initial |
| value. */ |
| static rtx |
| sh_allocate_initial_value (rtx hard_reg) |
| { |
| if (REGNO (hard_reg) == PR_REG) |
| { |
| if (crtl->is_leaf && ! sh_pr_n_sets ()) |
| return hard_reg; |
| else |
| return gen_frame_mem (Pmode, return_address_pointer_rtx); |
| } |
| |
| return NULL_RTX; |
| } |
| |
| /* This function returns "2" to indicate dual issue for the SH4 |
| processor. To be used by the DFA pipeline description. */ |
| static int |
| sh_issue_rate (void) |
| { |
| if (TARGET_SUPERSCALAR) |
| return 2; |
| else |
| return 1; |
| } |
| |
| /* Functions for ready queue reordering for sched1. */ |
| |
| /* Get weight for mode for a set x. */ |
| static short |
| find_set_regmode_weight (rtx x, machine_mode mode) |
| { |
| if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode)) |
| return 1; |
| if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode)) |
| { |
| if (REG_P (SET_DEST (x))) |
| { |
| if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x))) |
| return 1; |
| else |
| return 0; |
| } |
| return 1; |
| } |
| return 0; |
| } |
| |
| /* Get regmode weight for insn. */ |
| static short |
| find_insn_regmode_weight (rtx insn, machine_mode mode) |
| { |
| /* Increment weight for each register born here. */ |
| rtx x = PATTERN (insn); |
| short reg_weight = find_set_regmode_weight (x, mode); |
| if (GET_CODE (x) == PARALLEL) |
| { |
| int j; |
| for (j = XVECLEN (x, 0) - 1; j >= 0; j--) |
| { |
| x = XVECEXP (PATTERN (insn), 0, j); |
| reg_weight += find_set_regmode_weight (x, mode); |
| } |
| } |
| /* Decrement weight for each register that dies here. */ |
| for (x = REG_NOTES (insn); x; x = XEXP (x, 1)) |
| { |
| if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED) |
| { |
| rtx note = XEXP (x, 0); |
| if (REG_P (note) && GET_MODE (note) == mode) |
| reg_weight--; |
| } |
| } |
| return reg_weight; |
| } |
| |
| /* Calculate regmode weights for all insns of a basic block. */ |
| static void |
| find_regmode_weight (basic_block b, machine_mode mode) |
| { |
| rtx_insn *insn, *next_tail, *head, *tail; |
| |
| get_ebb_head_tail (b, b, &head, &tail); |
| next_tail = NEXT_INSN (tail); |
| |
| for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) |
| { |
| /* Handle register life information. */ |
| if (!INSN_P (insn)) |
| continue; |
| |
| if (mode == SFmode) |
| INSN_REGMODE_WEIGHT (insn, mode) = |
| find_insn_regmode_weight (insn, mode) |
| + 2 * find_insn_regmode_weight (insn, DFmode); |
| else if (mode == SImode) |
| INSN_REGMODE_WEIGHT (insn, mode) = |
| find_insn_regmode_weight (insn, mode) |
| + 2 * find_insn_regmode_weight (insn, DImode); |
| } |
| } |
| |
| /* Comparison function for ready queue sorting. */ |
| static int |
| rank_for_reorder (const void *x, const void *y) |
| { |
| rtx_insn *tmp = *(rtx_insn * const *) y; |
| rtx_insn *tmp2 = *(rtx_insn * const *) x; |
| |
| /* The insn in a schedule group should be issued the first. */ |
| if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2)) |
| return SCHED_GROUP_P (tmp2) ? 1 : -1; |
| |
| /* If insns are equally good, sort by INSN_LUID (original insn order), This |
| minimizes instruction movement, thus minimizing sched's effect on |
| register pressure. */ |
| return INSN_LUID (tmp) - INSN_LUID (tmp2); |
| } |
| |
| /* Resort the array A in which only element at index N may be out of order. */ |
| static void |
| swap_reorder (rtx_insn **a, int n) |
| { |
| rtx_insn *insn = a[n - 1]; |
| int i = n - 2; |
| |
| while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0) |
| { |
| a[i + 1] = a[i]; |
| i -= 1; |
| } |
| a[i + 1] = insn; |
| } |
| |
| /* Sort the ready list by ascending priority. */ |
| static void |
| ready_reorder (rtx_insn **ready, int nready) |
| { |
| if (nready == 2) |
| swap_reorder (ready, nready); |
| else if (nready > 2) |
| qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder); |
| } |
| |
| /* Count life regions of r0 for a block. */ |
| static int |
| find_r0_life_regions (basic_block b) |
| { |
| bool live; |
| int set; |
| int death = 0; |
| |
| if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG)) |
| { |
| set = 1; |
| live = true; |
| } |
| else |
| { |
| set = 0; |
| live = false; |
| } |
| |
| rtx_insn* insn = BB_HEAD (b); |
| rtx_insn* end = BB_END (b); |
| rtx r0_reg = gen_rtx_REG (SImode, R0_REG); |
| while (1) |
| { |
| if (INSN_P (insn)) |
| { |
| if (find_regno_note (insn, REG_DEAD, R0_REG)) |
| { |
| death++; |
| live = false; |
| } |
| |
| rtx pset; |
| if (!live |
| && (pset = single_set (insn)) |
| && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset)) |
| && !find_regno_note (insn, REG_UNUSED, R0_REG)) |
| { |
| set++; |
| live = true; |
| } |
| } |
| if (insn == end) |
| break; |
| insn = NEXT_INSN (insn); |
| } |
| return set - death; |
| } |
| |
| /* Calculate regmode weights for all insns of all basic block. */ |
| static void |
| sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED, |
| int verbose ATTRIBUTE_UNUSED, |
| int old_max_uid) |
| { |
| basic_block b; |
| |
| regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short)); |
| regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short)); |
| r0_life_regions = 0; |
| |
| FOR_EACH_BB_REVERSE_FN (b, cfun) |
| { |
| find_regmode_weight (b, SImode); |
| find_regmode_weight (b, SFmode); |
| if (!reload_completed) |
| r0_life_regions += find_r0_life_regions (b); |
| } |
| |
| CURR_REGMODE_PRESSURE (SImode) = 0; |
| CURR_REGMODE_PRESSURE (SFmode) = 0; |
| } |
| |
| /* Cleanup. */ |
| static void |
| sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED, |
| int verbose ATTRIBUTE_UNUSED) |
| { |
| if (regmode_weight[0]) |
| { |
| free (regmode_weight[0]); |
| regmode_weight[0] = NULL; |
| } |
| if (regmode_weight[1]) |
| { |
| free (regmode_weight[1]); |
| regmode_weight[1] = NULL; |
| } |
| } |
| |
| /* Cache the can_issue_more so that we can return it from reorder2. Also, |
| keep count of register pressures on SImode and SFmode. */ |
| static int |
| sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED, |
| int sched_verbose ATTRIBUTE_UNUSED, |
| rtx_insn *insn, |
| int can_issue_more) |
| { |
| if (GET_CODE (PATTERN (insn)) != USE |
| && GET_CODE (PATTERN (insn)) != CLOBBER) |
| cached_can_issue_more = can_issue_more - 1; |
| else |
| cached_can_issue_more = can_issue_more; |
| |
| if (reload_completed) |
| return cached_can_issue_more; |
| |
| CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode); |
| CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode); |
| |
| return cached_can_issue_more; |
| } |
| |
| static void |
| sh_md_init (FILE *dump ATTRIBUTE_UNUSED, |
| int verbose ATTRIBUTE_UNUSED, |
| int veclen ATTRIBUTE_UNUSED) |
| { |
| CURR_REGMODE_PRESSURE (SImode) = 0; |
| CURR_REGMODE_PRESSURE (SFmode) = 0; |
| } |
| |
| /* Some magic numbers. */ |
| /* Pressure on register r0 can lead to spill failures. so avoid sched1 for |
| functions that already have high pressure on r0. */ |
| #define R0_MAX_LIFE_REGIONS 2 |
| /* Register Pressure thresholds for SImode and SFmode registers. */ |
| #define SIMODE_MAX_WEIGHT 5 |
| #define SFMODE_MAX_WEIGHT 10 |
| |
| /* Return true if the pressure is high for MODE. */ |
| static bool |
| high_pressure (machine_mode mode) |
| { |
| /* Pressure on register r0 can lead to spill failures. so avoid sched1 for |
| functions that already have high pressure on r0. */ |
| if (r0_life_regions >= R0_MAX_LIFE_REGIONS) |
| return true; |
| |
| if (mode == SFmode) |
| return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT); |
| else |
| return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT); |
| } |
| |
| /* Reorder ready queue if register pressure is high. */ |
| static int |
| sh_reorder (FILE *dump ATTRIBUTE_UNUSED, |
| int sched_verbose ATTRIBUTE_UNUSED, |
| rtx_insn **ready, |
| int *n_readyp, |
| int clock_var ATTRIBUTE_UNUSED) |
| { |
| if (reload_completed) |
| return sh_issue_rate (); |
| |
| if (high_pressure (SFmode) || high_pressure (SImode)) |
| { |
| ready_reorder (ready, *n_readyp); |
| } |
| |
| return sh_issue_rate (); |
| } |
| |
| /* Skip cycles if the current register pressure is high. */ |
| static int |
| sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED, |
| int sched_verbose ATTRIBUTE_UNUSED, |
| rtx_insn **ready ATTRIBUTE_UNUSED, |
| int *n_readyp ATTRIBUTE_UNUSED, |
| int clock_var ATTRIBUTE_UNUSED) |
| { |
| if (reload_completed) |
| return cached_can_issue_more; |
| |
| if (high_pressure(SFmode) || high_pressure (SImode)) |
| skip_cycles = 1; |
| |
| return cached_can_issue_more; |
| } |
| |
| /* Skip cycles without sorting the ready queue. This will move insn from |
| Q->R. If this is the last cycle we are skipping; allow sorting of ready |
| queue by sh_reorder. */ |
| |
| /* Generally, skipping these many cycles are sufficient for all insns to move |
| from Q -> R. */ |
| #define MAX_SKIPS 8 |
| |
| static int |
| sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED, |
| int sched_verbose ATTRIBUTE_UNUSED, |
| rtx_insn *insn ATTRIBUTE_UNUSED, |
| int last_clock_var, |
| int clock_var, |
| int *sort_p) |
| { |
| if (reload_completed) |
| return 0; |
| |
| if (skip_cycles) |
| { |
| if ((clock_var - last_clock_var) < MAX_SKIPS) |
| { |
| *sort_p = 0; |
| return 1; |
| } |
| /* If this is the last cycle we are skipping, allow reordering of R. */ |
| if ((clock_var - last_clock_var) == MAX_SKIPS) |
| { |
| *sort_p = 1; |
| return 1; |
| } |
| } |
| |
| skip_cycles = 0; |
| |
| return 0; |
| } |
| |
| static bool |
| sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED) |
| { |
| return TARGET_HITACHI || sh_attr_renesas_p (record_type); |
| } |
| |
| /* |
| On the SH1..SH4, the trampoline looks like |
| 2 0002 D202 mov.l l2,r2 |
| 1 0000 D301 mov.l l1,r3 |
| 3 0004 422B jmp @r2 |
| 4 0006 0009 nop |
| 5 0008 00000000 l1: .long area |
| 6 000c 00000000 l2: .long function |
| |
| FDPIC needs a form that includes a function descriptor and |
| code to load the GOT register: |
| 0 0000 00000000 .long l0 |
| 1 0004 00000000 .long gotval |
| 2 0008 D302 l0: mov.l l1,r3 |
| 3 000a D203 mov.l l2,r2 |
| 4 000c 6122 mov.l @r2,r1 |
| 5 000e 5C21 mov.l @(4,r2),r12 |
| 6 0010 412B jmp @r1 |
| 7 0012 0009 nop |
| 8 0014 00000000 l1: .long area |
| 9 0018 00000000 l2: .long function |
| |
| SH5 (compact) uses r1 instead of r3 for the static chain. */ |
| |
| /* Emit insns to store a value at memory address + offset. */ |
| static void |
| sh_emit_storesi (rtx addr, HOST_WIDE_INT offset, rtx value) |
| { |
| gcc_assert ((offset & 3) == 0); |
| emit_move_insn (offset == 0 |
| ? change_address (addr, SImode, NULL_RTX) |
| : adjust_address (addr, SImode, offset), value); |
| } |
| |
| /* Emit insns to store w0 at addr + offset and w1 at addr + offset + 2. */ |
| static void |
| sh_emit_storehi (rtx addr, HOST_WIDE_INT offset, uint16_t w0, uint16_t w1) |
| { |
| sh_emit_storesi (addr, offset, gen_int_mode (TARGET_LITTLE_ENDIAN |
| ? (w0 | (w1 << 16)) |
| : (w1 | (w0 << 16)), SImode)); |
| } |
| |
| /* Emit RTL insns to initialize the variable parts of a trampoline. |
| FNADDR is an RTX for the address of the function's pure code. |
| CXT is an RTX for the static chain value for the function. */ |
| static void |
| sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt) |
| { |
| rtx fnaddr = XEXP (DECL_RTL (fndecl), 0); |
| rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0)); |
| |
| if (TARGET_FDPIC) |
| { |
| rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8)); |
| |
| sh_emit_storesi (tramp_mem, 0, a); |
| sh_emit_storesi (tramp_mem, 4, sh_get_fdpic_reg_initial_val ()); |
| |
| sh_emit_storehi (tramp_mem, 8, 0xd302, 0xd203); |
| sh_emit_storehi (tramp_mem, 12, 0x6122, 0x5c21); |
| sh_emit_storehi (tramp_mem, 16, 0x412b, 0x0009); |
| |
| sh_emit_storesi (tramp_mem, 20, cxt); |
| sh_emit_storesi (tramp_mem, 24, fnaddr); |
| } |
| else |
| { |
| sh_emit_storehi (tramp_mem, 0, 0xd202, 0xd301); |
| sh_emit_storehi (tramp_mem, 4, 0x422b, 0x0009); |
| |
| sh_emit_storesi (tramp_mem, 8, cxt); |
| sh_emit_storesi (tramp_mem, 12, fnaddr); |
| } |
| if (TARGET_HARD_SH4) |
| { |
| if (!TARGET_INLINE_IC_INVALIDATE |
| || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE)) |
| emit_library_call (function_symbol (NULL, "__ic_invalidate", |
| FUNCTION_ORDINARY).sym, |
| LCT_NORMAL, VOIDmode, tramp, SImode); |
| else |
| emit_insn (gen_ic_invalidate_line (tramp)); |
| } |
| } |
| |
| /* On SH5, trampolines are SHmedia code, so add 1 to the address. */ |
| static rtx |
| sh_trampoline_adjust_address (rtx tramp) |
| { |
| return tramp; |
| } |
| |
| /* If PIC, we cannot make sibling calls to global functions |
| because the PLT requires r12 to be live. */ |
| static bool |
| sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) |
| { |
| return (1 |
| && ! sh_cfun_interrupt_handler_p () |
| && (! flag_pic || TARGET_FDPIC |
| || (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl))) |
| || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT))); |
| } |
| |
| /* Expand to appropriate sym*_label2reg for SYM and SIBCALL_P. */ |
| void |
| sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p) |
| { |
| const_tree decl = SYMBOL_REF_DECL (sym); |
| bool is_weak = (decl && DECL_P (decl) && DECL_WEAK (decl)); |
| |
| if (!is_weak && SYMBOL_REF_LOCAL_P (sym)) |
| emit_insn (gen_sym_label2reg (reg, sym, lab)); |
| else if (sibcall_p && SYMBOL_REF_LOCAL_P (sym)) |
| emit_insn (gen_symPCREL_label2reg (reg, sym, lab)); |
| else |
| emit_insn (gen_symPLT_label2reg (reg, sym, lab)); |
| } |
| |
| /* Machine specific built-in functions. */ |
| |
| struct builtin_description |
| { |
| bool (* const is_enabled) (void); |
| const enum insn_code icode; |
| const char *const name; |
| int signature; |
| tree fndecl; |
| }; |
| |
| /* This function can be used if there are any built-ins that are not for |
| SHmedia. It's commented out to avoid the defined-but-unused warning. */ |
| static bool |
| sh1_builtin_p (void) |
| { |
| return TARGET_SH1; |
| } |
| |
| /* describe number and signedness of arguments; arg[0] == result |
| (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */ |
| /* 9: 64-bit pointer, 10: 32-bit pointer */ |
| static const char signature_args[][4] = |
| { |
| #define SH_BLTIN_V2SI2 0 |
| { 4, 4 }, |
| #define SH_BLTIN_V4HI2 1 |
| { 4, 4 }, |
| #define SH_BLTIN_V2SI3 2 |
| { 4, 4, 4 }, |
| #define SH_BLTIN_V4HI3 3 |
| { 4, 4, 4 }, |
| #define SH_BLTIN_V8QI3 4 |
| { 4, 4, 4 }, |
| #define SH_BLTIN_MAC_HISI 5 |
| { 1, 4, 4, 1 }, |
| #define SH_BLTIN_SH_HI 6 |
| { 4, 4, 1 }, |
| #define SH_BLTIN_SH_SI 7 |
| { 4, 4, 1 }, |
| #define SH_BLTIN_V4HI2V2SI 8 |
| { 4, 4, 4 }, |
| #define SH_BLTIN_V4HI2V8QI 9 |
| { 4, 4, 4 }, |
| #define SH_BLTIN_SISF 10 |
| { 4, 2 }, |
| #define SH_BLTIN_LDUA_L 11 |
| { 2, 10 }, |
| #define SH_BLTIN_LDUA_Q 12 |
| { 1, 10 }, |
| #define SH_BLTIN_STUA_L 13 |
| { 0, 10, 2 }, |
| #define SH_BLTIN_STUA_Q 14 |
| { 0, 10, 1 }, |
| #define SH_BLTIN_LDUA_L64 15 |
| { 2, 9 }, |
| #define SH_BLTIN_LDUA_Q64 16 |
| { 1, 9 }, |
| #define SH_BLTIN_STUA_L64 17 |
| { 0, 9, 2 }, |
| #define SH_BLTIN_STUA_Q64 18 |
| { 0, 9, 1 }, |
| #define SH_BLTIN_NUM_SHARED_SIGNATURES 19 |
| #define SH_BLTIN_2 19 |
| #define SH_BLTIN_SU 19 |
| { 1, 2 }, |
| #define SH_BLTIN_3 20 |
| #define SH_BLTIN_SUS 20 |
| { 2, 2, 1 }, |
| #define SH_BLTIN_PSSV 21 |
| { 0, 8, 2, 2 }, |
| #define SH_BLTIN_XXUU 22 |
| #define SH_BLTIN_UUUU 22 |
| { 1, 1, 1, 1 }, |
| #define SH_BLTIN_PV 23 |
| { 0, 8 }, |
| #define SH_BLTIN_VP 24 |
| { 8, 0 }, |
| #define SH_BLTIN_UV 25 |
| { 1, 0 }, |
| #define SH_BLTIN_VU 26 |
| { 0, 1 }, |
| }; |
| /* mcmv: operands considered unsigned. */ |
| /* mmulsum_wq, msad_ubq: result considered unsigned long long. */ |
| /* mperm: control value considered unsigned int. */ |
| /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */ |
| /* mshards_q: returns signed short. */ |
| /* nsb: takes long long arg, returns unsigned char. */ |
| static struct builtin_description bdesc[] = |
| { |
| { sh1_builtin_p, |
| CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 }, |
| { sh1_builtin_p, |
| CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 }, |
| }; |
| |
| static tree sh_builtin_get_fpscr; |
| static tree sh_builtin_set_fpscr; |
| |
| static void |
| sh_init_builtins (void) |
| { |
| tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES]; |
| memset (shared, 0, sizeof shared); |
| |
| for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di) |
| { |
| builtin_description* d = &bdesc[di]; |
| |
| if (!d->is_enabled ()) |
| continue; |
| |
| tree type, arg_type = NULL_TREE; |
| int signature = d->signature; |
| |
| if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature]) |
| type = shared[signature]; |
| else |
| { |
| int has_result = signature_args[signature][0] != 0; |
| tree args[3]; |
| |
| if (! TARGET_FPU_ANY |
| && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode)) |
| continue; |
| for (unsigned int i = 0; i < ARRAY_SIZE (args); i++) |
| args[i] = NULL_TREE; |
| for (int i = 3; ; i--) |
| { |
| int arg = signature_args[signature][i]; |
| int opno = i - 1 + has_result; |
| |
| if (arg & 8) |
| arg_type = ptr_type_node; |
| else if (arg) |
| arg_type = (*lang_hooks.types.type_for_mode) |
| (insn_data[d->icode].operand[opno].mode, (arg & 1)); |
| else if (i) |
| continue; |
| else |
| arg_type = void_type_node; |
| if (i == 0) |
| break; |
| args[i-1] = arg_type; |
| } |
| type = build_function_type_list (arg_type, args[0], args[1], |
| args[2], NULL_TREE); |
| if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES) |
| shared[signature] = type; |
| } |
| d->fndecl = |
| add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD, |
| NULL, NULL_TREE); |
| /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */ |
| if (d->icode == CODE_FOR_sts_fpscr) |
| sh_builtin_get_fpscr = d->fndecl; |
| else if (d->icode == CODE_FOR_set_fpscr) |
| sh_builtin_set_fpscr = d->fndecl; |
| } |
| } |
| |
| /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */ |
| |
| static void |
| sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update) |
| { |
| const unsigned SH_FE_INVALID = 64; |
| const unsigned SH_FE_DIVBYZERO = 32; |
| const unsigned SH_FE_OVERFLOW = 16; |
| const unsigned SH_FE_UNDERFLOW = 8; |
| const unsigned SH_FE_INEXACT = 4; |
| const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID |
| | SH_FE_DIVBYZERO |
| | SH_FE_OVERFLOW |
| | SH_FE_UNDERFLOW |
| | SH_FE_INEXACT); |
| const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5; |
| tree fenv_var, mask, ld_fenv, masked_fenv; |
| tree new_fenv_var, reload_fenv, restore_fnenv; |
| tree update_call, atomic_feraiseexcept, hold_fnclex; |
| |
| if (! TARGET_FPU_ANY) |
| return; |
| |
| /* Generate the equivalent of : |
| unsigned int fenv_var; |
| fenv_var = __builtin_sh_get_fpscr (); |
| |
| unsigned int masked_fenv; |
| masked_fenv = fenv_var & mask; |
| |
| __builtin_sh_set_fpscr (masked_fenv); */ |
| |
| fenv_var = create_tmp_var_raw (unsigned_type_node); |
| mask = build_int_cst (unsigned_type_node, |
| ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT) |
| | SH_FE_ALL_EXCEPT)); |
| ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node, |
| fenv_var, build_call_expr (sh_builtin_get_fpscr, 0)); |
| masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask); |
| hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv); |
| fenv_var = build4 (TARGET_EXPR, unsigned_type_node, fenv_var, |
| build2 (COMPOUND_EXPR, void_type_node, masked_fenv, |
| ld_fenv), |
| NULL_TREE, NULL_TREE); |
| *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var, hold_fnclex); |
| |
| /* Store the value of masked_fenv to clear the exceptions: |
| __builtin_sh_set_fpscr (masked_fenv); */ |
| |
| *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv); |
| |
| /* Generate the equivalent of : |
| unsigned int new_fenv_var; |
| new_fenv_var = __builtin_sh_get_fpscr (); |
| |
| __builtin_sh_set_fpscr (fenv_var); |
| |
| __atomic_feraiseexcept (new_fenv_var); */ |
| |
| new_fenv_var = create_tmp_var_raw (unsigned_type_node); |
| reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var, |
| build_call_expr (sh_builtin_get_fpscr, 0)); |
| restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var); |
| atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT); |
| update_call = build_call_expr (atomic_feraiseexcept, 1, |
| fold_convert (integer_type_node, |
| new_fenv_var)); |
| *update = build2 (COMPOUND_EXPR, void_type_node, |
| build2 (COMPOUND_EXPR, void_type_node, |
| reload_fenv, restore_fnenv), update_call); |
| } |
| |
| /* Implements target hook vector_mode_supported_p. */ |
| bool |
| sh_vector_mode_supported_p (machine_mode mode ATTRIBUTE_UNUSED) |
| { |
| return false; |
| } |
| |
| bool |
| sh_frame_pointer_required (void) |
| { |
| /* If needed override this in other tm.h files to cope with various OS |
| lossage requiring a frame pointer. */ |
| if (SUBTARGET_FRAME_POINTER_REQUIRED) |
| return true; |
| |
| if (crtl->profile) |
| return true; |
| |
| return false; |
| } |
| |
| /* Implements target hook dwarf_calling_convention. Return an enum |
| of dwarf_calling_convention. */ |
| int |
| sh_dwarf_calling_convention (const_tree func) |
| { |
| if (sh_attr_renesas_p (func)) |
| return DW_CC_GNU_renesas_sh; |
| |
| return DW_CC_normal; |
| } |
| |
| /* Returns the sh builtin decl for CODE. */ |
| static tree |
| sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED) |
| { |
| if (code >= ARRAY_SIZE (bdesc)) |
| return error_mark_node; |
| |
| if (!bdesc[code].is_enabled ()) |
| return error_mark_node; |
| |
| return bdesc[code].fndecl; |
| } |
| |
| /* Expand an expression EXP that calls a built-in function, |
| with result going to TARGET if that's convenient |
| (and in mode MODE if that's convenient). |
| SUBTARGET may be used as the target for computing one of EXP's operands. |
| IGNORE is nonzero if the value is to be ignored. */ |
| static rtx |
| sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, |
| machine_mode mode ATTRIBUTE_UNUSED, int ignore) |
| { |
| tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); |
| unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl); |
| const struct builtin_description *d = &bdesc[fcode]; |
| enum insn_code icode = d->icode; |
| int signature = d->signature; |
| int nop = 0; |
| rtx op[4]; |
| |
| if (signature_args[signature][0]) |
| { |
| if (ignore) |
| return NULL_RTX; |
| |
| machine_mode tmode = insn_data[icode].operand[0].mode; |
| if (! target || GET_MODE (target) != tmode |
| || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) |
| target = gen_reg_rtx (tmode); |
| op[nop++] = target; |
| } |
| else |
| target = NULL_RTX; |
| |
| for (int i = 1; i <= 3; i++, nop++) |
| { |
| if (! signature_args[signature][i]) |
| break; |
| tree arg = CALL_EXPR_ARG (exp, i - 1); |
| if (arg == error_mark_node) |
| return const0_rtx; |
| |
| machine_mode opmode; |
| tree optype; |
| if (signature_args[signature][i] & 8) |
| { |
| opmode = ptr_mode; |
| optype = ptr_type_node; |
| } |
| else |
| { |
| opmode = insn_data[icode].operand[nop].mode; |
| optype = (*lang_hooks.types.type_for_mode) (opmode, 0); |
| } |
| |
| machine_mode argmode = TYPE_MODE (TREE_TYPE (arg)); |
| if (argmode != opmode) |
| arg = build1 (NOP_EXPR, optype, arg); |
| op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL); |
| if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode)) |
| op[nop] = copy_to_mode_reg (opmode, op[nop]); |
| } |
| |
| rtx pat = NULL_RTX; |
| |
| switch (nop) |
| { |
| case 1: |
| pat = (*insn_data[d->icode].genfun) (op[0]); |
| break; |
| case 2: |
| pat = (*insn_data[d->icode].genfun) (op[0], op[1]); |
| break; |
| case 3: |
| pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]); |
| break; |
| case 4: |
| pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| if (! pat) |
| return NULL_RTX; |
| emit_insn (pat); |
| return target; |
| } |
| |
| /* Implement TARGET_HARD_REGNO_NREGS. On the SH all but the XD regs are |
| UNITS_PER_WORD bits wide. */ |
| |
| static unsigned int |
| sh_hard_regno_nregs (unsigned int regno, machine_mode mode) |
| { |
| if (XD_REGISTER_P (regno)) |
| return CEIL (GET_MODE_SIZE (mode), 2 * UNITS_PER_WORD); |
| return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD); |
| } |
| |
| /* Implement TARGET_HARD_REGNO_MODE_OK. |
| |
| We can allow any mode in any general register. The special registers |
| only allow SImode. Don't allow any mode in the PR. |
| |
| We cannot hold DCmode values in the XD registers because alter_reg |
| handles subregs of them incorrectly. We could work around this by |
| spacing the XD registers like the DR registers, but this would require |
| additional memory in every compilation to hold larger register vectors. |
| We could hold SFmode / SCmode values in XD registers, but that |
| would require a tertiary reload when reloading from / to memory, |
| and a secondary reload to reload from / to general regs; that |
| seems to be a losing proposition. |
| |
| We want to allow TImode FP regs so that when V4SFmode is loaded as TImode, |
| it won't be ferried through GP registers first. */ |
| static bool |
| sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode) |
| { |
| if (SPECIAL_REGISTER_P (regno)) |
| return mode == SImode; |
| |
| if (regno == FPUL_REG) |
| return (mode == SImode || mode == SFmode); |
| |
| if (FP_REGISTER_P (regno) && mode == SFmode) |
| return true; |
| |
| if (mode == V2SFmode) |
| { |
| if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0) |
| || GENERAL_REGISTER_P (regno))) |
| return true; |
| else |
| return false; |
| } |
| |
| if (mode == V4SFmode) |
| { |
| if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0) |
| || GENERAL_REGISTER_P (regno)) |
| return true; |
| else |
| return false; |
| } |
| |
| if (mode == V16SFmode) |
| return regno == FIRST_XD_REG; |
| |
| if (FP_REGISTER_P (regno)) |
| { |
| if (mode == SFmode |
| || mode == SImode |
| || ((TARGET_SH2E) && mode == SCmode) |
| || (((TARGET_FPU_DOUBLE && mode == DFmode) || mode == DCmode) |
| && ((regno - FIRST_FP_REG) & 1) == 0) |
| || (TARGET_SH4 && mode == TImode |
| && ((regno - FIRST_FP_REG) & 3) == 0)) |
| return true; |
| else |
| return false; |
| } |
| |
| if (XD_REGISTER_P (regno)) |
| return mode == DFmode; |
| |
| if (regno == PR_REG) |
| return mode == SImode; |
| |
| if (regno == FPSCR_REG) |
| return mode == SImode; |
| |
| return true; |
| } |
| |
| /* Implement TARGET_MODES_TIEABLE_P. |
| |
| If TARGET_HARD_REGNO_MODE_OK could produce different values for MODE1 |
| and MODE2, for any hard reg, then this must be false for correct output. |
| That's the case for xd registers: we don't hold SFmode values in |
| them, so we can't tie an SFmode pseudos with one in another |
| floating-point mode. */ |
| |
| static bool |
| sh_modes_tieable_p (machine_mode mode1, machine_mode mode2) |
| { |
| return (mode1 == mode2 |
| || (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2) |
| && (mode1 != SFmode && mode2 != SFmode))); |
| } |
| |
| /* Specify the modes required to caller save a given hard regno. |
| choose_hard_reg_mode chooses mode based on TARGET_HARD_REGNO_MODE_OK |
| and returns ?Imode for float regs when sh_hard_regno_mode_ok |
| permits integer modes on them. That makes LRA's split process |
| unhappy. See PR55212. |
| */ |
| machine_mode |
| sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs, |
| machine_mode mode) |
| { |
| if (FP_REGISTER_P (regno) |
| && (mode == SFmode |
| || mode == SCmode |
| || ((mode == DFmode || mode == DCmode) |
| && ((regno - FIRST_FP_REG) & 1) == 0))) |
| return mode; |
| |
| return choose_hard_reg_mode (regno, nregs, NULL); |
| } |
| |
| /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */ |
| static bool |
| sh_can_change_mode_class (machine_mode from, machine_mode to, |
| reg_class_t rclass) |
| { |
| /* We want to enable the use of SUBREGs as a means to |
| VEC_SELECT a single element of a vector. */ |
| |
| /* This effectively disallows using GENERAL_REGS for SFmode vector subregs. |
| This can be problematic when SFmode vector subregs need to be accessed |
| on the stack with displacement addressing, as it happens with -O0. |
| Thus we disallow the mode change for -O0. */ |
| if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode) |
| return optimize ? !reg_classes_intersect_p (GENERAL_REGS, rclass) : true; |
| |
| if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to)) |
| { |
| if (TARGET_LITTLE_ENDIAN) |
| { |
| if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8) |
| return !reg_classes_intersect_p (DF_REGS, rclass); |
| } |
| else |
| { |
| if (GET_MODE_SIZE (from) < 8) |
| return !reg_classes_intersect_p (DF_REGS, rclass); |
| } |
| } |
| return true; |
| } |
| |
| /* Return true if registers in machine mode MODE will likely be |
| allocated to registers in small register classes. */ |
| bool |
| sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED) |
| { |
| return true; |
| } |
| |
| /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times |
| that label is used. */ |
| void |
| sh_mark_label (rtx address, int nuses) |
| { |
| if (GOTOFF_P (address)) |
| { |
| /* Extract the label or symbol. */ |
| address = XEXP (address, 0); |
| if (GET_CODE (address) == PLUS) |
| address = XEXP (address, 0); |
| address = XVECEXP (address, 0, 0); |
| } |
| if (GET_CODE (address) == LABEL_REF |
| && LABEL_P (XEXP (address, 0))) |
| LABEL_NUSES (XEXP (address, 0)) += nuses; |
| } |
| |
| /* Compute extra cost of moving data between one register class |
| and another. |
| |
| If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass |
| uses this information. Hence, the general register <-> floating point |
| register information here is not used for SFmode. */ |
| static int |
| sh_register_move_cost (machine_mode mode, |
| reg_class_t srcclass, reg_class_t dstclass) |
| { |
| if (dstclass == T_REGS || dstclass == PR_REGS) |
| return 10; |
| |
| if (dstclass == MAC_REGS && srcclass == MAC_REGS) |
| return 4; |
| |
| if (mode == SImode && TARGET_FMOVD |
| && REGCLASS_HAS_FP_REG (srcclass) |
| && REGCLASS_HAS_FP_REG (dstclass)) |
| return 4; |
| |
| if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS) |
| return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7); |
| |
| if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS) |
| || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass))) |
| return 9; |
| |
| if ((REGCLASS_HAS_FP_REG (dstclass) |
| && REGCLASS_HAS_GENERAL_REG (srcclass)) |
| || (REGCLASS_HAS_GENERAL_REG (dstclass) |
| && REGCLASS_HAS_FP_REG (srcclass))) |
| { |
| /* Discourage trying to use fp regs for a pointer. This also |
| discourages fp regs with SImode because Pmode is an alias |
| of SImode on this target. See PR target/48596. */ |
| int addend = (mode == Pmode) ? 40 : 0; |
| |
| return ((TARGET_FMOVD ? 8 : 12) + addend) |
| * ((GET_MODE_SIZE (mode) + 7) / 8U); |
| } |
| |
| if ((dstclass == FPUL_REGS |
| && REGCLASS_HAS_GENERAL_REG (srcclass)) |
| || (srcclass == FPUL_REGS |
| && REGCLASS_HAS_GENERAL_REG (dstclass))) |
| return 5; |
| |
| if ((dstclass == FPUL_REGS |
| && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS)) |
| || (srcclass == FPUL_REGS |
| && (dstclass == PR_REGS || dstclass == MAC_REGS))) |
| return 7; |
| |
| if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass)) |
| || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass))) |
| return 4; |
| |
| if (TARGET_FMOVD |
| && ! REGCLASS_HAS_GENERAL_REG (srcclass) |
| && ! REGCLASS_HAS_GENERAL_REG (dstclass)) |
| return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U); |
| |
| return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U); |
| } |
| |
| static rtx |
| emit_load_ptr (rtx reg, rtx addr) |
| { |
| rtx mem = gen_const_mem (ptr_mode, addr); |
| |
| if (Pmode != ptr_mode) |
| mem = gen_rtx_SIGN_EXTEND (Pmode, mem); |
| return emit_move_insn (reg, mem); |
| } |
| |
| static void |
| sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED, |
| HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset, |
| tree function) |
| { |
| const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl)); |
| CUMULATIVE_ARGS cum; |
| int structure_value_byref = 0; |
| rtx this_rtx, this_value, sibcall, funexp; |
| rtx_insn *insns; |
| tree funtype = TREE_TYPE (function); |
| int simple_add = CONST_OK_FOR_ADD (delta); |
| int did_load = 0; |
| rtx scratch0, scratch1, scratch2; |
| |
| reload_completed = 1; |
| epilogue_completed = 1; |
| crtl->uses_only_leaf_regs = 1; |
| |
| emit_note (NOTE_INSN_PROLOGUE_END); |
| |
| /* Find the "this" pointer. We have such a wide range of ABIs for the |
| SH that it's best to do this completely machine independently. |
| "this" is passed as first argument, unless a structure return pointer |
| comes first, in which case "this" comes second. */ |
| INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1); |
| #ifndef PCC_STATIC_STRUCT_RETURN |
| if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)) |
| structure_value_byref = 1; |
| #endif /* not PCC_STATIC_STRUCT_RETURN */ |
| if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0) |
| { |
| tree ptype = build_pointer_type (TREE_TYPE (funtype)); |
| |
| function_arg_info ptr_arg (ptype, Pmode, /*named=*/true); |
| sh_function_arg_advance (pack_cumulative_args (&cum), ptr_arg); |
| } |
| function_arg_info ptr_arg (ptr_type_node, Pmode, /*named=*/true); |
| this_rtx = sh_function_arg (pack_cumulative_args (&cum), ptr_arg); |
| |
| /* For SHcompact, we only have r0 for a scratch register: r1 is the |
| static chain pointer (even if you can't have nested virtual functions |
| right now, someone might implement them sometime), and the rest of the |
| registers are used for argument passing, are callee-saved, or reserved. */ |
| /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg / |
| -ffixed-reg has been used. */ |
| if (! call_used_or_fixed_reg_p (0) || fixed_regs[0]) |
| error ("r0 needs to be available as a call-clobbered register"); |
| scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0); |
| |
| { |
| if (call_used_or_fixed_reg_p (1) && ! fixed_regs[1]) |
| scratch1 = gen_rtx_REG (ptr_mode, 1); |
| /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer |
| pointing where to return struct values. */ |
| if (call_used_or_fixed_reg_p (3) && ! fixed_regs[3]) |
| scratch2 = gen_rtx_REG (Pmode, 3); |
| } |
| |
| this_value = plus_constant (Pmode, this_rtx, delta); |
| if (vcall_offset |
| && (simple_add || scratch0 != scratch1) |
| && strict_memory_address_p (ptr_mode, this_value)) |
| { |
| emit_load_ptr (scratch0, this_value); |
| did_load = 1; |
| } |
| |
| if (!delta) |
| ; /* Do nothing. */ |
| else if (simple_add) |
| emit_move_insn (this_rtx, this_value); |
| else |
| { |
| emit_move_insn (scratch1, GEN_INT (delta)); |
| emit_insn (gen_add2_insn (this_rtx, scratch1)); |
| } |
| |
| if (vcall_offset) |
| { |
| rtx offset_addr; |
| |
| if (!did_load) |
| emit_load_ptr (scratch0, this_rtx); |
| |
| offset_addr = plus_constant (Pmode, scratch0, vcall_offset); |
| if (strict_memory_address_p (ptr_mode, offset_addr)) |
| ; /* Do nothing. */ |
| else if (scratch0 != scratch1) |
| { |
| /* scratch0 != scratch1, and we have indexed loads. Get better |
| schedule by loading the offset into r1 and using an indexed |
| load - then the load of r1 can issue before the load from |
| (this_rtx + delta) finishes. */ |
| emit_move_insn (scratch1, GEN_INT (vcall_offset)); |
| offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1); |
| } |
| else if (CONST_OK_FOR_ADD (vcall_offset)) |
| { |
| emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset))); |
| offset_addr = scratch0; |
| } |
| else |
| gcc_unreachable (); /* FIXME */ |
| emit_load_ptr (scratch0, offset_addr); |
| |
| if (Pmode != ptr_mode) |
| scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0); |
| emit_insn (gen_add2_insn (this_rtx, scratch0)); |
| } |
| |
| /* Generate a tail call to the target function. */ |
| if (! TREE_USED (function)) |
| { |
| assemble_external (function); |
| TREE_USED (function) = 1; |
| } |
| funexp = XEXP (DECL_RTL (function), 0); |
| /* If the function is overridden, so is the thunk, hence we don't |
| need GOT addressing even if this is a public symbol. */ |
| #if 0 |
| if (TARGET_SH1 && ! flag_weak) |
| sibcall = gen_sibcalli_thunk (funexp, const0_rtx); |
| else |
| #endif |
| if (TARGET_SH2 && flag_pic) |
| { |
| if (TARGET_FDPIC) |
| { |
| sibcall = gen_sibcall_pcrel_fdpic (funexp, const0_rtx); |
| XEXP (XVECEXP (sibcall, 0, 3), 0) = scratch2; |
| } |
| else |
| { |
| sibcall = gen_sibcall_pcrel (funexp, const0_rtx); |
| XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2; |
| } |
| } |
| else |
| { |
| emit_move_insn (scratch2, funexp); |
| funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2); |
| sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX); |
| } |
| sibcall = emit_call_insn (sibcall); |
| SIBLING_CALL_P (sibcall) = 1; |
| use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx); |
| emit_barrier (); |
| |
| /* Run just enough of rest_of_compilation to do scheduling and get |
| the insns emitted. */ |
| |
| insns = get_insns (); |
| |
| if (optimize > 0) |
| { |
| if (! cfun->cfg) |
| init_flow (cfun); |
| split_all_insns_noflow (); |
| } |
| |
| sh_reorg (); |
| shorten_branches (insns); |
| assemble_start_function (thunk_fndecl, fnname); |
| final_start_function (insns, file, 1); |
| final (insns, file, 1); |
| final_end_function (); |
| assemble_end_function (thunk_fndecl, fnname); |
| |
| reload_completed = 0; |
| epilogue_completed = 0; |
| } |
| |
| /* Return an RTX pair for the address and call site label of a function |
| NAME of kind KIND, placing the result in TARGET if not NULL. For |
| SFUNC_STATIC, if FDPIC, the LAB member of result will be set to |
| (const_int 0) if jsr should be used, or a label_ref if bsrf should |
| be used. For FDPIC, both SFUNC_GOT and SFUNC_STATIC will return the |
| address of the function itself, not a function descriptor, so they |
| can only be used with functions not using the FDPIC register that |
| are known to be called directory without a PLT entry. */ |
| |
| function_symbol_result |
| function_symbol (rtx target, const char *name, sh_function_kind kind) |
| { |
| /* If this is not an ordinary function, the name usually comes from a |
| string literal or an sprintf buffer. Make sure we use the same |
| string consistently, so that cse will be able to unify address loads. */ |
| if (kind != FUNCTION_ORDINARY) |
| name = IDENTIFIER_POINTER (get_identifier (name)); |
| rtx sym = gen_rtx_SYMBOL_REF (Pmode, name); |
| rtx lab = const0_rtx; |
| SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION; |
| if (flag_pic) |
| switch (kind) |
| { |
| case FUNCTION_ORDINARY: |
| break; |
| case SFUNC_GOT: |
| { |
| rtx reg = target ? target : gen_reg_rtx (Pmode); |
| |
| emit_insn (gen_symGOT2reg (reg, sym)); |
| sym = reg; |
| break; |
| } |
| case SFUNC_STATIC: |
| { |
| rtx reg = target ? target : gen_reg_rtx (Pmode); |
| |
| if (TARGET_FDPIC) |
| { |
| /* We use PC-relative calls, since GOTOFF can only refer |
| to writable data. This works along with sh_sfunc_call. */ |
| lab = PATTERN (gen_call_site ()); |
| emit_insn (gen_sym_label2reg (reg, sym, lab)); |
| } |
| else |
| { |
| /* ??? To allow cse to work, we use GOTOFF relocations. |
| we could add combiner patterns to transform this into |
| straight pc-relative calls with sym2PIC / bsrf when |
| label load and function call are still 1:1 and in the |
| same basic block during combine. */ |
| emit_insn (gen_symGOTOFF2reg (reg, sym)); |
| } |
| |
| sym = reg; |
| break; |
| } |
| } |
| if (target && sym != target) |
| { |
| emit_move_insn (target, sym); |
| return function_symbol_result (target, lab); |
| } |
| return function_symbol_result (sym, lab); |
| } |
| |
| /* Find the number of the first general purpose register in S that |
| is not set. */ |
| static int |
| scavenge_reg (HARD_REG_SET *s) |
| { |
| for (int r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++) |
| if (TEST_HARD_REG_BIT (*s, r)) |
| return r; |
| return -1; |
| } |
| |
| rtx |
| sh_get_pr_initial_val (void) |
| { |
| /* If we haven't finished rtl generation, there might be a nonlocal label |
| that we haven't seen yet. |
| ??? get_hard_reg_initial_val fails if it is called after register |
| allocation has started, unless it has been called before for the |
| same register. And even then, we end in trouble if we didn't use |
| the register in the same basic block before. So call |
| get_hard_reg_initial_val now and wrap it in an unspec if we might |
| need to replace it. */ |
| /* ??? We also must do this for TARGET_SH1 in general, because otherwise |
| combine can put the pseudo returned by get_hard_reg_initial_val into |
| instructions that need a general purpose registers, which will fail to |
| be recognized when the pseudo becomes allocated to PR. */ |
| rtx val = get_hard_reg_initial_val (Pmode, PR_REG); |
| return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA); |
| } |
| |
| bool |
| sh_expand_t_scc (rtx operands[]) |
| { |
| enum rtx_code code = GET_CODE (operands[1]); |
| rtx target = operands[0]; |
| rtx op0 = operands[2]; |
| rtx op1 = operands[3]; |
| rtx result = target; |
| |
| if (!REG_P (op0) || REGNO (op0) != T_REG |
| || !CONST_INT_P (op1)) |
| return false; |
| if (!REG_P (result)) |
| result = gen_reg_rtx (SImode); |
| HOST_WIDE_INT val = INTVAL (op1); |
| if ((code == EQ && val == 1) || (code == NE && val == 0)) |
| emit_insn (gen_movt (result, get_t_reg_rtx ())); |
| else if ((code == EQ && val == 0) || (code == NE && val == 1)) |
| emit_insn (gen_movnegt (result, get_t_reg_rtx ())); |
| else if (code == EQ || code == NE) |
| emit_insn (gen_move_insn (result, GEN_INT (code == NE))); |
| else |
| return false; |
| if (result != target) |
| emit_move_insn (target, result); |
| return true; |
| } |
| |
| /* INSN is an sfunc; return the rtx that describes the address used. */ |
| static rtx |
| extract_sfunc_addr (rtx insn) |
| { |
| rtx pattern = PATTERN (insn); |
| const int len = XVECLEN (pattern, 0); |
| for (int i = 0; i < len; i++) |
| { |
| rtx part = XVECEXP (pattern, 0, i); |
| if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode |
| && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0)))) |
| return XEXP (part, 0); |
| } |
| gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE); |
| return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1); |
| } |
| |
| /* Verify that the register in use_sfunc_addr still agrees with the address |
| used in the sfunc. This prevents fill_slots_from_thread from changing |
| use_sfunc_addr. |
| INSN is the use_sfunc_addr instruction, and REG is the register it |
| guards. */ |
| bool |
| check_use_sfunc_addr (rtx_insn *insn, rtx reg) |
| { |
| /* Search for the sfunc. It should really come right after INSN. */ |
| while ((insn = NEXT_INSN (insn))) |
| { |
| if (LABEL_P (insn) || JUMP_P (insn)) |
| break; |
| if (! INSN_P (insn)) |
| continue; |
| |
| if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn))) |
| insn = seq->insn (0); |
| if (GET_CODE (PATTERN (insn)) != PARALLEL |
| || get_attr_type (insn) != TYPE_SFUNC) |
| continue; |
| return rtx_equal_p (extract_sfunc_addr (insn), reg); |
| } |
| gcc_unreachable (); |
| } |
| |
| /* This function returns a constant rtx that represents 2**15 / pi in |
| SFmode. It's used to scale a fixed-point signed 16.16-bit fraction |
| of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */ |
| static GTY(()) rtx sh_fsca_sf2int_rtx; |
| |
| rtx |
| sh_fsca_sf2int (void) |
| { |
| if (! sh_fsca_sf2int_rtx) |
| { |
| REAL_VALUE_TYPE rv; |
| |
| real_from_string (&rv, "10430.378350470453"); |
| sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode); |
| } |
| |
| return sh_fsca_sf2int_rtx; |
| } |
| |
| /* This function returns a constant rtx that represents pi / 2**15 in |
| SFmode. It's used to scale SFmode angles, in radians, to a |
| fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi |
| maps to 0x10000. */ |
| static GTY(()) rtx sh_fsca_int2sf_rtx; |
| |
| rtx |
| sh_fsca_int2sf (void) |
| { |
| if (! sh_fsca_int2sf_rtx) |
| { |
| REAL_VALUE_TYPE rv; |
| |
| real_from_string (&rv, "9.587379924285257e-5"); |
| sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode); |
| } |
| |
| return sh_fsca_int2sf_rtx; |
| } |
| |
| /* Initialize the CUMULATIVE_ARGS structure. */ |
| void |
| sh_init_cumulative_args (CUMULATIVE_ARGS * pcum, |
| tree fntype, |
| rtx libname ATTRIBUTE_UNUSED, |
| tree fndecl, |
| signed int n_named_args, |
| machine_mode mode) |
| { |
| pcum->arg_count [(int) SH_ARG_FLOAT] = 0; |
| pcum->free_single_fp_reg = 0; |
| pcum->outgoing = n_named_args != -1; |
| |
| /* FIXME: Should we check TARGET_HITACHI here ??? */ |
| pcum->renesas_abi = sh_attr_renesas_p (fntype); |
| |
| if (fntype) |
| { |
| pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi) |
| && aggregate_value_p (TREE_TYPE (fntype), fndecl)); |
| pcum->prototype_p = prototype_p (fntype); |
| pcum->arg_count [(int) SH_ARG_INT] = false; |
| } |
| else |
| { |
| pcum->arg_count [(int) SH_ARG_INT] = 0; |
| pcum->prototype_p = false; |
| if (mode != VOIDmode) |
| { |
| /* If the default ABI is the Renesas ABI then all library |
| calls must assume that the library will be using the |
| Renesas ABI. So if the function would return its result |
| in memory then we must force the address of this memory |
| block onto the stack. Ideally we would like to call |
| targetm.calls.return_in_memory() here but we do not have |
| the TYPE or the FNDECL available so we synthesize the |
| contents of that function as best we can. */ |
| pcum->force_mem = |
| (TARGET_DEFAULT & MASK_HITACHI) |
| && (mode == BLKmode |
| || (GET_MODE_SIZE (mode) > 4 |
| && !(mode == DFmode |
| && TARGET_FPU_DOUBLE))); |
| } |
| else |
| pcum->force_mem = false; |
| } |
| } |
| |
| rtx |
| sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext) |
| { |
| enum rtx_code code = TRUNCATE; |
| |
| if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND) |
| { |
| rtx inner = XEXP (x, 0); |
| machine_mode inner_mode = GET_MODE (inner); |
| |
| if (inner_mode == mode) |
| return inner; |
| else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode)) |
| x = inner; |
| else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode) |
| && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND)) |
| { |
| code = GET_CODE (x); |
| x = inner; |
| } |
| } |
| return gen_rtx_fmt_e (code, mode, x); |
| } |
| |
| /* Load and store depend on the highpart of the address. However, |
| set_attr_alternative does not give well-defined results before reload, |
| so we must look at the rtl ourselves to see if any of the feeding |
| registers is used in a memref. |
| |
| Return true iff INSN contains a MEM. */ |
| bool |
| sh_contains_memref_p (rtx insn) |
| { |
| subrtx_iterator::array_type array; |
| FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) |
| if (MEM_P (*iter)) |
| return true; |
| return false; |
| } |
| |
| /* Return true iff INSN loads a banked register. */ |
| bool |
| sh_loads_bankedreg_p (rtx insn) |
| { |
| if (GET_CODE (PATTERN (insn)) == SET) |
| { |
| rtx op = SET_DEST (PATTERN(insn)); |
| if (REG_P (op) && BANKED_REGISTER_P (REGNO (op))) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* Implement TARGET_PREFERRED_RELOAD_CLASS. */ |
| static reg_class_t |
| sh_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass) |
| { |
| return rclass; |
| } |
| |
| /* Implement TARGET_SECONDARY_RELOAD. */ |
| static reg_class_t |
| sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, |
| machine_mode mode, secondary_reload_info *sri) |
| { |
| enum reg_class rclass = (enum reg_class) rclass_i; |
| |
| if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS |
| && REG_P (XEXP (XEXP (x, 0), 0)) |
| && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG) |
| return rclass == R0_REGS ? NO_REGS : R0_REGS; |
| |
| if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG) |
| return rclass == R0_REGS ? NO_REGS : R0_REGS; |
| |
| if (REG_P (x) && REGNO (x) == GBR_REG) |
| return NO_REGS; |
| |
| if (in_p) |
| { |
| if (REGCLASS_HAS_FP_REG (rclass) |
| && immediate_operand ((x), mode) |
| && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode)) |
| switch (mode) |
| { |
| case E_SFmode: |
| sri->icode = CODE_FOR_reload_insf__frn; |
| return NO_REGS; |
| case E_DFmode: |
| sri->icode = CODE_FOR_reload_indf__frn; |
| return NO_REGS; |
| case E_SImode: |
| /* ??? If we knew that we are in the appropriate mode - |
| single precision - we could use a reload pattern directly. */ |
| return FPUL_REGS; |
| default: |
| abort (); |
| } |
| if (rclass == FPUL_REGS |
| && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG |
| || REGNO (x) == T_REG)) |
| || GET_CODE (x) == PLUS)) |
| return GENERAL_REGS; |
| if (rclass == FPUL_REGS && immediate_operand (x, mode)) |
| { |
| if (satisfies_constraint_I08 (x) || fp_zero_operand (x)) |
| return GENERAL_REGS; |
| else if (mode == SFmode) |
| return FP_REGS; |
| sri->icode = CODE_FOR_reload_insi__i_fpul; |
| return NO_REGS; |
| } |
| if (rclass == FPSCR_REGS |
| && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER) |
| || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS))) |
| return GENERAL_REGS; |
| } /* end of input-only processing. */ |
| |
| if (((REGCLASS_HAS_FP_REG (rclass) |
| && (REG_P (x) |
| && (GENERAL_OR_AP_REGISTER_P (REGNO (x)) |
| || (FP_REGISTER_P (REGNO (x)) && mode == SImode |
| && TARGET_FMOVD)))) |
| || (REGCLASS_HAS_GENERAL_REG (rclass) |
| && REG_P (x) |
| && FP_REGISTER_P (REGNO (x)))) |
| && (mode == SFmode || mode == SImode)) |
| return FPUL_REGS; |
| if ((rclass == FPUL_REGS |
| || (REGCLASS_HAS_FP_REG (rclass) && mode == SImode)) |
| && (MEM_P (x) |
| || (REG_P (x) |
| && (REGNO (x) >= FIRST_PSEUDO_REGISTER |
| || REGNO (x) == T_REG |
| || system_reg_operand (x, VOIDmode))))) |
| { |
| if (rclass == FPUL_REGS) |
| return GENERAL_REGS; |
| return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS; |
| } |
| |
| if ((rclass == MAC_REGS || rclass == PR_REGS) |
| && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x)) |
| && rclass != REGNO_REG_CLASS (REGNO (x))) |
| return GENERAL_REGS; |
| |
| /* If here fall back to loading FPUL register through general registers. |
| This case can happen when movsi_ie insn is picked initially to |
| load/store the FPUL register from/to another register, and then the |
| other register is allocated on the stack. */ |
| if (rclass == FPUL_REGS && true_regnum (x) == -1) |
| return GENERAL_REGS; |
| |
| /* Force mov.b / mov.w displacement addressing insn to use R0 as |
| the other operand. |
| On SH2A could also just leave it alone here, which would result in a |
| 4 byte move insn being generated instead. However, for this to work |
| the insns must have the appropriate alternatives. */ |
| if ((mode == QImode || mode == HImode) && rclass != R0_REGS |
| && satisfies_constraint_Sdd (x) |
| && sh_disp_addr_displacement (x) |
| <= sh_max_mov_insn_displacement (mode, false)) |
| return R0_REGS; |
| |
| /* When reload is trying to address a QImode or HImode subreg on the stack, |
| force any subreg byte into R0_REGS, as this is going to become a |
| displacement address. |
| We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg |
| is on the stack, the memref to it might already require a displacement |
| and that has to be added to the final address. At this point we don't |
| know the cumulative displacement so we assume the worst case. */ |
| if ((mode == QImode || mode == HImode) && rclass != R0_REGS |
| && GET_CODE (x) == SUBREG && true_regnum (x) == -1) |
| return R0_REGS; |
| |
| return NO_REGS; |
| } |
| |
| /* Return true if SUBST can't safely replace its equivalent during RA. */ |
| static bool |
| sh_cannot_substitute_mem_equiv_p (rtx) |
| { |
| /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn |
| uses R0 and may cause spill failure when R0 is already used. |
| We have to return true for that case at least. |
| Moreover SH has strong R0 parity and also have not enough numbers of |
| the hard registers to make the equiv substitution win in the size |
| and the speed on average working sets. The pseudos produced to |
| hold the equiv values can't get good hard registers for bad cases |
| and end up memory save/restore insns which make the code worse. */ |
| return true; |
| } |
| |
| /* Implement TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT. */ |
| static bool |
| sh_legitimize_address_displacement (rtx *offset1, rtx *offset2, |
| poly_int64 orig_offset, |
| machine_mode mode) |
| { |
| if ((TARGET_FPU_DOUBLE && mode == DFmode) |
| || (TARGET_SH2E && mode == SFmode)) |
| return false; |
| |
| struct disp_adjust adj = sh_find_mov_disp_adjust (mode, orig_offset); |
| if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX) |
| { |
| *offset1 = adj.offset_adjust; |
| *offset2 = adj.mov_disp; |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* Return true if movsf insn should be splited with an additional |
| register. */ |
| bool |
| sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2) |
| { |
| /* op0 == op1 */ |
| if (rtx_equal_p (op0, op1)) |
| return true; |
| /* fy, FQ, reg */ |
| if (GET_CODE (op1) == CONST_DOUBLE |
| && ! satisfies_constraint_G (op1) |
| && ! satisfies_constraint_H (op1) |
| && REG_P (op0) |
| && REG_P (op2)) |
| return true; |
| /* f, r, y */ |
| if (REG_P (op0) && FP_REGISTER_P (REGNO (op0)) |
| && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1)) |
| && REG_P (op2) && (REGNO (op2) == FPUL_REG)) |
| return true; |
| /* r, f, y */ |
| if (REG_P (op1) && FP_REGISTER_P (REGNO (op1)) |
| && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0)) |
| && REG_P (op2) && (REGNO (op2) == FPUL_REG)) |
| return true; |
| |
| return false; |
| } |
| |
| static void |
| sh_conditional_register_usage (void) |
| { |
| for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++) |
| if (! VALID_REGISTER_P (regno)) |
| fixed_regs[regno] = 1; |
| /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */ |
| if (flag_pic) |
| fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; |
| if (TARGET_FDPIC) |
| { |
| fixed_regs[PIC_REG] = 1; |
| call_used_regs[PIC_REG] = 1; |
| } |
| /* Renesas saves and restores mac registers on call. */ |
| if (TARGET_HITACHI && ! TARGET_NOMACSAVE) |
| { |
| call_used_regs[MACH_REG] = 0; |
| call_used_regs[MACL_REG] = 0; |
| } |
| |
| for (int regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++) |
| if (! fixed_regs[regno] && call_used_regs[regno]) |
| SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno); |
| |
| call_used_regs[FPSCR_MODES_REG] = 0; |
| call_used_regs[FPSCR_STAT_REG] = 0; |
| } |
| |
| /* Implement TARGET_LEGITIMATE_CONSTANT_P |
| |
| can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */ |
| static bool |
| sh_legitimate_constant_p (machine_mode mode, rtx x) |
| { |
| if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P) |
| { |
| rtx base, offset; |
| split_const (x, &base, &offset); |
| |
| if (GET_CODE (base) == SYMBOL_REF |
| && !offset_within_block_p (base, INTVAL (offset))) |
| return false; |
| } |
| |
| if (TARGET_FDPIC |
| && (SYMBOLIC_CONST_P (x) |
| || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS |
| && SYMBOLIC_CONST_P (XEXP (XEXP (x, 0), 0))))) |
| return false; |
| |
| return GET_CODE (x) != CONST_DOUBLE |
| || mode == DFmode || mode == SFmode |
| || mode == DImode || GET_MODE (x) == VOIDmode; |
| } |
| |
| enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT; |
| |
| static void |
| sh_init_sync_libfuncs (void) |
| { |
| init_sync_libfuncs (UNITS_PER_WORD); |
| } |
| |
| /* Return true if it is appropriate to emit `ret' instructions in the |
| body of a function. */ |
| bool |
| sh_can_use_simple_return_p (void) |
| { |
| if (! reload_completed || frame_pointer_needed) |
| return false; |
| |
| /* Moving prologue around does't reduce the size. */ |
| if (optimize_function_for_size_p (cfun)) |
| return false; |
| |
| /* Finally, allow for pr save. */ |
| HARD_REG_SET live_regs_mask; |
| int d = calc_live_regs (&live_regs_mask); |
| |
| if (rounded_frame_size (d) > 4) |
| return false; |
| |
| return true; |
| } |
| |
| /*------------------------------------------------------------------------------ |
| Address mode optimization support code |
| */ |
| |
| typedef HOST_WIDE_INT disp_t; |
| static const disp_t MIN_DISP = HOST_WIDE_INT_MIN; |
| static const disp_t MAX_DISP = HOST_WIDE_INT_MAX; |
| static const disp_t INVALID_DISP = MAX_DISP; |
| |
| /* A memory reference which is described by a base register and a |
| displacement. */ |
| class base_reg_disp |
| { |
| public: |
| base_reg_disp (rtx br, disp_t d); |
| |
| bool is_reg (void) const; |
| bool is_disp (void) const; |
| rtx reg (void) const; |
| disp_t disp (void) const; |
| |
| private: |
| rtx reg_; |
| disp_t disp_; |
| }; |
| |
| inline |
| base_reg_disp::base_reg_disp (rtx br, disp_t d) |
| : reg_ (br), disp_ (d) |
| { |
| } |
| |
| inline bool |
| base_reg_disp::is_reg (void) const |
| { |
| return reg_ != NULL_RTX && disp_ != INVALID_DISP; |
| } |
| |
| inline bool |
| base_reg_disp::is_disp (void) const |
| { |
| return reg_ == NULL_RTX && disp_ != INVALID_DISP; |
| } |
| |
| inline rtx |
| base_reg_disp::reg (void) const |
| { |
| return reg_; |
| } |
| |
| inline disp_t |
| base_reg_disp::disp (void) const |
| { |
| return disp_; |
| } |
| |
| /* Find the base register and calculate the displacement for a given |
| address rtx 'x'. */ |
| static base_reg_disp |
| sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0, |
| rtx base_reg = NULL) |
| { |
| if (REG_P (x)) |
| { |
| if (REGNO (x) == GBR_REG) |
| return base_reg_disp (x, disp); |
| |
| /* We've reached a hard-reg. This is probably the point where |
| function args are copied to pseudos. Do not go any further and |
| stick to the pseudo. If the original mem addr was in a hard reg |
| from the beginning, it will become the base reg. */ |
| if (REGNO (x) < FIRST_PSEUDO_REGISTER) |
| return base_reg_disp (base_reg != NULL ? base_reg : x, disp); |
| |
| /* Find the def of the reg and trace it. If there are more than one |
| defs and they are not the same, assume it's not safe to proceed. */ |
| rtx_insn* last_i = NULL; |
| rtx last_set = NULL; |
| for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL; |
| d = DF_REF_NEXT_REG (d)) |
| { |
| rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d))); |
| |
| /* Accept multiple defs, as long as they are equal. */ |
| if (last_set == NULL || rtx_equal_p (last_set, set)) |
| { |
| last_i = DF_REF_INSN (d); |
| last_set = set; |
| } |
| else |
| { |
| last_i = NULL; |
| last_set = NULL; |
| break; |
| } |
| } |
| |
| if (last_set != NULL && last_i != NULL) |
| return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp, |
| XEXP (last_set, 0)); |
| |
| /* When here, no previous insn was found that sets the reg. |
| The input reg is already the base reg. */ |
| return base_reg_disp (x, disp); |
| } |
| |
| else if (GET_CODE (x) == PLUS) |
| { |
| base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0)); |
| base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1)); |
| |
| /* Either left or right val must be a reg. |
| We don't handle the case of 'reg + reg' here. */ |
| if (left_val.is_reg () && right_val.is_disp ()) |
| return base_reg_disp (left_val.reg (), left_val.disp () |
| + right_val.disp () + disp); |
| else if (right_val.is_reg () && left_val.is_disp ()) |
| return base_reg_disp (right_val.reg (), right_val.disp () |
| + left_val.disp () + disp); |
| else |
| return base_reg_disp (base_reg, disp); |
| } |
| |
| else if (CONST_INT_P (x)) |
| return base_reg_disp (NULL, disp + INTVAL (x)); |
| |
| /* Didn't find anything useful. */ |
| return base_reg_disp (base_reg, disp); |
| } |
| |
| /* Given an insn and a memory operand, try to find an equivalent GBR |
| based memory address and return the corresponding new memory address. |
| Return NULL_RTX if not found. */ |
| rtx |
| sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem) |
| { |
| if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem))) |
| return NULL_RTX; |
| |
| /* Leave post/pre inc/dec or any other side effect addresses alone. */ |
| if (side_effects_p (XEXP (mem, 0))) |
| return NULL_RTX; |
| |
| /* When not optimizing there might be no dataflow available. */ |
| if (df == NULL) |
| return NULL_RTX; |
| |
| base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0)); |
| |
| if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG) |
| { |
| /* If GBR is marked as call clobbered we bail out if we see a call. |
| FIXME: Actually should check if this mem refers to the gbr value |
| before or after the call. If there is a store_gbr preceeding this |
| mem, it's safe to use GBR for this mem. |
| |
| If GBR is not marked as call clobbered, but there is some other |
| def than a call, it's probably a load_gbr upon which we also |
| bail out to be on the safe side. |
| FIXME: Should check if we have a use-after-def case, such as |
| the call case above. */ |
| for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL; |
| d = DF_REF_NEXT_REG (d)) |
| { |
| if (CALL_P (DF_REF_INSN (d))) |
| { |
| if (TEST_HARD_REG_BIT (regs_invalidated_by_call, GBR_REG)) |
| return NULL_RTX; |
| else |
| continue; |
| } |
| else |
| return NULL_RTX; |
| } |
| |
| rtx disp = GEN_INT (gbr_disp.disp ()); |
| if (gbr_displacement (disp, GET_MODE (mem))) |
| return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp); |
| } |
| |
| return NULL_RTX; |
| } |
| |
| /*------------------------------------------------------------------------------ |
| Manual insn combine support code. |
| */ |
| |
| /* Return true if the specified insn contains any UNSPECs or |
| UNSPEC_VOLATILEs. */ |
| static bool |
| sh_unspec_insn_p (rtx x) |
| { |
| subrtx_iterator::array_type array; |
| FOR_EACH_SUBRTX (i, array, x, ALL) |
| if (*i != NULL |
| && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE)) |
| return true; |
| |
| return false; |
| } |
| |
| /* Return true if the register operands of the specified insn are modified |
| between the specified from and to insns (exclusive of those two). */ |
| bool |
| sh_insn_operands_modified_between_p (rtx_insn* operands_insn, |
| const rtx_insn* from, |
| const rtx_insn* to) |
| { |
| /* FIXME: Return true for multiple sets for now. */ |
| rtx s = single_set (operands_insn); |
| if (s == NULL_RTX) |
| return true; |
| |
| subrtx_iterator::array_type array; |
| FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL) |
| if (*i != NULL && |
| ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to))) |
| return true; |
| |
| return false; |
| } |
| |
| /* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that |
| negates the T bit and stores the result in the T bit. */ |
| bool |
| sh_is_nott_insn (const rtx_insn* i) |
| { |
| return i != NULL && GET_CODE (PATTERN (i)) == SET |
| && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode) |
| && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode); |
| } |
| |
| rtx |
| sh_movt_set_dest (const rtx_insn* i) |
| { |
| return i == NULL ? NULL : sh_movt_set_dest (PATTERN (i)); |
| } |
| |
| rtx |
| sh_movt_set_dest (const_rtx pat) |
| { |
| return GET_CODE (pat) == SET |
| && arith_reg_dest (XEXP (pat, 0), SImode) |
| && t_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL; |
| } |
| |
| /* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn |
| that stores the negated T bit in a register, and return the destination |
| register rtx, or null. */ |
| rtx |
| sh_movrt_set_dest (const rtx_insn* i) |
| { |
| return i == NULL ? NULL : sh_movrt_set_dest (PATTERN (i)); |
| } |
| |
| rtx |
| sh_movrt_set_dest (const_rtx pat) |
| { |
| /* The negc movrt replacement is inside a parallel. */ |
| if (GET_CODE (pat) == PARALLEL) |
| pat = XVECEXP (pat, 0, 0); |
| |
| return GET_CODE (pat) == SET |
| && arith_reg_dest (XEXP (pat, 0), SImode) |
| && negt_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL; |
| |
| } |
| |
| /* Given an insn and a reg number, tell whether the reg dies or is unused |
| after the insn. */ |
| bool |
| sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno) |
| { |
| return find_regno_note (i, REG_DEAD, regno) != NULL |
| || find_regno_note (i, REG_UNUSED, regno) != NULL; |
| } |
| |
| /* Given an insn and a reg number, remove reg dead or reg unused notes to |
| mark it as being used after the insn. */ |
| void |
| sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno) |
| { |
| if (rtx n = find_regno_note (i, REG_DEAD, regno)) |
| remove_note (i, n); |
| if (rtx n = find_regno_note (i, REG_UNUSED, regno)) |
| remove_note (i, n); |
| } |
| |
| /* Given an insn check if it contains any post/pre inc/dec mem operands and |
| add the REG_INC notes accordingly. |
| FIXME: This function is very similar to lra.c (add_auto_inc_notes). |
| FIXME: This function is currently used by peephole2 patterns because |
| the peephole2 pass does not preserve REG_INC notes. If the notes |
| are dropped the following passes will do wrong things. */ |
| rtx_insn* |
| sh_check_add_incdec_notes (rtx_insn* i) |
| { |
| struct for_each_inc_dec_clb |
| { |
| static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED, |
| rtx dest, rtx src ATTRIBUTE_UNUSED, |
| rtx srcoff ATTRIBUTE_UNUSED, void* arg) |
| { |
| gcc_assert (REG_P (dest)); |
| |
| rtx_insn* i = (rtx_insn*)arg; |
| if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL) |
| add_reg_note (i, REG_INC, dest); |
| |
| return 0; |
| } |
| }; |
| |
| for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i); |
| return i; |
| } |
| |
| /* Given a move insn destiation and a source, make sure that the move source |
| operand is not a post-inc mem load with the same address reg as the |
| destination. Returns the modified source operand with the post-inc removed |
| if necessary. */ |
| rtx |
| sh_remove_overlapping_post_inc (rtx dst, rtx src) |
| { |
| if (!MEM_P (src)) |
| return src; |
| |
| rtx addr = XEXP (src, 0); |
| |
| if (GET_CODE (addr) == POST_INC |
| && reg_overlap_mentioned_p (XEXP (addr, 0), dst)) |
| return replace_equiv_address (src, XEXP (addr, 0)); |
| |
| gcc_assert (GET_CODE (addr) != POST_MODIFY); |
| return src; |
| } |
| |
| /* Emit a move insn that is safe to be used in peephole patterns. */ |
| rtx_insn* |
| sh_peephole_emit_move_insn (rtx dst, rtx src) |
| { |
| return sh_check_add_incdec_notes ( |
| emit_move_insn (dst, sh_remove_overlapping_post_inc (dst, src))); |
| } |
| |
| /* Given an op rtx and an insn, try to find out whether the result of the |
| specified op consists only of logical operations on T bit stores. */ |
| bool |
| sh_is_logical_t_store_expr (rtx op, rtx_insn* insn) |
| { |
| if (!logical_operator (op, SImode)) |
| return false; |
| |
| rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) }; |
| int op_is_t_count = 0; |
| |
| for (int i = 0; i < 2; ++i) |
| { |
| if (t_reg_operand (ops[i], VOIDmode) |
| || negt_reg_operand (ops[i], VOIDmode)) |
| op_is_t_count++; |
| |
| else |
| { |
| set_of_reg op_set = sh_find_set_of_reg |
| (ops[i], insn, prev_nonnote_nondebug_insn_bb); |
| if (op_set.set_src == NULL_RTX) |
| continue; |
| |
| if (t_reg_operand (op_set.set_src, VOIDmode) |
| || negt_reg_operand (op_set.set_src, VOIDmode) |
| || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn)) |
| op_is_t_count++; |
| } |
| } |
| |
| return op_is_t_count == 2; |
| } |
| |
| /* Given the operand that is extended in a sign/zero extend insn, and the |
| insn, try to figure out whether the sign/zero extension can be replaced |
| by a simple reg-reg copy. If so, the replacement reg rtx is returned, |
| NULL_RTX otherwise. */ |
| rtx |
| sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn) |
| { |
| if (REG_P (extended_op)) |
| extended_op = extended_op; |
| else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op))) |
| extended_op = SUBREG_REG (extended_op); |
| else |
| return NULL_RTX; |
| |
| /* Reg moves must be of the same mode. */ |
| if (GET_MODE (extended_op) != SImode) |
| return NULL_RTX; |
| |
| set_of_reg s = sh_find_set_of_reg (extended_op, insn, |
| prev_nonnote_nondebug_insn_bb); |
| if (s.set_src == NULL_RTX) |
| return NULL_RTX; |
| |
| if (t_reg_operand (s.set_src, VOIDmode) |
| || negt_reg_operand (s.set_src, VOIDmode)) |
| return extended_op; |
| |
| /* If the zero extended reg was formed by a logical operation, check the |
| operands of the logical operation. If both originated from T bit |
| stores the zero extension can be eliminated. */ |
| else if (sh_is_logical_t_store_expr (s.set_src, s.insn)) |
| return extended_op; |
| |
| return NULL_RTX; |
| } |
| |
| /* Given the current insn, which is assumed to be a movrt_negc insn, try to |
| figure out whether it should be converted into a movt-xor sequence in |
| the movrt_negc splitter. |
| Returns true if insns have been modified and the splitter has succeeded. */ |
| bool |
| sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[]) |
| { |
| /* In cases such as |
| tst r4,r4 |
| mov #-1,r1 |
| negc r1,r1 |
| tst r4,r4 |
| we can replace the T bit clobbering negc with a movt-xor sequence and |
| eliminate the redundant comparison. |
| Because the xor insn depends on register allocation results, allow this |
| only before reload. */ |
| if (!can_create_pseudo_p ()) |
| return false; |
| |
| set_of_reg t_before_negc = sh_find_set_of_reg |
| (get_t_reg_rtx (), curr_insn, prev_nonnote_nondebug_insn_bb); |
| set_of_reg t_after_negc = sh_find_set_of_reg |
| (get_t_reg_rtx (), curr_insn, next_nonnote_nondebug_insn_bb); |
| |
| if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX |
| && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx) |
| && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn) |
| && !sh_insn_operands_modified_between_p (t_before_negc.insn, |
| t_before_negc.insn, |
| t_after_negc.insn) |
| && !modified_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn) |
| && !sh_unspec_insn_p (t_after_negc.insn) |
| && !volatile_insn_p (PATTERN (t_after_negc.insn)) |
| && !side_effects_p (PATTERN (t_after_negc.insn)) |
| && !may_trap_or_fault_p (PATTERN (t_after_negc.insn))) |
| { |
| emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ())); |
| set_insn_deleted (t_after_negc.insn); |
| return true; |
| } |
| else |
| return false; |
| } |
| |
| /* Given a reg and the current insn, see if the value of the reg originated |
| from a sign or zero extension and return the discovered information. */ |
| sh_extending_set_of_reg |
| sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn) |
| { |
| if (reg == NULL) |
| return sh_extending_set_of_reg (curr_insn); |
| |
| if (SUBREG_P (reg)) |
| reg = SUBREG_REG (reg); |
| |
| if (!REG_P (reg)) |
| return sh_extending_set_of_reg (curr_insn); |
| |
| /* FIXME: Also search the predecessor basic blocks. It seems that checking |
| only the adjacent predecessor blocks would cover most of the cases. |
| Also try to look through the first extension that we hit. There are some |
| cases, where a zero_extend is followed an (implicit) sign_extend, and it |
| fails to see the sign_extend. */ |
| sh_extending_set_of_reg result = sh_find_set_of_reg |
| (reg, curr_insn, prev_nonnote_nondebug_insn_bb, true); |
| |
| if (result.set_src != NULL) |
| { |
| if (GET_CODE (result.set_src) == SIGN_EXTEND |
| || GET_CODE (result.set_src) == ZERO_EXTEND) |
| { |
| if (dump_file) |
| fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is " |
| "explicitly sign/zero extended in insn %d\n", |
| REGNO (reg), INSN_UID (result.insn)); |
| result.from_mode = GET_MODE (XEXP (result.set_src, 0)); |
| result.ext_code = GET_CODE (result.set_src); |
| } |
| else if (MEM_P (result.set_src) |
| && (GET_MODE (result.set_src) == QImode |
| || GET_MODE (result.set_src) == HImode) |
| && !sh_unspec_insn_p (result.insn)) |
| { |
| /* On SH QIHImode memory loads always sign extend. However, in |
| some cases where it seems that the higher bits are not |
| interesting, the loads will not be expanded as sign extending |
| insns, but as QIHImode loads into QIHImode regs. We report that |
| the reg has been sign extended by the mem load. When it is used |
| as such, we must convert the mem load into a sign extending insn, |
| see also sh_extending_set_of_reg::use_as_extended_reg. */ |
| if (dump_file) |
| fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is " |
| "implicitly sign extended in insn %d\n", |
| REGNO (reg), INSN_UID (result.insn)); |
| result.from_mode = GET_MODE (result.set_src); |
| result.ext_code = SIGN_EXTEND; |
| } |
| } |
| |
| return result; |
| } |
| |
| /* Given a reg that is known to be sign or zero extended at some insn, |
| take the appropriate measures so that the extended value can be used as |
| a reg at the specified insn and return the resulting reg rtx. */ |
| rtx |
| sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const |
| { |
| gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL); |
| gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND); |
| gcc_assert (from_mode == QImode || from_mode == HImode); |
| |
| if (MEM_P (set_src) && ext_code == SIGN_EXTEND) |
| { |
| if (dump_file) |
| fprintf (dump_file, |
| "use_as_extended_reg: converting non-extending mem load in " |
| "insn %d into sign-extending load\n", INSN_UID (insn)); |
| |
| rtx r = gen_reg_rtx (SImode); |
| rtx_insn* i0; |
| if (from_mode == QImode) |
| i0 = sh_check_add_incdec_notes ( |
| emit_insn_after (gen_extendqisi2 (r, set_src), insn)); |
| else if (from_mode == HImode) |
| i0 = sh_check_add_incdec_notes ( |
| emit_insn_after (gen_extendhisi2 (r, set_src), insn)); |
| else |
| gcc_unreachable (); |
| |
| emit_insn_after ( |
| gen_move_insn (XEXP (set_rtx, 0), |
| gen_lowpart (GET_MODE (set_src), r)), i0); |
| set_insn_deleted (insn); |
| return r; |
| } |
| else |
| { |
| rtx extension_dst = XEXP (set_rtx, 0); |
| if (GET_MODE (extension_dst) != SImode) |
| extension_dst = simplify_gen_subreg (SImode, extension_dst, |
| GET_MODE (extension_dst), 0); |
| if (modified_between_p (extension_dst, insn, use_at_insn)) |
| { |
| if (dump_file) |
| fprintf (dump_file, |
| "use_as_extended_reg: dest reg %d of extending insn %d is " |
| "modified, inserting a reg-reg copy\n", |
| REGNO (extension_dst), INSN_UID (insn)); |
| |
| rtx r = gen_reg_rtx (SImode); |
| emit_insn_after (gen_move_insn (r, extension_dst), insn); |
| return r; |
| } |
| else |
| { |
| sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst)); |
| return extension_dst; |
| } |
| } |
| } |
| |
| bool |
| sh_extending_set_of_reg::can_use_as_unextended_reg (void) const |
| { |
| if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND) |
| && (from_mode == QImode || from_mode == HImode) |
| && set_src != NULL) |
| return arith_reg_operand (XEXP (set_src, 0), from_mode); |
| else |
| return false; |
| } |
| |
| rtx |
| sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const |
| { |
| gcc_assert (can_use_as_unextended_reg ()); |
| |
| rtx r = XEXP (set_src, 0); |
| rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0); |
| |
| if (modified_between_p (r, insn, use_at_insn)) |
| { |
| rtx r1 = gen_reg_rtx (SImode); |
| emit_insn_after (gen_move_insn (r1, r0), insn); |
| return r1; |
| } |
| else |
| { |
| sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r) |
| ? REGNO (SUBREG_REG (r)) |
| : REGNO (r)); |
| return r0; |
| } |
| } |
| |
| /* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn, |
| perform the necessary checks on the operands and split it accordingly. */ |
| void |
| sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode, |
| int subreg_offset, rtx operands[]) |
| { |
| gcc_assert (subreg_mode == QImode || subreg_mode == HImode); |
| |
| sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0], |
| curr_insn); |
| sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1], |
| curr_insn); |
| |
| /* If one of the operands is known to be zero extended, that's already |
| sufficient to mask out the unwanted high bits. */ |
| if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode) |
| { |
| emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn), |
| operands[1])); |
| return; |
| } |
| if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode) |
| { |
| emit_insn (gen_tstsi_t (operands[0], |
| eop1.use_as_extended_reg (curr_insn))); |
| return; |
| } |
| |
| /* None of the operands seem to be zero extended. |
| If both are sign extended it's OK, too. */ |
| if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND |
| && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode) |
| { |
| emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn), |
| eop1.use_as_extended_reg (curr_insn))); |
| return; |
| } |
| |
| /* Otherwise we have to insert a zero extension on one of the operands to |
| mask out the unwanted high bits. |
| Prefer the operand that has no known extension. */ |
| if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN) |
| std::swap (operands[0], operands[1]); |
| |
| rtx tmp0 = gen_reg_rtx (SImode); |
| rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0], |
| GET_MODE (operands[0]), subreg_offset); |
| emit_insn (subreg_mode == QImode |
| ? gen_zero_extendqisi2 (tmp0, tmp1) |
| : gen_zero_extendhisi2 (tmp0, tmp1)); |
| emit_insn (gen_tstsi_t (tmp0, operands[1])); |
| } |
| |
| /* A helper class to increment/decrement a counter variable each time a |
| function is entered/left. */ |
| class scope_counter |
| { |
| public: |
| scope_counter (int& counter) : m_counter (counter) { ++m_counter; } |
| |
| ~scope_counter (void) |
| { |
| --m_counter; |
| gcc_assert (m_counter >= 0); |
| } |
| |
| int count (void) const { return m_counter; } |
| |
| private: |
| int& m_counter; |
| }; |
| |
| /* Given an rtx x, determine whether the expression can be used to create |
| an insn that calulates x and stores the result in the T bit. |
| This is used by the 'treg_set_expr' predicate to construct insns sequences |
| where T bit results are fed into other insns, such as addc, subc, negc |
| insns. |
| |
| FIXME: The patterns that expand 'treg_set_expr' operands tend to |
| distinguish between 'positive' and 'negative' forms. For now this has to |
| be done in the preparation code. We could also introduce |
| 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write |
| two different patterns for the 'postive' and 'negative' forms. However, |
| the total amount of lines of code seems to be about the same and the |
| '{pos|neg}_treg_set_expr' predicates would be more expensive, because the |
| recog function would need to look inside the expression by temporarily |
| splitting it. */ |
| static int sh_recog_treg_set_expr_reent_count = 0; |
| |
| bool |
| sh_recog_treg_set_expr (rtx op, machine_mode mode) |
| { |
| scope_counter recursion (sh_recog_treg_set_expr_reent_count); |
| |
| /* Limit the recursion count to avoid nested expressions which we can't |
| resolve to a single treg set insn. */ |
| if (recursion.count () > 1) |
| return false; |
| |
| /* Early accept known possible operands before doing recog. */ |
| if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode) |
| || negt_reg_operand (op, mode)) |
| return true; |
| |
| /* Early reject impossible operands before doing recog. |
| There are some (set ((t) (subreg ...))) patterns, but we must be careful |
| not to allow any invalid reg-reg or mem-reg moves, or else other passes |
| such as lower-subreg will bail out. Some insns such as SH4A movua are |
| done with UNSPEC, so must reject those, too, or else it would result |
| in an invalid reg -> treg move. */ |
| if (CONST_INT_P (op) || register_operand (op, mode) |
| || memory_operand (op, mode) || sh_unspec_insn_p (op)) |
| return false; |
| |
| if (!can_create_pseudo_p ()) |
| return false; |
| |
| /* expand_debug_locations may call this to compute rtx costs at |
| very early stage. In that case, don't make new insns here to |
| avoid codegen differences with -g. */ |
| if (currently_expanding_to_rtl) |
| return false; |
| |
| /* We are going to invoke recog in a re-entrant way and thus |
| have to capture its current state and restore it afterwards. */ |
| recog_data_d prev_recog_data = recog_data; |
| |
| rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op)); |
| SET_PREV_INSN (i) = NULL; |
| SET_NEXT_INSN (i) = NULL; |
| |
| /* If the comparison op doesn't have a result mode, set it to SImode. */ |
| machine_mode prev_op_mode = GET_MODE (op); |
| if (COMPARISON_P (op) && prev_op_mode == VOIDmode) |
| PUT_MODE (op, SImode); |
| |
| int result = recog (PATTERN (i), i, 0); |
| |
| /* It seems there is no insn like that. Create a negated version and |
| try again. If we hit a negated form, we'll allow that and append a |
| nott sequence when splitting out the insns. Insns that do the split |
| can then remove the trailing nott if they know how to deal with it. */ |
| if (result < 0 && COMPARISON_P (op)) |
| { |
| machine_mode cmp_mode = GET_MODE (XEXP (op, 0)); |
| if (cmp_mode == VOIDmode) |
| cmp_mode = GET_MODE (XEXP (op, 1)); |
| |
| rtx_code prev_code = GET_CODE (op); |
| PUT_CODE (op, reverse_condition (GET_CODE (op))); |
| result = recog (PATTERN (i), i, 0); |
| PUT_CODE (op, prev_code); |
| } |
| |
| PUT_MODE (op, prev_op_mode); |
| recog_data = prev_recog_data; |
| return result >= 0; |
| } |
| |
| /* Returns true when recog of a 'treg_set_expr' is currently in progress. |
| This can be used as a condition for insn/split patterns to allow certain |
| T bit setting patters only to be matched as sub expressions of other |
| patterns. */ |
| bool |
| sh_in_recog_treg_set_expr (void) |
| { |
| return sh_recog_treg_set_expr_reent_count > 0; |
| } |
| |
| /* Given an rtx x, which is assumed to be some expression that has been |
| matched by the 'treg_set_expr' predicate before, split and emit the |
| insns that are necessary to calculate the expression and store the result |
| in the T bit. |
| The splitting is done recursively similar to 'try_split' in emit-rt.c. |
| Unfortunately we can't use 'try_split' here directly, as it tries to invoke |
| 'delete_insn' which then causes the DF parts to bail out, because we |
| currently are inside another gen_split* function and would invoke |
| 'try_split' in a reentrant way. */ |
| static std::pair<rtx_insn*, rtx_insn*> |
| sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0) |
| { |
| if (dump_file) |
| { |
| fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n); |
| print_rtl_single (dump_file, i); |
| fprintf (dump_file, "\n"); |
| } |
| |
| rtx_insn* seq = split_insns (PATTERN (i), curr_insn); |
| |
| if (seq == NULL) |
| return std::make_pair (i, i); |
| |
| /* Avoid infinite splitter loops if any insn of the result matches |
| the original pattern. */ |
| for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s)) |
| if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i))) |
| return std::make_pair (i, i); |
| |
| unshare_all_rtl_in_chain (seq); |
| |
| /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in |
| a linked list, replace the single insn with the new insns. */ |
| rtx_insn* seqlast = seq; |
| while (NEXT_INSN (seqlast) != NULL) |
| seqlast = NEXT_INSN (seqlast); |
| |
| if (rtx_insn* iprev = PREV_INSN (i)) |
| SET_NEXT_INSN (iprev) = seq; |
| if (rtx_insn* inext = NEXT_INSN (i)) |
| SET_PREV_INSN (inext) = seqlast; |
| |
| SET_PREV_INSN (seq) = PREV_INSN (i); |
| SET_NEXT_INSN (seqlast) = NEXT_INSN (i); |
| |
| SET_PREV_INSN (i) = NULL; |
| SET_NEXT_INSN (i) = NULL; |
| |
| /* Recursively split all insns. */ |
| for (i = seq; ; i = NEXT_INSN (i)) |
| { |
| std::pair<rtx_insn*, rtx_insn*> ii = |
| sh_try_split_insn_simple (i, curr_insn, n + 1); |
| if (i == seq) |
| seq = ii.first; |
| if (i == seqlast) |
| { |
| seqlast = ii.second; |
| break; |
| } |
| i = ii.first; |
| } |
| |
| return std::make_pair (seq, seqlast); |
| } |
| |
| sh_treg_insns |
| sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn) |
| { |
| if (t_reg_operand (x, VOIDmode)) |
| return sh_treg_insns (); |
| |
| scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count); |
| |
| rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), x)); |
| SET_PREV_INSN (i) = NULL; |
| SET_NEXT_INSN (i) = NULL; |
| |
| if (dump_file) |
| { |
| fprintf (dump_file, "split_treg_set_expr insn:\n"); |
| print_rtl (dump_file, i); |
| fprintf (dump_file, "\n"); |
| } |
| |
| /* If the insn is not found, we will try a negated form and append |
| a nott. */ |
| bool append_nott = false; |
| |
| /* We are going to invoke recog/split_insns in a re-entrant way and thus |
| have to capture its current state and restore it afterwards. */ |
| recog_data_d prev_recog_data = recog_data; |
| |
| if (negt_reg_operand (x, GET_MODE (x))) |
| { |
| /* This is a normal movt followed by a nott. It will be converted |
| into a movrt after initial expansion. */ |
| XEXP (PATTERN (i), 1) = get_t_reg_rtx (); |
| append_nott = true; |
| } |
| else |
| { |
| /* If the comparison op doesn't have a mode set, set it to SImode. */ |
| if (COMPARISON_P (x) && GET_MODE (x) == VOIDmode) |
| PUT_MODE (x, SImode); |
| |
| int insn_code = recog (PATTERN (i), i, 0); |
| |
| if (insn_code < 0 && COMPARISON_P (x)) |
| { |
| machine_mode cmp_mode = GET_MODE (XEXP (x, 0)); |
| if (cmp_mode == VOIDmode) |
| cmp_mode = GET_MODE (XEXP (x, 1)); |
| |
| PUT_CODE (x, reverse_condition (GET_CODE (x))); |
| insn_code = recog (PATTERN (i), i, 0); |
| append_nott = true; |
| } |
| |
| gcc_assert (insn_code >= 0); |
| } |
| |
| /* Try to recursively split the insn. Some insns might refuse to split |
| any further while we are in the treg_set_expr splitting phase. They |
| will be emitted as part of the outer insn and then split again. */ |
| std::pair<rtx_insn*, rtx_insn*> insnlist = |
| sh_try_split_insn_simple (i, curr_insn); |
| |
| /* Restore recog state. */ |
| recog_data = prev_recog_data; |
| |
| rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second) |
| ? insnlist.second |
| : NULL; |
| if (dump_file) |
| { |
| fprintf (dump_file, "split_treg_set_expr insnlist:\n"); |
| print_rtl (dump_file, insnlist.first); |
| fprintf (dump_file, "\n"); |
| |
| if (nott_insn != NULL) |
| fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn)); |
| } |
| |
| emit_insn (insnlist.first); |
| |
| if (nott_insn != NULL && append_nott) |
| { |
| if (dump_file) |
| fprintf (dump_file, "removing trailing nott\n"); |
| remove_insn (nott_insn); |
| nott_insn = NULL; |
| append_nott = false; |
| } |
| |
| if (append_nott) |
| nott_insn = emit_insn (gen_nott (get_t_reg_rtx ())); |
| |
| rtx_insn* first_insn = get_insns (); |
| |
| if (dump_file) |
| { |
| fprintf (dump_file, "resulting insns:\n"); |
| print_rtl (dump_file, first_insn); |
| fprintf (dump_file, "\n"); |
| } |
| |
| return sh_treg_insns (first_insn, nott_insn); |
| } |
| |
| /*------------------------------------------------------------------------------ |
| Mode switching support code. |
| */ |
| |
| static void |
| sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode, |
| int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED) |
| { |
| if ((TARGET_SH4A_FP || TARGET_FPU_SH4_300) |
| && prev_mode != FP_MODE_NONE && prev_mode != mode) |
| { |
| emit_insn (gen_toggle_pr ()); |
| if (TARGET_FMOVD) |
| emit_insn (gen_toggle_sz ()); |
| } |
| else if (mode != FP_MODE_NONE) |
| { |
| rtx tmp = gen_reg_rtx (SImode); |
| emit_insn (gen_sts_fpscr (tmp)); |
| rtx i = NULL; |
| |
| const unsigned HOST_WIDE_INT fpbits = |
| TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR; |
| |
| if (prev_mode != FP_MODE_NONE && prev_mode != mode) |
| i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits))); |
| else if (mode == FP_MODE_SINGLE) |
| i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits))); |
| else if (mode == FP_MODE_DOUBLE) |
| i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits))); |
| else |
| gcc_unreachable (); |
| |
| emit_insn (i); |
| emit_insn (gen_lds_fpscr (tmp)); |
| } |
| } |
| |
| static int |
| sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn) |
| { |
| return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE; |
| } |
| |
| static int |
| sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn) |
| { |
| if (TARGET_HITACHI && recog_memoized (insn) >= 0 && |
| get_attr_fp_set (insn) != FP_SET_NONE) |
| return (int) get_attr_fp_set (insn); |
| else |
| return mode; |
| } |
| |
| static int |
| sh_mode_entry (int entity ATTRIBUTE_UNUSED) |
| { |
| return NORMAL_MODE (entity); |
| } |
| |
| static int |
| sh_mode_exit (int entity ATTRIBUTE_UNUSED) |
| { |
| return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity); |
| } |
| |
| static int |
| sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n) |
| { |
| return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE); |
| } |
| |
| /*------------------------------------------------------------------------------ |
| Misc |
| */ |
| |
| /* Return true if we use LRA instead of reload pass. */ |
| bool |
| sh_lra_p (void) |
| { |
| return sh_lra_flag; |
| } |
| |
| /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */ |
| |
| static bool |
| sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size, |
| unsigned int align, |
| enum by_pieces_operation op, |
| bool speed_p) |
| { |
| switch (op) |
| { |
| case MOVE_BY_PIECES: |
| return by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1, op) |
| < (!speed_p ? 2 : (align >= 32) ? 16 : 2); |
| case STORE_BY_PIECES: |
| case SET_BY_PIECES: |
| return by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1, op) |
| < (!speed_p ? 2 : (align >= 32) ? 16 : 2); |
| default: |
| return default_use_by_pieces_infrastructure_p (size, align, |
| op, speed_p); |
| } |
| } |
| |
| bool |
| sh_cannot_force_const_mem_p (machine_mode mode ATTRIBUTE_UNUSED, |
| rtx x ATTRIBUTE_UNUSED) |
| { |
| return TARGET_FDPIC; |
| } |
| |
| /* Emit insns to load the function address from FUNCDESC (an FDPIC |
| function descriptor) into r1 and the GOT address into r12, |
| returning an rtx for r1. */ |
| |
| rtx |
| sh_load_function_descriptor (rtx funcdesc) |
| { |
| rtx r1 = gen_rtx_REG (Pmode, R1_REG); |
| rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG); |
| rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc); |
| rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4)); |
| |
| emit_move_insn (r1, fnaddr); |
| /* The ABI requires the entry point address to be loaded first, so |
| prevent the load from being moved after that of the GOT |
| address. */ |
| emit_insn (gen_blockage ()); |
| emit_move_insn (pic_reg, gotaddr); |
| return r1; |
| } |
| |
| /* Return an rtx holding the initial value of the FDPIC register (the |
| FDPIC pointer passed in from the caller). */ |
| |
| rtx |
| sh_get_fdpic_reg_initial_val (void) |
| { |
| return get_hard_reg_initial_val (Pmode, PIC_REG); |
| } |
| |
| #include "gt-sh.h" |