| /* Output routines for GCC for ARM. |
| Copyright (C) 1991-2015 Free Software Foundation, Inc. |
| Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) |
| and Martin Simmons (@harleqn.co.uk). |
| More major hacks by Richard Earnshaw (rearnsha@arm.com). |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it |
| under the terms of the GNU General Public License as published |
| by the Free Software Foundation; either version 3, or (at your |
| option) any later version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT |
| ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public |
| License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "hash-table.h" |
| #include "tm.h" |
| #include "rtl.h" |
| #include "hash-set.h" |
| #include "machmode.h" |
| #include "vec.h" |
| #include "double-int.h" |
| #include "input.h" |
| #include "alias.h" |
| #include "symtab.h" |
| #include "wide-int.h" |
| #include "inchash.h" |
| #include "tree.h" |
| #include "fold-const.h" |
| #include "stringpool.h" |
| #include "stor-layout.h" |
| #include "calls.h" |
| #include "varasm.h" |
| #include "obstack.h" |
| #include "regs.h" |
| #include "hard-reg-set.h" |
| #include "insn-config.h" |
| #include "conditions.h" |
| #include "output.h" |
| #include "insn-attr.h" |
| #include "flags.h" |
| #include "reload.h" |
| #include "function.h" |
| #include "hashtab.h" |
| #include "statistics.h" |
| #include "real.h" |
| #include "fixed-value.h" |
| #include "expmed.h" |
| #include "dojump.h" |
| #include "explow.h" |
| #include "emit-rtl.h" |
| #include "stmt.h" |
| #include "expr.h" |
| #include "insn-codes.h" |
| #include "optabs.h" |
| #include "diagnostic-core.h" |
| #include "recog.h" |
| #include "predict.h" |
| #include "dominance.h" |
| #include "cfg.h" |
| #include "cfgrtl.h" |
| #include "cfganal.h" |
| #include "lcm.h" |
| #include "cfgbuild.h" |
| #include "cfgcleanup.h" |
| #include "basic-block.h" |
| #include "hash-map.h" |
| #include "is-a.h" |
| #include "plugin-api.h" |
| #include "ipa-ref.h" |
| #include "cgraph.h" |
| #include "ggc.h" |
| #include "except.h" |
| #include "tm_p.h" |
| #include "target.h" |
| #include "sched-int.h" |
| #include "target-def.h" |
| #include "debug.h" |
| #include "langhooks.h" |
| #include "df.h" |
| #include "intl.h" |
| #include "libfuncs.h" |
| #include "params.h" |
| #include "opts.h" |
| #include "dumpfile.h" |
| #include "gimple-expr.h" |
| #include "builtins.h" |
| #include "tm-constrs.h" |
| #include "rtl-iter.h" |
| #include "sched-int.h" |
| |
| /* Forward definitions of types. */ |
| typedef struct minipool_node Mnode; |
| typedef struct minipool_fixup Mfix; |
| |
| void (*arm_lang_output_object_attributes_hook)(void); |
| |
| struct four_ints |
| { |
| int i[4]; |
| }; |
| |
| /* Forward function declarations. */ |
| static bool arm_const_not_ok_for_debug_p (rtx); |
| static bool arm_needs_doubleword_align (machine_mode, const_tree); |
| static int arm_compute_static_chain_stack_bytes (void); |
| static arm_stack_offsets *arm_get_frame_offsets (void); |
| static void arm_add_gc_roots (void); |
| static int arm_gen_constant (enum rtx_code, machine_mode, rtx, |
| HOST_WIDE_INT, rtx, rtx, int, int); |
| static unsigned bit_count (unsigned long); |
| static int arm_address_register_rtx_p (rtx, int); |
| static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int); |
| static int thumb2_legitimate_index_p (machine_mode, rtx, int); |
| static int thumb1_base_register_rtx_p (rtx, machine_mode, int); |
| static rtx arm_legitimize_address (rtx, rtx, machine_mode); |
| static reg_class_t arm_preferred_reload_class (rtx, reg_class_t); |
| static rtx thumb_legitimize_address (rtx, rtx, machine_mode); |
| inline static int thumb1_index_register_rtx_p (rtx, int); |
| static int thumb_far_jump_used_p (void); |
| static bool thumb_force_lr_save (void); |
| static unsigned arm_size_return_regs (void); |
| static bool arm_assemble_integer (rtx, unsigned int, int); |
| static void arm_print_operand (FILE *, rtx, int); |
| static void arm_print_operand_address (FILE *, rtx); |
| static bool arm_print_operand_punct_valid_p (unsigned char code); |
| static const char *fp_const_from_val (REAL_VALUE_TYPE *); |
| static arm_cc get_arm_condition_code (rtx); |
| static HOST_WIDE_INT int_log2 (HOST_WIDE_INT); |
| static const char *output_multi_immediate (rtx *, const char *, const char *, |
| int, HOST_WIDE_INT); |
| static const char *shift_op (rtx, HOST_WIDE_INT *); |
| static struct machine_function *arm_init_machine_status (void); |
| static void thumb_exit (FILE *, int); |
| static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *); |
| static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT); |
| static Mnode *add_minipool_forward_ref (Mfix *); |
| static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT); |
| static Mnode *add_minipool_backward_ref (Mfix *); |
| static void assign_minipool_offsets (Mfix *); |
| static void arm_print_value (FILE *, rtx); |
| static void dump_minipool (rtx_insn *); |
| static int arm_barrier_cost (rtx); |
| static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT); |
| static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT); |
| static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *, |
| machine_mode, rtx); |
| static void arm_reorg (void); |
| static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int); |
| static unsigned long arm_compute_save_reg0_reg12_mask (void); |
| static unsigned long arm_compute_save_reg_mask (void); |
| static unsigned long arm_isr_value (tree); |
| static unsigned long arm_compute_func_type (void); |
| static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *); |
| static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *); |
| static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *); |
| #if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
| static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *); |
| #endif |
| static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT); |
| static void arm_output_function_prologue (FILE *, HOST_WIDE_INT); |
| static int arm_comp_type_attributes (const_tree, const_tree); |
| static void arm_set_default_type_attributes (tree); |
| static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int); |
| static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int); |
| static int optimal_immediate_sequence (enum rtx_code code, |
| unsigned HOST_WIDE_INT val, |
| struct four_ints *return_sequence); |
| static int optimal_immediate_sequence_1 (enum rtx_code code, |
| unsigned HOST_WIDE_INT val, |
| struct four_ints *return_sequence, |
| int i); |
| static int arm_get_strip_length (int); |
| static bool arm_function_ok_for_sibcall (tree, tree); |
| static machine_mode arm_promote_function_mode (const_tree, |
| machine_mode, int *, |
| const_tree, int); |
| static bool arm_return_in_memory (const_tree, const_tree); |
| static rtx arm_function_value (const_tree, const_tree, bool); |
| static rtx arm_libcall_value_1 (machine_mode); |
| static rtx arm_libcall_value (machine_mode, const_rtx); |
| static bool arm_function_value_regno_p (const unsigned int); |
| static void arm_internal_label (FILE *, const char *, unsigned long); |
| static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, |
| tree); |
| static bool arm_have_conditional_execution (void); |
| static bool arm_cannot_force_const_mem (machine_mode, rtx); |
| static bool arm_legitimate_constant_p (machine_mode, rtx); |
| static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool); |
| static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *); |
| static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); |
| static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); |
| static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); |
| static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool); |
| static bool arm_rtx_costs (rtx, int, int, int, int *, bool); |
| static int arm_address_cost (rtx, machine_mode, addr_space_t, bool); |
| static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t); |
| static int arm_memory_move_cost (machine_mode, reg_class_t, bool); |
| static void emit_constant_insn (rtx cond, rtx pattern); |
| static rtx_insn *emit_set_insn (rtx, rtx); |
| static rtx emit_multi_reg_push (unsigned long, unsigned long); |
| static int arm_arg_partial_bytes (cumulative_args_t, machine_mode, |
| tree, bool); |
| static rtx arm_function_arg (cumulative_args_t, machine_mode, |
| const_tree, bool); |
| static void arm_function_arg_advance (cumulative_args_t, machine_mode, |
| const_tree, bool); |
| static unsigned int arm_function_arg_boundary (machine_mode, const_tree); |
| static rtx aapcs_allocate_return_reg (machine_mode, const_tree, |
| const_tree); |
| static rtx aapcs_libcall_value (machine_mode); |
| static int aapcs_select_return_coproc (const_tree, const_tree); |
| |
| #ifdef OBJECT_FORMAT_ELF |
| static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED; |
| static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED; |
| #endif |
| #ifndef ARM_PE |
| static void arm_encode_section_info (tree, rtx, int); |
| #endif |
| |
| static void arm_file_end (void); |
| static void arm_file_start (void); |
| |
| static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode, |
| tree, int *, int); |
| static bool arm_pass_by_reference (cumulative_args_t, |
| machine_mode, const_tree, bool); |
| static bool arm_promote_prototypes (const_tree); |
| static bool arm_default_short_enums (void); |
| static bool arm_align_anon_bitfield (void); |
| static bool arm_return_in_msb (const_tree); |
| static bool arm_must_pass_in_stack (machine_mode, const_tree); |
| static bool arm_return_in_memory (const_tree, const_tree); |
| #if ARM_UNWIND_INFO |
| static void arm_unwind_emit (FILE *, rtx_insn *); |
| static bool arm_output_ttype (rtx); |
| static void arm_asm_emit_except_personality (rtx); |
| static void arm_asm_init_sections (void); |
| #endif |
| static rtx arm_dwarf_register_span (rtx); |
| |
| static tree arm_cxx_guard_type (void); |
| static bool arm_cxx_guard_mask_bit (void); |
| static tree arm_get_cookie_size (tree); |
| static bool arm_cookie_has_size (void); |
| static bool arm_cxx_cdtor_returns_this (void); |
| static bool arm_cxx_key_method_may_be_inline (void); |
| static void arm_cxx_determine_class_data_visibility (tree); |
| static bool arm_cxx_class_data_always_comdat (void); |
| static bool arm_cxx_use_aeabi_atexit (void); |
| static void arm_init_libfuncs (void); |
| static tree arm_build_builtin_va_list (void); |
| static void arm_expand_builtin_va_start (tree, rtx); |
| static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); |
| static void arm_option_override (void); |
| static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode); |
| static bool arm_macro_fusion_p (void); |
| static bool arm_cannot_copy_insn_p (rtx_insn *); |
| static int arm_issue_rate (void); |
| static int arm_first_cycle_multipass_dfa_lookahead (void); |
| static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int); |
| static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; |
| static bool arm_output_addr_const_extra (FILE *, rtx); |
| static bool arm_allocate_stack_slots_for_args (void); |
| static bool arm_warn_func_return (tree); |
| static const char *arm_invalid_parameter_type (const_tree t); |
| static const char *arm_invalid_return_type (const_tree t); |
| static tree arm_promoted_type (const_tree t); |
| static tree arm_convert_to_type (tree type, tree expr); |
| static bool arm_scalar_mode_supported_p (machine_mode); |
| static bool arm_frame_pointer_required (void); |
| static bool arm_can_eliminate (const int, const int); |
| static void arm_asm_trampoline_template (FILE *); |
| static void arm_trampoline_init (rtx, tree, rtx); |
| static rtx arm_trampoline_adjust_address (rtx); |
| static rtx arm_pic_static_addr (rtx orig, rtx reg); |
| static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *); |
| static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *); |
| static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *); |
| static bool arm_array_mode_supported_p (machine_mode, |
| unsigned HOST_WIDE_INT); |
| static machine_mode arm_preferred_simd_mode (machine_mode); |
| static bool arm_class_likely_spilled_p (reg_class_t); |
| static HOST_WIDE_INT arm_vector_alignment (const_tree type); |
| static bool arm_vector_alignment_reachable (const_tree type, bool is_packed); |
| static bool arm_builtin_support_vector_misalignment (machine_mode mode, |
| const_tree type, |
| int misalignment, |
| bool is_packed); |
| static void arm_conditional_register_usage (void); |
| static reg_class_t arm_preferred_rename_class (reg_class_t rclass); |
| static unsigned int arm_autovectorize_vector_sizes (void); |
| static int arm_default_branch_cost (bool, bool); |
| static int arm_cortex_a5_branch_cost (bool, bool); |
| static int arm_cortex_m_branch_cost (bool, bool); |
| static int arm_cortex_m7_branch_cost (bool, bool); |
| |
| static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode, |
| const unsigned char *sel); |
| |
| static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*); |
| |
| static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, |
| tree vectype, |
| int misalign ATTRIBUTE_UNUSED); |
| static unsigned arm_add_stmt_cost (void *data, int count, |
| enum vect_cost_for_stmt kind, |
| struct _stmt_vec_info *stmt_info, |
| int misalign, |
| enum vect_cost_model_location where); |
| |
| static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, |
| bool op0_preserve_value); |
| static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void); |
| |
| static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*); |
| |
| /* Table of machine attributes. */ |
| static const struct attribute_spec arm_attribute_table[] = |
| { |
| /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, |
| affects_type_identity } */ |
| /* Function calls made to this symbol must be done indirectly, because |
| it may lie outside of the 26 bit addressing range of a normal function |
| call. */ |
| { "long_call", 0, 0, false, true, true, NULL, false }, |
| /* Whereas these functions are always known to reside within the 26 bit |
| addressing range. */ |
| { "short_call", 0, 0, false, true, true, NULL, false }, |
| /* Specify the procedure call conventions for a function. */ |
| { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute, |
| false }, |
| /* Interrupt Service Routines have special prologue and epilogue requirements. */ |
| { "isr", 0, 1, false, false, false, arm_handle_isr_attribute, |
| false }, |
| { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute, |
| false }, |
| { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute, |
| false }, |
| #ifdef ARM_PE |
| /* ARM/PE has three new attributes: |
| interfacearm - ? |
| dllexport - for exporting a function/variable that will live in a dll |
| dllimport - for importing a function/variable from a dll |
| |
| Microsoft allows multiple declspecs in one __declspec, separating |
| them with spaces. We do NOT support this. Instead, use __declspec |
| multiple times. |
| */ |
| { "dllimport", 0, 0, true, false, false, NULL, false }, |
| { "dllexport", 0, 0, true, false, false, NULL, false }, |
| { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute, |
| false }, |
| #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES |
| { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false }, |
| { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false }, |
| { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute, |
| false }, |
| #endif |
| { NULL, 0, 0, false, false, false, NULL, false } |
| }; |
| |
| /* Initialize the GCC target structure. */ |
| #if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
| #undef TARGET_MERGE_DECL_ATTRIBUTES |
| #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes |
| #endif |
| |
| #undef TARGET_LEGITIMIZE_ADDRESS |
| #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address |
| |
| #undef TARGET_LRA_P |
| #define TARGET_LRA_P hook_bool_void_true |
| |
| #undef TARGET_ATTRIBUTE_TABLE |
| #define TARGET_ATTRIBUTE_TABLE arm_attribute_table |
| |
| #undef TARGET_ASM_FILE_START |
| #define TARGET_ASM_FILE_START arm_file_start |
| #undef TARGET_ASM_FILE_END |
| #define TARGET_ASM_FILE_END arm_file_end |
| |
| #undef TARGET_ASM_ALIGNED_SI_OP |
| #define TARGET_ASM_ALIGNED_SI_OP NULL |
| #undef TARGET_ASM_INTEGER |
| #define TARGET_ASM_INTEGER arm_assemble_integer |
| |
| #undef TARGET_PRINT_OPERAND |
| #define TARGET_PRINT_OPERAND arm_print_operand |
| #undef TARGET_PRINT_OPERAND_ADDRESS |
| #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address |
| #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P |
| #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p |
| |
| #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA |
| #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra |
| |
| #undef TARGET_ASM_FUNCTION_PROLOGUE |
| #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue |
| |
| #undef TARGET_ASM_FUNCTION_EPILOGUE |
| #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue |
| |
| #undef TARGET_OPTION_OVERRIDE |
| #define TARGET_OPTION_OVERRIDE arm_option_override |
| |
| #undef TARGET_COMP_TYPE_ATTRIBUTES |
| #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes |
| |
| #undef TARGET_SCHED_MACRO_FUSION_P |
| #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p |
| |
| #undef TARGET_SCHED_MACRO_FUSION_PAIR_P |
| #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p |
| |
| #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES |
| #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes |
| |
| #undef TARGET_SCHED_ADJUST_COST |
| #define TARGET_SCHED_ADJUST_COST arm_adjust_cost |
| |
| #undef TARGET_SCHED_REORDER |
| #define TARGET_SCHED_REORDER arm_sched_reorder |
| |
| #undef TARGET_REGISTER_MOVE_COST |
| #define TARGET_REGISTER_MOVE_COST arm_register_move_cost |
| |
| #undef TARGET_MEMORY_MOVE_COST |
| #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost |
| |
| #undef TARGET_ENCODE_SECTION_INFO |
| #ifdef ARM_PE |
| #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info |
| #else |
| #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info |
| #endif |
| |
| #undef TARGET_STRIP_NAME_ENCODING |
| #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding |
| |
| #undef TARGET_ASM_INTERNAL_LABEL |
| #define TARGET_ASM_INTERNAL_LABEL arm_internal_label |
| |
| #undef TARGET_FUNCTION_OK_FOR_SIBCALL |
| #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall |
| |
| #undef TARGET_FUNCTION_VALUE |
| #define TARGET_FUNCTION_VALUE arm_function_value |
| |
| #undef TARGET_LIBCALL_VALUE |
| #define TARGET_LIBCALL_VALUE arm_libcall_value |
| |
| #undef TARGET_FUNCTION_VALUE_REGNO_P |
| #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p |
| |
| #undef TARGET_ASM_OUTPUT_MI_THUNK |
| #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk |
| #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK |
| #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall |
| |
| #undef TARGET_RTX_COSTS |
| #define TARGET_RTX_COSTS arm_rtx_costs |
| #undef TARGET_ADDRESS_COST |
| #define TARGET_ADDRESS_COST arm_address_cost |
| |
| #undef TARGET_SHIFT_TRUNCATION_MASK |
| #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask |
| #undef TARGET_VECTOR_MODE_SUPPORTED_P |
| #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p |
| #undef TARGET_ARRAY_MODE_SUPPORTED_P |
| #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p |
| #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE |
| #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode |
| #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES |
| #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \ |
| arm_autovectorize_vector_sizes |
| |
| #undef TARGET_MACHINE_DEPENDENT_REORG |
| #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg |
| |
| #undef TARGET_INIT_BUILTINS |
| #define TARGET_INIT_BUILTINS arm_init_builtins |
| #undef TARGET_EXPAND_BUILTIN |
| #define TARGET_EXPAND_BUILTIN arm_expand_builtin |
| #undef TARGET_BUILTIN_DECL |
| #define TARGET_BUILTIN_DECL arm_builtin_decl |
| |
| #undef TARGET_INIT_LIBFUNCS |
| #define TARGET_INIT_LIBFUNCS arm_init_libfuncs |
| |
| #undef TARGET_PROMOTE_FUNCTION_MODE |
| #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode |
| #undef TARGET_PROMOTE_PROTOTYPES |
| #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes |
| #undef TARGET_PASS_BY_REFERENCE |
| #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference |
| #undef TARGET_ARG_PARTIAL_BYTES |
| #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes |
| #undef TARGET_FUNCTION_ARG |
| #define TARGET_FUNCTION_ARG arm_function_arg |
| #undef TARGET_FUNCTION_ARG_ADVANCE |
| #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance |
| #undef TARGET_FUNCTION_ARG_BOUNDARY |
| #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary |
| |
| #undef TARGET_SETUP_INCOMING_VARARGS |
| #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs |
| |
| #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS |
| #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args |
| |
| #undef TARGET_ASM_TRAMPOLINE_TEMPLATE |
| #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template |
| #undef TARGET_TRAMPOLINE_INIT |
| #define TARGET_TRAMPOLINE_INIT arm_trampoline_init |
| #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS |
| #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address |
| |
| #undef TARGET_WARN_FUNC_RETURN |
| #define TARGET_WARN_FUNC_RETURN arm_warn_func_return |
| |
| #undef TARGET_DEFAULT_SHORT_ENUMS |
| #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums |
| |
| #undef TARGET_ALIGN_ANON_BITFIELD |
| #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield |
| |
| #undef TARGET_NARROW_VOLATILE_BITFIELD |
| #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false |
| |
| #undef TARGET_CXX_GUARD_TYPE |
| #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type |
| |
| #undef TARGET_CXX_GUARD_MASK_BIT |
| #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit |
| |
| #undef TARGET_CXX_GET_COOKIE_SIZE |
| #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size |
| |
| #undef TARGET_CXX_COOKIE_HAS_SIZE |
| #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size |
| |
| #undef TARGET_CXX_CDTOR_RETURNS_THIS |
| #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this |
| |
| #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE |
| #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline |
| |
| #undef TARGET_CXX_USE_AEABI_ATEXIT |
| #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit |
| |
| #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY |
| #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \ |
| arm_cxx_determine_class_data_visibility |
| |
| #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT |
| #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat |
| |
| #undef TARGET_RETURN_IN_MSB |
| #define TARGET_RETURN_IN_MSB arm_return_in_msb |
| |
| #undef TARGET_RETURN_IN_MEMORY |
| #define TARGET_RETURN_IN_MEMORY arm_return_in_memory |
| |
| #undef TARGET_MUST_PASS_IN_STACK |
| #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack |
| |
| #if ARM_UNWIND_INFO |
| #undef TARGET_ASM_UNWIND_EMIT |
| #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit |
| |
| /* EABI unwinding tables use a different format for the typeinfo tables. */ |
| #undef TARGET_ASM_TTYPE |
| #define TARGET_ASM_TTYPE arm_output_ttype |
| |
| #undef TARGET_ARM_EABI_UNWINDER |
| #define TARGET_ARM_EABI_UNWINDER true |
| |
| #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY |
| #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality |
| |
| #undef TARGET_ASM_INIT_SECTIONS |
| #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections |
| #endif /* ARM_UNWIND_INFO */ |
| |
| #undef TARGET_DWARF_REGISTER_SPAN |
| #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span |
| |
| #undef TARGET_CANNOT_COPY_INSN_P |
| #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p |
| |
| #ifdef HAVE_AS_TLS |
| #undef TARGET_HAVE_TLS |
| #define TARGET_HAVE_TLS true |
| #endif |
| |
| #undef TARGET_HAVE_CONDITIONAL_EXECUTION |
| #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution |
| |
| #undef TARGET_LEGITIMATE_CONSTANT_P |
| #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p |
| |
| #undef TARGET_CANNOT_FORCE_CONST_MEM |
| #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem |
| |
| #undef TARGET_MAX_ANCHOR_OFFSET |
| #define TARGET_MAX_ANCHOR_OFFSET 4095 |
| |
| /* The minimum is set such that the total size of the block |
| for a particular anchor is -4088 + 1 + 4095 bytes, which is |
| divisible by eight, ensuring natural spacing of anchors. */ |
| #undef TARGET_MIN_ANCHOR_OFFSET |
| #define TARGET_MIN_ANCHOR_OFFSET -4088 |
| |
| #undef TARGET_SCHED_ISSUE_RATE |
| #define TARGET_SCHED_ISSUE_RATE arm_issue_rate |
| |
| #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD |
| #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ |
| arm_first_cycle_multipass_dfa_lookahead |
| |
| #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD |
| #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \ |
| arm_first_cycle_multipass_dfa_lookahead_guard |
| |
| #undef TARGET_MANGLE_TYPE |
| #define TARGET_MANGLE_TYPE arm_mangle_type |
| |
| #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV |
| #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv |
| |
| #undef TARGET_BUILD_BUILTIN_VA_LIST |
| #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list |
| #undef TARGET_EXPAND_BUILTIN_VA_START |
| #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start |
| #undef TARGET_GIMPLIFY_VA_ARG_EXPR |
| #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr |
| |
| #ifdef HAVE_AS_TLS |
| #undef TARGET_ASM_OUTPUT_DWARF_DTPREL |
| #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel |
| #endif |
| |
| #undef TARGET_LEGITIMATE_ADDRESS_P |
| #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p |
| |
| #undef TARGET_PREFERRED_RELOAD_CLASS |
| #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class |
| |
| #undef TARGET_INVALID_PARAMETER_TYPE |
| #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type |
| |
| #undef TARGET_INVALID_RETURN_TYPE |
| #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type |
| |
| #undef TARGET_PROMOTED_TYPE |
| #define TARGET_PROMOTED_TYPE arm_promoted_type |
| |
| #undef TARGET_CONVERT_TO_TYPE |
| #define TARGET_CONVERT_TO_TYPE arm_convert_to_type |
| |
| #undef TARGET_SCALAR_MODE_SUPPORTED_P |
| #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p |
| |
| #undef TARGET_FRAME_POINTER_REQUIRED |
| #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required |
| |
| #undef TARGET_CAN_ELIMINATE |
| #define TARGET_CAN_ELIMINATE arm_can_eliminate |
| |
| #undef TARGET_CONDITIONAL_REGISTER_USAGE |
| #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage |
| |
| #undef TARGET_CLASS_LIKELY_SPILLED_P |
| #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p |
| |
| #undef TARGET_VECTORIZE_BUILTINS |
| #define TARGET_VECTORIZE_BUILTINS |
| |
| #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION |
| #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ |
| arm_builtin_vectorized_function |
| |
| #undef TARGET_VECTOR_ALIGNMENT |
| #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment |
| |
| #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE |
| #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \ |
| arm_vector_alignment_reachable |
| |
| #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT |
| #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ |
| arm_builtin_support_vector_misalignment |
| |
| #undef TARGET_PREFERRED_RENAME_CLASS |
| #define TARGET_PREFERRED_RENAME_CLASS \ |
| arm_preferred_rename_class |
| |
| #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK |
| #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \ |
| arm_vectorize_vec_perm_const_ok |
| |
| #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST |
| #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ |
| arm_builtin_vectorization_cost |
| #undef TARGET_VECTORIZE_ADD_STMT_COST |
| #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost |
| |
| #undef TARGET_CANONICALIZE_COMPARISON |
| #define TARGET_CANONICALIZE_COMPARISON \ |
| arm_canonicalize_comparison |
| |
| #undef TARGET_ASAN_SHADOW_OFFSET |
| #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset |
| |
| #undef MAX_INSN_PER_IT_BLOCK |
| #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4) |
| |
| #undef TARGET_CAN_USE_DOLOOP_P |
| #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost |
| |
| #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P |
| #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p |
| |
| #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS |
| #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true |
| |
| #undef TARGET_SCHED_FUSION_PRIORITY |
| #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority |
| |
| struct gcc_target targetm = TARGET_INITIALIZER; |
| |
| /* Obstack for minipool constant handling. */ |
| static struct obstack minipool_obstack; |
| static char * minipool_startobj; |
| |
| /* The maximum number of insns skipped which |
| will be conditionalised if possible. */ |
| static int max_insns_skipped = 5; |
| |
| extern FILE * asm_out_file; |
| |
| /* True if we are currently building a constant table. */ |
| int making_const_table; |
| |
| /* The processor for which instructions should be scheduled. */ |
| enum processor_type arm_tune = arm_none; |
| |
| /* The current tuning set. */ |
| const struct tune_params *current_tune; |
| |
| /* Which floating point hardware to schedule for. */ |
| int arm_fpu_attr; |
| |
| /* Which floating popint hardware to use. */ |
| const struct arm_fpu_desc *arm_fpu_desc; |
| |
| /* Used for Thumb call_via trampolines. */ |
| rtx thumb_call_via_label[14]; |
| static int thumb_call_reg_needed; |
| |
| /* The bits in this mask specify which |
| instructions we are allowed to generate. */ |
| unsigned long insn_flags = 0; |
| |
| /* The bits in this mask specify which instruction scheduling options should |
| be used. */ |
| unsigned long tune_flags = 0; |
| |
| /* The highest ARM architecture version supported by the |
| target. */ |
| enum base_architecture arm_base_arch = BASE_ARCH_0; |
| |
| /* The following are used in the arm.md file as equivalents to bits |
| in the above two flag variables. */ |
| |
| /* Nonzero if this chip supports the ARM Architecture 3M extensions. */ |
| int arm_arch3m = 0; |
| |
| /* Nonzero if this chip supports the ARM Architecture 4 extensions. */ |
| int arm_arch4 = 0; |
| |
| /* Nonzero if this chip supports the ARM Architecture 4t extensions. */ |
| int arm_arch4t = 0; |
| |
| /* Nonzero if this chip supports the ARM Architecture 5 extensions. */ |
| int arm_arch5 = 0; |
| |
| /* Nonzero if this chip supports the ARM Architecture 5E extensions. */ |
| int arm_arch5e = 0; |
| |
| /* Nonzero if this chip supports the ARM Architecture 6 extensions. */ |
| int arm_arch6 = 0; |
| |
| /* Nonzero if this chip supports the ARM 6K extensions. */ |
| int arm_arch6k = 0; |
| |
| /* Nonzero if instructions present in ARMv6-M can be used. */ |
| int arm_arch6m = 0; |
| |
| /* Nonzero if this chip supports the ARM 7 extensions. */ |
| int arm_arch7 = 0; |
| |
| /* Nonzero if instructions not present in the 'M' profile can be used. */ |
| int arm_arch_notm = 0; |
| |
| /* Nonzero if instructions present in ARMv7E-M can be used. */ |
| int arm_arch7em = 0; |
| |
| /* Nonzero if instructions present in ARMv8 can be used. */ |
| int arm_arch8 = 0; |
| |
| /* Nonzero if this chip can benefit from load scheduling. */ |
| int arm_ld_sched = 0; |
| |
| /* Nonzero if this chip is a StrongARM. */ |
| int arm_tune_strongarm = 0; |
| |
| /* Nonzero if this chip supports Intel Wireless MMX technology. */ |
| int arm_arch_iwmmxt = 0; |
| |
| /* Nonzero if this chip supports Intel Wireless MMX2 technology. */ |
| int arm_arch_iwmmxt2 = 0; |
| |
| /* Nonzero if this chip is an XScale. */ |
| int arm_arch_xscale = 0; |
| |
| /* Nonzero if tuning for XScale */ |
| int arm_tune_xscale = 0; |
| |
| /* Nonzero if we want to tune for stores that access the write-buffer. |
| This typically means an ARM6 or ARM7 with MMU or MPU. */ |
| int arm_tune_wbuf = 0; |
| |
| /* Nonzero if tuning for Cortex-A9. */ |
| int arm_tune_cortex_a9 = 0; |
| |
| /* Nonzero if generating Thumb instructions. */ |
| int thumb_code = 0; |
| |
| /* Nonzero if generating Thumb-1 instructions. */ |
| int thumb1_code = 0; |
| |
| /* Nonzero if we should define __THUMB_INTERWORK__ in the |
| preprocessor. |
| XXX This is a bit of a hack, it's intended to help work around |
| problems in GLD which doesn't understand that armv5t code is |
| interworking clean. */ |
| int arm_cpp_interwork = 0; |
| |
| /* Nonzero if chip supports Thumb 2. */ |
| int arm_arch_thumb2; |
| |
| /* Nonzero if chip supports integer division instruction. */ |
| int arm_arch_arm_hwdiv; |
| int arm_arch_thumb_hwdiv; |
| |
| /* Nonzero if this chip supports the Large Physical Address Extension. */ |
| int arm_arch_lpae; |
| |
| /* Nonzero if chip disallows volatile memory access in IT block. */ |
| int arm_arch_no_volatile_ce; |
| |
| /* Nonzero if we should use Neon to handle 64-bits operations rather |
| than core registers. */ |
| int prefer_neon_for_64bits = 0; |
| |
| /* Nonzero if we shouldn't use literal pools. */ |
| bool arm_disable_literal_pool = false; |
| |
| /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, |
| we must report the mode of the memory reference from |
| TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */ |
| machine_mode output_memory_reference_mode; |
| |
| /* The register number to be used for the PIC offset register. */ |
| unsigned arm_pic_register = INVALID_REGNUM; |
| |
| enum arm_pcs arm_pcs_default; |
| |
| /* For an explanation of these variables, see final_prescan_insn below. */ |
| int arm_ccfsm_state; |
| /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */ |
| enum arm_cond_code arm_current_cc; |
| |
| rtx arm_target_insn; |
| int arm_target_label; |
| /* The number of conditionally executed insns, including the current insn. */ |
| int arm_condexec_count = 0; |
| /* A bitmask specifying the patterns for the IT block. |
| Zero means do not output an IT block before this insn. */ |
| int arm_condexec_mask = 0; |
| /* The number of bits used in arm_condexec_mask. */ |
| int arm_condexec_masklen = 0; |
| |
| /* Nonzero if chip supports the ARMv8 CRC instructions. */ |
| int arm_arch_crc = 0; |
| |
| /* Nonzero if the core has a very small, high-latency, multiply unit. */ |
| int arm_m_profile_small_mul = 0; |
| |
| /* The condition codes of the ARM, and the inverse function. */ |
| static const char * const arm_condition_codes[] = |
| { |
| "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", |
| "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" |
| }; |
| |
| /* The register numbers in sequence, for passing to arm_gen_load_multiple. */ |
| int arm_regs_in_sequence[] = |
| { |
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 |
| }; |
| |
| #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl") |
| #define streq(string1, string2) (strcmp (string1, string2) == 0) |
| |
| #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \ |
| | (1 << SP_REGNUM) | (1 << PC_REGNUM) \ |
| | (1 << PIC_OFFSET_TABLE_REGNUM))) |
| |
| /* Initialization code. */ |
| |
| struct processors |
| { |
| const char *const name; |
| enum processor_type core; |
| const char *arch; |
| enum base_architecture base_arch; |
| const unsigned long flags; |
| const struct tune_params *const tune; |
| }; |
| |
| |
| #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1 |
| #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \ |
| prefetch_slots, \ |
| l1_size, \ |
| l1_line_size |
| |
| /* arm generic vectorizer costs. */ |
| static const |
| struct cpu_vec_costs arm_default_vec_cost = { |
| 1, /* scalar_stmt_cost. */ |
| 1, /* scalar load_cost. */ |
| 1, /* scalar_store_cost. */ |
| 1, /* vec_stmt_cost. */ |
| 1, /* vec_to_scalar_cost. */ |
| 1, /* scalar_to_vec_cost. */ |
| 1, /* vec_align_load_cost. */ |
| 1, /* vec_unalign_load_cost. */ |
| 1, /* vec_unalign_store_cost. */ |
| 1, /* vec_store_cost. */ |
| 3, /* cond_taken_branch_cost. */ |
| 1, /* cond_not_taken_branch_cost. */ |
| }; |
| |
| /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */ |
| #include "aarch-cost-tables.h" |
| |
| |
| |
| const struct cpu_cost_table cortexa9_extra_costs = |
| { |
| /* ALU */ |
| { |
| 0, /* arith. */ |
| 0, /* logical. */ |
| 0, /* shift. */ |
| COSTS_N_INSNS (1), /* shift_reg. */ |
| COSTS_N_INSNS (1), /* arith_shift. */ |
| COSTS_N_INSNS (2), /* arith_shift_reg. */ |
| 0, /* log_shift. */ |
| COSTS_N_INSNS (1), /* log_shift_reg. */ |
| COSTS_N_INSNS (1), /* extend. */ |
| COSTS_N_INSNS (2), /* extend_arith. */ |
| COSTS_N_INSNS (1), /* bfi. */ |
| COSTS_N_INSNS (1), /* bfx. */ |
| 0, /* clz. */ |
| 0, /* rev. */ |
| 0, /* non_exec. */ |
| true /* non_exec_costs_exec. */ |
| }, |
| { |
| /* MULT SImode */ |
| { |
| COSTS_N_INSNS (3), /* simple. */ |
| COSTS_N_INSNS (3), /* flag_setting. */ |
| COSTS_N_INSNS (2), /* extend. */ |
| COSTS_N_INSNS (3), /* add. */ |
| COSTS_N_INSNS (2), /* extend_add. */ |
| COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */ |
| }, |
| /* MULT DImode */ |
| { |
| 0, /* simple (N/A). */ |
| 0, /* flag_setting (N/A). */ |
| COSTS_N_INSNS (4), /* extend. */ |
| 0, /* add (N/A). */ |
| COSTS_N_INSNS (4), /* extend_add. */ |
| 0 /* idiv (N/A). */ |
| } |
| }, |
| /* LD/ST */ |
| { |
| COSTS_N_INSNS (2), /* load. */ |
| COSTS_N_INSNS (2), /* load_sign_extend. */ |
| COSTS_N_INSNS (2), /* ldrd. */ |
| COSTS_N_INSNS (2), /* ldm_1st. */ |
| 1, /* ldm_regs_per_insn_1st. */ |
| 2, /* ldm_regs_per_insn_subsequent. */ |
| COSTS_N_INSNS (5), /* loadf. */ |
| COSTS_N_INSNS (5), /* loadd. */ |
| COSTS_N_INSNS (1), /* load_unaligned. */ |
| COSTS_N_INSNS (2), /* store. */ |
| COSTS_N_INSNS (2), /* strd. */ |
| COSTS_N_INSNS (2), /* stm_1st. */ |
| 1, /* stm_regs_per_insn_1st. */ |
| 2, /* stm_regs_per_insn_subsequent. */ |
| COSTS_N_INSNS (1), /* storef. */ |
| COSTS_N_INSNS (1), /* stored. */ |
| COSTS_N_INSNS (1) /* store_unaligned. */ |
| }, |
| { |
| /* FP SFmode */ |
| { |
| COSTS_N_INSNS (14), /* div. */ |
| COSTS_N_INSNS (4), /* mult. */ |
| COSTS_N_INSNS (7), /* mult_addsub. */ |
| COSTS_N_INSNS (30), /* fma. */ |
| COSTS_N_INSNS (3), /* addsub. */ |
| COSTS_N_INSNS (1), /* fpconst. */ |
| COSTS_N_INSNS (1), /* neg. */ |
| COSTS_N_INSNS (3), /* compare. */ |
| COSTS_N_INSNS (3), /* widen. */ |
| COSTS_N_INSNS (3), /* narrow. */ |
| COSTS_N_INSNS (3), /* toint. */ |
| COSTS_N_INSNS (3), /* fromint. */ |
| COSTS_N_INSNS (3) /* roundint. */ |
| }, |
| /* FP DFmode */ |
| { |
| COSTS_N_INSNS (24), /* div. */ |
| COSTS_N_INSNS (5), /* mult. */ |
| COSTS_N_INSNS (8), /* mult_addsub. */ |
| COSTS_N_INSNS (30), /* fma. */ |
| COSTS_N_INSNS (3), /* addsub. */ |
| COSTS_N_INSNS (1), /* fpconst. */ |
| COSTS_N_INSNS (1), /* neg. */ |
| COSTS_N_INSNS (3), /* compare. */ |
| COSTS_N_INSNS (3), /* widen. */ |
| COSTS_N_INSNS (3), /* narrow. */ |
| COSTS_N_INSNS (3), /* toint. */ |
| COSTS_N_INSNS (3), /* fromint. */ |
| COSTS_N_INSNS (3) /* roundint. */ |
| } |
| }, |
| /* Vector */ |
| { |
| COSTS_N_INSNS (1) /* alu. */ |
| } |
| }; |
| |
| const struct cpu_cost_table cortexa8_extra_costs = |
| { |
| /* ALU */ |
| { |
| 0, /* arith. */ |
| 0, /* logical. */ |
| COSTS_N_INSNS (1), /* shift. */ |
| 0, /* shift_reg. */ |
| COSTS_N_INSNS (1), /* arith_shift. */ |
| 0, /* arith_shift_reg. */ |
| COSTS_N_INSNS (1), /* log_shift. */ |
| 0, /* log_shift_reg. */ |
| 0, /* extend. */ |
| 0, /* extend_arith. */ |
| 0, /* bfi. */ |
| 0, /* bfx. */ |
| 0, /* clz. */ |
| 0, /* rev. */ |
| 0, /* non_exec. */ |
| true /* non_exec_costs_exec. */ |
| }, |
| { |
| /* MULT SImode */ |
| { |
| COSTS_N_INSNS (1), /* simple. */ |
| COSTS_N_INSNS (1), /* flag_setting. */ |
| COSTS_N_INSNS (1), /* extend. */ |
| COSTS_N_INSNS (1), /* add. */ |
| COSTS_N_INSNS (1), /* extend_add. */ |
| COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */ |
| }, |
| /* MULT DImode */ |
| { |
| 0, /* simple (N/A). */ |
| 0, /* flag_setting (N/A). */ |
| COSTS_N_INSNS (2), /* extend. */ |
| 0, /* add (N/A). */ |
| COSTS_N_INSNS (2), /* extend_add. */ |
| 0 /* idiv (N/A). */ |
| } |
| }, |
| /* LD/ST */ |
| { |
| COSTS_N_INSNS (1), /* load. */ |
| COSTS_N_INSNS (1), /* load_sign_extend. */ |
| COSTS_N_INSNS (1), /* ldrd. */ |
| COSTS_N_INSNS (1), /* ldm_1st. */ |
| 1, /* ldm_regs_per_insn_1st. */ |
| 2, /* ldm_regs_per_insn_subsequent. */ |
| COSTS_N_INSNS (1), /* loadf. */ |
| COSTS_N_INSNS (1), /* loadd. */ |
| COSTS_N_INSNS (1), /* load_unaligned. */ |
| COSTS_N_INSNS (1), /* store. */ |
| COSTS_N_INSNS (1), /* strd. */ |
| COSTS_N_INSNS (1), /* stm_1st. */ |
| 1, /* stm_regs_per_insn_1st. */ |
| 2, /* stm_regs_per_insn_subsequent. */ |
| COSTS_N_INSNS (1), /* storef. */ |
| COSTS_N_INSNS (1), /* stored. */ |
| COSTS_N_INSNS (1) /* store_unaligned. */ |
| }, |
| { |
| /* FP SFmode */ |
| { |
| COSTS_N_INSNS (36), /* div. */ |
| COSTS_N_INSNS (11), /* mult. */ |
| COSTS_N_INSNS (20), /* mult_addsub. */ |
| COSTS_N_INSNS (30), /* fma. */ |
| COSTS_N_INSNS (9), /* addsub. */ |
| COSTS_N_INSNS (3), /* fpconst. */ |
| COSTS_N_INSNS (3), /* neg. */ |
| COSTS_N_INSNS (6), /* compare. */ |
| COSTS_N_INSNS (4), /* widen. */ |
| COSTS_N_INSNS (4), /* narrow. */ |
| COSTS_N_INSNS (8), /* toint. */ |
| COSTS_N_INSNS (8), /* fromint. */ |
| COSTS_N_INSNS (8) /* roundint. */ |
| }, |
| /* FP DFmode */ |
| { |
| COSTS_N_INSNS (64), /* div. */ |
| COSTS_N_INSNS (16), /* mult. */ |
| COSTS_N_INSNS (25), /* mult_addsub. */ |
| COSTS_N_INSNS (30), /* fma. */ |
| COSTS_N_INSNS (9), /* addsub. */ |
| COSTS_N_INSNS (3), /* fpconst. */ |
| COSTS_N_INSNS (3), /* neg. */ |
| COSTS_N_INSNS (6), /* compare. */ |
| COSTS_N_INSNS (6), /* widen. */ |
| COSTS_N_INSNS (6), /* narrow. */ |
| COSTS_N_INSNS (8), /* toint. */ |
| COSTS_N_INSNS (8), /* fromint. */ |
| COSTS_N_INSNS (8) /* roundint. */ |
| } |
| }, |
| /* Vector */ |
| { |
| COSTS_N_INSNS (1) /* alu. */ |
| } |
| }; |
| |
| const struct cpu_cost_table cortexa5_extra_costs = |
| { |
| /* ALU */ |
| { |
| 0, /* arith. */ |
| 0, /* logical. */ |
| COSTS_N_INSNS (1), /* shift. */ |
| COSTS_N_INSNS (1), /* shift_reg. */ |
| COSTS_N_INSNS (1), /* arith_shift. */ |
| COSTS_N_INSNS (1), /* arith_shift_reg. */ |
| COSTS_N_INSNS (1), /* log_shift. */ |
| COSTS_N_INSNS (1), /* log_shift_reg. */ |
| COSTS_N_INSNS (1), /* extend. */ |
| COSTS_N_INSNS (1), /* extend_arith. */ |
| COSTS_N_INSNS (1), /* bfi. */ |
| COSTS_N_INSNS (1), /* bfx. */ |
| COSTS_N_INSNS (1), /* clz. */ |
| COSTS_N_INSNS (1), /* rev. */ |
| 0, /* non_exec. */ |
| true /* non_exec_costs_exec. */ |
| }, |
| |
| { |
| /* MULT SImode */ |
| { |
| 0, /* simple. */ |
| COSTS_N_INSNS (1), /* flag_setting. */ |
| COSTS_N_INSNS (1), /* extend. */ |
| COSTS_N_INSNS (1), /* add. */ |
| COSTS_N_INSNS (1), /* extend_add. */ |
| COSTS_N_INSNS (7) /* idiv. */ |
| }, |
| /* MULT DImode */ |
| { |
| 0, /* simple (N/A). */ |
| 0, /* flag_setting (N/A). */ |
| COSTS_N_INSNS (1), /* extend. */ |
| 0, /* add. */ |
| COSTS_N_INSNS (2), /* extend_add. */ |
| 0 /* idiv (N/A). */ |
| } |
| }, |
| /* LD/ST */ |
| { |
| COSTS_N_INSNS (1), /* load. */ |
| COSTS_N_INSNS (1), /* load_sign_extend. */ |
| COSTS_N_INSNS (6), /* ldrd. */ |
| COSTS_N_INSNS (1), /* ldm_1st. */ |
| 1, /* ldm_regs_per_insn_1st. */ |
| 2, /* ldm_regs_per_insn_subsequent. */ |
| COSTS_N_INSNS (2), /* loadf. */ |
| COSTS_N_INSNS (4), /* loadd. */ |
| COSTS_N_INSNS (1), /* load_unaligned. */ |
| COSTS_N_INSNS (1), /* store. */ |
| COSTS_N_INSNS (3), /* strd. */ |
| COSTS_N_INSNS (1), /* stm_1st. */ |
| 1, /* stm_regs_per_insn_1st. */ |
| 2, /* stm_regs_per_insn_subsequent. */ |
| COSTS_N_INSNS (2), /* storef. */ |
| COSTS_N_INSNS (2), /* stored. */ |
| COSTS_N_INSNS (1) /* store_unaligned. */ |
| }, |
| { |
| /* FP SFmode */ |
| { |
| COSTS_N_INSNS (15), /* div. */ |
| COSTS_N_INSNS (3), /* mult. */ |
| COSTS_N_INSNS (7), /* mult_addsub. */ |
| COSTS_N_INSNS (7), /* fma. */ |
| COSTS_N_INSNS (3), /* addsub. */ |
| COSTS_N_INSNS (3), /* fpconst. */ |
| COSTS_N_INSNS (3), /* neg. */ |
| COSTS_N_INSNS (3), /* compare. */ |
| COSTS_N_INSNS (3), /* widen. */ |
| COSTS_N_INSNS (3), /* narrow. */ |
| COSTS_N_INSNS (3), /* toint. */ |
| COSTS_N_INSNS (3), /* fromint. */ |
| COSTS_N_INSNS (3) /* roundint. */ |
| }, |
| /* FP DFmode */ |
| { |
| COSTS_N_INSNS (30), /* div. */ |
| COSTS_N_INSNS (6), /* mult. */ |
| COSTS_N_INSNS (10), /* mult_addsub. */ |
| COSTS_N_INSNS (7), /* fma. */ |
| COSTS_N_INSNS (3), /* addsub. */ |
| COSTS_N_INSNS (3), /* fpconst. */ |
| COSTS_N_INSNS (3), /* neg. */ |
| COSTS_N_INSNS (3), /* compare. */ |
| COSTS_N_INSNS (3), /* widen. */ |
| COSTS_N_INSNS (3), /* narrow. */ |
| COSTS_N_INSNS (3), /* toint. */ |
| COSTS_N_INSNS (3), /* fromint. */ |
| COSTS_N_INSNS (3) /* roundint. */ |
| } |
| }, |
| /* Vector */ |
| { |
| COSTS_N_INSNS (1) /* alu. */ |
| } |
| }; |
| |
| |
| const struct cpu_cost_table cortexa7_extra_costs = |
| { |
| /* ALU */ |
| { |
| 0, /* arith. */ |
| 0, /* logical. */ |
| COSTS_N_INSNS (1), /* shift. */ |
| COSTS_N_INSNS (1), /* shift_reg. */ |
| COSTS_N_INSNS (1), /* arith_shift. */ |
| COSTS_N_INSNS (1), /* arith_shift_reg. */ |
| COSTS_N_INSNS (1), /* log_shift. */ |
| COSTS_N_INSNS (1), /* log_shift_reg. */ |
| COSTS_N_INSNS (1), /* extend. */ |
| COSTS_N_INSNS (1), /* extend_arith. */ |
| COSTS_N_INSNS (1), /* bfi. */ |
| COSTS_N_INSNS (1), /* bfx. */ |
| COSTS_N_INSNS (1), /* clz. */ |
| COSTS_N_INSNS (1), /* rev. */ |
| 0, /* non_exec. */ |
| true /* non_exec_costs_exec. */ |
| }, |
| |
| { |
| /* MULT SImode */ |
| { |
| 0, /* simple. */ |
| COSTS_N_INSNS (1), /* flag_setting. */ |
| COSTS_N_INSNS (1), /* extend. */ |
| COSTS_N_INSNS (1), /* add. */ |
| COSTS_N_INSNS (1), /* extend_add. */ |
| COSTS_N_INSNS (7) /* idiv. */ |
| }, |
| /* MULT DImode */ |
| { |
| 0, /* simple (N/A). */ |
| 0, /* flag_setting (N/A). */ |
| COSTS_N_INSNS (1), /* extend. */ |
| 0, /* add. */ |
| COSTS_N_INSNS (2), /* extend_add. */ |
| 0 /* idiv (N/A). */ |
| } |
| }, |
| /* LD/ST */ |
| { |
| COSTS_N_INSNS (1), /* load. */ |
| COSTS_N_INSNS (1), /* load_sign_extend. */ |
| COSTS_N_INSNS (3), /* ldrd. */ |
| COSTS_N_INSNS (1), /* ldm_1st. */ |
| 1, /* ldm_regs_per_insn_1st. */ |
| 2, /* ldm_regs_per_insn_subsequent. */ |
| COSTS_N_INSNS (2), /* loadf. */ |
| COSTS_N_INSNS (2), /* loadd. */ |
| COSTS_N_INSNS (1), /* load_unaligned. */ |
| COSTS_N_INSNS (1), /* store. */ |
| COSTS_N_INSNS (3), /* strd. */ |
| COSTS_N_INSNS (1), /* stm_1st. */ |
| 1, /* stm_regs_per_insn_1st. */ |
| 2, /* stm_regs_per_insn_subsequent. */ |
| COSTS_N_INSNS (2), /* storef. */ |
| COSTS_N_INSNS (2), /* stored. */ |
| COSTS_N_INSNS (1) /* store_unaligned. */ |
| }, |
| { |
| /* FP SFmode */ |
| { |
| COSTS_N_INSNS (15), /* div. */ |
| COSTS_N_INSNS (3), /* mult. */ |
| COSTS_N_INSNS (7), /* mult_addsub. */ |
| COSTS_N_INSNS (7), /* fma. */ |
| COSTS_N_INSNS (3), /* addsub. */ |
| COSTS_N_INSNS (3), /* fpconst. */ |
| COSTS_N_INSNS (3), /* neg. */ |
| COSTS_N_INSNS (3), /* compare. */ |
| COSTS_N_INSNS (3), /* widen. */ |
| COSTS_N_INSNS (3), /* narrow. */ |
| COSTS_N_INSNS (3), /* toint. */ |
| COSTS_N_INSNS (3), /* fromint. */ |
| COSTS_N_INSNS (3) /* roundint. */ |
| }, |
| /* FP DFmode */ |
| { |
| COSTS_N_INSNS (30), /* div. */ |
| COSTS_N_INSNS (6), /* mult. */ |
| COSTS_N_INSNS (10), /* mult_addsub. */ |
| COSTS_N_INSNS (7), /* fma. */ |
| COSTS_N_INSNS (3), /* addsub. */ |
| COSTS_N_INSNS (3), /* fpconst. */ |
| COSTS_N_INSNS (3), /* neg. */ |
| COSTS_N_INSNS (3), /* compare. */ |
| COSTS_N_INSNS (3), /* widen. */ |
| COSTS_N_INSNS (3), /* narrow. */ |
| COSTS_N_INSNS (3), /* toint. */ |
| COSTS_N_INSNS (3), /* fromint. */ |
| COSTS_N_INSNS (3) /* roundint. */ |
| } |
| }, |
| /* Vector */ |
| { |
| COSTS_N_INSNS (1) /* alu. */ |
| } |
| }; |
| |
| const struct cpu_cost_table cortexa12_extra_costs = |
| { |
| /* ALU */ |
| { |
| 0, /* arith. */ |
| 0, /* logical. */ |
| 0, /* shift. */ |
| COSTS_N_INSNS (1), /* shift_reg. */ |
| COSTS_N_INSNS (1), /* arith_shift. */ |
| COSTS_N_INSNS (1), /* arith_shift_reg. */ |
| COSTS_N_INSNS (1), /* log_shift. */ |
| COSTS_N_INSNS (1), /* log_shift_reg. */ |
| 0, /* extend. */ |
| COSTS_N_INSNS (1), /* extend_arith. */ |
| 0, /* bfi. */ |
| COSTS_N_INSNS (1), /* bfx. */ |
| COSTS_N_INSNS (1), /* clz. */ |
| COSTS_N_INSNS (1), /* rev. */ |
| 0, /* non_exec. */ |
| true /* non_exec_costs_exec. */ |
| }, |
| /* MULT SImode */ |
| { |
| { |
| COSTS_N_INSNS (2), /* simple. */ |
| COSTS_N_INSNS (3), /* flag_setting. */ |
| COSTS_N_INSNS (2), /* extend. */ |
| COSTS_N_INSNS (3), /* add. */ |
| COSTS_N_INSNS (2), /* extend_add. */ |
| COSTS_N_INSNS (18) /* idiv. */ |
| }, |
| /* MULT DImode */ |
| { |
| 0, /* simple (N/A). */ |
| 0, /* flag_setting (N/A). */ |
| COSTS_N_INSNS (3), /* extend. */ |
| 0, /* add (N/A). */ |
| COSTS_N_INSNS (3), /* extend_add. */ |
| 0 /* idiv (N/A). */ |
| } |
| }, |
| /* LD/ST */ |
| { |
| COSTS_N_INSNS (3), /* load. */ |
| COSTS_N_INSNS (3), /* load_sign_extend. */ |
| COSTS_N_INSNS (3), /* ldrd. */ |
| COSTS_N_INSNS (3), /* ldm_1st. */ |
| 1, /* ldm_regs_per_insn_1st. */ |
| 2, /* ldm_regs_per_insn_subsequent. */ |
| COSTS_N_INSNS (3), /* loadf. */ |
| COSTS_N_INSNS (3), /* loadd. */ |
| 0, /* load_unaligned. */ |
| 0, /* store. */ |
| 0, /* strd. */ |
| 0, /* stm_1st. */ |
| 1, /* stm_regs_per_insn_1st. */ |
| 2, /* stm_regs_per_insn_subsequent. */ |
| COSTS_N_INSNS (2), /* storef. */ |
| COSTS_N_INSNS (2), /* stored. */ |
| 0 /* store_unaligned. */ |
| }, |
| { |
| /* FP SFmode */ |
| { |
| COSTS_N_INSNS (17), /* div. */ |
| COSTS_N_INSNS (4), /* mult. */ |
| COSTS_N_INSNS (8), /* mult_addsub. */ |
| COSTS_N_INSNS (8), /* fma. */ |
| COSTS_N_INSNS (4), /* addsub. */ |
| COSTS_N_INSNS (2), /* fpconst. */ |
| COSTS_N_INSNS (2), /* neg. */ |
| COSTS_N_INSNS (2), /* compare. */ |
| COSTS_N_INSNS (4), /* widen. */ |
| COSTS_N_INSNS (4), /* narrow. */ |
| COSTS_N_INSNS (4), /* toint. */ |
| COSTS_N_INSNS (4), /* fromint. */ |
| COSTS_N_INSNS (4) /* roundint. */ |
| }, |
| /* FP DFmode */ |
| { |
| COSTS_N_INSNS (31), /* div. */ |
| COSTS_N_INSNS (4), /* mult. */ |
| COSTS_N_INSNS (8), /* mult_addsub. */ |
| COSTS_N_INSNS (8), /* fma. */ |
| COSTS_N_INSNS (4), /* addsub. */ |
| COSTS_N_INSNS (2), /* fpconst. */ |
| COSTS_N_INSNS (2), /* neg. */ |
| COSTS_N_INSNS (2), /* compare. */ |
| COSTS_N_INSNS (4), /* widen. */ |
| COSTS_N_INSNS (4), /* narrow. */ |
| COSTS_N_INSNS (4), /* toint. */ |
| COSTS_N_INSNS (4), /* fromint. */ |
| COSTS_N_INSNS (4) /* roundint. */ |
| } |
| }, |
| /* Vector */ |
| { |
| COSTS_N_INSNS (1) /* alu. */ |
| } |
| }; |
| |
| const struct cpu_cost_table cortexa15_extra_costs = |
| { |
| /* ALU */ |
| { |
| 0, /* arith. */ |
| 0, /* logical. */ |
| 0, /* shift. */ |
| 0, /* shift_reg. */ |
| COSTS_N_INSNS (1), /* arith_shift. */ |
| COSTS_N_INSNS (1), /* arith_shift_reg. */ |
| COSTS_N_INSNS (1), /* log_shift. */ |
| COSTS_N_INSNS (1), /* log_shift_reg. */ |
| 0, /* extend. */ |
| COSTS_N_INSNS (1), /* extend_arith. */ |
| COSTS_N_INSNS (1), /* bfi. */ |
| 0, /* bfx. */ |
| 0, /* clz. */ |
| 0, /* rev. */ |
| 0, /* non_exec. */ |
| true /* non_exec_costs_exec. */ |
| }, |
| /* MULT SImode */ |
| { |
| { |
| COSTS_N_INSNS (2), /* simple. */ |
| COSTS_N_INSNS (3), /* flag_setting. */ |
| COSTS_N_INSNS (2), /* extend. */ |
| COSTS_N_INSNS (2), /* add. */ |
| COSTS_N_INSNS (2), /* extend_add. */ |
| COSTS_N_INSNS (18) /* idiv. */ |
| }, |
| /* MULT DImode */ |
| { |
| 0, /* simple (N/A). */ |
| 0, /* flag_setting (N/A). */ |
| COSTS_N_INSNS (3), /* extend. */ |
| 0, /* add (N/A). */ |
| COSTS_N_INSNS (3), /* extend_add. */ |
| 0 /* idiv (N/A). */ |
| } |
| }, |
| /* LD/ST */ |
| { |
| COSTS_N_INSNS (3), /* load. */ |
| COSTS_N_INSNS (3), /* load_sign_extend. */ |
| COSTS_N_INSNS (3), /* ldrd. */ |
| COSTS_N_INSNS (4), /* ldm_1st. */ |
| 1, /* ldm_regs_per_insn_1st. */ |
| 2, /* ldm_regs_per_insn_subsequent. */ |
| COSTS_N_INSNS (4), /* loadf. */ |
| COSTS_N_INSNS (4), /* loadd. */ |
| 0, /* load_unaligned. */ |
| 0, /* store. */ |
| 0, /* strd. */ |
| COSTS_N_INSNS (1), /* stm_1st. */ |
| 1, /* stm_regs_per_insn_1st. */ |
| 2, /* stm_regs_per_insn_subsequent. */ |
| 0, /* storef. */ |
| 0, /* stored. */ |
| 0 /* store_unaligned. */ |
| }, |
| { |
| /* FP SFmode */ |
| { |
| COSTS_N_INSNS (17), /* div. */ |
| COSTS_N_INSNS (4), /* mult. */ |
| COSTS_N_INSNS (8), /* mult_addsub. */ |
| COSTS_N_INSNS (8), /* fma. */ |
| COSTS_N_INSNS (4), /* addsub. */ |
| COSTS_N_INSNS (2), /* fpconst. */ |
| COSTS_N_INSNS (2), /* neg. */ |
| COSTS_N_INSNS (5), /* compare. */ |
| COSTS_N_INSNS (4), /* widen. */ |
| COSTS_N_INSNS (4), /* narrow. */ |
| COSTS_N_INSNS (4), /* toint. */ |
| COSTS_N_INSNS (4), /* fromint. */ |
| COSTS_N_INSNS (4) /* roundint. */ |
| }, |
| /* FP DFmode */ |
| { |
| COSTS_N_INSNS (31), /* div. */ |
| COSTS_N_INSNS (4), /* mult. */ |
| COSTS_N_INSNS (8), /* mult_addsub. */ |
| COSTS_N_INSNS (8), /* fma. */ |
| COSTS_N_INSNS (4), /* addsub. */ |
| COSTS_N_INSNS (2), /* fpconst. */ |
| COSTS_N_INSNS (2), /* neg. */ |
| COSTS_N_INSNS (2), /* compare. */ |
| COSTS_N_INSNS (4), /* widen. */ |
| COSTS_N_INSNS (4), /* narrow. */ |
| COSTS_N_INSNS (4), /* toint. */ |
| COSTS_N_INSNS (4), /* fromint. */ |
| COSTS_N_INSNS (4) /* roundint. */ |
| } |
| }, |
| /* Vector */ |
| { |
| COSTS_N_INSNS (1) /* alu. */ |
| } |
| }; |
| |
| const struct cpu_cost_table v7m_extra_costs = |
| { |
| /* ALU */ |
| { |
| 0, /* arith. */ |
| 0, /* logical. */ |
| 0, /* shift. */ |
| 0, /* shift_reg. */ |
| 0, /* arith_shift. */ |
| COSTS_N_INSNS (1), /* arith_shift_reg. */ |
| 0, /* log_shift. */ |
| COSTS_N_INSNS (1), /* log_shift_reg. */ |
| 0, /* extend. */ |
| COSTS_N_INSNS (1), /* extend_arith. */ |
| 0, /* bfi. */ |
| 0, /* bfx. */ |
| 0, /* clz. */ |
| 0, /* rev. */ |
| COSTS_N_INSNS (1), /* non_exec. */ |
| false /* non_exec_costs_exec. */ |
| }, |
| { |
| /* MULT SImode */ |
| { |
| COSTS_N_INSNS (1), /* simple. */ |
| COSTS_N_INSNS (1), /* flag_setting. */ |
| COSTS_N_INSNS (2), /* extend. */ |
| COSTS_N_INSNS (1), /* add. */ |
| COSTS_N_INSNS (3), /* extend_add. */ |
| COSTS_N_INSNS (8) /* idiv. */ |
| }, |
| /* MULT DImode */ |
| { |
| 0, /* simple (N/A). */ |
| 0, /* flag_setting (N/A). */ |
| COSTS_N_INSNS (2), /* extend. */ |
| 0, /* add (N/A). */ |
| COSTS_N_INSNS (3), /* extend_add. */ |
| 0 /* idiv (N/A). */ |
| } |
| }, |
| /* LD/ST */ |
| { |
| COSTS_N_INSNS (2), /* load. */ |
| 0, /* load_sign_extend. */ |
| COSTS_N_INSNS (3), /* ldrd. */ |
| COSTS_N_INSNS (2), /* ldm_1st. */ |
| 1, /* ldm_regs_per_insn_1st. */ |
| 1, /* ldm_regs_per_insn_subsequent. */ |
| COSTS_N_INSNS (2), /* loadf. */ |
| COSTS_N_INSNS (3), /* loadd. */ |
| COSTS_N_INSNS (1), /* load_unaligned. */ |
| COSTS_N_INSNS (2), /* store. */ |
| COSTS_N_INSNS (3), /* strd. */ |
| COSTS_N_INSNS (2), /* stm_1st. */ |
| 1, /* stm_regs_per_insn_1st. */ |
| 1, /* stm_regs_per_insn_subsequent. */ |
| COSTS_N_INSNS (2), /* storef. */ |
| COSTS_N_INSNS (3), /* stored. */ |
| COSTS_N_INSNS (1) /* store_unaligned. */ |
| }, |
| { |
| /* FP SFmode */ |
| { |
| COSTS_N_INSNS (7), /* div. */ |
| COSTS_N_INSNS (2), /* mult. */ |
| COSTS_N_INSNS (5), /* mult_addsub. */ |
| COSTS_N_INSNS (3), /* fma. */ |
| COSTS_N_INSNS (1), /* addsub. */ |
| 0, /* fpconst. */ |
| 0, /* neg. */ |
| 0, /* compare. */ |
| 0, /* widen. */ |
| 0, /* narrow. */ |
| 0, /* toint. */ |
| 0, /* fromint. */ |
| 0 /* roundint. */ |
| }, |
| /* FP DFmode */ |
| { |
| COSTS_N_INSNS (15), /* div. */ |
| COSTS_N_INSNS (5), /* mult. */ |
| COSTS_N_INSNS (7), /* mult_addsub. */ |
| COSTS_N_INSNS (7), /* fma. */ |
| COSTS_N_INSNS (3), /* addsub. */ |
| 0, /* fpconst. */ |
| 0, /* neg. */ |
| 0, /* compare. */ |
| 0, /* widen. */ |
| 0, /* narrow. */ |
| 0, /* toint. */ |
| 0, /* fromint. */ |
| 0 /* roundint. */ |
| } |
| }, |
| /* Vector */ |
| { |
| COSTS_N_INSNS (1) /* alu. */ |
| } |
| }; |
| |
| #define ARM_FUSE_NOTHING (0) |
| #define ARM_FUSE_MOVW_MOVT (1 << 0) |
| |
| const struct tune_params arm_slowmul_tune = |
| { |
| arm_slowmul_rtx_costs, |
| NULL, |
| NULL, /* Sched adj cost. */ |
| 3, /* Constant limit. */ |
| 5, /* Max cond insns. */ |
| ARM_PREFETCH_NOT_BENEFICIAL, |
| true, /* Prefer constant pool. */ |
| arm_default_branch_cost, |
| false, /* Prefer LDRD/STRD. */ |
| {true, true}, /* Prefer non short circuit. */ |
| &arm_default_vec_cost, /* Vectorizer costs. */ |
| false, /* Prefer Neon for 64-bits bitops. */ |
| false, false, /* Prefer 32-bit encodings. */ |
| false, /* Prefer Neon for stringops. */ |
| 8, /* Maximum insns to inline memset. */ |
| ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ |
| ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ |
| }; |
| |
| const struct tune_params arm_fastmul_tune = |
| { |
| arm_fastmul_rtx_costs, |
| NULL, |
| NULL, /* Sched adj cost. */ |
| 1, /* Constant limit. */ |
| 5, /* Max cond insns. */ |
| ARM_PREFETCH_NOT_BENEFICIAL, |
| true, /* Prefer constant pool. */ |
| arm_default_branch_cost, |
| false, /* Prefer LDRD/STRD. */ |
| {true, true}, /* Prefer non short circuit. */ |
| &arm_default_vec_cost, /* Vectorizer costs. */ |
| false, /* Prefer Neon for 64-bits bitops. */ |
| false, false, /* Prefer 32-bit encodings. */ |
| false, /* Prefer Neon for stringops. */ |
| 8, /* Maximum insns to inline memset. */ |
| ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ |
| ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ |
| }; |
| |
| /* StrongARM has early execution of branches, so a sequence that is worth |
| skipping is shorter. Set max_insns_skipped to a lower value. */ |
| |
| const struct tune_params arm_strongarm_tune = |
| { |
| arm_fastmul_rtx_costs, |
| NULL, |
| NULL, /* Sched adj cost. */ |
| 1, /* Constant limit. */ |
| 3, /* Max cond insns. */ |
| ARM_PREFETCH_NOT_BENEFICIAL, |
| true, /* Prefer constant pool. */ |
| arm_default_branch_cost, |
| false, /* Prefer LDRD/STRD. */ |
| {true, true}, /* Prefer non short circuit. */ |
| &arm_default_vec_cost, /* Vectorizer costs. */ |
| false, /* Prefer Neon for 64-bits bitops. */ |
| false, false, /* Prefer 32-bit encodings. */ |
| false, /* Prefer Neon for stringops. */ |
| 8, /* Maximum insns to inline memset. */ |
| ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ |
| ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ |
| }; |
| |
| const struct tune_params arm_xscale_tune = |
| { |
| arm_xscale_rtx_costs, |
| NULL, |
| xscale_sched_adjust_cost, |
| 2, /* Constant limit. */ |
| 3, /* Max cond insns. */ |
| ARM_PREFETCH_NOT_BENEFICIAL, |
| true, /* Prefer constant pool. */ |
| arm_default_branch_cost, |
| false, /* Prefer LDRD/STRD. */ |
| {true, true}, /* Prefer non short circuit. */ |
| &arm_default_vec_cost, /* Vectorizer costs. */ |
| false, /* Prefer Neon for 64-bits bitops. */ |
| false, false, /* Prefer 32-bit encodings. */ |
| false, /* Prefer Neon for stringops. */ |
| 8, /* Maximum insns to inline memset. */ |
| ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ |
| ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ |
| }; |
| |
| const struct tune_params arm_9e_tune = |
| { |
| arm_9e_rtx_costs, |
| NULL, |
| NULL, /* Sched adj cost. */ |
| 1, /* Constant limit. */ |
| 5, /* Max cond insns. */ |
| ARM_PREFETCH_NOT_BENEFICIAL, |
| true, /* Prefer constant pool. */ |
| arm_default_branch_cost, |
| false, /* Prefer LDRD/STRD. */ |
| {true, true}, /* Prefer non short circuit. */ |
| &arm_default_vec_cost, /* Vectorizer costs. */ |
| false, /* Prefer Neon for 64-bits bitops. */ |
| false, false, /* Prefer 32-bit encodings. */ |
| false, /* Prefer Neon for stringops. */ |
| 8, /* Maximum insns to inline memset. */ |
| ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ |
| ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ |
| }; |
| |
| const struct tune_params arm_v6t2_tune = |
| { |
| arm_9e_rtx_costs, |
| NULL, |
| NULL, /* Sched adj cost. */ |
| 1, /* Constant limit. */ |
| 5, /* Max cond insns. */ |
| ARM_PREFETCH_NOT_BENEFICIAL, |
| false, /* Prefer constant pool. */ |
| arm_default_branch_cost, |
| false, /* Prefer LDRD/STRD. */ |
| {true, true}, /* Prefer non short circuit. */ |
| &arm_default_vec_cost, /* Vectorizer costs. */ |
| false, /* Prefer Neon for 64-bits bitops. */ |
| false, false, /* Prefer 32-bit encodings. */ |
| false, /* Prefer Neon for stringops. */ |
| 8, /* Maximum insns to inline memset. */ |
| ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ |
| ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ |
| }; |
| |
| /* Generic Cortex tuning. Use more specific tunings if appropriate. */ |
| const struct tune_params arm_cortex_tune = |
| { |
| arm_9e_rtx_costs, |
| &generic_extra_costs, |
| NULL, /* Sched adj cost. */ |
| 1, /* Constant limit. */ |
| 5, /* Max cond insns. */ |
| ARM_PREFETCH_NOT_BENEFICIAL, |
| false, /* Prefer constant pool. */ |
| arm_default_branch_cost, |
| false, /* Prefer LDRD/STRD. */ |
| {true, true}, /* Prefer non short circuit. */ |
| &arm_default_vec_cost, /* Vectorizer costs. */ |
| false, /* Prefer Neon for 64-bits bitops. */ |
| false, false, /* Prefer 32-bit encodings. */ |
| false, /* Prefer Neon for stringops. */ |
| 8, /* Maximum insns to inline memset. */ |
| ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ |
| ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ |
| }; |
| |
| const struct tune_params arm_cortex_a8_tune = |
| { |
| arm_9e_rtx_costs, |
| &cortexa8_extra_costs, |
| NULL, /* Sched adj cost. */ |
| 1, /* Constant limit. */ |
| 5, /* Max cond insns. */ |
| ARM_PREFETCH_NOT_BENEFICIAL, |
| false, /* Prefer constant pool. */ |
| arm_default_branch_cost, |
| false, /* Prefer LDRD/STRD. */ |
| {true, true}, /* Prefer non short circuit. */ |
| &arm_default_vec_cost, /* Vectorizer costs. */ |
| false, /* Prefer Neon for 64-bits bitops. */ |
| false, false, /* Prefer 32-bit encodings. */ |
| true, /* Prefer Neon for stringops. */ |
| 8, /* Maximum insns to inline memset. */ |
| ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ |
| ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ |
| }; |
| |
| const struct tune_params arm_cortex_a7_tune = |
| { |
| arm_9e_rtx_costs, |
| &cortexa7_extra_costs, |
| NULL, |
| 1, /* Constant limit. */ |
| 5, /* Max cond insns. */ |
| ARM_PREFETCH_NOT_BENEFICIAL, |
| false, /* Prefer constant pool. */ |
| arm_default_branch_cost, |
| false, /* Prefer LDRD/STRD. */ |
| {true, true}, /* Prefer non short circuit. */ |
| &arm_default_vec_cost, /* Vectorizer costs. */ |
| false, /* Prefer Neon for 64-bits bitops. */ |
| false, false, /* Prefer 32-bit encodings. */ |
| true, /* Prefer Neon for stringops. */ |
| 8, /* Maximum insns to inline memset. */ |
| ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ |
| ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ |
| }; |
| |
| const struct tune_params arm_cortex_a15_tune = |
| { |
| arm_9e_rtx_costs, |
| &cortexa15_extra_costs, |
| NULL, /* Sched adj cost. */ |
| 1, /* Constant limit. */ |
| 2, /* Max cond insns. */ |
| ARM_PREFETCH_NOT_BENEFICIAL, |
| false, /* Prefer constant pool. */ |
| arm_default_branch_cost, |
| true, /* Prefer LDRD/STRD. */ |
| {true, true}, /* Prefer non short circuit. */ |
| &arm_default_vec_cost, /* Vectorizer costs. */ |
| false, /* Prefer Neon for 64-bits bitops. */ |
| true, true, /* Prefer 32-bit encodings. */ |
| true, /* Prefer Neon for stringops. */ |
| 8, /* Maximum insns to inline memset. */ |
| ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ |
| ARM_SCHED_AUTOPREF_FULL /* Sched L2 autopref. */ |
| }; |
| |
| const struct tune_params arm_cortex_a53_tune = |
| { |
| arm_9e_rtx_costs, |
| &cortexa53_extra_costs, |
| NULL, /* Scheduler cost adjustment. */ |
| 1, /* Constant limit. */ |
| 5, /* Max cond insns. */ |
| ARM_PREFETCH_NOT_BENEFICIAL, |
| false, /* Prefer constant pool. */ |
| arm_default_branch_cost, |
| false, /* Prefer LDRD/STRD. */ |
| {true, true}, /* Prefer non short circuit. */ |
| &arm_default_vec_cost, /* Vectorizer costs. */ |
| false, /* Prefer Neon for 64-bits bitops. */ |
| false, false, /* Prefer 32-bit encodings. */ |
| true, /* Prefer Neon for stringops. */ |
| 8, /* Maximum insns to inline memset. */ |
| ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */ |
| ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ |
| }; |
| |
| const struct tune_params arm_cortex_a57_tune = |
| { |
| arm_9e_rtx_costs, |
| &cortexa57_extra_costs, |
| NULL, /* Scheduler cost adjustment. */ |
| 1, /* Constant limit. */ |
| 2, /* Max cond insns. */ |
| ARM_PREFETCH_NOT_BENEFICIAL, |
| false, /* Prefer constant pool. */ |
| arm_default_branch_cost, |
| true, /* Prefer LDRD/STRD. */ |
| {true, true}, /* Prefer non short circuit. */ |
| &arm_default_vec_cost, /* Vectorizer costs. */ |
| false, /* Prefer Neon for 64-bits bitops. */ |
| true, true, /* Prefer 32-bit encodings. */ |
| true, /* Prefer Neon for stringops. */ |
| 8, /* Maximum insns to inline memset. */ |
| ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */ |
| ARM_SCHED_AUTOPREF_FULL /* Sched L2 autopref. */ |
| }; |
| |
| const struct tune_params arm_xgene1_tune = |
| { |
| arm_9e_rtx_costs, |
| &xgene1_extra_costs, |
| NULL, /* Scheduler cost adjustment. */ |
| 1, /* Constant limit. */ |
| 2, /* Max cond insns. */ |
| ARM_PREFETCH_NOT_BENEFICIAL, |
| false, /* Prefer constant pool. */ |
| arm_default_branch_cost, |
| true, /* Prefer LDRD/STRD. */ |
| {true, true}, /* Prefer non short circuit. */ |
| &arm_default_vec_cost, /* Vectorizer costs. */ |
| false, /* Prefer Neon for 64-bits bitops. */ |
| true, true, /* Prefer 32-bit encodings. */ |
| false, /* Prefer Neon for stringops. */ |
| 32, /* Maximum insns to inline memset. */ |
| ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ |
| ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ |
| }; |
| |
| /* Branches can be dual-issued on Cortex-A5, so conditional execution is |
| less appealing. Set max_insns_skipped to a low value. */ |
| |
| const struct tune_params arm_cortex_a5_tune = |
| { |
| arm_9e_rtx_costs, |
| &cortexa5_extra_costs, |
| NULL, /* Sched adj cost. */ |
| 1, /* Constant limit. */ |
| 1, /* Max cond insns. */ |
| ARM_PREFETCH_NOT_BENEFICIAL, |
| false, /* Prefer constant pool. */ |
| arm_cortex_a5_branch_cost, |
| false, /* Prefer LDRD/STRD. */ |
| {false, false}, /* Prefer non short circuit. */ |
| &arm_default_vec_cost, /* Vectorizer costs. */ |
| false, /* Prefer Neon for 64-bits bitops. */ |
| false, false, /* Prefer 32-bit encodings. */ |
| true, /* Prefer Neon for stringops. */ |
| 8, /* Maximum insns to inline memset. */ |
| ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ |
| ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ |
| }; |
| |
| const struct tune_params arm_cortex_a9_tune = |
| { |
| arm_9e_rtx_costs, |
| &cortexa9_extra_costs, |
| cortex_a9_sched_adjust_cost, |
| 1, /* Constant limit. */ |
| 5, /* Max cond insns. */ |
| ARM_PREFETCH_BENEFICIAL(4,32,32), |
| false, /* Prefer constant pool. */ |
| arm_default_branch_cost, |
| false, /* Prefer LDRD/STRD. */ |
| {true, true}, /* Prefer non short circuit. */ |
| &arm_default_vec_cost, /* Vectorizer costs. */ |
| false, /* Prefer Neon for 64-bits bitops. */ |
| false, false, /* Prefer 32-bit encodings. */ |
| false, /* Prefer Neon for stringops. */ |
| 8, /* Maximum insns to inline memset. */ |
| ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ |
| ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ |
| }; |
| |
| const struct tune_params arm_cortex_a12_tune = |
| { |
| arm_9e_rtx_costs, |
| &cortexa12_extra_costs, |
| NULL, /* Sched adj cost. */ |
| 1, /* Constant limit. */ |
| 2, /* Max cond insns. */ |
| ARM_PREFETCH_NOT_BENEFICIAL, |
| false, /* Prefer constant pool. */ |
| arm_default_branch_cost, |
| true, /* Prefer LDRD/STRD. */ |
| {true, true}, /* Prefer non short circuit. */ |
| &arm_default_vec_cost, /* Vectorizer costs. */ |
| false, /* Prefer Neon for 64-bits bitops. */ |
| true, true, /* Prefer 32-bit encodings. */ |
| true, /* Prefer Neon for stringops. */ |
| 8, /* Maximum insns to inline memset. */ |
| ARM_FUSE_MOVW_MOVT, /* Fuseable pairs of instructions. */ |
| ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ |
| }; |
| |
| /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single |
| cycle to execute each. An LDR from the constant pool also takes two cycles |
| to execute, but mildly increases pipelining opportunity (consecutive |
| loads/stores can be pipelined together, saving one cycle), and may also |
| improve icache utilisation. Hence we prefer the constant pool for such |
| processors. */ |
| |
| const struct tune_params arm_v7m_tune = |
| { |
| arm_9e_rtx_costs, |
| &v7m_extra_costs, |
| NULL, /* Sched adj cost. */ |
| 1, /* Constant limit. */ |
| 2, /* Max cond insns. */ |
| ARM_PREFETCH_NOT_BENEFICIAL, |
| true, /* Prefer constant pool. */ |
| arm_cortex_m_branch_cost, |
| false, /* Prefer LDRD/STRD. */ |
| {false, false}, /* Prefer non short circuit. */ |
| &arm_default_vec_cost, /* Vectorizer costs. */ |
| false, /* Prefer Neon for 64-bits bitops. */ |
| false, false, /* Prefer 32-bit encodings. */ |
| false, /* Prefer Neon for stringops. */ |
| 8, /* Maximum insns to inline memset. */ |
| ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ |
| ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ |
| }; |
| |
| /* Cortex-M7 tuning. */ |
| |
| const struct tune_params arm_cortex_m7_tune = |
| { |
| arm_9e_rtx_costs, |
| &v7m_extra_costs, |
| NULL, /* Sched adj cost. */ |
| 0, /* Constant limit. */ |
| 1, /* Max cond insns. */ |
| ARM_PREFETCH_NOT_BENEFICIAL, |
| true, /* Prefer constant pool. */ |
| arm_cortex_m7_branch_cost, |
| false, /* Prefer LDRD/STRD. */ |
| {true, true}, /* Prefer non short circuit. */ |
| &arm_default_vec_cost, /* Vectorizer costs. */ |
| false, /* Prefer Neon for 64-bits bitops. */ |
| false, false, /* Prefer 32-bit encodings. */ |
| false, /* Prefer Neon for stringops. */ |
| 8, /* Maximum insns to inline memset. */ |
| ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ |
| ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ |
| }; |
| |
| /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than |
| arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */ |
| const struct tune_params arm_v6m_tune = |
| { |
| arm_9e_rtx_costs, |
| NULL, |
| NULL, /* Sched adj cost. */ |
| 1, /* Constant limit. */ |
| 5, /* Max cond insns. */ |
| ARM_PREFETCH_NOT_BENEFICIAL, |
| false, /* Prefer constant pool. */ |
| arm_default_branch_cost, |
| false, /* Prefer LDRD/STRD. */ |
| {false, false}, /* Prefer non short circuit. */ |
| &arm_default_vec_cost, /* Vectorizer costs. */ |
| false, /* Prefer Neon for 64-bits bitops. */ |
| false, false, /* Prefer 32-bit encodings. */ |
| false, /* Prefer Neon for stringops. */ |
| 8, /* Maximum insns to inline memset. */ |
| ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ |
| ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ |
| }; |
| |
| const struct tune_params arm_fa726te_tune = |
| { |
| arm_9e_rtx_costs, |
| NULL, |
| fa726te_sched_adjust_cost, |
| 1, /* Constant limit. */ |
| 5, /* Max cond insns. */ |
| ARM_PREFETCH_NOT_BENEFICIAL, |
| true, /* Prefer constant pool. */ |
| arm_default_branch_cost, |
| false, /* Prefer LDRD/STRD. */ |
| {true, true}, /* Prefer non short circuit. */ |
| &arm_default_vec_cost, /* Vectorizer costs. */ |
| false, /* Prefer Neon for 64-bits bitops. */ |
| false, false, /* Prefer 32-bit encodings. */ |
| false, /* Prefer Neon for stringops. */ |
| 8, /* Maximum insns to inline memset. */ |
| ARM_FUSE_NOTHING, /* Fuseable pairs of instructions. */ |
| ARM_SCHED_AUTOPREF_OFF /* Sched L2 autopref. */ |
| }; |
| |
| |
| /* Not all of these give usefully different compilation alternatives, |
| but there is no simple way of generalizing them. */ |
| static const struct processors all_cores[] = |
| { |
| /* ARM Cores */ |
| #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \ |
| {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \ |
| FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune}, |
| #include "arm-cores.def" |
| #undef ARM_CORE |
| {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL} |
| }; |
| |
| static const struct processors all_architectures[] = |
| { |
| /* ARM Architectures */ |
| /* We don't specify tuning costs here as it will be figured out |
| from the core. */ |
| |
| #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \ |
| {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL}, |
| #include "arm-arches.def" |
| #undef ARM_ARCH |
| {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL} |
| }; |
| |
| |
| /* These are populated as commandline arguments are processed, or NULL |
| if not specified. */ |
| static const struct processors *arm_selected_arch; |
| static const struct processors *arm_selected_cpu; |
| static const struct processors *arm_selected_tune; |
| |
| /* The name of the preprocessor macro to define for this architecture. */ |
| |
| char arm_arch_name[] = "__ARM_ARCH_0UNK__"; |
| |
| /* Available values for -mfpu=. */ |
| |
| static const struct arm_fpu_desc all_fpus[] = |
| { |
| #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \ |
| { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO }, |
| #include "arm-fpus.def" |
| #undef ARM_FPU |
| }; |
| |
| |
| /* Supported TLS relocations. */ |
| |
| enum tls_reloc { |
| TLS_GD32, |
| TLS_LDM32, |
| TLS_LDO32, |
| TLS_IE32, |
| TLS_LE32, |
| TLS_DESCSEQ /* GNU scheme */ |
| }; |
| |
| /* The maximum number of insns to be used when loading a constant. */ |
| inline static int |
| arm_constant_limit (bool size_p) |
| { |
| return size_p ? 1 : current_tune->constant_limit; |
| } |
| |
| /* Emit an insn that's a simple single-set. Both the operands must be known |
| to be valid. */ |
| inline static rtx_insn * |
| emit_set_insn (rtx x, rtx y) |
| { |
| return emit_insn (gen_rtx_SET (VOIDmode, x, y)); |
| } |
| |
| /* Return the number of bits set in VALUE. */ |
| static unsigned |
| bit_count (unsigned long value) |
| { |
| unsigned long count = 0; |
| |
| while (value) |
| { |
| count++; |
| value &= value - 1; /* Clear the least-significant set bit. */ |
| } |
| |
| return count; |
| } |
| |
| typedef struct |
| { |
| machine_mode mode; |
| const char *name; |
| } arm_fixed_mode_set; |
| |
| /* A small helper for setting fixed-point library libfuncs. */ |
| |
| static void |
| arm_set_fixed_optab_libfunc (optab optable, machine_mode mode, |
| const char *funcname, const char *modename, |
| int num_suffix) |
| { |
| char buffer[50]; |
| |
| if (num_suffix == 0) |
| sprintf (buffer, "__gnu_%s%s", funcname, modename); |
| else |
| sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix); |
| |
| set_optab_libfunc (optable, mode, buffer); |
| } |
| |
| static void |
| arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to, |
| machine_mode from, const char *funcname, |
| const char *toname, const char *fromname) |
| { |
| char buffer[50]; |
| const char *maybe_suffix_2 = ""; |
| |
| /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */ |
| if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to) |
| && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to) |
| && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to)) |
| maybe_suffix_2 = "2"; |
| |
| sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname, |
| maybe_suffix_2); |
| |
| set_conv_libfunc (optable, to, from, buffer); |
| } |
| |
| /* Set up library functions unique to ARM. */ |
| |
| static void |
| arm_init_libfuncs (void) |
| { |
| /* For Linux, we have access to kernel support for atomic operations. */ |
| if (arm_abi == ARM_ABI_AAPCS_LINUX) |
| init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE); |
| |
| /* There are no special library functions unless we are using the |
| ARM BPABI. */ |
| if (!TARGET_BPABI) |
| return; |
| |
| /* The functions below are described in Section 4 of the "Run-Time |
| ABI for the ARM architecture", Version 1.0. */ |
| |
| /* Double-precision floating-point arithmetic. Table 2. */ |
| set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd"); |
| set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv"); |
| set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul"); |
| set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg"); |
| set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub"); |
| |
| /* Double-precision comparisons. Table 3. */ |
| set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq"); |
| set_optab_libfunc (ne_optab, DFmode, NULL); |
| set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt"); |
| set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple"); |
| set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge"); |
| set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt"); |
| set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun"); |
| |
| /* Single-precision floating-point arithmetic. Table 4. */ |
| set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd"); |
| set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv"); |
| set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul"); |
| set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg"); |
| set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub"); |
| |
| /* Single-precision comparisons. Table 5. */ |
| set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq"); |
| set_optab_libfunc (ne_optab, SFmode, NULL); |
| set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt"); |
| set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple"); |
| set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge"); |
| set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt"); |
| set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun"); |
| |
| /* Floating-point to integer conversions. Table 6. */ |
| set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz"); |
| set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz"); |
| set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz"); |
| set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz"); |
| set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz"); |
| set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz"); |
| set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz"); |
| set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz"); |
| |
| /* Conversions between floating types. Table 7. */ |
| set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f"); |
| set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d"); |
| |
| /* Integer to floating-point conversions. Table 8. */ |
| set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d"); |
| set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d"); |
| set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d"); |
| set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d"); |
| set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f"); |
| set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f"); |
| set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f"); |
| set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f"); |
| |
| /* Long long. Table 9. */ |
| set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul"); |
| set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod"); |
| set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod"); |
| set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl"); |
| set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr"); |
| set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr"); |
| set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp"); |
| set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp"); |
| |
| /* Integer (32/32->32) division. \S 4.3.1. */ |
| set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod"); |
| set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod"); |
| |
| /* The divmod functions are designed so that they can be used for |
| plain division, even though they return both the quotient and the |
| remainder. The quotient is returned in the usual location (i.e., |
| r0 for SImode, {r0, r1} for DImode), just as would be expected |
| for an ordinary division routine. Because the AAPCS calling |
| conventions specify that all of { r0, r1, r2, r3 } are |
| callee-saved registers, there is no need to tell the compiler |
| explicitly that those registers are clobbered by these |
| routines. */ |
| set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod"); |
| set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod"); |
| |
| /* For SImode division the ABI provides div-without-mod routines, |
| which are faster. */ |
| set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv"); |
| set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv"); |
| |
| /* We don't have mod libcalls. Fortunately gcc knows how to use the |
| divmod libcalls instead. */ |
| set_optab_libfunc (smod_optab, DImode, NULL); |
| set_optab_libfunc (umod_optab, DImode, NULL); |
| set_optab_libfunc (smod_optab, SImode, NULL); |
| set_optab_libfunc (umod_optab, SImode, NULL); |
| |
| /* Half-precision float operations. The compiler handles all operations |
| with NULL libfuncs by converting the SFmode. */ |
| switch (arm_fp16_format) |
| { |
| case ARM_FP16_FORMAT_IEEE: |
| case ARM_FP16_FORMAT_ALTERNATIVE: |
| |
| /* Conversions. */ |
| set_conv_libfunc (trunc_optab, HFmode, SFmode, |
| (arm_fp16_format == ARM_FP16_FORMAT_IEEE |
| ? "__gnu_f2h_ieee" |
| : "__gnu_f2h_alternative")); |
| set_conv_libfunc (sext_optab, SFmode, HFmode, |
| (arm_fp16_format == ARM_FP16_FORMAT_IEEE |
| ? "__gnu_h2f_ieee" |
| : "__gnu_h2f_alternative")); |
| |
| /* Arithmetic. */ |
| set_optab_libfunc (add_optab, HFmode, NULL); |
| set_optab_libfunc (sdiv_optab, HFmode, NULL); |
| set_optab_libfunc (smul_optab, HFmode, NULL); |
| set_optab_libfunc (neg_optab, HFmode, NULL); |
| set_optab_libfunc (sub_optab, HFmode, NULL); |
| |
| /* Comparisons. */ |
| set_optab_libfunc (eq_optab, HFmode, NULL); |
| set_optab_libfunc (ne_optab, HFmode, NULL); |
| set_optab_libfunc (lt_optab, HFmode, NULL); |
| set_optab_libfunc (le_optab, HFmode, NULL); |
| set_optab_libfunc (ge_optab, HFmode, NULL); |
| set_optab_libfunc (gt_optab, HFmode, NULL); |
| set_optab_libfunc (unord_optab, HFmode, NULL); |
| break; |
| |
| default: |
| break; |
| } |
| |
| /* Use names prefixed with __gnu_ for fixed-point helper functions. */ |
| { |
| const arm_fixed_mode_set fixed_arith_modes[] = |
| { |
| { QQmode, "qq" }, |
| { UQQmode, "uqq" }, |
| { HQmode, "hq" }, |
| { UHQmode, "uhq" }, |
| { SQmode, "sq" }, |
| { USQmode, "usq" }, |
| { DQmode, "dq" }, |
| { UDQmode, "udq" }, |
| { TQmode, "tq" }, |
| { UTQmode, "utq" }, |
| { HAmode, "ha" }, |
| { UHAmode, "uha" }, |
| { SAmode, "sa" }, |
| { USAmode, "usa" }, |
| { DAmode, "da" }, |
| { UDAmode, "uda" }, |
| { TAmode, "ta" }, |
| { UTAmode, "uta" } |
| }; |
| const arm_fixed_mode_set fixed_conv_modes[] = |
| { |
| { QQmode, "qq" }, |
| { UQQmode, "uqq" }, |
| { HQmode, "hq" }, |
| { UHQmode, "uhq" }, |
| { SQmode, "sq" }, |
| { USQmode, "usq" }, |
| { DQmode, "dq" }, |
| { UDQmode, "udq" }, |
| { TQmode, "tq" }, |
| { UTQmode, "utq" }, |
| { HAmode, "ha" }, |
| { UHAmode, "uha" }, |
| { SAmode, "sa" }, |
| { USAmode, "usa" }, |
| { DAmode, "da" }, |
| { UDAmode, "uda" }, |
| { TAmode, "ta" }, |
| { UTAmode, "uta" }, |
| { QImode, "qi" }, |
| { HImode, "hi" }, |
| { SImode, "si" }, |
| { DImode, "di" }, |
| { TImode, "ti" }, |
| { SFmode, "sf" }, |
| { DFmode, "df" } |
| }; |
| unsigned int i, j; |
| |
| for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++) |
| { |
| arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode, |
| "add", fixed_arith_modes[i].name, 3); |
| arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode, |
| "ssadd", fixed_arith_modes[i].name, 3); |
| arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode, |
| "usadd", fixed_arith_modes[i].name, 3); |
| arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode, |
| "sub", fixed_arith_modes[i].name, 3); |
| arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode, |
| "sssub", fixed_arith_modes[i].name, 3); |
| arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode, |
| "ussub", fixed_arith_modes[i].name, 3); |
| arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode, |
| "mul", fixed_arith_modes[i].name, 3); |
| arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode, |
| "ssmul", fixed_arith_modes[i].name, 3); |
| arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode, |
| "usmul", fixed_arith_modes[i].name, 3); |
| arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode, |
| "div", fixed_arith_modes[i].name, 3); |
| arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode, |
| "udiv", fixed_arith_modes[i].name, 3); |
| arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode, |
| "ssdiv", fixed_arith_modes[i].name, 3); |
| arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode, |
| "usdiv", fixed_arith_modes[i].name, 3); |
| arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode, |
| "neg", fixed_arith_modes[i].name, 2); |
| arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode, |
| "ssneg", fixed_arith_modes[i].name, 2); |
| arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode, |
| "usneg", fixed_arith_modes[i].name, 2); |
| arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode, |
| "ashl", fixed_arith_modes[i].name, 3); |
| arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode, |
| "ashr", fixed_arith_modes[i].name, 3); |
| arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode, |
| "lshr", fixed_arith_modes[i].name, 3); |
| arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode, |
| "ssashl", fixed_arith_modes[i].name, 3); |
| arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode, |
| "usashl", fixed_arith_modes[i].name, 3); |
| arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode, |
| "cmp", fixed_arith_modes[i].name, 2); |
| } |
| |
| for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++) |
| for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++) |
| { |
| if (i == j |
| || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode) |
| && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode))) |
| continue; |
| |
| arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode, |
| fixed_conv_modes[j].mode, "fract", |
| fixed_conv_modes[i].name, |
| fixed_conv_modes[j].name); |
| arm_set_fixed_conv_libfunc (satfract_optab, |
| fixed_conv_modes[i].mode, |
| fixed_conv_modes[j].mode, "satfract", |
| fixed_conv_modes[i].name, |
| fixed_conv_modes[j].name); |
| arm_set_fixed_conv_libfunc (fractuns_optab, |
| fixed_conv_modes[i].mode, |
| fixed_conv_modes[j].mode, "fractuns", |
| fixed_conv_modes[i].name, |
| fixed_conv_modes[j].name); |
| arm_set_fixed_conv_libfunc (satfractuns_optab, |
| fixed_conv_modes[i].mode, |
| fixed_conv_modes[j].mode, "satfractuns", |
| fixed_conv_modes[i].name, |
| fixed_conv_modes[j].name); |
| } |
| } |
| |
| if (TARGET_AAPCS_BASED) |
| synchronize_libfunc = init_one_libfunc ("__sync_synchronize"); |
| } |
| |
| /* On AAPCS systems, this is the "struct __va_list". */ |
| static GTY(()) tree va_list_type; |
| |
| /* Return the type to use as __builtin_va_list. */ |
| static tree |
| arm_build_builtin_va_list (void) |
| { |
| tree va_list_name; |
| tree ap_field; |
| |
| if (!TARGET_AAPCS_BASED) |
| return std_build_builtin_va_list (); |
| |
| /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type |
| defined as: |
| |
| struct __va_list |
| { |
| void *__ap; |
| }; |
| |
| The C Library ABI further reinforces this definition in \S |
| 4.1. |
| |
| We must follow this definition exactly. The structure tag |
| name is visible in C++ mangled names, and thus forms a part |
| of the ABI. The field name may be used by people who |
| #include <stdarg.h>. */ |
| /* Create the type. */ |
| va_list_type = lang_hooks.types.make_type (RECORD_TYPE); |
| /* Give it the required name. */ |
| va_list_name = build_decl (BUILTINS_LOCATION, |
| TYPE_DECL, |
| get_identifier ("__va_list"), |
| va_list_type); |
| DECL_ARTIFICIAL (va_list_name) = 1; |
| TYPE_NAME (va_list_type) = va_list_name; |
| TYPE_STUB_DECL (va_list_type) = va_list_name; |
| /* Create the __ap field. */ |
| ap_field = build_decl (BUILTINS_LOCATION, |
| FIELD_DECL, |
| get_identifier ("__ap"), |
| ptr_type_node); |
| DECL_ARTIFICIAL (ap_field) = 1; |
| DECL_FIELD_CONTEXT (ap_field) = va_list_type; |
| TYPE_FIELDS (va_list_type) = ap_field; |
| /* Compute its layout. */ |
| layout_type (va_list_type); |
| |
| return va_list_type; |
| } |
| |
| /* Return an expression of type "void *" pointing to the next |
| available argument in a variable-argument list. VALIST is the |
| user-level va_list object, of type __builtin_va_list. */ |
| static tree |
| arm_extract_valist_ptr (tree valist) |
| { |
| if (TREE_TYPE (valist) == error_mark_node) |
| return error_mark_node; |
| |
| /* On an AAPCS target, the pointer is stored within "struct |
| va_list". */ |
| if (TARGET_AAPCS_BASED) |
| { |
| tree ap_field = TYPE_FIELDS (TREE_TYPE (valist)); |
| valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field), |
| valist, ap_field, NULL_TREE); |
| } |
| |
| return valist; |
| } |
| |
| /* Implement TARGET_EXPAND_BUILTIN_VA_START. */ |
| static void |
| arm_expand_builtin_va_start (tree valist, rtx nextarg) |
| { |
| valist = arm_extract_valist_ptr (valist); |
| std_expand_builtin_va_start (valist, nextarg); |
| } |
| |
| /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */ |
| static tree |
| arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, |
| gimple_seq *post_p) |
| { |
| valist = arm_extract_valist_ptr (valist); |
| return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); |
| } |
| |
| /* Fix up any incompatible options that the user has specified. */ |
| static void |
| arm_option_override (void) |
| { |
| arm_selected_arch = NULL; |
| arm_selected_cpu = NULL; |
| arm_selected_tune = NULL; |
| |
| if (global_options_set.x_arm_arch_option) |
| arm_selected_arch = &all_architectures[arm_arch_option]; |
| |
| if (global_options_set.x_arm_cpu_option) |
| { |
| arm_selected_cpu = &all_cores[(int) arm_cpu_option]; |
| arm_selected_tune = &all_cores[(int) arm_cpu_option]; |
| } |
| |
| if (global_options_set.x_arm_tune_option) |
| arm_selected_tune = &all_cores[(int) arm_tune_option]; |
| |
| #ifdef SUBTARGET_OVERRIDE_OPTIONS |
| SUBTARGET_OVERRIDE_OPTIONS; |
| #endif |
| |
| if (arm_selected_arch) |
| { |
| if (arm_selected_cpu) |
| { |
| /* Check for conflict between mcpu and march. */ |
| if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE) |
| { |
| warning (0, "switch -mcpu=%s conflicts with -march=%s switch", |
| arm_selected_cpu->name, arm_selected_arch->name); |
| /* -march wins for code generation. |
| -mcpu wins for default tuning. */ |
| if (!arm_selected_tune) |
| arm_selected_tune = arm_selected_cpu; |
| |
| arm_selected_cpu = arm_selected_arch; |
| } |
| else |
| /* -mcpu wins. */ |
| arm_selected_arch = NULL; |
| } |
| else |
| /* Pick a CPU based on the architecture. */ |
| arm_selected_cpu = arm_selected_arch; |
| } |
| |
| /* If the user did not specify a processor, choose one for them. */ |
| if (!arm_selected_cpu) |
| { |
| const struct processors * sel; |
| unsigned int sought; |
| |
| arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT]; |
| if (!arm_selected_cpu->name) |
| { |
| #ifdef SUBTARGET_CPU_DEFAULT |
| /* Use the subtarget default CPU if none was specified by |
| configure. */ |
| arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT]; |
| #endif |
| /* Default to ARM6. */ |
| if (!arm_selected_cpu->name) |
| arm_selected_cpu = &all_cores[arm6]; |
| } |
| |
| sel = arm_selected_cpu; |
| insn_flags = sel->flags; |
| |
| /* Now check to see if the user has specified some command line |
| switch that require certain abilities from the cpu. */ |
| sought = 0; |
| |
| if (TARGET_INTERWORK || TARGET_THUMB) |
| { |
| sought |= (FL_THUMB | FL_MODE32); |
| |
| /* There are no ARM processors that support both APCS-26 and |
| interworking. Therefore we force FL_MODE26 to be removed |
| from insn_flags here (if it was set), so that the search |
| below will always be able to find a compatible processor. */ |
| insn_flags &= ~FL_MODE26; |
| } |
| |
| if (sought != 0 && ((sought & insn_flags) != sought)) |
| { |
| /* Try to locate a CPU type that supports all of the abilities |
| of the default CPU, plus the extra abilities requested by |
| the user. */ |
| for (sel = all_cores; sel->name != NULL; sel++) |
| if ((sel->flags & sought) == (sought | insn_flags)) |
| break; |
| |
| if (sel->name == NULL) |
| { |
| unsigned current_bit_count = 0; |
| const struct processors * best_fit = NULL; |
| |
| /* Ideally we would like to issue an error message here |
| saying that it was not possible to find a CPU compatible |
| with the default CPU, but which also supports the command |
| line options specified by the programmer, and so they |
| ought to use the -mcpu=<name> command line option to |
| override the default CPU type. |
| |
| If we cannot find a cpu that has both the |
| characteristics of the default cpu and the given |
| command line options we scan the array again looking |
| for a best match. */ |
| for (sel = all_cores; sel->name != NULL; sel++) |
| if ((sel->flags & sought) == sought) |
| { |
| unsigned count; |
| |
| count = bit_count (sel->flags & insn_flags); |
| |
| if (count >= current_bit_count) |
| { |
| best_fit = sel; |
| current_bit_count = count; |
| } |
| } |
| |
| gcc_assert (best_fit); |
| sel = best_fit; |
| } |
| |
| arm_selected_cpu = sel; |
| } |
| } |
| |
| gcc_assert (arm_selected_cpu); |
| /* The selected cpu may be an architecture, so lookup tuning by core ID. */ |
| if (!arm_selected_tune) |
| arm_selected_tune = &all_cores[arm_selected_cpu->core]; |
| |
| sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch); |
| insn_flags = arm_selected_cpu->flags; |
| arm_base_arch = arm_selected_cpu->base_arch; |
| |
| arm_tune = arm_selected_tune->core; |
| tune_flags = arm_selected_tune->flags; |
| current_tune = arm_selected_tune->tune; |
| |
| /* Make sure that the processor choice does not conflict with any of the |
| other command line choices. */ |
| if (TARGET_ARM && !(insn_flags & FL_NOTM)) |
| error ("target CPU does not support ARM mode"); |
| |
| /* BPABI targets use linker tricks to allow interworking on cores |
| without thumb support. */ |
| if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI)) |
| { |
| warning (0, "target CPU does not support interworking" ); |
| target_flags &= ~MASK_INTERWORK; |
| } |
| |
| if (TARGET_THUMB && !(insn_flags & FL_THUMB)) |
| { |
| warning (0, "target CPU does not support THUMB instructions"); |
| target_flags &= ~MASK_THUMB; |
| } |
| |
| if (TARGET_APCS_FRAME && TARGET_THUMB) |
| { |
| /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */ |
| target_flags &= ~MASK_APCS_FRAME; |
| } |
| |
| /* Callee super interworking implies thumb interworking. Adding |
| this to the flags here simplifies the logic elsewhere. */ |
| if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING) |
| target_flags |= MASK_INTERWORK; |
| |
| /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done |
| from here where no function is being compiled currently. */ |
| if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM) |
| warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb"); |
| |
| if (TARGET_ARM && TARGET_CALLEE_INTERWORKING) |
| warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb"); |
| |
| if (TARGET_APCS_STACK && !TARGET_APCS_FRAME) |
| { |
| warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame"); |
| target_flags |= MASK_APCS_FRAME; |
| } |
| |
| if (TARGET_POKE_FUNCTION_NAME) |
| target_flags |= MASK_APCS_FRAME; |
| |
| if (TARGET_APCS_REENT && flag_pic) |
| error ("-fpic and -mapcs-reent are incompatible"); |
| |
| if (TARGET_APCS_REENT) |
| warning (0, "APCS reentrant code not supported. Ignored"); |
| |
| /* If this target is normally configured to use APCS frames, warn if they |
| are turned off and debugging is turned on. */ |
| if (TARGET_ARM |
| && write_symbols != NO_DEBUG |
| && !TARGET_APCS_FRAME |
| && (TARGET_DEFAULT & MASK_APCS_FRAME)) |
| warning (0, "-g with -mno-apcs-frame may not give sensible debugging"); |
| |
| if (TARGET_APCS_FLOAT) |
| warning (0, "passing floating point arguments in fp regs not yet supported"); |
| |
| /* Initialize boolean versions of the flags, for use in the arm.md file. */ |
| arm_arch3m = (insn_flags & FL_ARCH3M) != 0; |
| arm_arch4 = (insn_flags & FL_ARCH4) != 0; |
| arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0); |
| arm_arch5 = (insn_flags & FL_ARCH5) != 0; |
| arm_arch5e = (insn_flags & FL_ARCH5E) != 0; |
| arm_arch6 = (insn_flags & FL_ARCH6) != 0; |
| arm_arch6k = (insn_flags & FL_ARCH6K) != 0; |
| arm_arch_notm = (insn_flags & FL_NOTM) != 0; |
| arm_arch6m = arm_arch6 && !arm_arch_notm; |
| arm_arch7 = (insn_flags & FL_ARCH7) != 0; |
| arm_arch7em = (insn_flags & FL_ARCH7EM) != 0; |
| arm_arch8 = (insn_flags & FL_ARCH8) != 0; |
| arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0; |
| arm_arch_xscale = (insn_flags & FL_XSCALE) != 0; |
| |
| arm_ld_sched = (tune_flags & FL_LDSCHED) != 0; |
| arm_tune_strongarm = (tune_flags & FL_STRONG) != 0; |
| thumb_code = TARGET_ARM == 0; |
| thumb1_code = TARGET_THUMB1 != 0; |
| arm_tune_wbuf = (tune_flags & FL_WBUF) != 0; |
| arm_tune_xscale = (tune_flags & FL_XSCALE) != 0; |
| arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0; |
| arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0; |
| arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0; |
| arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0; |
| arm_arch_lpae = (insn_flags & FL_LPAE) != 0; |
| arm_arch_no_volatile_ce = (insn_flags & FL_NO_VOLATILE_CE) != 0; |
| arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; |
| arm_arch_crc = (insn_flags & FL_CRC32) != 0; |
| arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0; |
| if (arm_restrict_it == 2) |
| arm_restrict_it = arm_arch8 && TARGET_THUMB2; |
| |
| if (!TARGET_THUMB2) |
| arm_restrict_it = 0; |
| |
| /* If we are not using the default (ARM mode) section anchor offset |
| ranges, then set the correct ranges now. */ |
| if (TARGET_THUMB1) |
| { |
| /* Thumb-1 LDR instructions cannot have negative offsets. |
| Permissible positive offset ranges are 5-bit (for byte loads), |
| 6-bit (for halfword loads), or 7-bit (for word loads). |
| Empirical results suggest a 7-bit anchor range gives the best |
| overall code size. */ |
| targetm.min_anchor_offset = 0; |
| targetm.max_anchor_offset = 127; |
| } |
| else if (TARGET_THUMB2) |
| { |
| /* The minimum is set such that the total size of the block |
| for a particular anchor is 248 + 1 + 4095 bytes, which is |
| divisible by eight, ensuring natural spacing of anchors. */ |
| targetm.min_anchor_offset = -248; |
| targetm.max_anchor_offset = 4095; |
| } |
| |
| /* V5 code we generate is completely interworking capable, so we turn off |
| TARGET_INTERWORK here to avoid many tests later on. */ |
| |
| /* XXX However, we must pass the right pre-processor defines to CPP |
| or GLD can get confused. This is a hack. */ |
| if (TARGET_INTERWORK) |
| arm_cpp_interwork = 1; |
| |
| if (arm_arch5) |
| target_flags &= ~MASK_INTERWORK; |
| |
| if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN) |
| error ("iwmmxt requires an AAPCS compatible ABI for proper operation"); |
| |
| if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT) |
| error ("iwmmxt abi requires an iwmmxt capable cpu"); |
| |
| if (!global_options_set.x_arm_fpu_index) |
| { |
| const char *target_fpu_name; |
| bool ok; |
| |
| #ifdef FPUTYPE_DEFAULT |
| target_fpu_name = FPUTYPE_DEFAULT; |
| #else |
| target_fpu_name = "vfp"; |
| #endif |
| |
| ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index, |
| CL_TARGET); |
| gcc_assert (ok); |
| } |
| |
| arm_fpu_desc = &all_fpus[arm_fpu_index]; |
| |
| switch (arm_fpu_desc->model) |
| { |
| case ARM_FP_MODEL_VFP: |
| arm_fpu_attr = FPU_VFP; |
| break; |
| |
| default: |
| gcc_unreachable(); |
| } |
| |
| if (TARGET_AAPCS_BASED) |
| { |
| if (TARGET_CALLER_INTERWORKING) |
| error ("AAPCS does not support -mcaller-super-interworking"); |
| else |
| if (TARGET_CALLEE_INTERWORKING) |
| error ("AAPCS does not support -mcallee-super-interworking"); |
| } |
| |
| /* iWMMXt and NEON are incompatible. */ |
| if (TARGET_IWMMXT && TARGET_NEON) |
| error ("iWMMXt and NEON are incompatible"); |
| |
| /* iWMMXt unsupported under Thumb mode. */ |
| if (TARGET_THUMB && TARGET_IWMMXT) |
| error ("iWMMXt unsupported under Thumb mode"); |
| |
| /* __fp16 support currently assumes the core has ldrh. */ |
| if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE) |
| sorry ("__fp16 and no ldrh"); |
| |
| /* If soft-float is specified then don't use FPU. */ |
| if (TARGET_SOFT_FLOAT) |
| arm_fpu_attr = FPU_NONE; |
| |
| if (TARGET_AAPCS_BASED) |
| { |
| if (arm_abi == ARM_ABI_IWMMXT) |
| arm_pcs_default = ARM_PCS_AAPCS_IWMMXT; |
| else if (arm_float_abi == ARM_FLOAT_ABI_HARD |
| && TARGET_HARD_FLOAT |
| && TARGET_VFP) |
| arm_pcs_default = ARM_PCS_AAPCS_VFP; |
| else |
| arm_pcs_default = ARM_PCS_AAPCS; |
| } |
| else |
| { |
| if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP) |
| sorry ("-mfloat-abi=hard and VFP"); |
| |
| if (arm_abi == ARM_ABI_APCS) |
| arm_pcs_default = ARM_PCS_APCS; |
| else |
| arm_pcs_default = ARM_PCS_ATPCS; |
| } |
| |
| /* For arm2/3 there is no need to do any scheduling if we are doing |
| software floating-point. */ |
| if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0) |
| flag_schedule_insns = flag_schedule_insns_after_reload = 0; |
| |
| /* Use the cp15 method if it is available. */ |
| if (target_thread_pointer == TP_AUTO) |
| { |
| if (arm_arch6k && !TARGET_THUMB1) |
| target_thread_pointer = TP_CP15; |
| else |
| target_thread_pointer = TP_SOFT; |
| } |
| |
| if (TARGET_HARD_TP && TARGET_THUMB1) |
| error ("can not use -mtp=cp15 with 16-bit Thumb"); |
| |
| /* Override the default structure alignment for AAPCS ABI. */ |
| if (!global_options_set.x_arm_structure_size_boundary) |
| { |
| if (TARGET_AAPCS_BASED) |
| arm_structure_size_boundary = 8; |
| } |
| else |
| { |
| if (arm_structure_size_boundary != 8 |
| && arm_structure_size_boundary != 32 |
| && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64)) |
| { |
| if (ARM_DOUBLEWORD_ALIGN) |
| warning (0, |
| "structure size boundary can only be set to 8, 32 or 64"); |
| else |
| warning (0, "structure size boundary can only be set to 8 or 32"); |
| arm_structure_size_boundary |
| = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY); |
| } |
| } |
| |
| if (!TARGET_ARM && TARGET_VXWORKS_RTP && flag_pic) |
| { |
| error ("RTP PIC is incompatible with Thumb"); |
| flag_pic = 0; |
| } |
| |
| /* If stack checking is disabled, we can use r10 as the PIC register, |
| which keeps r9 available. The EABI specifies r9 as the PIC register. */ |
| if (flag_pic && TARGET_SINGLE_PIC_BASE) |
| { |
| if (TARGET_VXWORKS_RTP) |
| warning (0, "RTP PIC is incompatible with -msingle-pic-base"); |
| arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10; |
| } |
| |
| if (flag_pic && TARGET_VXWORKS_RTP) |
| arm_pic_register = 9; |
| |
| if (arm_pic_register_string != NULL) |
| { |
| int pic_register = decode_reg_name (arm_pic_register_string); |
| |
| if (!flag_pic) |
| warning (0, "-mpic-register= is useless without -fpic"); |
| |
| /* Prevent the user from choosing an obviously stupid PIC register. */ |
| else if (pic_register < 0 || call_used_regs[pic_register] |
| || pic_register == HARD_FRAME_POINTER_REGNUM |
| || pic_register == STACK_POINTER_REGNUM |
| || pic_register >= PC_REGNUM |
| || (TARGET_VXWORKS_RTP |
| && (unsigned int) pic_register != arm_pic_register)) |
| error ("unable to use '%s' for PIC register", arm_pic_register_string); |
| else |
| arm_pic_register = pic_register; |
| } |
| |
| if (TARGET_VXWORKS_RTP |
| && !global_options_set.x_arm_pic_data_is_text_relative) |
| arm_pic_data_is_text_relative = 0; |
| |
| /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */ |
| if (fix_cm3_ldrd == 2) |
| { |
| if (arm_selected_cpu->core == cortexm3) |
| fix_cm3_ldrd = 1; |
| else |
| fix_cm3_ldrd = 0; |
| } |
| |
| /* Enable -munaligned-access by default for |
| - all ARMv6 architecture-based processors |
| - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors. |
| - ARMv8 architecture-base processors. |
| |
| Disable -munaligned-access by default for |
| - all pre-ARMv6 architecture-based processors |
| - ARMv6-M architecture-based processors. */ |
| |
| if (unaligned_access == 2) |
| { |
| if (arm_arch6 && (arm_arch_notm || arm_arch7)) |
| unaligned_access = 1; |
| else |
| unaligned_access = 0; |
| } |
| else if (unaligned_access == 1 |
| && !(arm_arch6 && (arm_arch_notm || arm_arch7))) |
| { |
| warning (0, "target CPU does not support unaligned accesses"); |
| unaligned_access = 0; |
| } |
| |
| if (TARGET_THUMB1 && flag_schedule_insns) |
| { |
| /* Don't warn since it's on by default in -O2. */ |
| flag_schedule_insns = 0; |
| } |
| |
| if (optimize_size) |
| { |
| /* If optimizing for size, bump the number of instructions that we |
| are prepared to conditionally execute (even on a StrongARM). */ |
| max_insns_skipped = 6; |
| |
| /* For THUMB2, we limit the conditional sequence to one IT block. */ |
| if (TARGET_THUMB2) |
| max_insns_skipped = MAX_INSN_PER_IT_BLOCK; |
| } |
| else |
| max_insns_skipped = current_tune->max_insns_skipped; |
| |
| /* Hot/Cold partitioning is not currently supported, since we can't |
| handle literal pool placement in that case. */ |
| if (flag_reorder_blocks_and_partition) |
| { |
| inform (input_location, |
| "-freorder-blocks-and-partition not supported on this architecture"); |
| flag_reorder_blocks_and_partition = 0; |
| flag_reorder_blocks = 1; |
| } |
| |
| if (flag_pic) |
| /* Hoisting PIC address calculations more aggressively provides a small, |
| but measurable, size reduction for PIC code. Therefore, we decrease |
| the bar for unrestricted expression hoisting to the cost of PIC address |
| calculation, which is 2 instructions. */ |
| maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2, |
| global_options.x_param_values, |
| global_options_set.x_param_values); |
| |
| /* ARM EABI defaults to strict volatile bitfields. */ |
| if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0 |
| && abi_version_at_least(2)) |
| flag_strict_volatile_bitfields = 1; |
| |
| /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed |
| it beneficial (signified by setting num_prefetch_slots to 1 or more.) */ |
| if (flag_prefetch_loop_arrays < 0 |
| && HAVE_prefetch |
| && optimize >= 3 |
| && current_tune->num_prefetch_slots > 0) |
| flag_prefetch_loop_arrays = 1; |
| |
| /* Set up parameters to be used in prefetching algorithm. Do not override the |
| defaults unless we are tuning for a core we have researched values for. */ |
| if (current_tune->num_prefetch_slots > 0) |
| maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, |
| current_tune->num_prefetch_slots, |
| global_options.x_param_values, |
| global_options_set.x_param_values); |
| if (current_tune->l1_cache_line_size >= 0) |
| maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, |
| current_tune->l1_cache_line_size, |
| global_options.x_param_values, |
| global_options_set.x_param_values); |
| if (current_tune->l1_cache_size >= 0) |
| maybe_set_param_value (PARAM_L1_CACHE_SIZE, |
| current_tune->l1_cache_size, |
| global_options.x_param_values, |
| global_options_set.x_param_values); |
| |
| /* Use Neon to perform 64-bits operations rather than core |
| registers. */ |
| prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits; |
| if (use_neon_for_64bits == 1) |
| prefer_neon_for_64bits = true; |
| |
| /* Use the alternative scheduling-pressure algorithm by default. */ |
| maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL, |
| global_options.x_param_values, |
| global_options_set.x_param_values); |
| |
| /* Look through ready list and all of queue for instructions |
| relevant for L2 auto-prefetcher. */ |
| int param_sched_autopref_queue_depth; |
| if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_OFF) |
| param_sched_autopref_queue_depth = -1; |
| else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_RANK) |
| param_sched_autopref_queue_depth = 0; |
| else if (current_tune->sched_autopref == ARM_SCHED_AUTOPREF_FULL) |
| param_sched_autopref_queue_depth = max_insn_queue_index + 1; |
| else |
| gcc_unreachable (); |
| maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH, |
| param_sched_autopref_queue_depth, |
| global_options.x_param_values, |
| global_options_set.x_param_values); |
| |
| /* Disable shrink-wrap when optimizing function for size, since it tends to |
| generate additional returns. */ |
| if (optimize_function_for_size_p (cfun) && TARGET_THUMB2) |
| flag_shrink_wrap = false; |
| /* TBD: Dwarf info for apcs frame is not handled yet. */ |
| if (TARGET_APCS_FRAME) |
| flag_shrink_wrap = false; |
| |
| /* We only support -mslow-flash-data on armv7-m targets. */ |
| if (target_slow_flash_data |
| && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em) |
| || (TARGET_THUMB1 || flag_pic || TARGET_NEON))) |
| error ("-mslow-flash-data only supports non-pic code on armv7-m targets"); |
| |
| /* Currently, for slow flash data, we just disable literal pools. */ |
| if (target_slow_flash_data) |
| arm_disable_literal_pool = true; |
| |
| /* Thumb2 inline assembly code should always use unified syntax. |
| This will apply to ARM and Thumb1 eventually. */ |
| if (TARGET_THUMB2) |
| inline_asm_unified = 1; |
| |
| /* Disable scheduling fusion by default if it's not armv7 processor |
| or doesn't prefer ldrd/strd. */ |
| if (flag_schedule_fusion == 2 |
| && (!arm_arch7 || !current_tune->prefer_ldrd_strd)) |
| flag_schedule_fusion = 0; |
| |
| /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn |
| - epilogue_insns - does not accurately model the corresponding insns |
| emitted in the asm file. In particular, see the comment in thumb_exit |
| 'Find out how many of the (return) argument registers we can corrupt'. |
| As a consequence, the epilogue may clobber registers without fipa-ra |
| finding out about it. Therefore, disable fipa-ra in Thumb1 mode. |
| TODO: Accurately model clobbers for epilogue_insns and reenable |
| fipa-ra. */ |
| if (TARGET_THUMB1) |
| flag_ipa_ra = 0; |
| |
| /* Register global variables with the garbage collector. */ |
| arm_add_gc_roots (); |
| } |
| |
| static void |
| arm_add_gc_roots (void) |
| { |
| gcc_obstack_init(&minipool_obstack); |
| minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0); |
| } |
| |
| /* A table of known ARM exception types. |
| For use with the interrupt function attribute. */ |
| |
| typedef struct |
| { |
| const char *const arg; |
| const unsigned long return_value; |
| } |
| isr_attribute_arg; |
| |
| static const isr_attribute_arg isr_attribute_args [] = |
| { |
| { "IRQ", ARM_FT_ISR }, |
| { "irq", ARM_FT_ISR }, |
| { "FIQ", ARM_FT_FIQ }, |
| { "fiq", ARM_FT_FIQ }, |
| { "ABORT", ARM_FT_ISR }, |
| { "abort", ARM_FT_ISR }, |
| { "ABORT", ARM_FT_ISR }, |
| { "abort", ARM_FT_ISR }, |
| { "UNDEF", ARM_FT_EXCEPTION }, |
| { "undef", ARM_FT_EXCEPTION }, |
| { "SWI", ARM_FT_EXCEPTION }, |
| { "swi", ARM_FT_EXCEPTION }, |
| { NULL, ARM_FT_NORMAL } |
| }; |
| |
| /* Returns the (interrupt) function type of the current |
| function, or ARM_FT_UNKNOWN if the type cannot be determined. */ |
| |
| static unsigned long |
| arm_isr_value (tree argument) |
| { |
| const isr_attribute_arg * ptr; |
| const char * arg; |
| |
| if (!arm_arch_notm) |
| return ARM_FT_NORMAL | ARM_FT_STACKALIGN; |
| |
| /* No argument - default to IRQ. */ |
| if (argument == NULL_TREE) |
| return ARM_FT_ISR; |
| |
| /* Get the value of the argument. */ |
| if (TREE_VALUE (argument) == NULL_TREE |
| || TREE_CODE (TREE_VALUE (argument)) != STRING_CST) |
| return ARM_FT_UNKNOWN; |
| |
| arg = TREE_STRING_POINTER (TREE_VALUE (argument)); |
| |
| /* Check it against the list of known arguments. */ |
| for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++) |
| if (streq (arg, ptr->arg)) |
| return ptr->return_value; |
| |
| /* An unrecognized interrupt type. */ |
| return ARM_FT_UNKNOWN; |
| } |
| |
| /* Computes the type of the current function. */ |
| |
| static unsigned long |
| arm_compute_func_type (void) |
| { |
| unsigned long type = ARM_FT_UNKNOWN; |
| tree a; |
| tree attr; |
| |
| gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL); |
| |
| /* Decide if the current function is volatile. Such functions |
| never return, and many memory cycles can be saved by not storing |
| register values that will never be needed again. This optimization |
| was added to speed up context switching in a kernel application. */ |
| if (optimize > 0 |
| && (TREE_NOTHROW (current_function_decl) |
| || !(flag_unwind_tables |
| || (flag_exceptions |
| && arm_except_unwind_info (&global_options) != UI_SJLJ))) |
| && TREE_THIS_VOLATILE (current_function_decl)) |
| type |= ARM_FT_VOLATILE; |
| |
| if (cfun->static_chain_decl != NULL) |
| type |= ARM_FT_NESTED; |
| |
| attr = DECL_ATTRIBUTES (current_function_decl); |
| |
| a = lookup_attribute ("naked", attr); |
| if (a != NULL_TREE) |
| type |= ARM_FT_NAKED; |
| |
| a = lookup_attribute ("isr", attr); |
| if (a == NULL_TREE) |
| a = lookup_attribute ("interrupt", attr); |
| |
| if (a == NULL_TREE) |
| type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL; |
| else |
| type |= arm_isr_value (TREE_VALUE (a)); |
| |
| return type; |
| } |
| |
| /* Returns the type of the current function. */ |
| |
| unsigned long |
| arm_current_func_type (void) |
| { |
| if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN) |
| cfun->machine->func_type = arm_compute_func_type (); |
| |
| return cfun->machine->func_type; |
| } |
| |
| bool |
| arm_allocate_stack_slots_for_args (void) |
| { |
| /* Naked functions should not allocate stack slots for arguments. */ |
| return !IS_NAKED (arm_current_func_type ()); |
| } |
| |
| static bool |
| arm_warn_func_return (tree decl) |
| { |
| /* Naked functions are implemented entirely in assembly, including the |
| return sequence, so suppress warnings about this. */ |
| return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE; |
| } |
| |
| |
| /* Output assembler code for a block containing the constant parts |
| of a trampoline, leaving space for the variable parts. |
| |
| On the ARM, (if r8 is the static chain regnum, and remembering that |
| referencing pc adds an offset of 8) the trampoline looks like: |
| ldr r8, [pc, #0] |
| ldr pc, [pc] |
| .word static chain value |
| .word function's address |
| XXX FIXME: When the trampoline returns, r8 will be clobbered. */ |
| |
| static void |
| arm_asm_trampoline_template (FILE *f) |
| { |
| if (TARGET_ARM) |
| { |
| asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM); |
| asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM); |
| } |
| else if (TARGET_THUMB2) |
| { |
| /* The Thumb-2 trampoline is similar to the arm implementation. |
| Unlike 16-bit Thumb, we enter the stub in thumb mode. */ |
| asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", |
| STATIC_CHAIN_REGNUM, PC_REGNUM); |
| asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM); |
| } |
| else |
| { |
| ASM_OUTPUT_ALIGN (f, 2); |
| fprintf (f, "\t.code\t16\n"); |
| fprintf (f, ".Ltrampoline_start:\n"); |
| asm_fprintf (f, "\tpush\t{r0, r1}\n"); |
| asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM); |
| asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM); |
| asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM); |
| asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM); |
| asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM); |
| } |
| assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); |
| assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); |
| } |
| |
| /* Emit RTL insns to initialize the variable parts of a trampoline. */ |
| |
| static void |
| arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) |
| { |
| rtx fnaddr, mem, a_tramp; |
| |
| emit_block_move (m_tramp, assemble_trampoline_template (), |
| GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); |
| |
| mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12); |
| emit_move_insn (mem, chain_value); |
| |
| mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16); |
| fnaddr = XEXP (DECL_RTL (fndecl), 0); |
| emit_move_insn (mem, fnaddr); |
| |
| a_tramp = XEXP (m_tramp, 0); |
| emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"), |
| LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode, |
| plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode); |
| } |
| |
| /* Thumb trampolines should be entered in thumb mode, so set |
| the bottom bit of the address. */ |
| |
| static rtx |
| arm_trampoline_adjust_address (rtx addr) |
| { |
| if (TARGET_THUMB) |
| addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx, |
| NULL, 0, OPTAB_LIB_WIDEN); |
| return addr; |
| } |
| |
| /* Return 1 if it is possible to return using a single instruction. |
| If SIBLING is non-null, this is a test for a return before a sibling |
| call. SIBLING is the call insn, so we can examine its register usage. */ |
| |
| int |
| use_return_insn (int iscond, rtx sibling) |
| { |
| int regno; |
| unsigned int func_type; |
| unsigned long saved_int_regs; |
| unsigned HOST_WIDE_INT stack_adjust; |
| arm_stack_offsets *offsets; |
| |
| /* Never use a return instruction before reload has run. */ |
| if (!reload_completed) |
| return 0; |
| |
| func_type = arm_current_func_type (); |
| |
| /* Naked, volatile and stack alignment functions need special |
| consideration. */ |
| if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN)) |
| return 0; |
| |
| /* So do interrupt functions that use the frame pointer and Thumb |
| interrupt functions. */ |
| if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB)) |
| return 0; |
| |
| if (TARGET_LDRD && current_tune->prefer_ldrd_strd |
| && !optimize_function_for_size_p (cfun)) |
| return 0; |
| |
| offsets = arm_get_frame_offsets (); |
| stack_adjust = offsets->outgoing_args - offsets->saved_regs; |
| |
| /* As do variadic functions. */ |
| if (crtl->args.pretend_args_size |
| || cfun->machine->uses_anonymous_args |
| /* Or if the function calls __builtin_eh_return () */ |
| || crtl->calls_eh_return |
| /* Or if the function calls alloca */ |
| || cfun->calls_alloca |
| /* Or if there is a stack adjustment. However, if the stack pointer |
| is saved on the stack, we can use a pre-incrementing stack load. */ |
| || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed |
| && stack_adjust == 4))) |
| return 0; |
| |
| saved_int_regs = offsets->saved_regs_mask; |
| |
| /* Unfortunately, the insn |
| |
| ldmib sp, {..., sp, ...} |
| |
| triggers a bug on most SA-110 based devices, such that the stack |
| pointer won't be correctly restored if the instruction takes a |
| page fault. We work around this problem by popping r3 along with |
| the other registers, since that is never slower than executing |
| another instruction. |
| |
| We test for !arm_arch5 here, because code for any architecture |
| less than this could potentially be run on one of the buggy |
| chips. */ |
| if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM) |
| { |
| /* Validate that r3 is a call-clobbered register (always true in |
| the default abi) ... */ |
| if (!call_used_regs[3]) |
| return 0; |
| |
| /* ... that it isn't being used for a return value ... */ |
| if (arm_size_return_regs () >= (4 * UNITS_PER_WORD)) |
| return 0; |
| |
| /* ... or for a tail-call argument ... */ |
| if (sibling) |
| { |
| gcc_assert (CALL_P (sibling)); |
| |
| if (find_regno_fusage (sibling, USE, 3)) |
| return 0; |
| } |
| |
| /* ... and that there are no call-saved registers in r0-r2 |
| (always true in the default ABI). */ |
| if (saved_int_regs & 0x7) |
| return 0; |
| } |
| |
| /* Can't be done if interworking with Thumb, and any registers have been |
| stacked. */ |
| if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type)) |
| return 0; |
| |
| /* On StrongARM, conditional returns are expensive if they aren't |
| taken and multiple registers have been stacked. */ |
| if (iscond && arm_tune_strongarm) |
| { |
| /* Conditional return when just the LR is stored is a simple |
| conditional-load instruction, that's not expensive. */ |
| if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM)) |
| return 0; |
| |
| if (flag_pic |
| && arm_pic_register != INVALID_REGNUM |
| && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) |
| return 0; |
| } |
| |
| /* If there are saved registers but the LR isn't saved, then we need |
| two instructions for the return. */ |
| if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM))) |
| return 0; |
| |
| /* Can't be done if any of the VFP regs are pushed, |
| since this also requires an insn. */ |
| if (TARGET_HARD_FLOAT && TARGET_VFP) |
| for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++) |
| if (df_regs_ever_live_p (regno) && !call_used_regs[regno]) |
| return 0; |
| |
| if (TARGET_REALLY_IWMMXT) |
| for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++) |
| if (df_regs_ever_live_p (regno) && ! call_used_regs[regno]) |
| return 0; |
| |
| return 1; |
| } |
| |
| /* Return TRUE if we should try to use a simple_return insn, i.e. perform |
| shrink-wrapping if possible. This is the case if we need to emit a |
| prologue, which we can test by looking at the offsets. */ |
| bool |
| use_simple_return_p (void) |
| { |
| arm_stack_offsets *offsets; |
| |
| offsets = arm_get_frame_offsets (); |
| return offsets->outgoing_args != 0; |
| } |
| |
| /* Return TRUE if int I is a valid immediate ARM constant. */ |
| |
| int |
| const_ok_for_arm (HOST_WIDE_INT i) |
| { |
| int lowbit; |
| |
| /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must |
| be all zero, or all one. */ |
| if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0 |
| && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) |
| != ((~(unsigned HOST_WIDE_INT) 0) |
| & ~(unsigned HOST_WIDE_INT) 0xffffffff))) |
| return FALSE; |
| |
| i &= (unsigned HOST_WIDE_INT) 0xffffffff; |
| |
| /* Fast return for 0 and small values. We must do this for zero, since |
| the code below can't handle that one case. */ |
| if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0) |
| return TRUE; |
| |
| /* Get the number of trailing zeros. */ |
| lowbit = ffs((int) i) - 1; |
| |
| /* Only even shifts are allowed in ARM mode so round down to the |
| nearest even number. */ |
| if (TARGET_ARM) |
| lowbit &= ~1; |
| |
| if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0) |
| return TRUE; |
| |
| if (TARGET_ARM) |
| { |
| /* Allow rotated constants in ARM mode. */ |
| if (lowbit <= 4 |
| && ((i & ~0xc000003f) == 0 |
| || (i & ~0xf000000f) == 0 |
| || (i & ~0xfc000003) == 0)) |
| return TRUE; |
| } |
| else |
| { |
| HOST_WIDE_INT v; |
| |
| /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */ |
| v = i & 0xff; |
| v |= v << 16; |
| if (i == v || i == (v | (v << 8))) |
| return TRUE; |
| |
| /* Allow repeated pattern 0xXY00XY00. */ |
| v = i & 0xff00; |
| v |= v << 16; |
| if (i == v) |
| return TRUE; |
| } |
| |
| return FALSE; |
| } |
| |
| /* Return true if I is a valid constant for the operation CODE. */ |
| int |
| const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code) |
| { |
| if (const_ok_for_arm (i)) |
| return 1; |
| |
| switch (code) |
| { |
| case SET: |
| /* See if we can use movw. */ |
| if (arm_arch_thumb2 && (i & 0xffff0000) == 0) |
| return 1; |
| else |
| /* Otherwise, try mvn. */ |
| return const_ok_for_arm (ARM_SIGN_EXTEND (~i)); |
| |
| case PLUS: |
| /* See if we can use addw or subw. */ |
| if (TARGET_THUMB2 |
| && ((i & 0xfffff000) == 0 |
| || ((-i) & 0xfffff000) == 0)) |
| return 1; |
| /* else fall through. */ |
| |
| case COMPARE: |
| case EQ: |
| case NE: |
| case GT: |
| case LE: |
| case LT: |
| case GE: |
| case GEU: |
| case LTU: |
| case GTU: |
| case LEU: |
| case UNORDERED: |
| case ORDERED: |
| case UNEQ: |
| case UNGE: |
| case UNLT: |
| case UNGT: |
| case UNLE: |
| return const_ok_for_arm (ARM_SIGN_EXTEND (-i)); |
| |
| case MINUS: /* Should only occur with (MINUS I reg) => rsb */ |
| case XOR: |
| return 0; |
| |
| case IOR: |
| if (TARGET_THUMB2) |
| return const_ok_for_arm (ARM_SIGN_EXTEND (~i)); |
| return 0; |
| |
| case AND: |
| return const_ok_for_arm (ARM_SIGN_EXTEND (~i)); |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Return true if I is a valid di mode constant for the operation CODE. */ |
| int |
| const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code) |
| { |
| HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF; |
| HOST_WIDE_INT lo_val = i & 0xFFFFFFFF; |
| rtx hi = GEN_INT (hi_val); |
| rtx lo = GEN_INT (lo_val); |
| |
| if (TARGET_THUMB1) |
| return 0; |
| |
| switch (code) |
| { |
| case AND: |
| case IOR: |
| case XOR: |
| return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF) |
| && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF); |
| case PLUS: |
| return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode); |
| |
| default: |
| return 0; |
| } |
| } |
| |
| /* Emit a sequence of insns to handle a large constant. |
| CODE is the code of the operation required, it can be any of SET, PLUS, |
| IOR, AND, XOR, MINUS; |
| MODE is the mode in which the operation is being performed; |
| VAL is the integer to operate on; |
| SOURCE is the other operand (a register, or a null-pointer for SET); |
| SUBTARGETS means it is safe to create scratch registers if that will |
| either produce a simpler sequence, or we will want to cse the values. |
| Return value is the number of insns emitted. */ |
| |
| /* ??? Tweak this for thumb2. */ |
| int |
| arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn, |
| HOST_WIDE_INT val, rtx target, rtx source, int subtargets) |
| { |
| rtx cond; |
| |
| if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC) |
| cond = COND_EXEC_TEST (PATTERN (insn)); |
| else |
| cond = NULL_RTX; |
| |
| if (subtargets || code == SET |
| || (REG_P (target) && REG_P (source) |
| && REGNO (target) != REGNO (source))) |
| { |
| /* After arm_reorg has been called, we can't fix up expensive |
| constants by pushing them into memory so we must synthesize |
| them in-line, regardless of the cost. This is only likely to |
| be more costly on chips that have load delay slots and we are |
| compiling without running the scheduler (so no splitting |
| occurred before the final instruction emission). |
| |
| Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c |
| */ |
| if (!cfun->machine->after_arm_reorg |
| && !cond |
| && (arm_gen_constant (code, mode, NULL_RTX, val, target, source, |
| 1, 0) |
| > (arm_constant_limit (optimize_function_for_size_p (cfun)) |
| + (code != SET)))) |
| { |
| if (code == SET) |
| { |
| /* Currently SET is the only monadic value for CODE, all |
| the rest are diadic. */ |
| if (TARGET_USE_MOVT) |
| arm_emit_movpair (target, GEN_INT (val)); |
| else |
| emit_set_insn (target, GEN_INT (val)); |
| |
| return 1; |
| } |
| else |
| { |
| rtx temp = subtargets ? gen_reg_rtx (mode) : target; |
| |
| if (TARGET_USE_MOVT) |
| arm_emit_movpair (temp, GEN_INT (val)); |
| else |
| emit_set_insn (temp, GEN_INT (val)); |
| |
| /* For MINUS, the value is subtracted from, since we never |
| have subtraction of a constant. */ |
| if (code == MINUS) |
| emit_set_insn (target, gen_rtx_MINUS (mode, temp, source)); |
| else |
| emit_set_insn (target, |
| gen_rtx_fmt_ee (code, mode, source, temp)); |
| return 2; |
| } |
| } |
| } |
| |
| return arm_gen_constant (code, mode, cond, val, target, source, subtargets, |
| 1); |
| } |
| |
| /* Return a sequence of integers, in RETURN_SEQUENCE that fit into |
| ARM/THUMB2 immediates, and add up to VAL. |
| Thr function return value gives the number of insns required. */ |
| static int |
| optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val, |
| struct four_ints *return_sequence) |
| { |
| int best_consecutive_zeros = 0; |
| int i; |
| int best_start = 0; |
| int insns1, insns2; |
| struct four_ints tmp_sequence; |
| |
| /* If we aren't targeting ARM, the best place to start is always at |
| the bottom, otherwise look more closely. */ |
| if (TARGET_ARM) |
| { |
| for (i = 0; i < 32; i += 2) |
| { |
| int consecutive_zeros = 0; |
| |
| if (!(val & (3 << i))) |
| { |
| while ((i < 32) && !(val & (3 << i))) |
| { |
| consecutive_zeros += 2; |
| i += 2; |
| } |
| if (consecutive_zeros > best_consecutive_zeros) |
| { |
| best_consecutive_zeros = consecutive_zeros; |
| best_start = i - consecutive_zeros; |
| } |
| i -= 2; |
| } |
| } |
| } |
| |
| /* So long as it won't require any more insns to do so, it's |
| desirable to emit a small constant (in bits 0...9) in the last |
| insn. This way there is more chance that it can be combined with |
| a later addressing insn to form a pre-indexed load or store |
| operation. Consider: |
| |
| *((volatile int *)0xe0000100) = 1; |
| *((volatile int *)0xe0000110) = 2; |
| |
| We want this to wind up as: |
| |
| mov rA, #0xe0000000 |
| mov rB, #1 |
| str rB, [rA, #0x100] |
| mov rB, #2 |
| str rB, [rA, #0x110] |
| |
| rather than having to synthesize both large constants from scratch. |
| |
| Therefore, we calculate how many insns would be required to emit |
| the constant starting from `best_start', and also starting from |
| zero (i.e. with bit 31 first to be output). If `best_start' doesn't |
| yield a shorter sequence, we may as well use zero. */ |
| insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start); |
| if (best_start != 0 |
| && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val)) |
| { |
| insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0); |
| if (insns2 <= insns1) |
| { |
| *return_sequence = tmp_sequence; |
| insns1 = insns2; |
| } |
| } |
| |
| return insns1; |
| } |
| |
| /* As for optimal_immediate_sequence, but starting at bit-position I. */ |
| static int |
| optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val, |
| struct four_ints *return_sequence, int i) |
| { |
| int remainder = val & 0xffffffff; |
| int insns = 0; |
| |
| /* Try and find a way of doing the job in either two or three |
| instructions. |
| |
| In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned |
| location. We start at position I. This may be the MSB, or |
| optimial_immediate_sequence may have positioned it at the largest block |
| of zeros that are aligned on a 2-bit boundary. We then fill up the temps, |
| wrapping around to the top of the word when we drop off the bottom. |
| In the worst case this code should produce no more than four insns. |
| |
| In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit |
| constants, shifted to any arbitrary location. We should always start |
| at the MSB. */ |
| do |
| { |
| int end; |
| unsigned int b1, b2, b3, b4; |
| unsigned HOST_WIDE_INT result; |
| int loc; |
| |
| gcc_assert (insns < 4); |
| |
| if (i <= 0) |
| i += 32; |
| |
| /* First, find the next normal 12/8-bit shifted/rotated immediate. */ |
| if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1))))) |
| { |
| loc = i; |
| if (i <= 12 && TARGET_THUMB2 && code == PLUS) |
| /* We can use addw/subw for the last 12 bits. */ |
| result = remainder; |
| else |
| { |
| /* Use an 8-bit shifted/rotated immediate. */ |
| end = i - 8; |
| if (end < 0) |
| end += 32; |
| result = remainder & ((0x0ff << end) |
| | ((i < end) ? (0xff >> (32 - end)) |
| : 0)); |
| i -= 8; |
| } |
| } |
| else |
| { |
| /* Arm allows rotates by a multiple of two. Thumb-2 allows |
| arbitrary shifts. */ |
| i -= TARGET_ARM ? 2 : 1; |
| continue; |
| } |
| |
| /* Next, see if we can do a better job with a thumb2 replicated |
| constant. |
| |
| We do it this way around to catch the cases like 0x01F001E0 where |
| two 8-bit immediates would work, but a replicated constant would |
| make it worse. |
| |
| TODO: 16-bit constants that don't clear all the bits, but still win. |
| TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */ |
| if (TARGET_THUMB2) |
| { |
| b1 = (remainder & 0xff000000) >> 24; |
| b2 = (remainder & 0x00ff0000) >> 16; |
| b3 = (remainder & 0x0000ff00) >> 8; |
| b4 = remainder & 0xff; |
| |
| if (loc > 24) |
| { |
| /* The 8-bit immediate already found clears b1 (and maybe b2), |
| but must leave b3 and b4 alone. */ |
| |
| /* First try to find a 32-bit replicated constant that clears |
| almost everything. We can assume that we can't do it in one, |
| or else we wouldn't be here. */ |
| unsigned int tmp = b1 & b2 & b3 & b4; |
| unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16) |
| + (tmp << 24); |
| unsigned int matching_bytes = (tmp == b1) + (tmp == b2) |
| + (tmp == b3) + (tmp == b4); |
| if (tmp |
| && (matching_bytes >= 3 |
| || (matching_bytes == 2 |
| && const_ok_for_op (remainder & ~tmp2, code)))) |
| { |
| /* At least 3 of the bytes match, and the fourth has at |
| least as many bits set, or two of the bytes match |
| and it will only require one more insn to finish. */ |
| result = tmp2; |
| i = tmp != b1 ? 32 |
| : tmp != b2 ? 24 |
| : tmp != b3 ? 16 |
| : 8; |
| } |
| |
| /* Second, try to find a 16-bit replicated constant that can |
| leave three of the bytes clear. If b2 or b4 is already |
| zero, then we can. If the 8-bit from above would not |
| clear b2 anyway, then we still win. */ |
| else if (b1 == b3 && (!b2 || !b4 |
| || (remainder & 0x00ff0000 & ~result))) |
| { |
| result = remainder & 0xff00ff00; |
| i = 24; |
| } |
| } |
| else if (loc > 16) |
| { |
| /* The 8-bit immediate already found clears b2 (and maybe b3) |
| and we don't get here unless b1 is alredy clear, but it will |
| leave b4 unchanged. */ |
| |
| /* If we can clear b2 and b4 at once, then we win, since the |
| 8-bits couldn't possibly reach that far. */ |
| if (b2 == b4) |
| { |
| result = remainder & 0x00ff00ff; |
| i = 16; |
| } |
| } |
| } |
| |
| return_sequence->i[insns++] = result; |
| remainder &= ~result; |
| |
| if (code == SET || code == MINUS) |
| code = PLUS; |
| } |
| while (remainder); |
| |
| return insns; |
| } |
| |
| /* Emit an instruction with the indicated PATTERN. If COND is |
| non-NULL, conditionalize the execution of the instruction on COND |
| being true. */ |
| |
| static void |
| emit_constant_insn (rtx cond, rtx pattern) |
| { |
| if (cond) |
| pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern); |
| emit_insn (pattern); |
| } |
| |
| /* As above, but extra parameter GENERATE which, if clear, suppresses |
| RTL generation. */ |
| |
| static int |
| arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond, |
| HOST_WIDE_INT val, rtx target, rtx source, int subtargets, |
| int generate) |
| { |
| int can_invert = 0; |
| int can_negate = 0; |
| int final_invert = 0; |
| int i; |
| int set_sign_bit_copies = 0; |
| int clear_sign_bit_copies = 0; |
| int clear_zero_bit_copies = 0; |
| int set_zero_bit_copies = 0; |
| int insns = 0, neg_insns, inv_insns; |
| unsigned HOST_WIDE_INT temp1, temp2; |
| unsigned HOST_WIDE_INT remainder = val & 0xffffffff; |
| struct four_ints *immediates; |
| struct four_ints pos_immediates, neg_immediates, inv_immediates; |
| |
| /* Find out which operations are safe for a given CODE. Also do a quick |
| check for degenerate cases; these can occur when DImode operations |
| are split. */ |
| switch (code) |
| { |
| case SET: |
| can_invert = 1; |
| break; |
| |
| case PLUS: |
| can_negate = 1; |
| break; |
| |
| case IOR: |
| if (remainder == 0xffffffff) |
| { |
| if (generate) |
| emit_constant_insn (cond, |
| gen_rtx_SET (VOIDmode, target, |
| GEN_INT (ARM_SIGN_EXTEND (val)))); |
| return 1; |
| } |
| |
| if (remainder == 0) |
| { |
| if (reload_completed && rtx_equal_p (target, source)) |
| return 0; |
| |
| if (generate) |
| emit_constant_insn (cond, |
| gen_rtx_SET (VOIDmode, target, source)); |
| return 1; |
| } |
| break; |
| |
| case AND: |
| if (remainder == 0) |
| { |
| if (generate) |
| emit_constant_insn (cond, |
| gen_rtx_SET (VOIDmode, target, const0_rtx)); |
| return 1; |
| } |
| if (remainder == 0xffffffff) |
| { |
| if (reload_completed && rtx_equal_p (target, source)) |
| return 0; |
| if (generate) |
| emit_constant_insn (cond, |
| gen_rtx_SET (VOIDmode, target, source)); |
| return 1; |
| } |
| can_invert = 1; |
| break; |
| |
| case XOR: |
| if (remainder == 0) |
| { |
| if (reload_completed && rtx_equal_p (target, source)) |
| return 0; |
| if (generate) |
| emit_constant_insn (cond, |
| gen_rtx_SET (VOIDmode, target, source)); |
| return 1; |
| } |
| |
| if (remainder == 0xffffffff) |
| { |
| if (generate) |
| emit_constant_insn (cond, |
| gen_rtx_SET (VOIDmode, target, |
| gen_rtx_NOT (mode, source))); |
| return 1; |
| } |
| final_invert = 1; |
| break; |
| |
| case MINUS: |
| /* We treat MINUS as (val - source), since (source - val) is always |
| passed as (source + (-val)). */ |
| if (remainder == 0) |
| { |
| if (generate) |
| emit_constant_insn (cond, |
| gen_rtx_SET (VOIDmode, target, |
| gen_rtx_NEG (mode, source))); |
| return 1; |
| } |
| if (const_ok_for_arm (val)) |
| { |
| if (generate) |
| emit_constant_insn (cond, |
| gen_rtx_SET (VOIDmode, target, |
| gen_rtx_MINUS (mode, GEN_INT (val), |
| source))); |
| return 1; |
| } |
| |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| /* If we can do it in one insn get out quickly. */ |
| if (const_ok_for_op (val, code)) |
| { |
| if (generate) |
| emit_constant_insn (cond, |
| gen_rtx_SET (VOIDmode, target, |
| (source |
| ? gen_rtx_fmt_ee (code, mode, source, |
| GEN_INT (val)) |
| : GEN_INT (val)))); |
| return 1; |
| } |
| |
| /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single |
| insn. */ |
| if (code == AND && (i = exact_log2 (remainder + 1)) > 0 |
| && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode))) |
| { |
| if (generate) |
| { |
| if (mode == SImode && i == 16) |
| /* Use UXTH in preference to UBFX, since on Thumb2 it's a |
| smaller insn. */ |
| emit_constant_insn (cond, |
| gen_zero_extendhisi2 |
| (target, gen_lowpart (HImode, source))); |
| else |
| /* Extz only supports SImode, but we can coerce the operands |
| into that mode. */ |
| emit_constant_insn (cond, |
| gen_extzv_t2 (gen_lowpart (SImode, target), |
| gen_lowpart (SImode, source), |
| GEN_INT (i), const0_rtx)); |
| } |
| |
| return 1; |
| } |
| |
| /* Calculate a few attributes that may be useful for specific |
| optimizations. */ |
| /* Count number of leading zeros. */ |
| for (i = 31; i >= 0; i--) |
| { |
| if ((remainder & (1 << i)) == 0) |
| clear_sign_bit_copies++; |
| else |
| break; |
| } |
| |
| /* Count number of leading 1's. */ |
| for (i = 31; i >= 0; i--) |
| { |
| if ((remainder & (1 << i)) != 0) |
| set_sign_bit_copies++; |
| else |
| break; |
| } |
| |
| /* Count number of trailing zero's. */ |
| for (i = 0; i <= 31; i++) |
| { |
| if ((remainder & (1 << i)) == 0) |
| clear_zero_bit_copies++; |
| else |
| break; |
| } |
| |
| /* Count number of trailing 1's. */ |
| for (i = 0; i <= 31; i++) |
| { |
| if ((remainder & (1 << i)) != 0) |
| set_zero_bit_copies++; |
| else |
| break; |
| } |
| |
| switch (code) |
| { |
| case SET: |
| /* See if we can do this by sign_extending a constant that is known |
| to be negative. This is a good, way of doing it, since the shift |
| may well merge into a subsequent insn. */ |
| if (set_sign_bit_copies > 1) |
| { |
| if (const_ok_for_arm |
| (temp1 = ARM_SIGN_EXTEND (remainder |
| << (set_sign_bit_copies - 1)))) |
| { |
| if (generate) |
| { |
| rtx new_src = subtargets ? gen_reg_rtx (mode) : target; |
| emit_constant_insn (cond, |
| gen_rtx_SET (VOIDmode, new_src, |
| GEN_INT (temp1))); |
| emit_constant_insn (cond, |
| gen_ashrsi3 (target, new_src, |
| GEN_INT (set_sign_bit_copies - 1))); |
| } |
| return 2; |
| } |
| /* For an inverted constant, we will need to set the low bits, |
| these will be shifted out of harm's way. */ |
| temp1 |= (1 << (set_sign_bit_copies - 1)) - 1; |
| if (const_ok_for_arm (~temp1)) |
| { |
| if (generate) |
| { |
| rtx new_src = subtargets ? gen_reg_rtx (mode) : target; |
| emit_constant_insn (cond, |
| gen_rtx_SET (VOIDmode, new_src, |
| GEN_INT (temp1))); |
| emit_constant_insn (cond, |
| gen_ashrsi3 (target, new_src, |
| GEN_INT (set_sign_bit_copies - 1))); |
| } |
| return 2; |
| } |
| } |
| |
| /* See if we can calculate the value as the difference between two |
| valid immediates. */ |
| if (clear_sign_bit_copies + clear_zero_bit_copies <= 16) |
| { |
| int topshift = clear_sign_bit_copies & ~1; |
| |
| temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift)) |
| & (0xff000000 >> topshift)); |
| |
| /* If temp1 is zero, then that means the 9 most significant |
| bits of remainder were 1 and we've caused it to overflow. |
| When topshift is 0 we don't need to do anything since we |
| can borrow from 'bit 32'. */ |
| if (temp1 == 0 && topshift != 0) |
| temp1 = 0x80000000 >> (topshift - 1); |
| |
| temp2 = ARM_SIGN_EXTEND (temp1 - remainder); |
| |
| if (const_ok_for_arm (temp2)) |
| { |
| if (generate) |
| { |
| rtx new_src = subtargets ? gen_reg_rtx (mode) : target; |
| emit_constant_insn (cond, |
| gen_rtx_SET (VOIDmode, new_src, |
| GEN_INT (temp1))); |
| emit_constant_insn (cond, |
| gen_addsi3 (target, new_src, |
| GEN_INT (-temp2))); |
| } |
| |
| return 2; |
| } |
| } |
| |
| /* See if we can generate this by setting the bottom (or the top) |
| 16 bits, and then shifting these into the other half of the |
| word. We only look for the simplest cases, to do more would cost |
| too much. Be careful, however, not to generate this when the |
| alternative would take fewer insns. */ |
| if (val & 0xffff0000) |
| { |
| temp1 = remainder & 0xffff0000; |
| temp2 = remainder & 0x0000ffff; |
| |
| /* Overlaps outside this range are best done using other methods. */ |
| for (i = 9; i < 24; i++) |
| { |
| if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder) |
| && !const_ok_for_arm (temp2)) |
| { |
| rtx new_src = (subtargets |
| ? (generate ? gen_reg_rtx (mode) : NULL_RTX) |
| : target); |
| insns = arm_gen_constant (code, mode, cond, temp2, new_src, |
| source, subtargets, generate); |
| source = new_src; |
| if (generate) |
| emit_constant_insn |
| (cond, |
| gen_rtx_SET |
| (VOIDmode, target, |
| gen_rtx_IOR (mode, |
| gen_rtx_ASHIFT (mode, source, |
| GEN_INT (i)), |
| source))); |
| return insns + 1; |
| } |
| } |
| |
| /* Don't duplicate cases already considered. */ |
| for (i = 17; i < 24; i++) |
| { |
| if (((temp1 | (temp1 >> i)) == remainder) |
| && !const_ok_for_arm (temp1)) |
| { |
| rtx new_src = (subtargets |
| ? (generate ? gen_reg_rtx (mode) : NULL_RTX) |
| : target); |
| insns = arm_gen_constant (code, mode, cond, temp1, new_src, |
| source, subtargets, generate); |
| source = new_src; |
| if (generate) |
| emit_constant_insn |
| (cond, |
| gen_rtx_SET (VOIDmode, target, |
| gen_rtx_IOR |
| (mode, |
| gen_rtx_LSHIFTRT (mode, source, |
| GEN_INT (i)), |
| source))); |
| return insns + 1; |
| } |
| } |
| } |
| break; |
| |
| case IOR: |
| case XOR: |
| /* If we have IOR or XOR, and the constant can be loaded in a |
| single instruction, and we can find a temporary to put it in, |
| then this can be done in two instructions instead of 3-4. */ |
| if (subtargets |
| /* TARGET can't be NULL if SUBTARGETS is 0 */ |
| || (reload_completed && !reg_mentioned_p (target, source))) |
| { |
| if (const_ok_for_arm (ARM_SIGN_EXTEND (~val))) |
| { |
| if (generate) |
| { |
| rtx sub = subtargets ? gen_reg_rtx (mode) : target; |
| |
| emit_constant_insn (cond, |
| gen_rtx_SET (VOIDmode, sub, |
| GEN_INT (val))); |
| emit_constant_insn (cond, |
| gen_rtx_SET (VOIDmode, target, |
| gen_rtx_fmt_ee (code, mode, |
| source, sub))); |
| } |
| return 2; |
| } |
| } |
| |
| if (code == XOR) |
| break; |
| |
| /* Convert. |
| x = y | constant ( which is composed of set_sign_bit_copies of leading 1s |
| and the remainder 0s for e.g. 0xfff00000) |
| x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies) |
| |
| This can be done in 2 instructions by using shifts with mov or mvn. |
| e.g. for |
| x = x | 0xfff00000; |
| we generate. |
| mvn r0, r0, asl #12 |
| mvn r0, r0, lsr #12 */ |
| if (set_sign_bit_copies > 8 |
| && (val & (-1 << (32 - set_sign_bit_copies))) == val) |
| { |
| if (generate) |
| { |
| rtx sub = subtargets ? gen_reg_rtx (mode) : target; |
| rtx shift = GEN_INT (set_sign_bit_copies); |
| |
| emit_constant_insn |
| (cond, |
| gen_rtx_SET (VOIDmode, sub, |
| gen_rtx_NOT (mode, |
| gen_rtx_ASHIFT (mode, |
| source, |
| shift)))); |
| emit_constant_insn |
| (cond, |
| gen_rtx_SET (VOIDmode, target, |
| gen_rtx_NOT (mode, |
| gen_rtx_LSHIFTRT (mode, sub, |
| shift)))); |
| } |
| return 2; |
| } |
| |
| /* Convert |
| x = y | constant (which has set_zero_bit_copies number of trailing ones). |
| to |
| x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies). |
| |
| For eg. r0 = r0 | 0xfff |
| mvn r0, r0, lsr #12 |
| mvn r0, r0, asl #12 |
| |
| */ |
| if (set_zero_bit_copies > 8 |
| && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder) |
| { |
| if (generate) |
| { |
| rtx sub = subtargets ? gen_reg_rtx (mode) : target; |
| rtx shift = GEN_INT (set_zero_bit_copies); |
| |
| emit_constant_insn |
| (cond, |
| gen_rtx_SET (VOIDmode, sub, |
| gen_rtx_NOT (mode, |
| gen_rtx_LSHIFTRT (mode, |
| source, |
| shift)))); |
| emit_constant_insn |
| (cond, |
| gen_rtx_SET (VOIDmode, target, |
| gen_rtx_NOT (mode, |
| gen_rtx_ASHIFT (mode, sub, |
| shift)))); |
| } |
| return 2; |
| } |
| |
| /* This will never be reached for Thumb2 because orn is a valid |
| instruction. This is for Thumb1 and the ARM 32 bit cases. |
| |
| x = y | constant (such that ~constant is a valid constant) |
| Transform this to |
| x = ~(~y & ~constant). |
| */ |
| if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val))) |
| { |
| if (generate) |
| { |
| rtx sub = subtargets ? gen_reg_rtx (mode) : target; |
| emit_constant_insn (cond, |
| gen_rtx_SET (VOIDmode, sub, |
| gen_rtx_NOT (mode, source))); |
| source = sub; |
| if (subtargets) |
| sub = gen_reg_rtx (mode); |
| emit_constant_insn (cond, |
| gen_rtx_SET (VOIDmode, sub, |
| gen_rtx_AND (mode, source, |
| GEN_INT (temp1)))); |
| emit_constant_insn (cond, |
| gen_rtx_SET (VOIDmode, target, |
| gen_rtx_NOT (mode, sub))); |
| } |
| return 3; |
| } |
| break; |
| |
| case AND: |
| /* See if two shifts will do 2 or more insn's worth of work. */ |
| if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24) |
| { |
| HOST_WIDE_INT shift_mask = ((0xffffffff |
| << (32 - clear_sign_bit_copies)) |
| & 0xffffffff); |
| |
| if ((remainder | shift_mask) != 0xffffffff) |
| { |
| HOST_WIDE_INT new_val |
| = ARM_SIGN_EXTEND (remainder | shift_mask); |
| |
| if (generate) |
| { |
| rtx new_src = subtargets ? gen_reg_rtx (mode) : target; |
| insns = arm_gen_constant (AND, SImode, cond, new_val, |
| new_src, source, subtargets, 1); |
| source = new_src; |
| } |
| else |
| { |
| rtx targ = subtargets ? NULL_RTX : target; |
| insns = arm_gen_constant (AND, mode, cond, new_val, |
| targ, source, subtargets, 0); |
| } |
| } |
| |
| if (generate) |
| { |
| rtx new_src = subtargets ? gen_reg_rtx (mode) : target; |
| rtx shift = GEN_INT (clear_sign_bit_copies); |
| |
| emit_insn (gen_ashlsi3 (new_src, source, shift)); |
| emit_insn (gen_lshrsi3 (target, new_src, shift)); |
| } |
| |
| return insns + 2; |
| } |
| |
| if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24) |
| { |
| HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1; |
| |
| if ((remainder | shift_mask) != 0xffffffff) |
| { |
| HOST_WIDE_INT new_val |
| = ARM_SIGN_EXTEND (remainder | shift_mask); |
| if (generate) |
| { |
| rtx new_src = subtargets ? gen_reg_rtx (mode) : target; |
| |
| insns = arm_gen_constant (AND, mode, cond, new_val, |
| new_src, source, subtargets, 1); |
| source = new_src; |
| } |
| else |
| { |
| rtx targ = subtargets ? NULL_RTX : target; |
| |
| insns = arm_gen_constant (AND, mode, cond, new_val, |
| targ, source, subtargets, 0); |
| } |
| } |
| |
| if (generate) |
| { |
| rtx new_src = subtargets ? gen_reg_rtx (mode) : target; |
| rtx shift = GEN_INT (clear_zero_bit_copies); |
| |
| emit_insn (gen_lshrsi3 (new_src, source, shift)); |
| emit_insn (gen_ashlsi3 (target, new_src, shift)); |
| } |
| |
| return insns + 2; |
| } |
| |
| break; |
| |
| default: |
| break; |
| } |
| |
| /* Calculate what the instruction sequences would be if we generated it |
| normally, negated, or inverted. */ |
| if (code == AND) |
| /* AND cannot be split into multiple insns, so invert and use BIC. */ |
| insns = 99; |
| else |
| insns = optimal_immediate_sequence (code, remainder, &pos_immediates); |
| |
| if (can_negate) |
| neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff, |
| &neg_immediates); |
| else |
| neg_insns = 99; |
| |
| if (can_invert || final_invert) |
| inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff, |
| &inv_immediates); |
| else |
| inv_insns = 99; |
| |
| immediates = &pos_immediates; |
| |
| /* Is the negated immediate sequence more efficient? */ |
| if (neg_insns < insns && neg_insns <= inv_insns) |
| { |
| insns = neg_insns; |
| immediates = &neg_immediates; |
| } |
| else |
| can_negate = 0; |
| |
| /* Is the inverted immediate sequence more efficient? |
| We must allow for an extra NOT instruction for XOR operations, although |
| there is some chance that the final 'mvn' will get optimized later. */ |
| if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns)) |
| { |
| insns = inv_insns; |
| immediates = &inv_immediates; |
| } |
| else |
| { |
| can_invert = 0; |
| final_invert = 0; |
| } |
| |
| /* Now output the chosen sequence as instructions. */ |
| if (generate) |
| { |
| for (i = 0; i < insns; i++) |
| { |
| rtx new_src, temp1_rtx; |
| |
| temp1 = immediates->i[i]; |
| |
| if (code == SET || code == MINUS) |
| new_src = (subtargets ? gen_reg_rtx (mode) : target); |
| else if ((final_invert || i < (insns - 1)) && subtargets) |
| new_src = gen_reg_rtx (mode); |
| else |
| new_src = target; |
| |
| if (can_invert) |
| temp1 = ~temp1; |
| else if (can_negate) |
| temp1 = -temp1; |
| |
| temp1 = trunc_int_for_mode (temp1, mode); |
| temp1_rtx = GEN_INT (temp1); |
| |
| if (code == SET) |
| ; |
| else if (code == MINUS) |
| temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source); |
| else |
| temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx); |
| |
| emit_constant_insn (cond, |
| gen_rtx_SET (VOIDmode, new_src, |
| temp1_rtx)); |
| source = new_src; |
| |
| if (code == SET) |
| { |
| can_negate = can_invert; |
| can_invert = 0; |
| code = PLUS; |
| } |
| else if (code == MINUS) |
| code = PLUS; |
| } |
| } |
| |
| if (final_invert) |
| { |
| if (generate) |
| emit_constant_insn (cond, gen_rtx_SET (VOIDmode, target, |
| gen_rtx_NOT (mode, source))); |
| insns++; |
| } |
| |
| return insns; |
| } |
| |
| /* Canonicalize a comparison so that we are more likely to recognize it. |
| This can be done for a few constant compares, where we can make the |
| immediate value easier to load. */ |
| |
| static void |
| arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, |
| bool op0_preserve_value) |
| { |
| machine_mode mode; |
| unsigned HOST_WIDE_INT i, maxval; |
| |
| mode = GET_MODE (*op0); |
| if (mode == VOIDmode) |
| mode = GET_MODE (*op1); |
| |
| maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1; |
| |
| /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode |
| we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either |
| reversed or (for constant OP1) adjusted to GE/LT. Similarly |
| for GTU/LEU in Thumb mode. */ |
| if (mode == DImode) |
| { |
| |
| if (*code == GT || *code == LE |
| || (!TARGET_ARM && (*code == GTU || *code == LEU))) |
| { |
| /* Missing comparison. First try to use an available |
| comparison. */ |
| if (CONST_INT_P (*op1)) |
| { |
| i = INTVAL (*op1); |
| switch (*code) |
| { |
| case GT: |
| case LE: |
| if (i != maxval |
| && arm_const_double_by_immediates (GEN_INT (i + 1))) |
| { |
| *op1 = GEN_INT (i + 1); |
| *code = *code == GT ? GE : LT; |
| return; |
| } |
| break; |
| case GTU: |
| case LEU: |
| if (i != ~((unsigned HOST_WIDE_INT) 0) |
| && arm_const_double_by_immediates (GEN_INT (i + 1))) |
| { |
| *op1 = GEN_INT (i + 1); |
| *code = *code == GTU ? GEU : LTU; |
| return; |
| } |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* If that did not work, reverse the condition. */ |
| if (!op0_preserve_value) |
| { |
| std::swap (*op0, *op1); |
| *code = (int)swap_condition ((enum rtx_code)*code); |
| } |
| } |
| return; |
| } |
| |
| /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing |
| with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)), |
| to facilitate possible combining with a cmp into 'ands'. */ |
| if (mode == SImode |
| && GET_CODE (*op0) == ZERO_EXTEND |
| && GET_CODE (XEXP (*op0, 0)) == SUBREG |
| && GET_MODE (XEXP (*op0, 0)) == QImode |
| && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode |
| && subreg_lowpart_p (XEXP (*op0, 0)) |
| && *op1 == const0_rtx) |
| *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)), |
| GEN_INT (255)); |
| |
| /* Comparisons smaller than DImode. Only adjust comparisons against |
| an out-of-range constant. */ |
| if (!CONST_INT_P (*op1) |
| || const_ok_for_arm (INTVAL (*op1)) |
| || const_ok_for_arm (- INTVAL (*op1))) |
| return; |
| |
| i = INTVAL (*op1); |
| |
| switch (*code) |
| { |
| case EQ: |
| case NE: |
| return; |
| |
| case GT: |
| case LE: |
| if (i != maxval |
| && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1)))) |
| { |
| *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1)); |
| *code = *code == GT ? GE : LT; |
| return; |
| } |
| break; |
| |
| case GE: |
| case LT: |
| if (i != ~maxval |
| && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1)))) |
| { |
| *op1 = GEN_INT (i - 1); |
| *code = *code == GE ? GT : LE; |
| return; |
| } |
| break; |
| |
| case GTU: |
| case LEU: |
| if (i != ~((unsigned HOST_WIDE_INT) 0) |
| && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1)))) |
| { |
| *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1)); |
| *code = *code == GTU ? GEU : LTU; |
| return; |
| } |
| break; |
| |
| case GEU: |
| case LTU: |
| if (i != 0 |
| && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1)))) |
| { |
| *op1 = GEN_INT (i - 1); |
| *code = *code == GEU ? GTU : LEU; |
| return; |
| } |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| |
| /* Define how to find the value returned by a function. */ |
| |
| static rtx |
| arm_function_value(const_tree type, const_tree func, |
| bool outgoing ATTRIBUTE_UNUSED) |
| { |
| machine_mode mode; |
| int unsignedp ATTRIBUTE_UNUSED; |
| rtx r ATTRIBUTE_UNUSED; |
| |
| mode = TYPE_MODE (type); |
| |
| if (TARGET_AAPCS_BASED) |
| return aapcs_allocate_return_reg (mode, type, func); |
| |
| /* Promote integer types. */ |
| if (INTEGRAL_TYPE_P (type)) |
| mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1); |
| |
| /* Promotes small structs returned in a register to full-word size |
| for big-endian AAPCS. */ |
| if (arm_return_in_msb (type)) |
| { |
| HOST_WIDE_INT size = int_size_in_bytes (type); |
| if (size % UNITS_PER_WORD != 0) |
| { |
| size += UNITS_PER_WORD - size % UNITS_PER_WORD; |
| mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); |
| } |
| } |
| |
| return arm_libcall_value_1 (mode); |
| } |
| |
| /* libcall hashtable helpers. */ |
| |
| struct libcall_hasher : typed_noop_remove <rtx_def> |
| { |
| typedef rtx_def value_type; |
| typedef rtx_def compare_type; |
| static inline hashval_t hash (const value_type *); |
| static inline bool equal (const value_type *, const compare_type *); |
| static inline void remove (value_type *); |
| }; |
| |
| inline bool |
| libcall_hasher::equal (const value_type *p1, const compare_type *p2) |
| { |
| return rtx_equal_p (p1, p2); |
| } |
| |
| inline hashval_t |
| libcall_hasher::hash (const value_type *p1) |
| { |
| return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE); |
| } |
| |
| typedef hash_table<libcall_hasher> libcall_table_type; |
| |
| static void |
| add_libcall (libcall_table_type *htab, rtx libcall) |
| { |
| *htab->find_slot (libcall, INSERT) = libcall; |
| } |
| |
| static bool |
| arm_libcall_uses_aapcs_base (const_rtx libcall) |
| { |
| static bool init_done = false; |
| static libcall_table_type *libcall_htab = NULL; |
| |
| if (!init_done) |
| { |
| init_done = true; |
| |
| libcall_htab = new libcall_table_type (31); |
| add_libcall (libcall_htab, |
| convert_optab_libfunc (sfloat_optab, SFmode, SImode)); |
| add_libcall (libcall_htab, |
| convert_optab_libfunc (sfloat_optab, DFmode, SImode)); |
| add_libcall (libcall_htab, |
| convert_optab_libfunc (sfloat_optab, SFmode, DImode)); |
| add_libcall (libcall_htab, |
| convert_optab_libfunc (sfloat_optab, DFmode, DImode)); |
| |
| add_libcall (libcall_htab, |
| convert_optab_libfunc (ufloat_optab, SFmode, SImode)); |
| add_libcall (libcall_htab, |
| convert_optab_libfunc (ufloat_optab, DFmode, SImode)); |
| add_libcall (libcall_htab, |
| convert_optab_libfunc (ufloat_optab, SFmode, DImode)); |
| add_libcall (libcall_htab, |
| convert_optab_libfunc (ufloat_optab, DFmode, DImode)); |
| |
| add_libcall (libcall_htab, |
| convert_optab_libfunc (sext_optab, SFmode, HFmode)); |
| add_libcall (libcall_htab, |
| convert_optab_libfunc (trunc_optab, HFmode, SFmode)); |
| add_libcall (libcall_htab, |
| convert_optab_libfunc (sfix_optab, SImode, DFmode)); |
| add_libcall (libcall_htab, |
| convert_optab_libfunc (ufix_optab, SImode, DFmode)); |
| add_libcall (libcall_htab, |
| convert_optab_libfunc (sfix_optab, DImode, DFmode)); |
| add_libcall (libcall_htab, |
| convert_optab_libfunc (ufix_optab, DImode, DFmode)); |
| add_libcall (libcall_htab, |
| convert_optab_libfunc (sfix_optab, DImode, SFmode)); |
| add_libcall (libcall_htab, |
| convert_optab_libfunc (ufix_optab, DImode, SFmode)); |
| |
| /* Values from double-precision helper functions are returned in core |
| registers if the selected core only supports single-precision |
| arithmetic, even if we are using the hard-float ABI. The same is |
| true for single-precision helpers, but we will never be using the |
| hard-float ABI on a CPU which doesn't support single-precision |
| operations in hardware. */ |
| add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode)); |
| add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode)); |
| add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode)); |
| add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode)); |
| add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode)); |
| add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode)); |
| add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode)); |
| add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode)); |
| add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode)); |
| add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode)); |
| add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode)); |
| add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode, |
| SFmode)); |
| add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode, |
| DFmode)); |
| } |
| |
| return libcall && libcall_htab->find (libcall) != NULL; |
| } |
| |
| static rtx |
| arm_libcall_value_1 (machine_mode mode) |
| { |
| if (TARGET_AAPCS_BASED) |
| return aapcs_libcall_value (mode); |
| else if (TARGET_IWMMXT_ABI |
| && arm_vector_mode_supported_p (mode)) |
| return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM); |
| else |
| return gen_rtx_REG (mode, ARG_REGISTER (1)); |
| } |
| |
| /* Define how to find the value returned by a library function |
| assuming the value has mode MODE. */ |
| |
| static rtx |
| arm_libcall_value (machine_mode mode, const_rtx libcall) |
| { |
| if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS |
| && GET_MODE_CLASS (mode) == MODE_FLOAT) |
| { |
| /* The following libcalls return their result in integer registers, |
| even though they return a floating point value. */ |
| if (arm_libcall_uses_aapcs_base (libcall)) |
| return gen_rtx_REG (mode, ARG_REGISTER(1)); |
| |
| } |
| |
| return arm_libcall_value_1 (mode); |
| } |
| |
| /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */ |
| |
| static bool |
| arm_function_value_regno_p (const unsigned int regno) |
| { |
| if (regno == ARG_REGISTER (1) |
| || (TARGET_32BIT |
| && TARGET_AAPCS_BASED |
| && TARGET_VFP |
| && TARGET_HARD_FLOAT |
| && regno == FIRST_VFP_REGNUM) |
| || (TARGET_IWMMXT_ABI |
| && regno == FIRST_IWMMXT_REGNUM)) |
| return true; |
| |
| return false; |
| } |
| |
| /* Determine the amount of memory needed to store the possible return |
| registers of an untyped call. */ |
| int |
| arm_apply_result_size (void) |
| { |
| int size = 16; |
| |
| if (TARGET_32BIT) |
| { |
| if (TARGET_HARD_FLOAT_ABI && TARGET_VFP) |
| size += 32; |
| if (TARGET_IWMMXT_ABI) |
| size += 8; |
| } |
| |
| return size; |
| } |
| |
| /* Decide whether TYPE should be returned in memory (true) |
| or in a register (false). FNTYPE is the type of the function making |
| the call. */ |
| static bool |
| arm_return_in_memory (const_tree type, const_tree fntype) |
| { |
| HOST_WIDE_INT size; |
| |
| size = int_size_in_bytes (type); /* Negative if not fixed size. */ |
| |
| if (TARGET_AAPCS_BASED) |
| { |
| /* Simple, non-aggregate types (ie not including vectors and |
| complex) are always returned in a register (or registers). |
| We don't care about which register here, so we can short-cut |
| some of the detail. */ |
| if (!AGGREGATE_TYPE_P (type) |
| && TREE_CODE (type) != VECTOR_TYPE |
| && TREE_CODE (type) != COMPLEX_TYPE) |
| return false; |
| |
| /* Any return value that is no larger than one word can be |
| returned in r0. */ |
| if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD) |
| return false; |
| |
| /* Check any available co-processors to see if they accept the |
| type as a register candidate (VFP, for example, can return |
| some aggregates in consecutive registers). These aren't |
| available if the call is variadic. */ |
| if (aapcs_select_return_coproc (type, fntype) >= 0) |
| return false; |
| |
| /* Vector values should be returned using ARM registers, not |
| memory (unless they're over 16 bytes, which will break since |
| we only have four call-clobbered registers to play with). */ |
| if (TREE_CODE (type) == VECTOR_TYPE) |
| return (size < 0 || size > (4 * UNITS_PER_WORD)); |
| |
| /* The rest go in memory. */ |
| return true; |
| } |
| |
| if (TREE_CODE (type) == VECTOR_TYPE) |
| return (size < 0 || size > (4 * UNITS_PER_WORD)); |
| |
| if (!AGGREGATE_TYPE_P (type) && |
| (TREE_CODE (type) != VECTOR_TYPE)) |
| /* All simple types are returned in registers. */ |
| return false; |
| |
| if (arm_abi != ARM_ABI_APCS) |
| { |
| /* ATPCS and later return aggregate types in memory only if they are |
| larger than a word (or are variable size). */ |
| return (size < 0 || size > UNITS_PER_WORD); |
| } |
| |
| /* For the arm-wince targets we choose to be compatible with Microsoft's |
| ARM and Thumb compilers, which always return aggregates in memory. */ |
| #ifndef ARM_WINCE |
| /* All structures/unions bigger than one word are returned in memory. |
| Also catch the case where int_size_in_bytes returns -1. In this case |
| the aggregate is either huge or of variable size, and in either case |
| we will want to return it via memory and not in a register. */ |
| if (size < 0 || size > UNITS_PER_WORD) |
| return true; |
| |
| if (TREE_CODE (type) == RECORD_TYPE) |
| { |
| tree field; |
| |
| /* For a struct the APCS says that we only return in a register |
| if the type is 'integer like' and every addressable element |
| has an offset of zero. For practical purposes this means |
| that the structure can have at most one non bit-field element |
| and that this element must be the first one in the structure. */ |
| |
| /* Find the first field, ignoring non FIELD_DECL things which will |
| have been created by C++. */ |
| for (field = TYPE_FIELDS (type); |
| field && TREE_CODE (field) != FIELD_DECL; |
| field = DECL_CHAIN (field)) |
| continue; |
| |
| if (field == NULL) |
| return false; /* An empty structure. Allowed by an extension to ANSI C. */ |
| |
| /* Check that the first field is valid for returning in a register. */ |
| |
| /* ... Floats are not allowed */ |
| if (FLOAT_TYPE_P (TREE_TYPE (field))) |
| return true; |
| |
| /* ... Aggregates that are not themselves valid for returning in |
| a register are not allowed. */ |
| if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE)) |
| return true; |
| |
| /* Now check the remaining fields, if any. Only bitfields are allowed, |
| since they are not addressable. */ |
| for (field = DECL_CHAIN (field); |
| field; |
| field = DECL_CHAIN (field)) |
| { |
| if (TREE_CODE (field) != FIELD_DECL) |
| continue; |
| |
| if (!DECL_BIT_FIELD_TYPE (field)) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| if (TREE_CODE (type) == UNION_TYPE) |
| { |
| tree field; |
| |
| /* Unions can be returned in registers if every element is |
| integral, or can be returned in an integer register. */ |
| for (field = TYPE_FIELDS (type); |
| field; |
| field = DECL_CHAIN (field)) |
| { |
| if (TREE_CODE (field) != FIELD_DECL) |
| continue; |
| |
| if (FLOAT_TYPE_P (TREE_TYPE (field))) |
| return true; |
| |
| if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE)) |
| return true; |
| } |
| |
| return false; |
| } |
| #endif /* not ARM_WINCE */ |
| |
| /* Return all other types in memory. */ |
| return true; |
| } |
| |
| const struct pcs_attribute_arg |
| { |
| const char *arg; |
| enum arm_pcs value; |
| } pcs_attribute_args[] = |
| { |
| {"aapcs", ARM_PCS_AAPCS}, |
| {"aapcs-vfp", ARM_PCS_AAPCS_VFP}, |
| #if 0 |
| /* We could recognize these, but changes would be needed elsewhere |
| * to implement them. */ |
| {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT}, |
| {"atpcs", ARM_PCS_ATPCS}, |
| {"apcs", ARM_PCS_APCS}, |
| #endif |
| {NULL, ARM_PCS_UNKNOWN} |
| }; |
| |
| static enum arm_pcs |
| arm_pcs_from_attribute (tree attr) |
| { |
| const struct pcs_attribute_arg *ptr; |
| const char *arg; |
| |
| /* Get the value of the argument. */ |
| if (TREE_VALUE (attr) == NULL_TREE |
| || TREE_CODE (TREE_VALUE (attr)) != STRING_CST) |
| return ARM_PCS_UNKNOWN; |
| |
| arg = TREE_STRING_POINTER (TREE_VALUE (attr)); |
| |
| /* Check it against the list of known arguments. */ |
| for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++) |
| if (streq (arg, ptr->arg)) |
| return ptr->value; |
| |
| /* An unrecognized interrupt type. */ |
| return ARM_PCS_UNKNOWN; |
| } |
| |
| /* Get the PCS variant to use for this call. TYPE is the function's type |
| specification, DECL is the specific declartion. DECL may be null if |
| the call could be indirect or if this is a library call. */ |
| static enum arm_pcs |
| arm_get_pcs_model (const_tree type, const_tree decl) |
| { |
| bool user_convention = false; |
| enum arm_pcs user_pcs = arm_pcs_default; |
| tree attr; |
| |
| gcc_assert (type); |
| |
| attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type)); |
| if (attr) |
| { |
| user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr)); |
| user_convention = true; |
| } |
| |
| if (TARGET_AAPCS_BASED) |
| { |
| /* Detect varargs functions. These always use the base rules |
| (no argument is ever a candidate for a co-processor |
| register). */ |
| bool base_rules = stdarg_p (type); |
| |
| if (user_convention) |
| { |
| if (user_pcs > ARM_PCS_AAPCS_LOCAL) |
| sorry ("non-AAPCS derived PCS variant"); |
| else if (base_rules && user_pcs != ARM_PCS_AAPCS) |
| error ("variadic functions must use the base AAPCS variant"); |
| } |
| |
| if (base_rules) |
| return ARM_PCS_AAPCS; |
| else if (user_convention) |
| return user_pcs; |
| else if (decl && flag_unit_at_a_time) |
| { |
| /* Local functions never leak outside this compilation unit, |
| so we are free to use whatever conventions are |
| appropriate. */ |
| /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */ |
| cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl)); |
| if (i && i->local) |
| return ARM_PCS_AAPCS_LOCAL; |
| } |
| } |
| else if (user_convention && user_pcs != arm_pcs_default) |
| sorry ("PCS variant"); |
| |
| /* For everything else we use the target's default. */ |
| return arm_pcs_default; |
| } |
| |
| |
| static void |
| aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED, |
| const_tree fntype ATTRIBUTE_UNUSED, |
| rtx libcall ATTRIBUTE_UNUSED, |
| const_tree fndecl ATTRIBUTE_UNUSED) |
| { |
| /* Record the unallocated VFP registers. */ |
| pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1; |
| pcum->aapcs_vfp_reg_alloc = 0; |
| } |
| |
| /* Walk down the type tree of TYPE counting consecutive base elements. |
| If *MODEP is VOIDmode, then set it to the first valid floating point |
| type. If a non-floating point type is found, or if a floating point |
| type that doesn't match a non-VOIDmode *MODEP is found, then return -1, |
| otherwise return the count in the sub-tree. */ |
| static int |
| aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep) |
| { |
| machine_mode mode; |
| HOST_WIDE_INT size; |
| |
| switch (TREE_CODE (type)) |
| { |
| case REAL_TYPE: |
| mode = TYPE_MODE (type); |
| if (mode != DFmode && mode != SFmode) |
| return -1; |
| |
| if (*modep == VOIDmode) |
| *modep = mode; |
| |
| if (*modep == mode) |
| return 1; |
| |
| break; |
| |
| case COMPLEX_TYPE: |
| mode = TYPE_MODE (TREE_TYPE (type)); |
| if (mode != DFmode && mode != SFmode) |
| return -1; |
| |
| if (*modep == VOIDmode) |
| *modep = mode; |
| |
| if (*modep == mode) |
| return 2; |
| |
| break; |
| |
| case VECTOR_TYPE: |
| /* Use V2SImode and V4SImode as representatives of all 64-bit |
| and 128-bit vector types, whether or not those modes are |
| supported with the present options. */ |
| size = int_size_in_bytes (type); |
| switch (size) |
| { |
| case 8: |
| mode = V2SImode; |
| break; |
| case 16: |
| mode = V4SImode; |
| break; |
| default: |
| return -1; |
| } |
| |
| if (*modep == VOIDmode) |
| *modep = mode; |
| |
| /* Vector modes are considered to be opaque: two vectors are |
| equivalent for the purposes of being homogeneous aggregates |
| if they are the same size. */ |
| if (*modep == mode) |
| return 1; |
| |
| break; |
| |
| case ARRAY_TYPE: |
| { |
| int count; |
| tree index = TYPE_DOMAIN (type); |
| |
| /* Can't handle incomplete types nor sizes that are not |
| fixed. */ |
| if (!COMPLETE_TYPE_P (type) |
| || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) |
| return -1; |
| |
| count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep); |
| if (count == -1 |
| || !index |
| || !TYPE_MAX_VALUE (index) |
| || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index)) |
| || !TYPE_MIN_VALUE (index) |
| || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index)) |
| || count < 0) |
| return -1; |
| |
| count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index)) |
| - tree_to_uhwi (TYPE_MIN_VALUE (index))); |
| |
| /* There must be no padding. */ |
| if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) |
| return -1; |
| |
| return count; |
| } |
| |
| case RECORD_TYPE: |
| { |
| int count = 0; |
| int sub_count; |
| tree field; |
| |
| /* Can't handle incomplete types nor sizes that are not |
| fixed. */ |
| if (!COMPLETE_TYPE_P (type) |
| || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) |
| return -1; |
| |
| for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
| { |
| if (TREE_CODE (field) != FIELD_DECL) |
| continue; |
| |
| sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); |
| if (sub_count < 0) |
| return -1; |
| count += sub_count; |
| } |
| |
| /* There must be no padding. */ |
| if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) |
| return -1; |
| |
| return count; |
| } |
| |
| case UNION_TYPE: |
| case QUAL_UNION_TYPE: |
| { |
| /* These aren't very interesting except in a degenerate case. */ |
| int count = 0; |
| int sub_count; |
| tree field; |
| |
| /* Can't handle incomplete types nor sizes that are not |
| fixed. */ |
| if (!COMPLETE_TYPE_P (type) |
| || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) |
| return -1; |
| |
| for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
| { |
| if (TREE_CODE (field) != FIELD_DECL) |
| continue; |
| |
| sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); |
| if (sub_count < 0) |
| return -1; |
| count = count > sub_count ? count : sub_count; |
| } |
| |
| /* There must be no padding. */ |
| if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep))) |
| return -1; |
| |
| return count; |
| } |
| |
| default: |
| break; |
| } |
| |
| return -1; |
| } |
| |
| /* Return true if PCS_VARIANT should use VFP registers. */ |
| static bool |
| use_vfp_abi (enum arm_pcs pcs_variant, bool is_double) |
| { |
| if (pcs_variant == ARM_PCS_AAPCS_VFP) |
| { |
| static bool seen_thumb1_vfp = false; |
| |
| if (TARGET_THUMB1 && !seen_thumb1_vfp) |
| { |
| sorry ("Thumb-1 hard-float VFP ABI"); |
| /* sorry() is not immediately fatal, so only display this once. */ |
| seen_thumb1_vfp = true; |
| } |
| |
| return true; |
| } |
| |
| if (pcs_variant != ARM_PCS_AAPCS_LOCAL) |
| return false; |
| |
| return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT && |
| (TARGET_VFP_DOUBLE || !is_double)); |
| } |
| |
| /* Return true if an argument whose type is TYPE, or mode is MODE, is |
| suitable for passing or returning in VFP registers for the PCS |
| variant selected. If it is, then *BASE_MODE is updated to contain |
| a machine mode describing each element of the argument's type and |
| *COUNT to hold the number of such elements. */ |
| static bool |
| aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant, |
| machine_mode mode, const_tree type, |
| machine_mode *base_mode, int *count) |
| { |
| machine_mode new_mode = VOIDmode; |
| |
| /* If we have the type information, prefer that to working things |
| out from the mode. */ |
| if (type) |
| { |
| int ag_count = aapcs_vfp_sub_candidate (type, &new_mode); |
| |
| if (ag_count > 0 && ag_count <= 4) |
| *count = ag_count; |
| else |
| return false; |
| } |
| else if (GET_MODE_CLASS (mode) == MODE_FLOAT |
| || GET_MODE_CLASS (mode) == MODE_VECTOR_INT |
| || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) |
| { |
| *count = 1; |
| new_mode = mode; |
| } |
| else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) |
| { |
| *count = 2; |
| new_mode = (mode == DCmode ? DFmode : SFmode); |
| } |
| else |
| return false; |
| |
| |
| if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1)) |
| return false; |
| |
| *base_mode = new_mode; |
| return true; |
| } |
| |
| static bool |
| aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant, |
| machine_mode mode, const_tree type) |
| { |
| int count ATTRIBUTE_UNUSED; |
| machine_mode ag_mode ATTRIBUTE_UNUSED; |
| |
| if (!use_vfp_abi (pcs_variant, false)) |
| return false; |
| return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type, |
| &ag_mode, &count); |
| } |
| |
| static bool |
| aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode, |
| const_tree type) |
| { |
| if (!use_vfp_abi (pcum->pcs_variant, false)) |
| return false; |
| |
| return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type, |
| &pcum->aapcs_vfp_rmode, |
| &pcum->aapcs_vfp_rcount); |
| } |
| |
| static bool |
| aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode, |
| const_tree type ATTRIBUTE_UNUSED) |
| { |
| int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode); |
| unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1; |
| int regno; |
| |
| for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift) |
| if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask) |
| { |
| pcum->aapcs_vfp_reg_alloc = mask << regno; |
| if (mode == BLKmode |
| || (mode == TImode && ! TARGET_NEON) |
| || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode)) |
| { |
| int i; |
| int rcount = pcum->aapcs_vfp_rcount; |
| int rshift = shift; |
| machine_mode rmode = pcum->aapcs_vfp_rmode; |
| rtx par; |
| if (!TARGET_NEON) |
| { |
| /* Avoid using unsupported vector modes. */ |
| if (rmode == V2SImode) |
| rmode = DImode; |
| else if (rmode == V4SImode) |
| { |
| rmode = DImode; |
| rcount *= 2; |
| rshift /= 2; |
| } |
| } |
| par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount)); |
| for (i = 0; i < rcount; i++) |
| { |
| rtx tmp = gen_rtx_REG (rmode, |
| FIRST_VFP_REGNUM + regno + i * rshift); |
| tmp = gen_rtx_EXPR_LIST |
| (VOIDmode, tmp, |
| GEN_INT (i * GET_MODE_SIZE (rmode))); |
| XVECEXP (par, 0, i) = tmp; |
| } |
| |
| pcum->aapcs_reg = par; |
| } |
| else |
| pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno); |
| return true; |
| } |
| return false; |
| } |
| |
| static rtx |
| aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED, |
| machine_mode mode, |
| const_tree type ATTRIBUTE_UNUSED) |
| { |
| if (!use_vfp_abi (pcs_variant, false)) |
| return NULL; |
| |
| if (mode == BLKmode || (mode == TImode && !TARGET_NEON)) |
| { |
| int count; |
| machine_mode ag_mode; |
| int i; |
| rtx par; |
| int shift; |
| |
| aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type, |
| &ag_mode, &count); |
| |
| if (!TARGET_NEON) |
| { |
| if (ag_mode == V2SImode) |
| ag_mode = DImode; |
| else if (ag_mode == V4SImode) |
| { |
| ag_mode = DImode; |
| count *= 2; |
| } |
| } |
| shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode); |
| par = gen_rtx_PARALLEL (mode, rtvec_alloc (count)); |
| for (i = 0; i < count; i++) |
| { |
| rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift); |
| tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, |
| GEN_INT (i * GET_MODE_SIZE (ag_mode))); |
| XVECEXP (par, 0, i) = tmp; |
| } |
| |
| return par; |
| } |
| |
| return gen_rtx_REG (mode, FIRST_VFP_REGNUM); |
| } |
| |
| static void |
| aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED, |
| machine_mode mode ATTRIBUTE_UNUSED, |
| const_tree type ATTRIBUTE_UNUSED) |
| { |
| pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc; |
| pcum->aapcs_vfp_reg_alloc = 0; |
| return; |
| } |
| |
| #define AAPCS_CP(X) \ |
| { \ |
| aapcs_ ## X ## _cum_init, \ |
| aapcs_ ## X ## _is_call_candidate, \ |
| aapcs_ ## X ## _allocate, \ |
| aapcs_ ## X ## _is_return_candidate, \ |
| aapcs_ ## X ## _allocate_return_reg, \ |
| aapcs_ ## X ## _advance \ |
| } |
| |
| /* Table of co-processors that can be used to pass arguments in |
| registers. Idealy no arugment should be a candidate for more than |
| one co-processor table entry, but the table is processed in order |
| and stops after the first match. If that entry then fails to put |
| the argument into a co-processor register, the argument will go on |
| the stack. */ |
| static struct |
| { |
| /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */ |
| void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree); |
| |
| /* Return true if an argument of mode MODE (or type TYPE if MODE is |
| BLKmode) is a candidate for this co-processor's registers; this |
| function should ignore any position-dependent state in |
| CUMULATIVE_ARGS and only use call-type dependent information. */ |
| bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree); |
| |
| /* Return true if the argument does get a co-processor register; it |
| should set aapcs_reg to an RTX of the register allocated as is |
| required for a return from FUNCTION_ARG. */ |
| bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree); |
| |
| /* Return true if a result of mode MODE (or type TYPE if MODE is |
| BLKmode) is can be returned in this co-processor's registers. */ |
| bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree); |
| |
| /* Allocate and return an RTX element to hold the return type of a |
| call, this routine must not fail and will only be called if |
| is_return_candidate returned true with the same parameters. */ |
| rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree); |
| |
| /* Finish processing this argument and prepare to start processing |
| the next one. */ |
| void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree); |
| } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] = |
| { |
| AAPCS_CP(vfp) |
| }; |
| |
| #undef AAPCS_CP |
| |
| static int |
| aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode, |
| const_tree type) |
| { |
| int i; |
| |
| for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) |
| if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type)) |
| return i; |
| |
| return -1; |
| } |
| |
| static int |
| aapcs_select_return_coproc (const_tree type, const_tree fntype) |
| { |
| /* We aren't passed a decl, so we can't check that a call is local. |
| However, it isn't clear that that would be a win anyway, since it |
| might limit some tail-calling opportunities. */ |
| enum arm_pcs pcs_variant; |
| |
| if (fntype) |
| { |
| const_tree fndecl = NULL_TREE; |
| |
| if (TREE_CODE (fntype) == FUNCTION_DECL) |
| { |
| fndecl = fntype; |
| fntype = TREE_TYPE (fntype); |
| } |
| |
| pcs_variant = arm_get_pcs_model (fntype, fndecl); |
| } |
| else |
| pcs_variant = arm_pcs_default; |
| |
| if (pcs_variant != ARM_PCS_AAPCS) |
| { |
| int i; |
| |
| for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) |
| if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, |
| TYPE_MODE (type), |
| type)) |
| return i; |
| } |
| return -1; |
| } |
| |
| static rtx |
| aapcs_allocate_return_reg (machine_mode mode, const_tree type, |
| const_tree fntype) |
| { |
| /* We aren't passed a decl, so we can't check that a call is local. |
| However, it isn't clear that that would be a win anyway, since it |
| might limit some tail-calling opportunities. */ |
| enum arm_pcs pcs_variant; |
| int unsignedp ATTRIBUTE_UNUSED; |
| |
| if (fntype) |
| { |
| const_tree fndecl = NULL_TREE; |
| |
| if (TREE_CODE (fntype) == FUNCTION_DECL) |
| { |
| fndecl = fntype; |
| fntype = TREE_TYPE (fntype); |
| } |
| |
| pcs_variant = arm_get_pcs_model (fntype, fndecl); |
| } |
| else |
| pcs_variant = arm_pcs_default; |
| |
| /* Promote integer types. */ |
| if (type && INTEGRAL_TYPE_P (type)) |
| mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1); |
| |
| if (pcs_variant != ARM_PCS_AAPCS) |
| { |
| int i; |
| |
| for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) |
| if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode, |
| type)) |
| return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant, |
| mode, type); |
| } |
| |
| /* Promotes small structs returned in a register to full-word size |
| for big-endian AAPCS. */ |
| if (type && arm_return_in_msb (type)) |
| { |
| HOST_WIDE_INT size = int_size_in_bytes (type); |
| if (size % UNITS_PER_WORD != 0) |
| { |
| size += UNITS_PER_WORD - size % UNITS_PER_WORD; |
| mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); |
| } |
| } |
| |
| return gen_rtx_REG (mode, R0_REGNUM); |
| } |
| |
| static rtx |
| aapcs_libcall_value (machine_mode mode) |
| { |
| if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode) |
| && GET_MODE_SIZE (mode) <= 4) |
| mode = SImode; |
| |
| return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE); |
| } |
| |
| /* Lay out a function argument using the AAPCS rules. The rule |
| numbers referred to here are those in the AAPCS. */ |
| static void |
| aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode, |
| const_tree type, bool named) |
| { |
| int nregs, nregs2; |
| int ncrn; |
| |
| /* We only need to do this once per argument. */ |
| if (pcum->aapcs_arg_processed) |
| return; |
| |
| pcum->aapcs_arg_processed = true; |
| |
| /* Special case: if named is false then we are handling an incoming |
| anonymous argument which is on the stack. */ |
| if (!named) |
| return; |
| |
| /* Is this a potential co-processor register candidate? */ |
| if (pcum->pcs_variant != ARM_PCS_AAPCS) |
| { |
| int slot = aapcs_select_call_coproc (pcum, mode, type); |
| pcum->aapcs_cprc_slot = slot; |
| |
| /* We don't have to apply any of the rules from part B of the |
| preparation phase, these are handled elsewhere in the |
| compiler. */ |
| |
| if (slot >= 0) |
| { |
| /* A Co-processor register candidate goes either in its own |
| class of registers or on the stack. */ |
| if (!pcum->aapcs_cprc_failed[slot]) |
| { |
| /* C1.cp - Try to allocate the argument to co-processor |
| registers. */ |
| if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type)) |
| return; |
| |
| /* C2.cp - Put the argument on the stack and note that we |
| can't assign any more candidates in this slot. We also |
| need to note that we have allocated stack space, so that |
| we won't later try to split a non-cprc candidate between |
| core registers and the stack. */ |
| pcum->aapcs_cprc_failed[slot] = true; |
| pcum->can_split = false; |
| } |
| |
| /* We didn't get a register, so this argument goes on the |
| stack. */ |
| gcc_assert (pcum->can_split == false); |
| return; |
| } |
| } |
| |
| /* C3 - For double-word aligned arguments, round the NCRN up to the |
| next even number. */ |
| ncrn = pcum->aapcs_ncrn; |
| if ((ncrn & 1) && arm_needs_doubleword_align (mode, type)) |
| ncrn++; |
| |
| nregs = ARM_NUM_REGS2(mode, type); |
| |
| /* Sigh, this test should really assert that nregs > 0, but a GCC |
| extension allows empty structs and then gives them empty size; it |
| then allows such a structure to be passed by value. For some of |
| the code below we have to pretend that such an argument has |
| non-zero size so that we 'locate' it correctly either in |
| registers or on the stack. */ |
| gcc_assert (nregs >= 0); |
| |
| nregs2 = nregs ? nregs : 1; |
| |
| /* C4 - Argument fits entirely in core registers. */ |
| if (ncrn + nregs2 <= NUM_ARG_REGS) |
| { |
| pcum->aapcs_reg = gen_rtx_REG (mode, ncrn); |
| pcum->aapcs_next_ncrn = ncrn + nregs; |
| return; |
| } |
| |
| /* C5 - Some core registers left and there are no arguments already |
| on the stack: split this argument between the remaining core |
| registers and the stack. */ |
| if (ncrn < NUM_ARG_REGS && pcum->can_split) |
| { |
| pcum->aapcs_reg = gen_rtx_REG (mode, ncrn); |
| pcum->aapcs_next_ncrn = NUM_ARG_REGS; |
| pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD; |
| return; |
| } |
| |
| /* C6 - NCRN is set to 4. */ |
| pcum->aapcs_next_ncrn = NUM_ARG_REGS; |
| |
| /* C7,C8 - arugment goes on the stack. We have nothing to do here. */ |
| return; |
| } |
| |
| /* Initialize a variable CUM of type CUMULATIVE_ARGS |
| for a call to a function whose data type is FNTYPE. |
| For a library call, FNTYPE is NULL. */ |
| void |
| arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype, |
| rtx libname, |
| tree fndecl ATTRIBUTE_UNUSED) |
| { |
| /* Long call handling. */ |
| if (fntype) |
| pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl); |
| else |
| pcum->pcs_variant = arm_pcs_default; |
| |
| if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) |
| { |
| if (arm_libcall_uses_aapcs_base (libname)) |
| pcum->pcs_variant = ARM_PCS_AAPCS; |
| |
| pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0; |
| pcum->aapcs_reg = NULL_RTX; |
| pcum->aapcs_partial = 0; |
| pcum->aapcs_arg_processed = false; |
| pcum->aapcs_cprc_slot = -1; |
| pcum->can_split = true; |
| |
| if (pcum->pcs_variant != ARM_PCS_AAPCS) |
| { |
| int i; |
| |
| for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) |
| { |
| pcum->aapcs_cprc_failed[i] = false; |
| aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl); |
| } |
| } |
| return; |
| } |
| |
| /* Legacy ABIs */ |
| |
| /* On the ARM, the offset starts at 0. */ |
| pcum->nregs = 0; |
| pcum->iwmmxt_nregs = 0; |
| pcum->can_split = true; |
| |
| /* Varargs vectors are treated the same as long long. |
| named_count avoids having to change the way arm handles 'named' */ |
| pcum->named_count = 0; |
| pcum->nargs = 0; |
| |
| if (TARGET_REALLY_IWMMXT && fntype) |
| { |
| tree fn_arg; |
| |
| for (fn_arg = TYPE_ARG_TYPES (fntype); |
| fn_arg; |
| fn_arg = TREE_CHAIN (fn_arg)) |
| pcum->named_count += 1; |
| |
| if (! pcum->named_count) |
| pcum->named_count = INT_MAX; |
| } |
| } |
| |
| /* Return true if mode/type need doubleword alignment. */ |
| static bool |
| arm_needs_doubleword_align (machine_mode mode, const_tree type) |
| { |
| if (!type) |
| return PARM_BOUNDARY < GET_MODE_ALIGNMENT (mode); |
| |
| /* Scalar and vector types: Use natural alignment, i.e. of base type. */ |
| if (!AGGREGATE_TYPE_P (type)) |
| return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY; |
| |
| /* Array types: Use member alignment of element type. */ |
| if (TREE_CODE (type) == ARRAY_TYPE) |
| return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY; |
| |
| /* Record/aggregate types: Use greatest member alignment of any member. */ |
| for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
| if (DECL_ALIGN (field) > PARM_BOUNDARY) |
| return true; |
| |
| return false; |
| } |
| |
| |
| /* Determine where to put an argument to a function. |
| Value is zero to push the argument on the stack, |
| or a hard register in which to store the argument. |
| |
| MODE is the argument's machine mode. |
| TYPE is the data type of the argument (as a tree). |
| This is null for libcalls where that information may |
| not be available. |
| CUM is a variable of type CUMULATIVE_ARGS which gives info about |
| the preceding args and about the function being called. |
| NAMED is nonzero if this argument is a named parameter |
| (otherwise it is an extra parameter matching an ellipsis). |
| |
| On the ARM, normally the first 16 bytes are passed in registers r0-r3; all |
| other arguments are passed on the stack. If (NAMED == 0) (which happens |
| only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is |
| defined), say it is passed in the stack (function_prologue will |
| indeed make it pass in the stack if necessary). */ |
| |
| static rtx |
| arm_function_arg (cumulative_args_t pcum_v, machine_mode mode, |
| const_tree type, bool named) |
| { |
| CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); |
| int nregs; |
| |
| /* Handle the special case quickly. Pick an arbitrary value for op2 of |
| a call insn (op3 of a call_value insn). */ |
| if (mode == VOIDmode) |
| return const0_rtx; |
| |
| if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) |
| { |
| aapcs_layout_arg (pcum, mode, type, named); |
| return pcum->aapcs_reg; |
| } |
| |
| /* Varargs vectors are treated the same as long long. |
| named_count avoids having to change the way arm handles 'named' */ |
| if (TARGET_IWMMXT_ABI |
| && arm_vector_mode_supported_p (mode) |
| && pcum->named_count > pcum->nargs + 1) |
| { |
| if (pcum->iwmmxt_nregs <= 9) |
| return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM); |
| else |
| { |
| pcum->can_split = false; |
| return NULL_RTX; |
| } |
| } |
| |
| /* Put doubleword aligned quantities in even register pairs. */ |
| if (pcum->nregs & 1 |
| && ARM_DOUBLEWORD_ALIGN |
| && arm_needs_doubleword_align (mode, type)) |
| pcum->nregs++; |
| |
| /* Only allow splitting an arg between regs and memory if all preceding |
| args were allocated to regs. For args passed by reference we only count |
| the reference pointer. */ |
| if (pcum->can_split) |
| nregs = 1; |
| else |
| nregs = ARM_NUM_REGS2 (mode, type); |
| |
| if (!named || pcum->nregs + nregs > NUM_ARG_REGS) |
| return NULL_RTX; |
| |
| return gen_rtx_REG (mode, pcum->nregs); |
| } |
| |
| static unsigned int |
| arm_function_arg_boundary (machine_mode mode, const_tree type) |
| { |
| return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type) |
| ? DOUBLEWORD_ALIGNMENT |
| : PARM_BOUNDARY); |
| } |
| |
| static int |
| arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode, |
| tree type, bool named) |
| { |
| CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); |
| int nregs = pcum->nregs; |
| |
| if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) |
| { |
| aapcs_layout_arg (pcum, mode, type, named); |
| return pcum->aapcs_partial; |
| } |
| |
| if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode)) |
| return 0; |
| |
| if (NUM_ARG_REGS > nregs |
| && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type)) |
| && pcum->can_split) |
| return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD; |
| |
| return 0; |
| } |
| |
| /* Update the data in PCUM to advance over an argument |
| of mode MODE and data type TYPE. |
| (TYPE is null for libcalls where that information may not be available.) */ |
| |
| static void |
| arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode, |
| const_tree type, bool named) |
| { |
| CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); |
| |
| if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) |
| { |
| aapcs_layout_arg (pcum, mode, type, named); |
| |
| if (pcum->aapcs_cprc_slot >= 0) |
| { |
| aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode, |
| type); |
| pcum->aapcs_cprc_slot = -1; |
| } |
| |
| /* Generic stuff. */ |
| pcum->aapcs_arg_processed = false; |
| pcum->aapcs_ncrn = pcum->aapcs_next_ncrn; |
| pcum->aapcs_reg = NULL_RTX; |
| pcum->aapcs_partial = 0; |
| } |
| else |
| { |
| pcum->nargs += 1; |
| if (arm_vector_mode_supported_p (mode) |
| && pcum->named_count > pcum->nargs |
| && TARGET_IWMMXT_ABI) |
| pcum->iwmmxt_nregs += 1; |
| else |
| pcum->nregs += ARM_NUM_REGS2 (mode, type); |
| } |
| } |
| |
| /* Variable sized types are passed by reference. This is a GCC |
| extension to the ARM ABI. */ |
| |
| static bool |
| arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED, |
| machine_mode mode ATTRIBUTE_UNUSED, |
| const_tree type, bool named ATTRIBUTE_UNUSED) |
| { |
| return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST; |
| } |
| |
| /* Encode the current state of the #pragma [no_]long_calls. */ |
| typedef enum |
| { |
| OFF, /* No #pragma [no_]long_calls is in effect. */ |
| LONG, /* #pragma long_calls is in effect. */ |
| SHORT /* #pragma no_long_calls is in effect. */ |
| } arm_pragma_enum; |
| |
| static arm_pragma_enum arm_pragma_long_calls = OFF; |
| |
| void |
| arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED) |
| { |
| arm_pragma_long_calls = LONG; |
| } |
| |
| void |
| arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED) |
| { |
| arm_pragma_long_calls = SHORT; |
| } |
| |
| void |
| arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED) |
| { |
| arm_pragma_long_calls = OFF; |
| } |
| |
| /* Handle an attribute requiring a FUNCTION_DECL; |
| arguments as in struct attribute_spec.handler. */ |
| static tree |
| arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED, |
| int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) |
| { |
| if (TREE_CODE (*node) != FUNCTION_DECL) |
| { |
| warning (OPT_Wattributes, "%qE attribute only applies to functions", |
| name); |
| *no_add_attrs = true; |
| } |
| |
| return NULL_TREE; |
| } |
| |
| /* Handle an "interrupt" or "isr" attribute; |
| arguments as in struct attribute_spec.handler. */ |
| static tree |
| arm_handle_isr_attribute (tree *node, tree name, tree args, int flags, |
| bool *no_add_attrs) |
| { |
| if (DECL_P (*node)) |
| { |
| if (TREE_CODE (*node) != FUNCTION_DECL) |
| { |
| warning (OPT_Wattributes, "%qE attribute only applies to functions", |
| name); |
| *no_add_attrs = true; |
| } |
| /* FIXME: the argument if any is checked for type attributes; |
| should it be checked for decl ones? */ |
| } |
| else |
| { |
| if (TREE_CODE (*node) == FUNCTION_TYPE |
| || TREE_CODE (*node) == METHOD_TYPE) |
| { |
| if (arm_isr_value (args) == ARM_FT_UNKNOWN) |
| { |
| warning (OPT_Wattributes, "%qE attribute ignored", |
| name); |
| *no_add_attrs = true; |
| } |
| } |
| else if (TREE_CODE (*node) == POINTER_TYPE |
| && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE |
| || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE) |
| && arm_isr_value (args) != ARM_FT_UNKNOWN) |
| { |
| *node = build_variant_type_copy (*node); |
| TREE_TYPE (*node) = build_type_attribute_variant |
| (TREE_TYPE (*node), |
| tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node)))); |
| *no_add_attrs = true; |
| } |
| else |
| { |
| /* Possibly pass this attribute on from the type to a decl. */ |
| if (flags & ((int) ATTR_FLAG_DECL_NEXT |
| | (int) ATTR_FLAG_FUNCTION_NEXT |
| | (int) ATTR_FLAG_ARRAY_NEXT)) |
| { |
| *no_add_attrs = true; |
| return tree_cons (name, args, NULL_TREE); |
| } |
| else |
| { |
| warning (OPT_Wattributes, "%qE attribute ignored", |
| name); |
| } |
| } |
| } |
| |
| return NULL_TREE; |
| } |
| |
| /* Handle a "pcs" attribute; arguments as in struct |
| attribute_spec.handler. */ |
| static tree |
| arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args, |
| int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) |
| { |
| if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN) |
| { |
| warning (OPT_Wattributes, "%qE attribute ignored", name); |
| *no_add_attrs = true; |
| } |
| return NULL_TREE; |
| } |
| |
| #if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
| /* Handle the "notshared" attribute. This attribute is another way of |
| requesting hidden visibility. ARM's compiler supports |
| "__declspec(notshared)"; we support the same thing via an |
| attribute. */ |
| |
| static tree |
| arm_handle_notshared_attribute (tree *node, |
| tree name ATTRIBUTE_UNUSED, |
| tree args ATTRIBUTE_UNUSED, |
| int flags ATTRIBUTE_UNUSED, |
| bool *no_add_attrs) |
| { |
| tree decl = TYPE_NAME (*node); |
| |
| if (decl) |
| { |
| DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN; |
| DECL_VISIBILITY_SPECIFIED (decl) = 1; |
| *no_add_attrs = false; |
| } |
| return NULL_TREE; |
| } |
| #endif |
| |
| /* Return 0 if the attributes for two types are incompatible, 1 if they |
| are compatible, and 2 if they are nearly compatible (which causes a |
| warning to be generated). */ |
| static int |
| arm_comp_type_attributes (const_tree type1, const_tree type2) |
| { |
| int l1, l2, s1, s2; |
| |
| /* Check for mismatch of non-default calling convention. */ |
| if (TREE_CODE (type1) != FUNCTION_TYPE) |
| return 1; |
| |
| /* Check for mismatched call attributes. */ |
| l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL; |
| l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL; |
| s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL; |
| s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL; |
| |
| /* Only bother to check if an attribute is defined. */ |
| if (l1 | l2 | s1 | s2) |
| { |
| /* If one type has an attribute, the other must have the same attribute. */ |
| if ((l1 != l2) || (s1 != s2)) |
| return 0; |
| |
| /* Disallow mixed attributes. */ |
| if ((l1 & s2) || (l2 & s1)) |
| return 0; |
| } |
| |
| /* Check for mismatched ISR attribute. */ |
| l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL; |
| if (! l1) |
| l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL; |
| l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL; |
| if (! l2) |
| l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL; |
| if (l1 != l2) |
| return 0; |
| |
| return 1; |
| } |
| |
| /* Assigns default attributes to newly defined type. This is used to |
| set short_call/long_call attributes for function types of |
| functions defined inside corresponding #pragma scopes. */ |
| static void |
| arm_set_default_type_attributes (tree type) |
| { |
| /* Add __attribute__ ((long_call)) to all functions, when |
| inside #pragma long_calls or __attribute__ ((short_call)), |
| when inside #pragma no_long_calls. */ |
| if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE) |
| { |
| tree type_attr_list, attr_name; |
| type_attr_list = TYPE_ATTRIBUTES (type); |
| |
| if (arm_pragma_long_calls == LONG) |
| attr_name = get_identifier ("long_call"); |
| else if (arm_pragma_long_calls == SHORT) |
| attr_name = get_identifier ("short_call"); |
| else |
| return; |
| |
| type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list); |
| TYPE_ATTRIBUTES (type) = type_attr_list; |
| } |
| } |
| |
| /* Return true if DECL is known to be linked into section SECTION. */ |
| |
| static bool |
| arm_function_in_section_p (tree decl, section *section) |
| { |
| /* We can only be certain about the prevailing symbol definition. */ |
| if (!decl_binds_to_current_def_p (decl)) |
| return false; |
| |
| /* If DECL_SECTION_NAME is set, assume it is trustworthy. */ |
| if (!DECL_SECTION_NAME (decl)) |
| { |
| /* Make sure that we will not create a unique section for DECL. */ |
| if (flag_function_sections || DECL_COMDAT_GROUP (decl)) |
| return false; |
| } |
| |
| return function_section (decl) == section; |
| } |
| |
| /* Return nonzero if a 32-bit "long_call" should be generated for |
| a call from the current function to DECL. We generate a long_call |
| if the function: |
| |
| a. has an __attribute__((long call)) |
| or b. is within the scope of a #pragma long_calls |
| or c. the -mlong-calls command line switch has been specified |
| |
| However we do not generate a long call if the function: |
| |
| d. has an __attribute__ ((short_call)) |
| or e. is inside the scope of a #pragma no_long_calls |
| or f. is defined in the same section as the current function. */ |
| |
| bool |
| arm_is_long_call_p (tree decl) |
| { |
| tree attrs; |
| |
| if (!decl) |
| return TARGET_LONG_CALLS; |
| |
| attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl)); |
| if (lookup_attribute ("short_call", attrs)) |
| return false; |
| |
| /* For "f", be conservative, and only cater for cases in which the |
| whole of the current function is placed in the same section. */ |
| if (!flag_reorder_blocks_and_partition |
| && TREE_CODE (decl) == FUNCTION_DECL |
| && arm_function_in_section_p (decl, current_function_section ())) |
| return false; |
| |
| if (lookup_attribute ("long_call", attrs)) |
| return true; |
| |
| return TARGET_LONG_CALLS; |
| } |
| |
| /* Return nonzero if it is ok to make a tail-call to DECL. */ |
| static bool |
| arm_function_ok_for_sibcall (tree decl, tree exp) |
| { |
| unsigned long func_type; |
| |
| if (cfun->machine->sibcall_blocked) |
| return false; |
| |
| /* Never tailcall something if we are generating code for Thumb-1. */ |
| if (TARGET_THUMB1) |
| return false; |
| |
| /* The PIC register is live on entry to VxWorks PLT entries, so we |
| must make the call before restoring the PIC register. */ |
| if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl)) |
| return false; |
| |
| /* If we are interworking and the function is not declared static |
| then we can't tail-call it unless we know that it exists in this |
| compilation unit (since it might be a Thumb routine). */ |
| if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl) |
| && !TREE_ASM_WRITTEN (decl)) |
| return false; |
| |
| func_type = arm_current_func_type (); |
| /* Never tailcall from an ISR routine - it needs a special exit sequence. */ |
| if (IS_INTERRUPT (func_type)) |
| return false; |
| |
| if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) |
| { |
| /* Check that the return value locations are the same. For |
| example that we aren't returning a value from the sibling in |
| a VFP register but then need to transfer it to a core |
| register. */ |
| rtx a, b; |
| tree decl_or_type = decl; |
| |
| /* If it is an indirect function pointer, get the function type. */ |
| if (!decl) |
| decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp))); |
| |
| a = arm_function_value (TREE_TYPE (exp), decl_or_type, false); |
| b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), |
| cfun->decl, false); |
| if (!rtx_equal_p (a, b)) |
| return false; |
| } |
| |
| /* Never tailcall if function may be called with a misaligned SP. */ |
| if (IS_STACKALIGN (func_type)) |
| return false; |
| |
| /* The AAPCS says that, on bare-metal, calls to unresolved weak |
| references should become a NOP. Don't convert such calls into |
| sibling calls. */ |
| if (TARGET_AAPCS_BASED |
| && arm_abi == ARM_ABI_AAPCS |
| && decl |
| && DECL_WEAK (decl)) |
| return false; |
| |
| /* Everything else is ok. */ |
| return true; |
| } |
| |
| |
| /* Addressing mode support functions. */ |
| |
| /* Return nonzero if X is a legitimate immediate operand when compiling |
| for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */ |
| int |
| legitimate_pic_operand_p (rtx x) |
| { |
| if (GET_CODE (x) == SYMBOL_REF |
| || (GET_CODE (x) == CONST |
| && GET_CODE (XEXP (x, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)) |
| return 0; |
| |
| return 1; |
| } |
| |
| /* Record that the current function needs a PIC register. Initialize |
| cfun->machine->pic_reg if we have not already done so. */ |
| |
| static void |
| require_pic_register (void) |
| { |
| /* A lot of the logic here is made obscure by the fact that this |
| routine gets called as part of the rtx cost estimation process. |
| We don't want those calls to affect any assumptions about the real |
| function; and further, we can't call entry_of_function() until we |
| start the real expansion process. */ |
| if (!crtl->uses_pic_offset_table) |
| { |
| gcc_assert (can_create_pseudo_p ()); |
| if (arm_pic_register != INVALID_REGNUM |
| && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM)) |
| { |
| if (!cfun->machine->pic_reg) |
| cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register); |
| |
| /* Play games to avoid marking the function as needing pic |
| if we are being called as part of the cost-estimation |
| process. */ |
| if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl) |
| crtl->uses_pic_offset_table = 1; |
| } |
| else |
| { |
| rtx_insn *seq, *insn; |
| |
| if (!cfun->machine->pic_reg) |
| cfun->machine->pic_reg = gen_reg_rtx (Pmode); |
| |
| /* Play games to avoid marking the function as needing pic |
| if we are being called as part of the cost-estimation |
| process. */ |
| if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl) |
| { |
| crtl->uses_pic_offset_table = 1; |
| start_sequence (); |
| |
| if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM |
| && arm_pic_register > LAST_LO_REGNUM) |
| emit_move_insn (cfun->machine->pic_reg, |
| gen_rtx_REG (Pmode, arm_pic_register)); |
| else |
| arm_load_pic_register (0UL); |
| |
| seq = get_insns (); |
| end_sequence (); |
| |
| for (insn = seq; insn; insn = NEXT_INSN (insn)) |
| if (INSN_P (insn)) |
| INSN_LOCATION (insn) = prologue_location; |
| |
| /* We can be called during expansion of PHI nodes, where |
| we can't yet emit instructions directly in the final |
| insn stream. Queue the insns on the entry edge, they will |
| be committed after everything else is expanded. */ |
| insert_insn_on_edge (seq, |
| single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))); |
| } |
| } |
| } |
| } |
| |
| rtx |
| legitimize_pic_address (rtx orig, machine_mode mode, rtx reg) |
| { |
| if (GET_CODE (orig) == SYMBOL_REF |
| || GET_CODE (orig) == LABEL_REF) |
| { |
| rtx insn; |
| |
| if (reg == 0) |
| { |
| gcc_assert (can_create_pseudo_p ()); |
| reg = gen_reg_rtx (Pmode); |
| } |
| |
| /* VxWorks does not impose a fixed gap between segments; the run-time |
| gap can be different from the object-file gap. We therefore can't |
| use GOTOFF unless we are absolutely sure that the symbol is in the |
| same segment as the GOT. Unfortunately, the flexibility of linker |
| scripts means that we can't be sure of that in general, so assume |
| that GOTOFF is never valid on VxWorks. */ |
| /* References to weak symbols cannot be resolved locally: they |
| may be overridden by a non-weak definition at link time. */ |
| if ((GET_CODE (orig) == LABEL_REF |
| || (GET_CODE (orig) == SYMBOL_REF |
| && SYMBOL_REF_LOCAL_P (orig) |
| && (SYMBOL_REF_DECL (orig) |
| ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1))) |
| && NEED_GOT_RELOC |
| && arm_pic_data_is_text_relative) |
| insn = arm_pic_static_addr (orig, reg); |
| else |
| { |
| rtx pat; |
| rtx mem; |
| |
| /* If this function doesn't have a pic register, create one now. */ |
| require_pic_register (); |
| |
| pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig); |
| |
| /* Make the MEM as close to a constant as possible. */ |
| mem = SET_SRC (pat); |
| gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem)); |
| MEM_READONLY_P (mem) = 1; |
| MEM_NOTRAP_P (mem) = 1; |
| |
| insn = emit_insn (pat); |
| } |
| |
| /* Put a REG_EQUAL note on this insn, so that it can be optimized |
| by loop. */ |
| set_unique_reg_note (insn, REG_EQUAL, orig); |
| |
| return reg; |
| } |
| else if (GET_CODE (orig) == CONST) |
| { |
| rtx base, offset; |
| |
| if (GET_CODE (XEXP (orig, 0)) == PLUS |
| && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg) |
| return orig; |
| |
| /* Handle the case where we have: const (UNSPEC_TLS). */ |
| if (GET_CODE (XEXP (orig, 0)) == UNSPEC |
| && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS) |
| return orig; |
| |
| /* Handle the case where we have: |
| const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a |
| CONST_INT. */ |
| if (GET_CODE (XEXP (orig, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC |
| && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS) |
| { |
| gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1))); |
| return orig; |
| } |
| |
| if (reg == 0) |
| { |
| gcc_assert (can_create_pseudo_p ()); |
| reg = gen_reg_rtx (Pmode); |
| } |
| |
| gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); |
| |
| base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg); |
| offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode, |
| base == reg ? 0 : reg); |
| |
| if (CONST_INT_P (offset)) |
| { |
| /* The base register doesn't really matter, we only want to |
| test the index for the appropriate mode. */ |
| if (!arm_legitimate_index_p (mode, offset, SET, 0)) |
| { |
| gcc_assert (can_create_pseudo_p ()); |
| offset = force_reg (Pmode, offset); |
| } |
| |
| if (CONST_INT_P (offset)) |
| return plus_constant (Pmode, base, INTVAL (offset)); |
| } |
| |
| if (GET_MODE_SIZE (mode) > 4 |
| && (GET_MODE_CLASS (mode) == MODE_INT |
| || TARGET_SOFT_FLOAT)) |
| { |
| emit_insn (gen_addsi3 (reg, base, offset)); |
| return reg; |
| } |
| |
| return gen_rtx_PLUS (Pmode, base, offset); |
| } |
| |
| return orig; |
| } |
| |
| |
| /* Find a spare register to use during the prolog of a function. */ |
| |
| static int |
| thumb_find_work_register (unsigned long pushed_regs_mask) |
| { |
| int reg; |
| |
| /* Check the argument registers first as these are call-used. The |
| register allocation order means that sometimes r3 might be used |
| but earlier argument registers might not, so check them all. */ |
| for (reg = LAST_ARG_REGNUM; reg >= 0; reg --) |
| if (!df_regs_ever_live_p (reg)) |
| return reg; |
| |
| /* Before going on to check the call-saved registers we can try a couple |
| more ways of deducing that r3 is available. The first is when we are |
| pushing anonymous arguments onto the stack and we have less than 4 |
| registers worth of fixed arguments(*). In this case r3 will be part of |
| the variable argument list and so we can be sure that it will be |
| pushed right at the start of the function. Hence it will be available |
| for the rest of the prologue. |
| (*): ie crtl->args.pretend_args_size is greater than 0. */ |
| if (cfun->machine->uses_anonymous_args |
| && crtl->args.pretend_args_size > 0) |
| return LAST_ARG_REGNUM; |
| |
| /* The other case is when we have fixed arguments but less than 4 registers |
| worth. In this case r3 might be used in the body of the function, but |
| it is not being used to convey an argument into the function. In theory |
| we could just check crtl->args.size to see how many bytes are |
| being passed in argument registers, but it seems that it is unreliable. |
| Sometimes it will have the value 0 when in fact arguments are being |
| passed. (See testcase execute/20021111-1.c for an example). So we also |
| check the args_info.nregs field as well. The problem with this field is |
| that it makes no allowances for arguments that are passed to the |
| function but which are not used. Hence we could miss an opportunity |
| when a function has an unused argument in r3. But it is better to be |
| safe than to be sorry. */ |
| if (! cfun->machine->uses_anonymous_args |
| && crtl->args.size >= 0 |
| && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD) |
| && (TARGET_AAPCS_BASED |
| ? crtl->args.info.aapcs_ncrn < 4 |
| : crtl->args.info.nregs < 4)) |
| return LAST_ARG_REGNUM; |
| |
| /* Otherwise look for a call-saved register that is going to be pushed. */ |
| for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --) |
| if (pushed_regs_mask & (1 << reg)) |
| return reg; |
| |
| if (TARGET_THUMB2) |
| { |
| /* Thumb-2 can use high regs. */ |
| for (reg = FIRST_HI_REGNUM; reg < 15; reg ++) |
| if (pushed_regs_mask & (1 << reg)) |
| return reg; |
| } |
| /* Something went wrong - thumb_compute_save_reg_mask() |
| should have arranged for a suitable register to be pushed. */ |
| gcc_unreachable (); |
| } |
| |
| static GTY(()) int pic_labelno; |
| |
| /* Generate code to load the PIC register. In thumb mode SCRATCH is a |
| low register. */ |
| |
| void |
| arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED) |
| { |
| rtx l1, labelno, pic_tmp, pic_rtx, pic_reg; |
| |
| if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE) |
| return; |
| |
| gcc_assert (flag_pic); |
| |
| pic_reg = cfun->machine->pic_reg; |
| if (TARGET_VXWORKS_RTP) |
| { |
| pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE); |
| pic_rtx = gen_rtx_CONST (Pmode, pic_rtx); |
| emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx)); |
| |
| emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg))); |
| |
| pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); |
| emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp)); |
| } |
| else |
| { |
| /* We use an UNSPEC rather than a LABEL_REF because this label |
| never appears in the code stream. */ |
| |
| labelno = GEN_INT (pic_labelno++); |
| l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); |
| l1 = gen_rtx_CONST (VOIDmode, l1); |
| |
| /* On the ARM the PC register contains 'dot + 8' at the time of the |
| addition, on the Thumb it is 'dot + 4'. */ |
| pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4); |
| pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx), |
| UNSPEC_GOTSYM_OFF); |
| pic_rtx = gen_rtx_CONST (Pmode, pic_rtx); |
| |
| if (TARGET_32BIT) |
| { |
| emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno)); |
| } |
| else /* TARGET_THUMB1 */ |
| { |
| if (arm_pic_register != INVALID_REGNUM |
| && REGNO (pic_reg) > LAST_LO_REGNUM) |
| { |
| /* We will have pushed the pic register, so we should always be |
| able to find a work register. */ |
| pic_tmp = gen_rtx_REG (SImode, |
| thumb_find_work_register (saved_regs)); |
| emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx)); |
| emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp)); |
| emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno)); |
| } |
| else if (arm_pic_register != INVALID_REGNUM |
| && arm_pic_register > LAST_LO_REGNUM |
| && REGNO (pic_reg) <= LAST_LO_REGNUM) |
| { |
| emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno)); |
| emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg); |
| emit_use (gen_rtx_REG (Pmode, arm_pic_register)); |
| } |
| else |
| emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno)); |
| } |
| } |
| |
| /* Need to emit this whether or not we obey regdecls, |
| since setjmp/longjmp can cause life info to screw up. */ |
| emit_use (pic_reg); |
| } |
| |
| /* Generate code to load the address of a static var when flag_pic is set. */ |
| static rtx |
| arm_pic_static_addr (rtx orig, rtx reg) |
| { |
| rtx l1, labelno, offset_rtx, insn; |
| |
| gcc_assert (flag_pic); |
| |
| /* We use an UNSPEC rather than a LABEL_REF because this label |
| never appears in the code stream. */ |
| labelno = GEN_INT (pic_labelno++); |
| l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); |
| l1 = gen_rtx_CONST (VOIDmode, l1); |
| |
| /* On the ARM the PC register contains 'dot + 8' at the time of the |
| addition, on the Thumb it is 'dot + 4'. */ |
| offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4); |
| offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx), |
| UNSPEC_SYMBOL_OFFSET); |
| offset_rtx = gen_rtx_CONST (Pmode, offset_rtx); |
| |
| insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno)); |
| return insn; |
| } |
| |
| /* Return nonzero if X is valid as an ARM state addressing register. */ |
| static int |
| arm_address_register_rtx_p (rtx x, int strict_p) |
| { |
| int regno; |
| |
| if (!REG_P (x)) |
| return 0; |
| |
| regno = REGNO (x); |
| |
| if (strict_p) |
| return ARM_REGNO_OK_FOR_BASE_P (regno); |
| |
| return (regno <= LAST_ARM_REGNUM |
| || regno >= FIRST_PSEUDO_REGISTER |
| || regno == FRAME_POINTER_REGNUM |
| || regno == ARG_POINTER_REGNUM); |
| } |
| |
| /* Return TRUE if this rtx is the difference of a symbol and a label, |
| and will reduce to a PC-relative relocation in the object file. |
| Expressions like this can be left alone when generating PIC, rather |
| than forced through the GOT. */ |
| static int |
| pcrel_constant_p (rtx x) |
| { |
| if (GET_CODE (x) == MINUS) |
| return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1)); |
| |
| return FALSE; |
| } |
| |
| /* Return true if X will surely end up in an index register after next |
| splitting pass. */ |
| static bool |
| will_be_in_index_register (const_rtx x) |
| { |
| /* arm.md: calculate_pic_address will split this into a register. */ |
| return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM); |
| } |
| |
| /* Return nonzero if X is a valid ARM state address operand. */ |
| int |
| arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer, |
| int strict_p) |
| { |
| bool use_ldrd; |
| enum rtx_code code = GET_CODE (x); |
| |
| if (arm_address_register_rtx_p (x, strict_p)) |
| return 1; |
| |
| use_ldrd = (TARGET_LDRD |
| && (mode == DImode |
| || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP)))); |
| |
| if (code == POST_INC || code == PRE_DEC |
| || ((code == PRE_INC || code == POST_DEC) |
| && (use_ldrd || GET_MODE_SIZE (mode) <= 4))) |
| return arm_address_register_rtx_p (XEXP (x, 0), strict_p); |
| |
| else if ((code == POST_MODIFY || code == PRE_MODIFY) |
| && arm_address_register_rtx_p (XEXP (x, 0), strict_p) |
| && GET_CODE (XEXP (x, 1)) == PLUS |
| && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0))) |
| { |
| rtx addend = XEXP (XEXP (x, 1), 1); |
| |
| /* Don't allow ldrd post increment by register because it's hard |
| to fixup invalid register choices. */ |
| if (use_ldrd |
| && GET_CODE (x) == POST_MODIFY |
| && REG_P (addend)) |
| return 0; |
| |
| return ((use_ldrd || GET_MODE_SIZE (mode) <= 4) |
| && arm_legitimate_index_p (mode, addend, outer, strict_p)); |
| } |
| |
| /* After reload constants split into minipools will have addresses |
| from a LABEL_REF. */ |
| else if (reload_completed |
| && (code == LABEL_REF |
| || (code == CONST |
| && GET_CODE (XEXP (x, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF |
| && CONST_INT_P (XEXP (XEXP (x, 0), 1))))) |
| return 1; |
| |
| else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))) |
| return 0; |
| |
| else if (code == PLUS) |
| { |
| rtx xop0 = XEXP (x, 0); |
| rtx xop1 = XEXP (x, 1); |
| |
| return ((arm_address_register_rtx_p (xop0, strict_p) |
| && ((CONST_INT_P (xop1) |
| && arm_legitimate_index_p (mode, xop1, outer, strict_p)) |
| || (!strict_p && will_be_in_index_register (xop1)))) |
| || (arm_address_register_rtx_p (xop1, strict_p) |
| && arm_legitimate_index_p (mode, xop0, outer, strict_p))); |
| } |
| |
| #if 0 |
| /* Reload currently can't handle MINUS, so disable this for now */ |
| else if (GET_CODE (x) == MINUS) |
| { |
| rtx xop0 = XEXP (x, 0); |
| rtx xop1 = XEXP (x, 1); |
| |
| return (arm_address_register_rtx_p (xop0, strict_p) |
| && arm_legitimate_index_p (mode, xop1, outer, strict_p)); |
| } |
| #endif |
| |
| else if (GET_MODE_CLASS (mode) != MODE_FLOAT |
| && code == SYMBOL_REF |
| && CONSTANT_POOL_ADDRESS_P (x) |
| && ! (flag_pic |
| && symbol_mentioned_p (get_pool_constant (x)) |
| && ! pcrel_constant_p (get_pool_constant (x)))) |
| return 1; |
| |
| return 0; |
| } |
| |
| /* Return nonzero if X is a valid Thumb-2 address operand. */ |
| static int |
| thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p) |
| { |
| bool use_ldrd; |
| enum rtx_code code = GET_CODE (x); |
| |
| if (arm_address_register_rtx_p (x, strict_p)) |
| return 1; |
| |
| use_ldrd = (TARGET_LDRD |
| && (mode == DImode |
| || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP)))); |
| |
| if (code == POST_INC || code == PRE_DEC |
| || ((code == PRE_INC || code == POST_DEC) |
| && (use_ldrd || GET_MODE_SIZE (mode) <= 4))) |
| return arm_address_register_rtx_p (XEXP (x, 0), strict_p); |
| |
| else if ((code == POST_MODIFY || code == PRE_MODIFY) |
| && arm_address_register_rtx_p (XEXP (x, 0), strict_p) |
| && GET_CODE (XEXP (x, 1)) == PLUS |
| && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0))) |
| { |
| /* Thumb-2 only has autoincrement by constant. */ |
| rtx addend = XEXP (XEXP (x, 1), 1); |
| HOST_WIDE_INT offset; |
| |
| if (!CONST_INT_P (addend)) |
| return 0; |
| |
| offset = INTVAL(addend); |
| if (GET_MODE_SIZE (mode) <= 4) |
| return (offset > -256 && offset < 256); |
| |
| return (use_ldrd && offset > -1024 && offset < 1024 |
| && (offset & 3) == 0); |
| } |
| |
| /* After reload constants split into minipools will have addresses |
| from a LABEL_REF. */ |
| else if (reload_completed |
| && (code == LABEL_REF |
| || (code == CONST |
| && GET_CODE (XEXP (x, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF |
| && CONST_INT_P (XEXP (XEXP (x, 0), 1))))) |
| return 1; |
| |
| else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))) |
| return 0; |
| |
| else if (code == PLUS) |
| { |
| rtx xop0 = XEXP (x, 0); |
| rtx xop1 = XEXP (x, 1); |
| |
| return ((arm_address_register_rtx_p (xop0, strict_p) |
| && (thumb2_legitimate_index_p (mode, xop1, strict_p) |
| || (!strict_p && will_be_in_index_register (xop1)))) |
| || (arm_address_register_rtx_p (xop1, strict_p) |
| && thumb2_legitimate_index_p (mode, xop0, strict_p))); |
| } |
| |
| /* Normally we can assign constant values to target registers without |
| the help of constant pool. But there are cases we have to use constant |
| pool like: |
| 1) assign a label to register. |
| 2) sign-extend a 8bit value to 32bit and then assign to register. |
| |
| Constant pool access in format: |
| (set (reg r0) (mem (symbol_ref (".LC0")))) |
| will cause the use of literal pool (later in function arm_reorg). |
| So here we mark such format as an invalid format, then the compiler |
| will adjust it into: |
| (set (reg r0) (symbol_ref (".LC0"))) |
| (set (reg r0) (mem (reg r0))). |
| No extra register is required, and (mem (reg r0)) won't cause the use |
| of literal pools. */ |
| else if (arm_disable_literal_pool && code == SYMBOL_REF |
| && CONSTANT_POOL_ADDRESS_P (x)) |
| return 0; |
| |
| else if (GET_MODE_CLASS (mode) != MODE_FLOAT |
| && code == SYMBOL_REF |
| && CONSTANT_POOL_ADDRESS_P (x) |
| && ! (flag_pic |
| && symbol_mentioned_p (get_pool_constant (x)) |
| && ! pcrel_constant_p (get_pool_constant (x)))) |
| return 1; |
| |
| return 0; |
| } |
| |
| /* Return nonzero if INDEX is valid for an address index operand in |
| ARM state. */ |
| static int |
| arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer, |
| int strict_p) |
| { |
| HOST_WIDE_INT range; |
| enum rtx_code code = GET_CODE (index); |
| |
| /* Standard coprocessor addressing modes. */ |
| if (TARGET_HARD_FLOAT |
| && TARGET_VFP |
| && (mode == SFmode || mode == DFmode)) |
| return (code == CONST_INT && INTVAL (index) < 1024 |
| && INTVAL (index) > -1024 |
| && (INTVAL (index) & 3) == 0); |
| |
| /* For quad modes, we restrict the constant offset to be slightly less |
| than what the instruction format permits. We do this because for |
| quad mode moves, we will actually decompose them into two separate |
| double-mode reads or writes. INDEX must therefore be a valid |
| (double-mode) offset and so should INDEX+8. */ |
| if (TARGET_NEON && VALID_NEON_QREG_MODE (mode)) |
| return (code == CONST_INT |
| && INTVAL (index) < 1016 |
| && INTVAL (index) > -1024 |
| && (INTVAL (index) & 3) == 0); |
| |
| /* We have no such constraint on double mode offsets, so we permit the |
| full range of the instruction format. */ |
| if (TARGET_NEON && VALID_NEON_DREG_MODE (mode)) |
| return (code == CONST_INT |
| && INTVAL (index) < 1024 |
| && INTVAL (index) > -1024 |
| && (INTVAL (index) & 3) == 0); |
| |
| if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode)) |
| return (code == CONST_INT |
| && INTVAL (index) < 1024 |
| && INTVAL (index) > -1024 |
| && (INTVAL (index) & 3) == 0); |
| |
| if (arm_address_register_rtx_p (index, strict_p) |
| && (GET_MODE_SIZE (mode) <= 4)) |
| return 1; |
| |
| if (mode == DImode || mode == DFmode) |
| { |
| if (code == CONST_INT) |
| { |
| HOST_WIDE_INT val = INTVAL (index); |
| |
| if (TARGET_LDRD) |
| return val > -256 && val < 256; |
| else |
| return val > -4096 && val < 4092; |
| } |
| |
| return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p); |
| } |
| |
| if (GET_MODE_SIZE (mode) <= 4 |
| && ! (arm_arch4 |
| && (mode == HImode |
| || mode == HFmode |
| || (mode == QImode && outer == SIGN_EXTEND)))) |
| { |
| if (code == MULT) |
| { |
| rtx xiop0 = XEXP (index, 0); |
| rtx xiop1 = XEXP (index, 1); |
| |
| return ((arm_address_register_rtx_p (xiop0, strict_p) |
| && power_of_two_operand (xiop1, SImode)) |
| || (arm_address_register_rtx_p (xiop1, strict_p) |
| && power_of_two_operand (xiop0, SImode))); |
| } |
| else if (code == LSHIFTRT || code == ASHIFTRT |
| || code == ASHIFT || code == ROTATERT) |
| { |
| rtx op = XEXP (index, 1); |
| |
| return (arm_address_register_rtx_p (XEXP (index, 0), strict_p) |
| && CONST_INT_P (op) |
| && INTVAL (op) > 0 |
| && INTVAL (op) <= 31); |
| } |
| } |
| |
| /* For ARM v4 we may be doing a sign-extend operation during the |
| load. */ |
| if (arm_arch4) |
| { |
| if (mode == HImode |
| || mode == HFmode |
| || (outer == SIGN_EXTEND && mode == QImode)) |
| range = 256; |
| else |
| range = 4096; |
| } |
| else |
| range = (mode == HImode || mode == HFmode) ? 4095 : 4096; |
| |
| return (code == CONST_INT |
| && INTVAL (index) < range |
| && INTVAL (index) > -range); |
| } |
| |
| /* Return true if OP is a valid index scaling factor for Thumb-2 address |
| index operand. i.e. 1, 2, 4 or 8. */ |
| static bool |
| thumb2_index_mul_operand (rtx op) |
| { |
| HOST_WIDE_INT val; |
| |
| if (!CONST_INT_P (op)) |
| return false; |
| |
| val = INTVAL(op); |
| return (val == 1 || val == 2 || val == 4 || val == 8); |
| } |
| |
| /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */ |
| static int |
| thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p) |
| { |
| enum rtx_code code = GET_CODE (index); |
| |
| /* ??? Combine arm and thumb2 coprocessor addressing modes. */ |
| /* Standard coprocessor addressing modes. */ |
| if (TARGET_HARD_FLOAT |
| && TARGET_VFP |
| && (mode == SFmode || mode == DFmode)) |
| return (code == CONST_INT && INTVAL (index) < 1024 |
| /* Thumb-2 allows only > -256 index range for it's core register |
| load/stores. Since we allow SF/DF in core registers, we have |
| to use the intersection between -256~4096 (core) and -1024~1024 |
| (coprocessor). */ |
| && INTVAL (index) > -256 |
| && (INTVAL (index) & 3) == 0); |
| |
| if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode)) |
| { |
| /* For DImode assume values will usually live in core regs |
| and only allow LDRD addressing modes. */ |
| if (!TARGET_LDRD || mode != DImode) |
| return (code == CONST_INT |
| && INTVAL (index) < 1024 |
| && INTVAL (index) > -1024 |
| && (INTVAL (index) & 3) == 0); |
| } |
| |
| /* For quad modes, we restrict the constant offset to be slightly less |
| than what the instruction format permits. We do this because for |
| quad mode moves, we will actually decompose them into two separate |
| double-mode reads or writes. INDEX must therefore be a valid |
| (double-mode) offset and so should INDEX+8. */ |
| if (TARGET_NEON && VALID_NEON_QREG_MODE (mode)) |
| return (code == CONST_INT |
| && INTVAL (index) < 1016 |
| && INTVAL (index) > -1024 |
| && (INTVAL (index) & 3) == 0); |
| |
| /* We have no such constraint on double mode offsets, so we permit the |
| full range of the instruction format. */ |
| if (TARGET_NEON && VALID_NEON_DREG_MODE (mode)) |
| return (code == CONST_INT |
| && INTVAL (index) < 1024 |
| && INTVAL (index) > -1024 |
| && (INTVAL (index) & 3) == 0); |
| |
| if (arm_address_register_rtx_p (index, strict_p) |
| && (GET_MODE_SIZE (mode) <= 4)) |
| return 1; |
| |
| if (mode == DImode || mode == DFmode) |
| { |
| if (code == CONST_INT) |
| { |
| HOST_WIDE_INT val = INTVAL (index); |
| /* ??? Can we assume ldrd for thumb2? */ |
| /* Thumb-2 ldrd only has reg+const addressing modes. */ |
| /* ldrd supports offsets of +-1020. |
| However the ldr fallback does not. */ |
| return val > -256 && val < 256 && (val & 3) == 0; |
| } |
| else |
| return 0; |
| } |
| |
| if (code == MULT) |
| { |
| rtx xiop0 = XEXP (index, 0); |
| rtx xiop1 = XEXP (index, 1); |
| |
| return ((arm_address_register_rtx_p (xiop0, strict_p) |
| && thumb2_index_mul_operand (xiop1)) |
| || (arm_address_register_rtx_p (xiop1, strict_p) |
| && thumb2_index_mul_operand (xiop0))); |
| } |
| else if (code == ASHIFT) |
| { |
| rtx op = XEXP (index, 1); |
| |
| return (arm_address_register_rtx_p (XEXP (index, 0), strict_p) |
| && CONST_INT_P (op) |
| && INTVAL (op) > 0 |
| && INTVAL (op) <= 3); |
| } |
| |
| return (code == CONST_INT |
| && INTVAL (index) < 4096 |
| && INTVAL (index) > -256); |
| } |
| |
| /* Return nonzero if X is valid as a 16-bit Thumb state base register. */ |
| static int |
| thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p) |
| { |
| int regno; |
| |
| if (!REG_P (x)) |
| return 0; |
| |
| regno = REGNO (x); |
| |
| if (strict_p) |
| return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode); |
| |
| return (regno <= LAST_LO_REGNUM |
| || regno > LAST_VIRTUAL_REGISTER |
| || regno == FRAME_POINTER_REGNUM |
| || (GET_MODE_SIZE (mode) >= 4 |
| && (regno == STACK_POINTER_REGNUM |
| || regno >= FIRST_PSEUDO_REGISTER |
| || x == hard_frame_pointer_rtx |
| || x == arg_pointer_rtx))); |
| } |
| |
| /* Return nonzero if x is a legitimate index register. This is the case |
| for any base register that can access a QImode object. */ |
| inline static int |
| thumb1_index_register_rtx_p (rtx x, int strict_p) |
| { |
| return thumb1_base_register_rtx_p (x, QImode, strict_p); |
| } |
| |
| /* Return nonzero if x is a legitimate 16-bit Thumb-state address. |
| |
| The AP may be eliminated to either the SP or the FP, so we use the |
| least common denominator, e.g. SImode, and offsets from 0 to 64. |
| |
| ??? Verify whether the above is the right approach. |
| |
| ??? Also, the FP may be eliminated to the SP, so perhaps that |
| needs special handling also. |
| |
| ??? Look at how the mips16 port solves this problem. It probably uses |
| better ways to solve some of these problems. |
| |
| Although it is not incorrect, we don't accept QImode and HImode |
| addresses based on the frame pointer or arg pointer until the |
| reload pass starts. This is so that eliminating such addresses |
| into stack based ones won't produce impossible code. */ |
| int |
| thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p) |
| { |
| /* ??? Not clear if this is right. Experiment. */ |
| if (GET_MODE_SIZE (mode) < 4 |
| && !(reload_in_progress || reload_completed) |
| && (reg_mentioned_p (frame_pointer_rtx, x) |
| || reg_mentioned_p (arg_pointer_rtx, x) |
| || reg_mentioned_p (virtual_incoming_args_rtx, x) |
| || reg_mentioned_p (virtual_outgoing_args_rtx, x) |
| || reg_mentioned_p (virtual_stack_dynamic_rtx, x) |
| || reg_mentioned_p (virtual_stack_vars_rtx, x))) |
| return 0; |
| |
| /* Accept any base register. SP only in SImode or larger. */ |
| else if (thumb1_base_register_rtx_p (x, mode, strict_p)) |
| return 1; |
| |
| /* This is PC relative data before arm_reorg runs. */ |
| else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x) |
| && GET_CODE (x) == SYMBOL_REF |
| && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic) |
| return 1; |
| |
| /* This is PC relative data after arm_reorg runs. */ |
| else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode) |
| && reload_completed |
| && (GET_CODE (x) == LABEL_REF |
| || (GET_CODE (x) == CONST |
| && GET_CODE (XEXP (x, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF |
| && CONST_INT_P (XEXP (XEXP (x, 0), 1))))) |
| return 1; |
| |
| /* Post-inc indexing only supported for SImode and larger. */ |
| else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4 |
| && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)) |
| return 1; |
| |
| else if (GET_CODE (x) == PLUS) |
| { |
| /* REG+REG address can be any two index registers. */ |
| /* We disallow FRAME+REG addressing since we know that FRAME |
| will be replaced with STACK, and SP relative addressing only |
| permits SP+OFFSET. */ |
| if (GET_MODE_SIZE (mode) <= 4 |
| && XEXP (x, 0) != frame_pointer_rtx |
| && XEXP (x, 1) != frame_pointer_rtx |
| && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) |
| && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p) |
| || (!strict_p && will_be_in_index_register (XEXP (x, 1))))) |
| return 1; |
| |
| /* REG+const has 5-7 bit offset for non-SP registers. */ |
| else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) |
| || XEXP (x, 0) == arg_pointer_rtx) |
| && CONST_INT_P (XEXP (x, 1)) |
| && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1)))) |
| return 1; |
| |
| /* REG+const has 10-bit offset for SP, but only SImode and |
| larger is supported. */ |
| /* ??? Should probably check for DI/DFmode overflow here |
| just like GO_IF_LEGITIMATE_OFFSET does. */ |
| else if (REG_P (XEXP (x, 0)) |
| && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM |
| && GET_MODE_SIZE (mode) >= 4 |
| && CONST_INT_P (XEXP (x, 1)) |
| && INTVAL (XEXP (x, 1)) >= 0 |
| && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024 |
| && (INTVAL (XEXP (x, 1)) & 3) == 0) |
| return 1; |
| |
| else if (REG_P (XEXP (x, 0)) |
| && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM |
| || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM |
| || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER |
| && REGNO (XEXP (x, 0)) |
| <= LAST_VIRTUAL_POINTER_REGISTER)) |
| && GET_MODE_SIZE (mode) >= 4 |
| && CONST_INT_P (XEXP (x, 1)) |
| && (INTVAL (XEXP (x, 1)) & 3) == 0) |
| return 1; |
| } |
| |
| else if (GET_MODE_CLASS (mode) != MODE_FLOAT |
| && GET_MODE_SIZE (mode) == 4 |
| && GET_CODE (x) == SYMBOL_REF |
| && CONSTANT_POOL_ADDRESS_P (x) |
| && ! (flag_pic |
| && symbol_mentioned_p (get_pool_constant (x)) |
| && ! pcrel_constant_p (get_pool_constant (x)))) |
| return 1; |
| |
| return 0; |
| } |
| |
| /* Return nonzero if VAL can be used as an offset in a Thumb-state address |
| instruction of mode MODE. */ |
| int |
| thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val) |
| { |
| switch (GET_MODE_SIZE (mode)) |
| { |
| case 1: |
| return val >= 0 && val < 32; |
| |
| case 2: |
| return val >= 0 && val < 64 && (val & 1) == 0; |
| |
| default: |
| return (val >= 0 |
| && (val + GET_MODE_SIZE (mode)) <= 128 |
| && (val & 3) == 0); |
| } |
| } |
| |
| bool |
| arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p) |
| { |
| if (TARGET_ARM) |
| return arm_legitimate_address_outer_p (mode, x, SET, strict_p); |
| else if (TARGET_THUMB2) |
| return thumb2_legitimate_address_p (mode, x, strict_p); |
| else /* if (TARGET_THUMB1) */ |
| return thumb1_legitimate_address_p (mode, x, strict_p); |
| } |
| |
| /* Worker function for TARGET_PREFERRED_RELOAD_CLASS. |
| |
| Given an rtx X being reloaded into a reg required to be |
| in class CLASS, return the class of reg to actually use. |
| In general this is just CLASS, but for the Thumb core registers and |
| immediate constants we prefer a LO_REGS class or a subset. */ |
| |
| static reg_class_t |
| arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass) |
| { |
| if (TARGET_32BIT) |
| return rclass; |
| else |
| { |
| if (rclass == GENERAL_REGS) |
| return LO_REGS; |
| else |
| return rclass; |
| } |
| } |
| |
| /* Build the SYMBOL_REF for __tls_get_addr. */ |
| |
| static GTY(()) rtx tls_get_addr_libfunc; |
| |
| static rtx |
| get_tls_get_addr (void) |
| { |
| if (!tls_get_addr_libfunc) |
| tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr"); |
| return tls_get_addr_libfunc; |
| } |
| |
| rtx |
| arm_load_tp (rtx target) |
| { |
| if (!target) |
| target = gen_reg_rtx (SImode); |
| |
| if (TARGET_HARD_TP) |
| { |
| /* Can return in any reg. */ |
| emit_insn (gen_load_tp_hard (target)); |
| } |
| else |
| { |
| /* Always returned in r0. Immediately copy the result into a pseudo, |
| otherwise other uses of r0 (e.g. setting up function arguments) may |
| clobber the value. */ |
| |
| rtx tmp; |
| |
| emit_insn (gen_load_tp_soft ()); |
| |
| tmp = gen_rtx_REG (SImode, R0_REGNUM); |
| emit_move_insn (target, tmp); |
| } |
| return target; |
| } |
| |
| static rtx |
| load_tls_operand (rtx x, rtx reg) |
| { |
| rtx tmp; |
| |
| if (reg == NULL_RTX) |
| reg = gen_reg_rtx (SImode); |
| |
| tmp = gen_rtx_CONST (SImode, x); |
| |
| emit_move_insn (reg, tmp); |
| |
| return reg; |
| } |
| |
| static rtx |
| arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc) |
| { |
| rtx insns, label, labelno, sum; |
| |
| gcc_assert (reloc != TLS_DESCSEQ); |
| start_sequence (); |
| |
| labelno = GEN_INT (pic_labelno++); |
| label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); |
| label = gen_rtx_CONST (VOIDmode, label); |
| |
| sum = gen_rtx_UNSPEC (Pmode, |
| gen_rtvec (4, x, GEN_INT (reloc), label, |
| GEN_INT (TARGET_ARM ? 8 : 4)), |
| UNSPEC_TLS); |
| reg = load_tls_operand (sum, reg); |
| |
| if (TARGET_ARM) |
| emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno)); |
| else |
| emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); |
| |
| *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, |
| LCT_PURE, /* LCT_CONST? */ |
| Pmode, 1, reg, Pmode); |
| |
| insns = get_insns (); |
| end_sequence (); |
| |
| return insns; |
| } |
| |
| static rtx |
| arm_tls_descseq_addr (rtx x, rtx reg) |
| { |
| rtx labelno = GEN_INT (pic_labelno++); |
| rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); |
| rtx sum = gen_rtx_UNSPEC (Pmode, |
| gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ), |
| gen_rtx_CONST (VOIDmode, label), |
| GEN_INT (!TARGET_ARM)), |
| UNSPEC_TLS); |
| rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM)); |
| |
| emit_insn (gen_tlscall (x, labelno)); |
| if (!reg) |
| reg = gen_reg_rtx (SImode); |
| else |
| gcc_assert (REGNO (reg) != R0_REGNUM); |
| |
| emit_move_insn (reg, reg0); |
| |
| return reg; |
| } |
| |
| rtx |
| legitimize_tls_address (rtx x, rtx reg) |
| { |
| rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend; |
| unsigned int model = SYMBOL_REF_TLS_MODEL (x); |
| |
| switch (model) |
| { |
| case TLS_MODEL_GLOBAL_DYNAMIC: |
| if (TARGET_GNU2_TLS) |
| { |
| reg = arm_tls_descseq_addr (x, reg); |
| |
| tp = arm_load_tp (NULL_RTX); |
| |
| dest = gen_rtx_PLUS (Pmode, tp, reg); |
| } |
| else |
| { |
| /* Original scheme */ |
| insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32); |
| dest = gen_reg_rtx (Pmode); |
| emit_libcall_block (insns, dest, ret, x); |
| } |
| return dest; |
| |
| case TLS_MODEL_LOCAL_DYNAMIC: |
| if (TARGET_GNU2_TLS) |
| { |
| reg = arm_tls_descseq_addr (x, reg); |
| |
| tp = arm_load_tp (NULL_RTX); |
| |
| dest = gen_rtx_PLUS (Pmode, tp, reg); |
| } |
| else |
| { |
| insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32); |
| |
| /* Attach a unique REG_EQUIV, to allow the RTL optimizers to |
| share the LDM result with other LD model accesses. */ |
| eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx), |
| UNSPEC_TLS); |
| dest = gen_reg_rtx (Pmode); |
| emit_libcall_block (insns, dest, ret, eqv); |
| |
| /* Load the addend. */ |
| addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, |
| GEN_INT (TLS_LDO32)), |
| UNSPEC_TLS); |
| addend = force_reg (SImode, gen_rtx_CONST (SImode, addend)); |
| dest = gen_rtx_PLUS (Pmode, dest, addend); |
| } |
| return dest; |
| |
| case TLS_MODEL_INITIAL_EXEC: |
| labelno = GEN_INT (pic_labelno++); |
| label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); |
| label = gen_rtx_CONST (VOIDmode, label); |
| sum = gen_rtx_UNSPEC (Pmode, |
| gen_rtvec (4, x, GEN_INT (TLS_IE32), label, |
| GEN_INT (TARGET_ARM ? 8 : 4)), |
| UNSPEC_TLS); |
| reg = load_tls_operand (sum, reg); |
| |
| if (TARGET_ARM) |
| emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno)); |
| else if (TARGET_THUMB2) |
| emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno)); |
| else |
| { |
| emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); |
| emit_move_insn (reg, gen_const_mem (SImode, reg)); |
| } |
| |
| tp = arm_load_tp (NULL_RTX); |
| |
| return gen_rtx_PLUS (Pmode, tp, reg); |
| |
| case TLS_MODEL_LOCAL_EXEC: |
| tp = arm_load_tp (NULL_RTX); |
| |
| reg = gen_rtx_UNSPEC (Pmode, |
| gen_rtvec (2, x, GEN_INT (TLS_LE32)), |
| UNSPEC_TLS); |
| reg = force_reg (SImode, gen_rtx_CONST (SImode, reg)); |
| |
| return gen_rtx_PLUS (Pmode, tp, reg); |
| |
| default: |
| abort (); |
| } |
| } |
| |
| /* Try machine-dependent ways of modifying an illegitimate address |
| to be legitimate. If we find one, return the new, valid address. */ |
| rtx |
| arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode) |
| { |
| if (arm_tls_referenced_p (x)) |
| { |
| rtx addend = NULL; |
| |
| if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS) |
| { |
| addend = XEXP (XEXP (x, 0), 1); |
| x = XEXP (XEXP (x, 0), 0); |
| } |
| |
| if (GET_CODE (x) != SYMBOL_REF) |
| return x; |
| |
| gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0); |
| |
| x = legitimize_tls_address (x, NULL_RTX); |
| |
| if (addend) |
| { |
| x = gen_rtx_PLUS (SImode, x, addend); |
| orig_x = x; |
| } |
| else |
| return x; |
| } |
| |
| if (!TARGET_ARM) |
| { |
| /* TODO: legitimize_address for Thumb2. */ |
| if (TARGET_THUMB2) |
| return x; |
| return thumb_legitimize_address (x, orig_x, mode); |
| } |
| |
| if (GET_CODE (x) == PLUS) |
| { |
| rtx xop0 = XEXP (x, 0); |
| rtx xop1 = XEXP (x, 1); |
| |
| if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0)) |
| xop0 = force_reg (SImode, xop0); |
| |
| if (CONSTANT_P (xop1) && !CONST_INT_P (xop1) |
| && !symbol_mentioned_p (xop1)) |
| xop1 = force_reg (SImode, xop1); |
| |
| if (ARM_BASE_REGISTER_RTX_P (xop0) |
| && CONST_INT_P (xop1)) |
| { |
| HOST_WIDE_INT n, low_n; |
| rtx base_reg, val; |
| n = INTVAL (xop1); |
| |
| /* VFP addressing modes actually allow greater offsets, but for |
| now we just stick with the lowest common denominator. */ |
| if (mode == DImode |
| || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode)) |
| { |
| low_n = n & 0x0f; |
| n &= ~0x0f; |
| if (low_n > 4) |
| { |
| n += 16; |
| low_n -= 16; |
| } |
| } |
| else |
| { |
| low_n = ((mode) == TImode ? 0 |
| : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff)); |
| n -= low_n; |
| } |
| |
| base_reg = gen_reg_rtx (SImode); |
| val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX); |
| emit_move_insn (base_reg, val); |
| x = plus_constant (Pmode, base_reg, low_n); |
| } |
| else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1)) |
| x = gen_rtx_PLUS (SImode, xop0, xop1); |
| } |
| |
| /* XXX We don't allow MINUS any more -- see comment in |
| arm_legitimate_address_outer_p (). */ |
| else if (GET_CODE (x) == MINUS) |
| { |
| rtx xop0 = XEXP (x, 0); |
| rtx xop1 = XEXP (x, 1); |
| |
| if (CONSTANT_P (xop0)) |
| xop0 = force_reg (SImode, xop0); |
| |
| if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1)) |
| xop1 = force_reg (SImode, xop1); |
| |
| if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1)) |
| x = gen_rtx_MINUS (SImode, xop0, xop1); |
| } |
| |
| /* Make sure to take full advantage of the pre-indexed addressing mode |
| with absolute addresses which often allows for the base register to |
| be factorized for multiple adjacent memory references, and it might |
| even allows for the mini pool to be avoided entirely. */ |
| else if (CONST_INT_P (x) && optimize > 0) |
| { |
| unsigned int bits; |
| HOST_WIDE_INT mask, base, index; |
| rtx base_reg; |
| |
| /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only |
| use a 8-bit index. So let's use a 12-bit index for SImode only and |
| hope that arm_gen_constant will enable ldrb to use more bits. */ |
| bits = (mode == SImode) ? 12 : 8; |
| mask = (1 << bits) - 1; |
| base = INTVAL (x) & ~mask; |
| index = INTVAL (x) & mask; |
| if (bit_count (base & 0xffffffff) > (32 - bits)/2) |
| { |
| /* It'll most probably be more efficient to generate the base |
| with more bits set and use a negative index instead. */ |
| base |= mask; |
| index -= mask; |
| } |
| base_reg = force_reg (SImode, GEN_INT (base)); |
| x = plus_constant (Pmode, base_reg, index); |
| } |
| |
| if (flag_pic) |
| { |
| /* We need to find and carefully transform any SYMBOL and LABEL |
| references; so go back to the original address expression. */ |
| rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX); |
| |
| if (new_x != orig_x) |
| x = new_x; |
| } |
| |
| return x; |
| } |
| |
| |
| /* Try machine-dependent ways of modifying an illegitimate Thumb address |
| to be legitimate. If we find one, return the new, valid address. */ |
| rtx |
| thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode) |
| { |
| if (GET_CODE (x) == PLUS |
| && CONST_INT_P (XEXP (x, 1)) |
| && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode) |
| || INTVAL (XEXP (x, 1)) < 0)) |
| { |
| rtx xop0 = XEXP (x, 0); |
| rtx xop1 = XEXP (x, 1); |
| HOST_WIDE_INT offset = INTVAL (xop1); |
| |
| /* Try and fold the offset into a biasing of the base register and |
| then offsetting that. Don't do this when optimizing for space |
| since it can cause too many CSEs. */ |
| if (optimize_size && offset >= 0 |
| && offset < 256 + 31 * GET_MODE_SIZE (mode)) |
| { |
| HOST_WIDE_INT delta; |
| |
| if (offset >= 256) |
| delta = offset - (256 - GET_MODE_SIZE (mode)); |
| else if (offset < 32 * GET_MODE_SIZE (mode) + 8) |
| delta = 31 * GET_MODE_SIZE (mode); |
| else |
| delta = offset & (~31 * GET_MODE_SIZE (mode)); |
| |
| xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta), |
| NULL_RTX); |
| x = plus_constant (Pmode, xop0, delta); |
| } |
| else if (offset < 0 && offset > -256) |
| /* Small negative offsets are best done with a subtract before the |
| dereference, forcing these into a register normally takes two |
| instructions. */ |
| x = force_operand (x, NULL_RTX); |
| else |
| { |
| /* For the remaining cases, force the constant into a register. */ |
| xop1 = force_reg (SImode, xop1); |
| x = gen_rtx_PLUS (SImode, xop0, xop1); |
| } |
| } |
| else if (GET_CODE (x) == PLUS |
| && s_register_operand (XEXP (x, 1), SImode) |
| && !s_register_operand (XEXP (x, 0), SImode)) |
| { |
| rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX); |
| |
| x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1)); |
| } |
| |
| if (flag_pic) |
| { |
| /* We need to find and carefully transform any SYMBOL and LABEL |
| references; so go back to the original address expression. */ |
| rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX); |
| |
| if (new_x != orig_x) |
| x = new_x; |
| } |
| |
| return x; |
| } |
| |
| bool |
| arm_legitimize_reload_address (rtx *p, |
| machine_mode mode, |
| int opnum, int type, |
| int ind_levels ATTRIBUTE_UNUSED) |
| { |
| /* We must recognize output that we have already generated ourselves. */ |
| if (GET_CODE (*p) == PLUS |
| && GET_CODE (XEXP (*p, 0)) == PLUS |
| && REG_P (XEXP (XEXP (*p, 0), 0)) |
| && CONST_INT_P (XEXP (XEXP (*p, 0), 1)) |
| && CONST_INT_P (XEXP (*p, 1))) |
| { |
| push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL, |
| MODE_BASE_REG_CLASS (mode), GET_MODE (*p), |
| VOIDmode, 0, 0, opnum, (enum reload_type) type); |
| return true; |
| } |
| |
| if (GET_CODE (*p) == PLUS |
| && REG_P (XEXP (*p, 0)) |
| && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0))) |
| /* If the base register is equivalent to a constant, let the generic |
| code handle it. Otherwise we will run into problems if a future |
| reload pass decides to rematerialize the constant. */ |
| && !reg_equiv_constant (ORIGINAL_REGNO (XEXP (*p, 0))) |
| && CONST_INT_P (XEXP (*p, 1))) |
| { |
| HOST_WIDE_INT val = INTVAL (XEXP (*p, 1)); |
| HOST_WIDE_INT low, high; |
| |
| /* Detect coprocessor load/stores. */ |
| bool coproc_p = ((TARGET_HARD_FLOAT |
| && TARGET_VFP |
| && (mode == SFmode || mode == DFmode)) |
| || (TARGET_REALLY_IWMMXT |
| && VALID_IWMMXT_REG_MODE (mode)) |
| || (TARGET_NEON |
| && (VALID_NEON_DREG_MODE (mode) |
| || VALID_NEON_QREG_MODE (mode)))); |
| |
| /* For some conditions, bail out when lower two bits are unaligned. */ |
| if ((val & 0x3) != 0 |
| /* Coprocessor load/store indexes are 8-bits + '00' appended. */ |
| && (coproc_p |
| /* For DI, and DF under soft-float: */ |
| || ((mode == DImode || mode == DFmode) |
| /* Without ldrd, we use stm/ldm, which does not |
| fair well with unaligned bits. */ |
| && (! TARGET_LDRD |
| /* Thumb-2 ldrd/strd is [-1020,+1020] in steps of 4. */ |
| || TARGET_THUMB2)))) |
| return false; |
| |
| /* When breaking down a [reg+index] reload address into [(reg+high)+low], |
| of which the (reg+high) gets turned into a reload add insn, |
| we try to decompose the index into high/low values that can often |
| also lead to better reload CSE. |
| For example: |
| ldr r0, [r2, #4100] // Offset too large |
| ldr r1, [r2, #4104] // Offset too large |
| |
| is best reloaded as: |
| add t1, r2, #4096 |
| ldr r0, [t1, #4] |
| add t2, r2, #4096 |
| ldr r1, [t2, #8] |
| |
| which post-reload CSE can simplify in most cases to eliminate the |
| second add instruction: |
| add t1, r2, #4096 |
| ldr r0, [t1, #4] |
| ldr r1, [t1, #8] |
| |
| The idea here is that we want to split out the bits of the constant |
| as a mask, rather than as subtracting the maximum offset that the |
| respective type of load/store used can handle. |
| |
| When encountering negative offsets, we can still utilize it even if |
| the overall offset is positive; sometimes this may lead to an immediate |
| that can be constructed with fewer instructions. |
| For example: |
| ldr r0, [r2, #0x3FFFFC] |
| |
| This is best reloaded as: |
| add t1, r2, #0x400000 |
| ldr r0, [t1, #-4] |
| |
| The trick for spotting this for a load insn with N bits of offset |
| (i.e. bits N-1:0) is to look at bit N; if it is set, then chose a |
| negative offset that is going to make bit N and all the bits below |
| it become zero in the remainder part. |
| |
| The SIGN_MAG_LOW_ADDR_BITS macro below implements this, with respect |
| to sign-magnitude addressing (i.e. separate +- bit, or 1's complement), |
| used in most cases of ARM load/store instructions. */ |
| |
| #define SIGN_MAG_LOW_ADDR_BITS(VAL, N) \ |
| (((VAL) & ((1 << (N)) - 1)) \ |
| ? (((VAL) & ((1 << ((N) + 1)) - 1)) ^ (1 << (N))) - (1 << (N)) \ |
| : 0) |
| |
| if (coproc_p) |
| { |
| low = SIGN_MAG_LOW_ADDR_BITS (val, 10); |
| |
| /* NEON quad-word load/stores are made of two double-word accesses, |
| so the valid index range is reduced by 8. Treat as 9-bit range if |
| we go over it. */ |
| if (TARGET_NEON && VALID_NEON_QREG_MODE (mode) && low >= 1016) |
| low = SIGN_MAG_LOW_ADDR_BITS (val, 9); |
| } |
| else if (GET_MODE_SIZE (mode) == 8) |
| { |
| if (TARGET_LDRD) |
| low = (TARGET_THUMB2 |
| ? SIGN_MAG_LOW_ADDR_BITS (val, 10) |
| : SIGN_MAG_LOW_ADDR_BITS (val, 8)); |
| else |
| /* For pre-ARMv5TE (without ldrd), we use ldm/stm(db/da/ib) |
| to access doublewords. The supported load/store offsets are |
| -8, -4, and 4, which we try to produce here. */ |
| low = ((val & 0xf) ^ 0x8) - 0x8; |
| } |
| else if (GET_MODE_SIZE (mode) < 8) |
| { |
| /* NEON element load/stores do not have an offset. */ |
| if (TARGET_NEON_FP16 && mode == HFmode) |
| return false; |
| |
| if (TARGET_THUMB2) |
| { |
| /* Thumb-2 has an asymmetrical index range of (-256,4096). |
| Try the wider 12-bit range first, and re-try if the result |
| is out of range. */ |
| low = SIGN_MAG_LOW_ADDR_BITS (val, 12); |
| if (low < -255) |
| low = SIGN_MAG_LOW_ADDR_BITS (val, 8); |
| } |
| else |
| { |
| if (mode == HImode || mode == HFmode) |
| { |
| if (arm_arch4) |
| low = SIGN_MAG_LOW_ADDR_BITS (val, 8); |
| else |
| { |
| /* The storehi/movhi_bytes fallbacks can use only |
| [-4094,+4094] of the full ldrb/strb index range. */ |
| low = SIGN_MAG_LOW_ADDR_BITS (val, 12); |
| if (low == 4095 || low == -4095) |
| return false; |
| } |
| } |
| else |
| low = SIGN_MAG_LOW_ADDR_BITS (val, 12); |
| } |
| } |
| else |
| return false; |
| |
| high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff) |
| ^ (unsigned HOST_WIDE_INT) 0x80000000) |
| - (unsigned HOST_WIDE_INT) 0x80000000); |
| /* Check for overflow or zero */ |
| if (low == 0 || high == 0 || (high + low != val)) |
| return false; |
| |
| /* Reload the high part into a base reg; leave the low part |
| in the mem. |
| Note that replacing this gen_rtx_PLUS with plus_constant is |
| wrong in this case because we rely on the |
| (plus (plus reg c1) c2) structure being preserved so that |
| XEXP (*p, 0) in push_reload below uses the correct term. */ |
| *p = gen_rtx_PLUS (GET_MODE (*p), |
| gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0), |
| GEN_INT (high)), |
| GEN_INT (low)); |
| push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL, |
| MODE_BASE_REG_CLASS (mode), GET_MODE (*p), |
| VOIDmode, 0, 0, opnum, (enum reload_type) type); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| rtx |
| thumb_legitimize_reload_address (rtx *x_p, |
| machine_mode mode, |
| int opnum, int type, |
| int ind_levels ATTRIBUTE_UNUSED) |
| { |
| rtx x = *x_p; |
| |
| if (GET_CODE (x) == PLUS |
| && GET_MODE_SIZE (mode) < 4 |
| && REG_P (XEXP (x, 0)) |
| && XEXP (x, 0) == stack_pointer_rtx |
| && CONST_INT_P (XEXP (x, 1)) |
| && !thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1)))) |
| { |
| rtx orig_x = x; |
| |
| x = copy_rtx (x); |
| push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode), |
| Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type); |
| return x; |
| } |
| |
| /* If both registers are hi-regs, then it's better to reload the |
| entire expression rather than each register individually. That |
| only requires one reload register rather than two. */ |
| if (GET_CODE (x) == PLUS |
| && REG_P (XEXP (x, 0)) |
| && REG_P (XEXP (x, 1)) |
| && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 0), mode) |
| && !REG_MODE_OK_FOR_REG_BASE_P (XEXP (x, 1), mode)) |
| { |
| rtx orig_x = x; |
| |
| x = copy_rtx (x); |
| push_reload (orig_x, NULL_RTX, x_p, NULL, MODE_BASE_REG_CLASS (mode), |
| Pmode, VOIDmode, 0, 0, opnum, (enum reload_type) type); |
| return x; |
| } |
| |
| return NULL; |
| } |
| |
| /* Return TRUE if X contains any TLS symbol references. */ |
| |
| bool |
| arm_tls_referenced_p (rtx x) |
| { |
| if (! TARGET_HAVE_TLS) |
| return false; |
| |
| subrtx_iterator::array_type array; |
| FOR_EACH_SUBRTX (iter, array, x, ALL) |
| { |
| const_rtx x = *iter; |
| if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0) |
| return true; |
| |
| /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are |
| TLS offsets, not real symbol references. */ |
| if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) |
| iter.skip_subrtxes (); |
| } |
| return false; |
| } |
| |
| /* Implement TARGET_LEGITIMATE_CONSTANT_P. |
| |
| On the ARM, allow any integer (invalid ones are removed later by insn |
| patterns), nice doubles and symbol_refs which refer to the function's |
| constant pool XXX. |
| |
| When generating pic allow anything. */ |
| |
| static bool |
| arm_legitimate_constant_p_1 (machine_mode, rtx x) |
| { |
| return flag_pic || !label_mentioned_p (x); |
| } |
| |
| static bool |
| thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x) |
| { |
| return (CONST_INT_P (x) |
| || CONST_DOUBLE_P (x) |
| || CONSTANT_ADDRESS_P (x) |
| || flag_pic); |
| } |
| |
| static bool |
| arm_legitimate_constant_p (machine_mode mode, rtx x) |
| { |
| return (!arm_cannot_force_const_mem (mode, x) |
| && (TARGET_32BIT |
| ? arm_legitimate_constant_p_1 (mode, x) |
| : thumb_legitimate_constant_p (mode, x))); |
| } |
| |
| /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ |
| |
| static bool |
| arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) |
| { |
| rtx base, offset; |
| |
| if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P) |
| { |
| split_const (x, &base, &offset); |
| if (GET_CODE (base) == SYMBOL_REF |
| && !offset_within_block_p (base, INTVAL (offset))) |
| return true; |
| } |
| return arm_tls_referenced_p (x); |
| } |
| |
| #define REG_OR_SUBREG_REG(X) \ |
| (REG_P (X) \ |
| || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X)))) |
| |
| #define REG_OR_SUBREG_RTX(X) \ |
| (REG_P (X) ? (X) : SUBREG_REG (X)) |
| |
| static inline int |
| thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) |
| { |
| machine_mode mode = GET_MODE (x); |
| int total, words; |
| |
| switch (code) |
| { |
| case ASHIFT: |
| case ASHIFTRT: |
| case LSHIFTRT: |
| case ROTATERT: |
| return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2); |
| |
| case PLUS: |
| case MINUS: |
| case COMPARE: |
| case NEG: |
| case NOT: |
| return COSTS_N_INSNS (1); |
| |
| case MULT: |
| if (CONST_INT_P (XEXP (x, 1))) |
| { |
| int cycles = 0; |
| unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1)); |
| |
| while (i) |
| { |
| i >>= 2; |
| cycles++; |
| } |
| return COSTS_N_INSNS (2) + cycles; |
| } |
| return COSTS_N_INSNS (1) + 16; |
| |
| case SET: |
| /* A SET doesn't have a mode, so let's look at the SET_DEST to get |
| the mode. */ |
| words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x)))); |
| return (COSTS_N_INSNS (words) |
| + 4 * ((MEM_P (SET_SRC (x))) |
| + MEM_P (SET_DEST (x)))); |
| |
| case CONST_INT: |
| if (outer == SET) |
| { |
| if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) |
| return 0; |
| if (thumb_shiftable_const (INTVAL (x))) |
| return COSTS_N_INSNS (2); |
| return COSTS_N_INSNS (3); |
| } |
| else if ((outer == PLUS || outer == COMPARE) |
| && INTVAL (x) < 256 && INTVAL (x) > -256) |
| return 0; |
| else if ((outer == IOR || outer == XOR || outer == AND) |
| && INTVAL (x) < 256 && INTVAL (x) >= -256) |
| return COSTS_N_INSNS (1); |
| else if (outer == AND) |
| { |
| int i; |
| /* This duplicates the tests in the andsi3 expander. */ |
| for (i = 9; i <= 31; i++) |
| if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x) |
| || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x)) |
| return COSTS_N_INSNS (2); |
| } |
| else if (outer == ASHIFT || outer == ASHIFTRT |
| || outer == LSHIFTRT) |
| return 0; |
| return COSTS_N_INSNS (2); |
| |
| case CONST: |
| case CONST_DOUBLE: |
| case LABEL_REF: |
| case SYMBOL_REF: |
| return COSTS_N_INSNS (3); |
| |
| case UDIV: |
| case UMOD: |
| case DIV: |
| case MOD: |
| return 100; |
| |
| case TRUNCATE: |
| return 99; |
| |
| case AND: |
| case XOR: |
| case IOR: |
| /* XXX guess. */ |
| return 8; |
| |
| case MEM: |
| /* XXX another guess. */ |
| /* Memory costs quite a lot for the first word, but subsequent words |
| load at the equivalent of a single insn each. */ |
| return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD) |
| + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) |
| ? 4 : 0)); |
| |
| case IF_THEN_ELSE: |
| /* XXX a guess. */ |
| if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) |
| return 14; |
| return 2; |
| |
| case SIGN_EXTEND: |
| case ZERO_EXTEND: |
| total = mode == DImode ? COSTS_N_INSNS (1) : 0; |
| total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code); |
| |
| if (mode == SImode) |
| return total; |
| |
| if (arm_arch6) |
| return total + COSTS_N_INSNS (1); |
| |
| /* Assume a two-shift sequence. Increase the cost slightly so |
| we prefer actual shifts over an extend operation. */ |
| return total + 1 + COSTS_N_INSNS (2); |
| |
| default: |
| return 99; |
| } |
| } |
| |
| static inline bool |
| arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed) |
| { |
| machine_mode mode = GET_MODE (x); |
| enum rtx_code subcode; |
| rtx operand; |
| enum rtx_code code = GET_CODE (x); |
| *total = 0; |
| |
| switch (code) |
| { |
| case MEM: |
| /* Memory costs quite a lot for the first word, but subsequent words |
| load at the equivalent of a single insn each. */ |
| *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode)); |
| return true; |
| |
| case DIV: |
| case MOD: |
| case UDIV: |
| case UMOD: |
| if (TARGET_HARD_FLOAT && mode == SFmode) |
| *total = COSTS_N_INSNS (2); |
| else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE) |
| *total = COSTS_N_INSNS (4); |
| else |
| *total = COSTS_N_INSNS (20); |
| return false; |
| |
| case ROTATE: |
| if (REG_P (XEXP (x, 1))) |
| *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */ |
| else if (!CONST_INT_P (XEXP (x, 1))) |
| *total = rtx_cost (XEXP (x, 1), code, 1, speed); |
| |
| /* Fall through */ |
| case ROTATERT: |
| if (mode != SImode) |
| { |
| *total += COSTS_N_INSNS (4); |
| return true; |
| } |
| |
| /* Fall through */ |
| case ASHIFT: case LSHIFTRT: case ASHIFTRT: |
| *total += rtx_cost (XEXP (x, 0), code, 0, speed); |
| if (mode == DImode) |
| { |
| *total += COSTS_N_INSNS (3); |
| return true; |
| } |
| |
| *total += COSTS_N_INSNS (1); |
| /* Increase the cost of complex shifts because they aren't any faster, |
| and reduce dual issue opportunities. */ |
| if (arm_tune_cortex_a9 |
| && outer != SET && !CONST_INT_P (XEXP (x, 1))) |
| ++*total; |
| |
| return true; |
| |
| case MINUS: |
| if (mode == DImode) |
| { |
| *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); |
| if (CONST_INT_P (XEXP (x, 0)) |
| && const_ok_for_arm (INTVAL (XEXP (x, 0)))) |
| { |
| *total += rtx_cost (XEXP (x, 1), code, 1, speed); |
| return true; |
| } |
| |
| if (CONST_INT_P (XEXP (x, 1)) |
| && const_ok_for_arm (INTVAL (XEXP (x, 1)))) |
| { |
| *total += rtx_cost (XEXP (x, 0), code, 0, speed); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
| { |
| if (TARGET_HARD_FLOAT |
| && (mode == SFmode |
| || (mode == DFmode && !TARGET_VFP_SINGLE))) |
| { |
| *total = COSTS_N_INSNS (1); |
| if (CONST_DOUBLE_P (XEXP (x, 0)) |
| && arm_const_double_rtx (XEXP (x, 0))) |
| { |
| *total += rtx_cost (XEXP (x, 1), code, 1, speed); |
| return true; |
| } |
| |
| if (CONST_DOUBLE_P (XEXP (x, 1)) |
| && arm_const_double_rtx (XEXP (x, 1))) |
| { |
| *total += rtx_cost (XEXP (x, 0), code, 0, speed); |
| return true; |
| } |
| |
| return false; |
| } |
| *total = COSTS_N_INSNS (20); |
| return false; |
| } |
| |
| *total = COSTS_N_INSNS (1); |
| if (CONST_INT_P (XEXP (x, 0)) |
| && const_ok_for_arm (INTVAL (XEXP (x, 0)))) |
| { |
| *total += rtx_cost (XEXP (x, 1), code, 1, speed); |
| return true; |
| } |
| |
| subcode = GET_CODE (XEXP (x, 1)); |
| if (subcode == ASHIFT || subcode == ASHIFTRT |
| || subcode == LSHIFTRT |
| || subcode == ROTATE || subcode == ROTATERT) |
| { |
| *total += rtx_cost (XEXP (x, 0), code, 0, speed); |
| *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed); |
| return true; |
| } |
| |
| /* A shift as a part of RSB costs no more than RSB itself. */ |
| if (GET_CODE (XEXP (x, 0)) == MULT |
| && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) |
| { |
| *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed); |
| *total += rtx_cost (XEXP (x, 1), code, 1, speed); |
| return true; |
| } |
| |
| if (subcode == MULT |
| && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)) |
| { |
| *total += rtx_cost (XEXP (x, 0), code, 0, speed); |
| *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed); |
| return true; |
| } |
| |
| if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE |
| || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE) |
| { |
| *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed); |
| if (REG_P (XEXP (XEXP (x, 1), 0)) |
| && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM) |
| *total += COSTS_N_INSNS (1); |
| |
| return true; |
| } |
| |
| /* Fall through */ |
| |
| case PLUS: |
| if (code == PLUS && arm_arch6 && mode == SImode |
| && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND |
| || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) |
| { |
| *total = COSTS_N_INSNS (1); |
| *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)), |
| 0, speed); |
| *total += rtx_cost (XEXP (x, 1), code, 1, speed); |
| return true; |
| } |
| |
| /* MLA: All arguments must be registers. We filter out |
| multiplication by a power of two, so that we fall down into |
| the code below. */ |
| if (GET_CODE (XEXP (x, 0)) == MULT |
| && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) |
| { |
| /* The cost comes from the cost of the multiply. */ |
| return false; |
| } |
| |
| if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
| { |
| if (TARGET_HARD_FLOAT |
| && (mode == SFmode |
| || (mode == DFmode && !TARGET_VFP_SINGLE))) |
| { |
| *total = COSTS_N_INSNS (1); |
| if (CONST_DOUBLE_P (XEXP (x, 1)) |
| && arm_const_double_rtx (XEXP (x, 1))) |
| { |
| *total += rtx_cost (XEXP (x, 0), code, 0, speed); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| *total = COSTS_N_INSNS (20); |
| return false; |
| } |
| |
| if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE |
| || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE) |
| { |
| *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed); |
| if (REG_P (XEXP (XEXP (x, 0), 0)) |
| && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM) |
| *total += COSTS_N_INSNS (1); |
| return true; |
| } |
| |
| /* Fall through */ |
| |
| case AND: case XOR: case IOR: |
| |
| /* Normally the frame registers will be spilt into reg+const during |
| reload, so it is a bad idea to combine them with other instructions, |
| since then they might not be moved outside of loops. As a compromise |
| we allow integration with ops that have a constant as their second |
| operand. */ |
| if (REG_OR_SUBREG_REG (XEXP (x, 0)) |
| && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0))) |
| && !CONST_INT_P (XEXP (x, 1))) |
| *total = COSTS_N_INSNS (1); |
| |
| if (mode == DImode) |
| { |
| *total += COSTS_N_INSNS (2); |
| if (CONST_INT_P (XEXP (x, 1)) |
| && const_ok_for_op (INTVAL (XEXP (x, 1)), code)) |
| { |
| *total += rtx_cost (XEXP (x, 0), code, 0, speed); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| *total += COSTS_N_INSNS (1); |
| if (CONST_INT_P (XEXP (x, 1)) |
| && const_ok_for_op (INTVAL (XEXP (x, 1)), code)) |
| { |
| *total += rtx_cost (XEXP (x, 0), code, 0, speed); |
| return true; |
| } |
| subcode = GET_CODE (XEXP (x, 0)); |
| if (subcode == ASHIFT || subcode == ASHIFTRT |
| || subcode == LSHIFTRT |
| || subcode == ROTATE || subcode == ROTATERT) |
| { |
| *total += rtx_cost (XEXP (x, 1), code, 1, speed); |
| *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed); |
| return true; |
| } |
| |
| if (subcode == MULT |
| && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) |
| { |
| *total += rtx_cost (XEXP (x, 1), code, 1, speed); |
| *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed); |
| return true; |
| } |
| |
| if (subcode == UMIN || subcode == UMAX |
| || subcode == SMIN || subcode == SMAX) |
| { |
| *total = COSTS_N_INSNS (3); |
| return true; |
| } |
| |
| return false; |
| |
| case MULT: |
| /* This should have been handled by the CPU specific routines. */ |
| gcc_unreachable (); |
| |
| case TRUNCATE: |
| if (arm_arch3m && mode == SImode |
| && GET_CODE (XEXP (x, 0)) == LSHIFTRT |
| && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT |
| && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) |
| == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))) |
| && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND |
| || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND)) |
| { |
| *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed); |
| return true; |
| } |
| *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */ |
| return false; |
| |
| case NEG: |
| if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
| { |
| if (TARGET_HARD_FLOAT |
| && (mode == SFmode |
| || (mode == DFmode && !TARGET_VFP_SINGLE))) |
| { |
| *total = COSTS_N_INSNS (1); |
| return false; |
| } |
| *total = COSTS_N_INSNS (2); |
| return false; |
| } |
| |
| /* Fall through */ |
| case NOT: |
| *total = COSTS_N_INSNS (ARM_NUM_REGS(mode)); |
| if (mode == SImode && code == NOT) |
| { |
| subcode = GET_CODE (XEXP (x, 0)); |
| if (subcode == ASHIFT || subcode == ASHIFTRT |
| || subcode == LSHIFTRT |
| || subcode == ROTATE || subcode == ROTATERT |
| || (subcode == MULT |
| && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))) |
| { |
| *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed); |
| /* Register shifts cost an extra cycle. */ |
| if (!CONST_INT_P (XEXP (XEXP (x, 0), 1))) |
| *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1), |
| subcode, 1, speed); |
| return true; |
| } |
| } |
| |
| return false; |
| |
| case IF_THEN_ELSE: |
| if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) |
| { |
| *total = COSTS_N_INSNS (4); |
| return true; |
| } |
| |
| operand = XEXP (x, 0); |
| |
| if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE |
| || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE) |
| && REG_P (XEXP (operand, 0)) |
| && REGNO (XEXP (operand, 0)) == CC_REGNUM)) |
| *total += COSTS_N_INSNS (1); |
| *total += (rtx_cost (XEXP (x, 1), code, 1, speed) |
| + rtx_cost (XEXP (x, 2), code, 2, speed)); |
| return true; |
| |
| case NE: |
| if (mode == SImode && XEXP (x, 1) == const0_rtx) |
| { |
| *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed); |
| return true; |
| } |
| goto scc_insn; |
| |
| case GE: |
| if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM) |
| && mode == SImode && XEXP (x, 1) == const0_rtx) |
| { |
| *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed); |
| return true; |
| } |
| goto scc_insn; |
| |
| case LT: |
| if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM) |
| && mode == SImode && XEXP (x, 1) == const0_rtx) |
| { |
| *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed); |
| return true; |
| } |
| goto scc_insn; |
| |
| case EQ: |
| case GT: |
| case LE: |
| case GEU: |
| case LTU: |
| case GTU: |
| case LEU: |
| case UNORDERED: |
| case ORDERED: |
| case UNEQ: |
| case UNGE: |
| case UNLT: |
| case UNGT: |
| case UNLE: |
| scc_insn: |
| /* SCC insns. In the case where the comparison has already been |
| performed, then they cost 2 instructions. Otherwise they need |
| an additional comparison before them. */ |
| *total = COSTS_N_INSNS (2); |
| if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM) |
| { |
| return true; |
| } |
| |
| /* Fall through */ |
| case COMPARE: |
| if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM) |
| { |
| *total = 0; |
| return true; |
| } |
| |
| *total += COSTS_N_INSNS (1); |
| if (CONST_INT_P (XEXP (x, 1)) |
| && const_ok_for_op (INTVAL (XEXP (x, 1)), code)) |
| { |
| *total += rtx_cost (XEXP (x, 0), code, 0, speed); |
| return true; |
| } |
| |
| subcode = GET_CODE (XEXP (x, 0)); |
| if (subcode == ASHIFT || subcode == ASHIFTRT |
| || subcode == LSHIFTRT |
| || subcode == ROTATE || subcode == ROTATERT) |
| { |
| *total += rtx_cost (XEXP (x, 1), code, 1, speed); |
| *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed); |
| return true; |
| } |
| |
| if (subcode == MULT |
| && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) |
| { |
| *total += rtx_cost (XEXP (x, 1), code, 1, speed); |
| *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed); |
| return true; |
| } |
| |
| return false; |
| |
| case UMIN: |
| case UMAX: |
| case SMIN: |
| case SMAX: |
| *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed); |
| if (!CONST_INT_P (XEXP (x, 1)) |
| || !const_ok_for_arm (INTVAL (XEXP (x, 1)))) |
| *total += rtx_cost (XEXP (x, 1), code, 1, speed); |
| return true; |
| |
| case ABS: |
| if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
| { |
| if (TARGET_HARD_FLOAT |
| && (mode == SFmode |
| || (mode == DFmode && !TARGET_VFP_SINGLE))) |
| { |
| *total = COSTS_N_INSNS (1); |
| return false; |
| } |
| *total = COSTS_N_INSNS (20); |
| return false; |
| } |
| *total = COSTS_N_INSNS (1); |
| if (mode == DImode) |
| *total += COSTS_N_INSNS (3); |
| return false; |
| |
| case SIGN_EXTEND: |
| case ZERO_EXTEND: |
| *total = 0; |
| if (GET_MODE_CLASS (mode) == MODE_INT) |
| { |
| rtx op = XEXP (x, 0); |
| machine_mode opmode = GET_MODE (op); |
| |
| if (mode == DImode) |
| *total += COSTS_N_INSNS (1); |
| |
| if (opmode != SImode) |
| { |
| if (MEM_P (op)) |
| { |
| /* If !arm_arch4, we use one of the extendhisi2_mem |
| or movhi_bytes patterns for HImode. For a QImode |
| sign extension, we first zero-extend from memory |
| and then perform a shift sequence. */ |
| if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND)) |
| *total += COSTS_N_INSNS (2); |
| } |
| else if (arm_arch6) |
| *total += COSTS_N_INSNS (1); |
| |
| /* We don't have the necessary insn, so we need to perform some |
| other operation. */ |
| else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode) |
| /* An and with constant 255. */ |
| *total += COSTS_N_INSNS (1); |
| else |
| /* A shift sequence. Increase costs slightly to avoid |
| combining two shifts into an extend operation. */ |
| *total += COSTS_N_INSNS (2) + 1; |
| } |
| |
| return false; |
| } |
| |
| switch (GET_MODE (XEXP (x, 0))) |
| { |
| case V8QImode: |
| case V4HImode: |
| case V2SImode: |
| case V4QImode: |
| case V2HImode: |
| *total = COSTS_N_INSNS (1); |
| return false; |
| |
| default: |
| gcc_unreachable (); |
| } |
| gcc_unreachable (); |
| |
| case ZERO_EXTRACT: |
| case SIGN_EXTRACT: |
| *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed); |
| return true; |
| |
| case CONST_INT: |
| if (const_ok_for_arm (INTVAL (x)) |
| || const_ok_for_arm (~INTVAL (x))) |
| *total = COSTS_N_INSNS (1); |
| else |
| *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX, |
| INTVAL (x), NULL_RTX, |
| NULL_RTX, 0, 0)); |
| return true; |
| |
| case CONST: |
| case LABEL_REF: |
| case SYMBOL_REF: |
| *total = COSTS_N_INSNS (3); |
| return true; |
| |
| case HIGH: |
| *total = COSTS_N_INSNS (1); |
| return true; |
| |
| case LO_SUM: |
| *total = COSTS_N_INSNS (1); |
| *total += rtx_cost (XEXP (x, 0), code, 0, speed); |
| return true; |
| |
| case CONST_DOUBLE: |
| if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x) |
| && (mode == SFmode || !TARGET_VFP_SINGLE)) |
| *total = COSTS_N_INSNS (1); |
| else |
| *total = COSTS_N_INSNS (4); |
| return true; |
| |
| case SET: |
| /* The vec_extract patterns accept memory operands that require an |
| address reload. Account for the cost of that reload to give the |
| auto-inc-dec pass an incentive to try to replace them. */ |
| if (TARGET_NEON && MEM_P (SET_DEST (x)) |
| && GET_CODE (SET_SRC (x)) == VEC_SELECT) |
| { |
| *total = rtx_cost (SET_DEST (x), code, 0, speed); |
| if (!neon_vector_mem_operand (SET_DEST (x), 2, true)) |
| *total += COSTS_N_INSNS (1); |
| return true; |
| } |
| /* Likewise for the vec_set patterns. */ |
| if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE |
| && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE |
| && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0))) |
| { |
| rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0); |
| *total = rtx_cost (mem, code, 0, speed); |
| if (!neon_vector_mem_operand (mem, 2, true)) |
| *total += COSTS_N_INSNS (1); |
| return true; |
| } |
| return false; |
| |
| case UNSPEC: |
| /* We cost this as high as our memory costs to allow this to |
| be hoisted from loops. */ |
| if (XINT (x, 1) == UNSPEC_PIC_UNIFIED) |
| { |
| *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode)); |
| } |
| return true; |
| |
| case CONST_VECTOR: |
| if (TARGET_NEON |
| && TARGET_HARD_FLOAT |
| && outer == SET |
| && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) |
| && neon_immediate_valid_for_move (x, mode, NULL, NULL)) |
| *total = COSTS_N_INSNS (1); |
| else |
| *total = COSTS_N_INSNS (4); |
| return true; |
| |
| default: |
| *total = COSTS_N_INSNS (4); |
| return false; |
| } |
| } |
| |
| /* Estimates the size cost of thumb1 instructions. |
| For now most of the code is copied from thumb1_rtx_costs. We need more |
| fine grain tuning when we have more related test cases. */ |
| static inline int |
| thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) |
| { |
| machine_mode mode = GET_MODE (x); |
| int words; |
| |
| switch (code) |
| { |
| case ASHIFT: |
| case ASHIFTRT: |
| case LSHIFTRT: |
| case ROTATERT: |
| return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2); |
| |
| case PLUS: |
| case MINUS: |
| /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1 |
| defined by RTL expansion, especially for the expansion of |
| multiplication. */ |
| if ((GET_CODE (XEXP (x, 0)) == MULT |
| && power_of_two_operand (XEXP (XEXP (x,0),1), SImode)) |
| || (GET_CODE (XEXP (x, 1)) == MULT |
| && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))) |
| return COSTS_N_INSNS (2); |
| /* On purpose fall through for normal RTX. */ |
| case COMPARE: |
| case NEG: |
| case NOT: |
| return COSTS_N_INSNS (1); |
| |
| case MULT: |
| if (CONST_INT_P (XEXP (x, 1))) |
| { |
| /* Thumb1 mul instruction can't operate on const. We must Load it |
| into a register first. */ |
| int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET); |
| /* For the targets which have a very small and high-latency multiply |
| unit, we prefer to synthesize the mult with up to 5 instructions, |
| giving a good balance between size and performance. */ |
| if (arm_arch6m && arm_m_profile_small_mul) |
| return COSTS_N_INSNS (5); |
| else |
| return COSTS_N_INSNS (1) + const_size; |
| } |
| return COSTS_N_INSNS (1); |
| |
| case SET: |
| /* A SET doesn't have a mode, so let's look at the SET_DEST to get |
| the mode. */ |
| words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x)))); |
| return COSTS_N_INSNS (words) |
| + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x)) |
| || satisfies_constraint_K (SET_SRC (x)) |
| /* thumb1_movdi_insn. */ |
| || ((words > 1) && MEM_P (SET_SRC (x)))); |
| |
| case CONST_INT: |
| if (outer == SET) |
| { |
| if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) |
| return COSTS_N_INSNS (1); |
| /* See split "TARGET_THUMB1 && satisfies_constraint_J". */ |
| if (INTVAL (x) >= -255 && INTVAL (x) <= -1) |
| return COSTS_N_INSNS (2); |
| /* See split "TARGET_THUMB1 && satisfies_constraint_K". */ |
| if (thumb_shiftable_const (INTVAL (x))) |
| return COSTS_N_INSNS (2); |
| return COSTS_N_INSNS (3); |
| } |
| else if ((outer == PLUS || outer == COMPARE) |
| && INTVAL (x) < 256 && INTVAL (x) > -256) |
| return 0; |
| else if ((outer == IOR || outer == XOR || outer == AND) |
| && INTVAL (x) < 256 && INTVAL (x) >= -256) |
| return COSTS_N_INSNS (1); |
| else if (outer == AND) |
| { |
| int i; |
| /* This duplicates the tests in the andsi3 expander. */ |
| for (i = 9; i <= 31; i++) |
| if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x) |
| || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x)) |
| return COSTS_N_INSNS (2); |
| } |
| else if (outer == ASHIFT || outer == ASHIFTRT |
| || outer == LSHIFTRT) |
| return 0; |
| return COSTS_N_INSNS (2); |
| |
| case CONST: |
| case CONST_DOUBLE: |
| case LABEL_REF: |
| case SYMBOL_REF: |
| return COSTS_N_INSNS (3); |
| |
| case UDIV: |
| case UMOD: |
| case DIV: |
| case MOD: |
| return 100; |
| |
| case TRUNCATE: |
| return 99; |
| |
| case AND: |
| case XOR: |
| case IOR: |
| return COSTS_N_INSNS (1); |
| |
| case MEM: |
| return (COSTS_N_INSNS (1) |
| + COSTS_N_INSNS (1) |
| * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD) |
| + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) |
| ? COSTS_N_INSNS (1) : 0)); |
| |
| case IF_THEN_ELSE: |
| /* XXX a guess. */ |
| if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) |
| return 14; |
| return 2; |
| |
| case ZERO_EXTEND: |
| /* XXX still guessing. */ |
| switch (GET_MODE (XEXP (x, 0))) |
| { |
| case QImode: |
| return (1 + (mode == DImode ? 4 : 0) |
| + (MEM_P (XEXP (x, 0)) ? 10 : 0)); |
| |
| case HImode: |
| return (4 + (mode == DImode ? 4 : 0) |
| + (MEM_P (XEXP (x, 0)) ? 10 : 0)); |
| |
| case SImode: |
| return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0)); |
| |
| default: |
| return 99; |
| } |
| |
| default: |
| return 99; |
| } |
| } |
| |
| /* RTX costs when optimizing for size. */ |
| static bool |
| arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, |
| int *total) |
| { |
| machine_mode mode = GET_MODE (x); |
| if (TARGET_THUMB1) |
| { |
| *total = thumb1_size_rtx_costs (x, code, outer_code); |
| return true; |
| } |
| |
| /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */ |
| switch (code) |
| { |
| case MEM: |
| /* A memory access costs 1 insn if the mode is small, or the address is |
| a single register, otherwise it costs one insn per word. */ |
| if (REG_P (XEXP (x, 0))) |
| *total = COSTS_N_INSNS (1); |
| else if (flag_pic |
| && GET_CODE (XEXP (x, 0)) == PLUS |
| && will_be_in_index_register (XEXP (XEXP (x, 0), 1))) |
| /* This will be split into two instructions. |
| See arm.md:calculate_pic_address. */ |
| *total = COSTS_N_INSNS (2); |
| else |
| *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); |
| return true; |
| |
| case DIV: |
| case MOD: |
| case UDIV: |
| case UMOD: |
| /* Needs a libcall, so it costs about this. */ |
| *total = COSTS_N_INSNS (2); |
| return false; |
| |
| case ROTATE: |
| if (mode == SImode && REG_P (XEXP (x, 1))) |
| { |
| *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false); |
| return true; |
| } |
| /* Fall through */ |
| case ROTATERT: |
| case ASHIFT: |
| case LSHIFTRT: |
| case ASHIFTRT: |
| if (mode == DImode && CONST_INT_P (XEXP (x, 1))) |
| { |
| *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false); |
| return true; |
| } |
| else if (mode == SImode) |
| { |
| *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false); |
| /* Slightly disparage register shifts, but not by much. */ |
| if (!CONST_INT_P (XEXP (x, 1))) |
| *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false); |
| return true; |
| } |
| |
| /* Needs a libcall. */ |
| *total = COSTS_N_INSNS (2); |
| return false; |
| |
| case MINUS: |
| if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT |
| && (mode == SFmode || !TARGET_VFP_SINGLE)) |
| { |
| *total = COSTS_N_INSNS (1); |
| return false; |
| } |
| |
| if (mode == SImode) |
| { |
| enum rtx_code subcode0 = GET_CODE (XEXP (x, 0)); |
| enum rtx_code subcode1 = GET_CODE (XEXP (x, 1)); |
| |
| if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT |
| || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT |
| || subcode1 == ROTATE || subcode1 == ROTATERT |
| || subcode1 == ASHIFT || subcode1 == LSHIFTRT |
| || subcode1 == ASHIFTRT) |
| { |
| /* It's just the cost of the two operands. */ |
| *total = 0; |
| return false; |
| } |
| |
| *total = COSTS_N_INSNS (1); |
| return false; |
| } |
| |
| *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); |
| return false; |
| |
| case PLUS: |
| if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT |
| && (mode == SFmode || !TARGET_VFP_SINGLE)) |
| { |
| *total = COSTS_N_INSNS (1); |
| return false; |
| } |
| |
| /* A shift as a part of ADD costs nothing. */ |
| if (GET_CODE (XEXP (x, 0)) == MULT |
| && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)) |
| { |
| *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1); |
| *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false); |
| *total += rtx_cost (XEXP (x, 1), code, 1, false); |
| return true; |
| } |
| |
| /* Fall through */ |
| case AND: case XOR: case IOR: |
| if (mode == SImode) |
| { |
| enum rtx_code subcode = GET_CODE (XEXP (x, 0)); |
| |
| if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT |
| || subcode == LSHIFTRT || subcode == ASHIFTRT |
| || (code == AND && subcode == NOT)) |
| { |
| /* It's just the cost of the two operands. */ |
| *total = 0; |
| return false; |
| } |
| } |
| |
| *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); |
| return false; |
| |
| case MULT: |
| *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); |
| return false; |
| |
| case NEG: |
| if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT |
| && (mode == SFmode || !TARGET_VFP_SINGLE)) |
| { |
| *total = COSTS_N_INSNS (1); |
| return false; |
| } |
| |
| /* Fall through */ |
| case NOT: |
| *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); |
| |
| return false; |
| |
| case IF_THEN_ELSE: |
| *total = 0; |
| return false; |
| |
| case COMPARE: |
| if (cc_register (XEXP (x, 0), VOIDmode)) |
| * total = 0; |
| else |
| *total = COSTS_N_INSNS (1); |
| return false; |
| |
| case ABS: |
| if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT |
| && (mode == SFmode || !TARGET_VFP_SINGLE)) |
| *total = COSTS_N_INSNS (1); |
| else |
| *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode)); |
| return false; |
| |
| case SIGN_EXTEND: |
| case ZERO_EXTEND: |
| return arm_rtx_costs_1 (x, outer_code, total, 0); |
| |
| case CONST_INT: |
| if (const_ok_for_arm (INTVAL (x))) |
| /* A multiplication by a constant requires another instruction |
| to load the constant to a register. */ |
| *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT) |
| ? 1 : 0); |
| else if (const_ok_for_arm (~INTVAL (x))) |
| *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1); |
| else if (const_ok_for_arm (-INTVAL (x))) |
| { |
| if (outer_code == COMPARE || outer_code == PLUS |
| || outer_code == MINUS) |
| *total = 0; |
| else |
| *total = COSTS_N_INSNS (1); |
| } |
| else |
| *total = COSTS_N_INSNS (2); |
| return true; |
| |
| case CONST: |
| case LABEL_REF: |
| case SYMBOL_REF: |
| *total = COSTS_N_INSNS (2); |
| return true; |
| |
| case CONST_DOUBLE: |
| *total = COSTS_N_INSNS (4); |
| return true; |
| |
| case CONST_VECTOR: |
| if (TARGET_NEON |
| && TARGET_HARD_FLOAT |
| && outer_code == SET |
| && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) |
| && neon_immediate_valid_for_move (x, mode, NULL, NULL)) |
| *total = COSTS_N_INSNS (1); |
| else |
| *total = COSTS_N_INSNS (4); |
| return true; |
| |
| case HIGH: |
| case LO_SUM: |
| /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the |
| cost of these slightly. */ |
| *total = COSTS_N_INSNS (1) + 1; |
| return true; |
| |
| case SET: |
| return false; |
| |
| default: |
| if (mode != VOIDmode) |
| *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); |
| else |
| *total = COSTS_N_INSNS (4); /* How knows? */ |
| return false; |
| } |
| } |
| |
| /* Helper function for arm_rtx_costs. If the operand is a valid shift |
| operand, then return the operand that is being shifted. If the shift |
| is not by a constant, then set SHIFT_REG to point to the operand. |
| Return NULL if OP is not a shifter operand. */ |
| static rtx |
| shifter_op_p (rtx op, rtx *shift_reg) |
| { |
| enum rtx_code code = GET_CODE (op); |
| |
| if (code == MULT && CONST_INT_P (XEXP (op, 1)) |
| && exact_log2 (INTVAL (XEXP (op, 1))) > 0) |
| return XEXP (op, 0); |
| else if (code == ROTATE && CONST_INT_P (XEXP (op, 1))) |
| return XEXP (op, 0); |
| else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT |
| || code == ASHIFTRT) |
| { |
| if (!CONST_INT_P (XEXP (op, 1))) |
| *shift_reg = XEXP (op, 1); |
| return XEXP (op, 0); |
| } |
| |
| return NULL; |
| } |
| |
| static bool |
| arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost) |
| { |
| const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost; |
| gcc_assert (GET_CODE (x) == UNSPEC); |
| |
| switch (XINT (x, 1)) |
| { |
| case UNSPEC_UNALIGNED_LOAD: |
| /* We can only do unaligned loads into the integer unit, and we can't |
| use LDM or LDRD. */ |
| *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x))); |
| if (speed_p) |
| *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load |
| + extra_cost->ldst.load_unaligned); |
| |
| #ifdef NOT_YET |
| *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x), |
| ADDR_SPACE_GENERIC, speed_p); |
| #endif |
| return true; |
| |
| case UNSPEC_UNALIGNED_STORE: |
| *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x))); |
| if (speed_p) |
| *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store |
| + extra_cost->ldst.store_unaligned); |
| |
| *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p); |
| #ifdef NOT_YET |
| *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x), |
| ADDR_SPACE_GENERIC, speed_p); |
| #endif |
| return true; |
| |
| case UNSPEC_VRINTZ: |
| case UNSPEC_VRINTP: |
| case UNSPEC_VRINTM: |
| case UNSPEC_VRINTR: |
| case UNSPEC_VRINTX: |
| case UNSPEC_VRINTA: |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint; |
| |
| return true; |
| default: |
| *cost = COSTS_N_INSNS (2); |
| break; |
| } |
| return false; |
| } |
| |
| /* Cost of a libcall. We assume one insn per argument, an amount for the |
| call (one insn for -Os) and then one for processing the result. */ |
| #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2)) |
| |
| #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \ |
| do \ |
| { \ |
| shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \ |
| if (shift_op != NULL \ |
| && arm_rtx_shift_left_p (XEXP (x, IDX))) \ |
| { \ |
| if (shift_reg) \ |
| { \ |
| if (speed_p) \ |
| *cost += extra_cost->alu.arith_shift_reg; \ |
| *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \ |
| } \ |
| else if (speed_p) \ |
| *cost += extra_cost->alu.arith_shift; \ |
| \ |
| *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \ |
| + rtx_cost (XEXP (x, 1 - IDX), \ |
| OP, 1, speed_p)); \ |
| return true; \ |
| } \ |
| } \ |
| while (0); |
| |
| /* RTX costs. Make an estimate of the cost of executing the operation |
| X, which is contained with an operation with code OUTER_CODE. |
| SPEED_P indicates whether the cost desired is the performance cost, |
| or the size cost. The estimate is stored in COST and the return |
| value is TRUE if the cost calculation is final, or FALSE if the |
| caller should recurse through the operands of X to add additional |
| costs. |
| |
| We currently make no attempt to model the size savings of Thumb-2 |
| 16-bit instructions. At the normal points in compilation where |
| this code is called we have no measure of whether the condition |
| flags are live or not, and thus no realistic way to determine what |
| the size will eventually be. */ |
| static bool |
| arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, |
| const struct cpu_cost_table *extra_cost, |
| int *cost, bool speed_p) |
| { |
| machine_mode mode = GET_MODE (x); |
| |
| if (TARGET_THUMB1) |
| { |
| if (speed_p) |
| *cost = thumb1_rtx_costs (x, code, outer_code); |
| else |
| *cost = thumb1_size_rtx_costs (x, code, outer_code); |
| return true; |
| } |
| |
| switch (code) |
| { |
| case SET: |
| *cost = 0; |
| /* SET RTXs don't have a mode so we get it from the destination. */ |
| mode = GET_MODE (SET_DEST (x)); |
| |
| if (REG_P (SET_SRC (x)) |
| && REG_P (SET_DEST (x))) |
| { |
| /* Assume that most copies can be done with a single insn, |
| unless we don't have HW FP, in which case everything |
| larger than word mode will require two insns. */ |
| *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT |
| && GET_MODE_SIZE (mode) > 4) |
| || mode == DImode) |
| ? 2 : 1); |
| /* Conditional register moves can be encoded |
| in 16 bits in Thumb mode. */ |
| if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC) |
| *cost >>= 1; |
| |
| return true; |
| } |
| |
| if (CONST_INT_P (SET_SRC (x))) |
| { |
| /* Handle CONST_INT here, since the value doesn't have a mode |
| and we would otherwise be unable to work out the true cost. */ |
| *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p); |
| outer_code = SET; |
| /* Slightly lower the cost of setting a core reg to a constant. |
| This helps break up chains and allows for better scheduling. */ |
| if (REG_P (SET_DEST (x)) |
| && REGNO (SET_DEST (x)) <= LR_REGNUM) |
| *cost -= 1; |
| x = SET_SRC (x); |
| /* Immediate moves with an immediate in the range [0, 255] can be |
| encoded in 16 bits in Thumb mode. */ |
| if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode |
| && INTVAL (x) >= 0 && INTVAL (x) <=255) |
| *cost >>= 1; |
| goto const_int_cost; |
| } |
| |
| return false; |
| |
| case MEM: |
| /* A memory access costs 1 insn if the mode is small, or the address is |
| a single register, otherwise it costs one insn per word. */ |
| if (REG_P (XEXP (x, 0))) |
| *cost = COSTS_N_INSNS (1); |
| else if (flag_pic |
| && GET_CODE (XEXP (x, 0)) == PLUS |
| && will_be_in_index_register (XEXP (XEXP (x, 0), 1))) |
| /* This will be split into two instructions. |
| See arm.md:calculate_pic_address. */ |
| *cost = COSTS_N_INSNS (2); |
| else |
| *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode)); |
| |
| /* For speed optimizations, add the costs of the address and |
| accessing memory. */ |
| if (speed_p) |
| #ifdef NOT_YET |
| *cost += (extra_cost->ldst.load |
| + arm_address_cost (XEXP (x, 0), mode, |
| ADDR_SPACE_GENERIC, speed_p)); |
| #else |
| *cost += extra_cost->ldst.load; |
| #endif |
| return true; |
| |
| case PARALLEL: |
| { |
| /* Calculations of LDM costs are complex. We assume an initial cost |
| (ldm_1st) which will load the number of registers mentioned in |
| ldm_regs_per_insn_1st registers; then each additional |
| ldm_regs_per_insn_subsequent registers cost one more insn. The |
| formula for N regs is thus: |
| |
| ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0) |
| + ldm_regs_per_insn_subsequent - 1) |
| / ldm_regs_per_insn_subsequent). |
| |
| Additional costs may also be added for addressing. A similar |
| formula is used for STM. */ |
| |
| bool is_ldm = load_multiple_operation (x, SImode); |
| bool is_stm = store_multiple_operation (x, SImode); |
| |
| *cost = COSTS_N_INSNS (1); |
| |
| if (is_ldm || is_stm) |
| { |
| if (speed_p) |
| { |
| HOST_WIDE_INT nregs = XVECLEN (x, 0); |
| HOST_WIDE_INT regs_per_insn_1st = is_ldm |
| ? extra_cost->ldst.ldm_regs_per_insn_1st |
| : extra_cost->ldst.stm_regs_per_insn_1st; |
| HOST_WIDE_INT regs_per_insn_sub = is_ldm |
| ? extra_cost->ldst.ldm_regs_per_insn_subsequent |
| : extra_cost->ldst.stm_regs_per_insn_subsequent; |
| |
| *cost += regs_per_insn_1st |
| + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0)) |
| + regs_per_insn_sub - 1) |
| / regs_per_insn_sub); |
| return true; |
| } |
| |
| } |
| return false; |
| } |
| case DIV: |
| case UDIV: |
| if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT |
| && (mode == SFmode || !TARGET_VFP_SINGLE)) |
| *cost = COSTS_N_INSNS (speed_p |
| ? extra_cost->fp[mode != SFmode].div : 1); |
| else if (mode == SImode && TARGET_IDIV) |
| *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1); |
| else |
| *cost = LIBCALL_COST (2); |
| return false; /* All arguments must be in registers. */ |
| |
| case MOD: |
| case UMOD: |
| *cost = LIBCALL_COST (2); |
| return false; /* All arguments must be in registers. */ |
| |
| case ROTATE: |
| if (mode == SImode && REG_P (XEXP (x, 1))) |
| { |
| *cost = (COSTS_N_INSNS (2) |
| + rtx_cost (XEXP (x, 0), code, 0, speed_p)); |
| if (speed_p) |
| *cost += extra_cost->alu.shift_reg; |
| return true; |
| } |
| /* Fall through */ |
| case ROTATERT: |
| case ASHIFT: |
| case LSHIFTRT: |
| case ASHIFTRT: |
| if (mode == DImode && CONST_INT_P (XEXP (x, 1))) |
| { |
| *cost = (COSTS_N_INSNS (3) |
| + rtx_cost (XEXP (x, 0), code, 0, speed_p)); |
| if (speed_p) |
| *cost += 2 * extra_cost->alu.shift; |
| return true; |
| } |
| else if (mode == SImode) |
| { |
| *cost = (COSTS_N_INSNS (1) |
| + rtx_cost (XEXP (x, 0), code, 0, speed_p)); |
| /* Slightly disparage register shifts at -Os, but not by much. */ |
| if (!CONST_INT_P (XEXP (x, 1))) |
| *cost += (speed_p ? extra_cost->alu.shift_reg : 1 |
| + rtx_cost (XEXP (x, 1), code, 1, speed_p)); |
| return true; |
| } |
| else if (GET_MODE_CLASS (mode) == MODE_INT |
| && GET_MODE_SIZE (mode) < 4) |
| { |
| if (code == ASHIFT) |
| { |
| *cost = (COSTS_N_INSNS (1) |
| + rtx_cost (XEXP (x, 0), code, 0, speed_p)); |
| /* Slightly disparage register shifts at -Os, but not by |
| much. */ |
| if (!CONST_INT_P (XEXP (x, 1))) |
| *cost += (speed_p ? extra_cost->alu.shift_reg : 1 |
| + rtx_cost (XEXP (x, 1), code, 1, speed_p)); |
| } |
| else if (code == LSHIFTRT || code == ASHIFTRT) |
| { |
| if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1))) |
| { |
| /* Can use SBFX/UBFX. */ |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->alu.bfx; |
| *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); |
| } |
| else |
| { |
| *cost = COSTS_N_INSNS (2); |
| *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); |
| if (speed_p) |
| { |
| if (CONST_INT_P (XEXP (x, 1))) |
| *cost += 2 * extra_cost->alu.shift; |
| else |
| *cost += (extra_cost->alu.shift |
| + extra_cost->alu.shift_reg); |
| } |
| else |
| /* Slightly disparage register shifts. */ |
| *cost += !CONST_INT_P (XEXP (x, 1)); |
| } |
| } |
| else /* Rotates. */ |
| { |
| *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1))); |
| *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); |
| if (speed_p) |
| { |
| if (CONST_INT_P (XEXP (x, 1))) |
| *cost += (2 * extra_cost->alu.shift |
| + extra_cost->alu.log_shift); |
| else |
| *cost += (extra_cost->alu.shift |
| + extra_cost->alu.shift_reg |
| + extra_cost->alu.log_shift_reg); |
| } |
| } |
| return true; |
| } |
| |
| *cost = LIBCALL_COST (2); |
| return false; |
| |
| case BSWAP: |
| if (arm_arch6) |
| { |
| if (mode == SImode) |
| { |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->alu.rev; |
| |
| return false; |
| } |
| } |
| else |
| { |
| /* No rev instruction available. Look at arm_legacy_rev |
| and thumb_legacy_rev for the form of RTL used then. */ |
| if (TARGET_THUMB) |
| { |
| *cost = COSTS_N_INSNS (10); |
| |
| if (speed_p) |
| { |
| *cost += 6 * extra_cost->alu.shift; |
| *cost += 3 * extra_cost->alu.logical; |
| } |
| } |
| else |
| { |
| *cost = COSTS_N_INSNS (5); |
| |
| if (speed_p) |
| { |
| *cost += 2 * extra_cost->alu.shift; |
| *cost += extra_cost->alu.arith_shift; |
| *cost += 2 * extra_cost->alu.logical; |
| } |
| } |
| return true; |
| } |
| return false; |
| |
| case MINUS: |
| if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT |
| && (mode == SFmode || !TARGET_VFP_SINGLE)) |
| { |
| *cost = COSTS_N_INSNS (1); |
| if (GET_CODE (XEXP (x, 0)) == MULT |
| || GET_CODE (XEXP (x, 1)) == MULT) |
| { |
| rtx mul_op0, mul_op1, sub_op; |
| |
| if (speed_p) |
| *cost += extra_cost->fp[mode != SFmode].mult_addsub; |
| |
| if (GET_CODE (XEXP (x, 0)) == MULT) |
| { |
| mul_op0 = XEXP (XEXP (x, 0), 0); |
| mul_op1 = XEXP (XEXP (x, 0), 1); |
| sub_op = XEXP (x, 1); |
| } |
| else |
| { |
| mul_op0 = XEXP (XEXP (x, 1), 0); |
| mul_op1 = XEXP (XEXP (x, 1), 1); |
| sub_op = XEXP (x, 0); |
| } |
| |
| /* The first operand of the multiply may be optionally |
| negated. */ |
| if (GET_CODE (mul_op0) == NEG) |
| mul_op0 = XEXP (mul_op0, 0); |
| |
| *cost += (rtx_cost (mul_op0, code, 0, speed_p) |
| + rtx_cost (mul_op1, code, 0, speed_p) |
| + rtx_cost (sub_op, code, 0, speed_p)); |
| |
| return true; |
| } |
| |
| if (speed_p) |
| *cost += extra_cost->fp[mode != SFmode].addsub; |
| return false; |
| } |
| |
| if (mode == SImode) |
| { |
| rtx shift_by_reg = NULL; |
| rtx shift_op; |
| rtx non_shift_op; |
| |
| *cost = COSTS_N_INSNS (1); |
| |
| shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg); |
| if (shift_op == NULL) |
| { |
| shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg); |
| non_shift_op = XEXP (x, 0); |
| } |
| else |
| non_shift_op = XEXP (x, 1); |
| |
| if (shift_op != NULL) |
| { |
| if (shift_by_reg != NULL) |
| { |
| if (speed_p) |
| *cost += extra_cost->alu.arith_shift_reg; |
| *cost += rtx_cost (shift_by_reg, code, 0, speed_p); |
| } |
| else if (speed_p) |
| *cost += extra_cost->alu.arith_shift; |
| |
| *cost += (rtx_cost (shift_op, code, 0, speed_p) |
| + rtx_cost (non_shift_op, code, 0, speed_p)); |
| return true; |
| } |
| |
| if (arm_arch_thumb2 |
| && GET_CODE (XEXP (x, 1)) == MULT) |
| { |
| /* MLS. */ |
| if (speed_p) |
| *cost += extra_cost->mult[0].add; |
| *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p) |
| + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p) |
| + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p)); |
| return true; |
| } |
| |
| if (CONST_INT_P (XEXP (x, 0))) |
| { |
| int insns = arm_gen_constant (MINUS, SImode, NULL_RTX, |
| INTVAL (XEXP (x, 0)), NULL_RTX, |
| NULL_RTX, 1, 0); |
| *cost = COSTS_N_INSNS (insns); |
| if (speed_p) |
| *cost += insns * extra_cost->alu.arith; |
| *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p); |
| return true; |
| } |
| else if (speed_p) |
| *cost += extra_cost->alu.arith; |
| |
| return false; |
| } |
| |
| if (GET_MODE_CLASS (mode) == MODE_INT |
| && GET_MODE_SIZE (mode) < 4) |
| { |
| rtx shift_op, shift_reg; |
| shift_reg = NULL; |
| |
| /* We check both sides of the MINUS for shifter operands since, |
| unlike PLUS, it's not commutative. */ |
| |
| HANDLE_NARROW_SHIFT_ARITH (MINUS, 0) |
| HANDLE_NARROW_SHIFT_ARITH (MINUS, 1) |
| |
| /* Slightly disparage, as we might need to widen the result. */ |
| *cost = 1 + COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->alu.arith; |
| |
| if (CONST_INT_P (XEXP (x, 0))) |
| { |
| *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| if (mode == DImode) |
| { |
| *cost = COSTS_N_INSNS (2); |
| |
| if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND) |
| { |
| rtx op1 = XEXP (x, 1); |
| |
| if (speed_p) |
| *cost += 2 * extra_cost->alu.arith; |
| |
| if (GET_CODE (op1) == ZERO_EXTEND) |
| *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p); |
| else |
| *cost += rtx_cost (op1, MINUS, 1, speed_p); |
| *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, |
| 0, speed_p); |
| return true; |
| } |
| else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) |
| { |
| if (speed_p) |
| *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift; |
| *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND, |
| 0, speed_p) |
| + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p)); |
| return true; |
| } |
| else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND |
| || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND) |
| { |
| if (speed_p) |
| *cost += (extra_cost->alu.arith |
| + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND |
| ? extra_cost->alu.arith |
| : extra_cost->alu.arith_shift)); |
| *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p) |
| + rtx_cost (XEXP (XEXP (x, 1), 0), |
| GET_CODE (XEXP (x, 1)), 0, speed_p)); |
| return true; |
| } |
| |
| if (speed_p) |
| *cost += 2 * extra_cost->alu.arith; |
| return false; |
| } |
| |
| /* Vector mode? */ |
| |
| *cost = LIBCALL_COST (2); |
| return false; |
| |
| case PLUS: |
| if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT |
| && (mode == SFmode || !TARGET_VFP_SINGLE)) |
| { |
| *cost = COSTS_N_INSNS (1); |
| if (GET_CODE (XEXP (x, 0)) == MULT) |
| { |
| rtx mul_op0, mul_op1, add_op; |
| |
| if (speed_p) |
| *cost += extra_cost->fp[mode != SFmode].mult_addsub; |
| |
| mul_op0 = XEXP (XEXP (x, 0), 0); |
| mul_op1 = XEXP (XEXP (x, 0), 1); |
| add_op = XEXP (x, 1); |
| |
| *cost += (rtx_cost (mul_op0, code, 0, speed_p) |
| + rtx_cost (mul_op1, code, 0, speed_p) |
| + rtx_cost (add_op, code, 0, speed_p)); |
| |
| return true; |
| } |
| |
| if (speed_p) |
| *cost += extra_cost->fp[mode != SFmode].addsub; |
| return false; |
| } |
| else if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
| { |
| *cost = LIBCALL_COST (2); |
| return false; |
| } |
| |
| /* Narrow modes can be synthesized in SImode, but the range |
| of useful sub-operations is limited. Check for shift operations |
| on one of the operands. Only left shifts can be used in the |
| narrow modes. */ |
| if (GET_MODE_CLASS (mode) == MODE_INT |
| && GET_MODE_SIZE (mode) < 4) |
| { |
| rtx shift_op, shift_reg; |
| shift_reg = NULL; |
| |
| HANDLE_NARROW_SHIFT_ARITH (PLUS, 0) |
| |
| if (CONST_INT_P (XEXP (x, 1))) |
| { |
| int insns = arm_gen_constant (PLUS, SImode, NULL_RTX, |
| INTVAL (XEXP (x, 1)), NULL_RTX, |
| NULL_RTX, 1, 0); |
| *cost = COSTS_N_INSNS (insns); |
| if (speed_p) |
| *cost += insns * extra_cost->alu.arith; |
| /* Slightly penalize a narrow operation as the result may |
| need widening. */ |
| *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p); |
| return true; |
| } |
| |
| /* Slightly penalize a narrow operation as the result may |
| need widening. */ |
| *cost = 1 + COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->alu.arith; |
| |
| return false; |
| } |
| |
| if (mode == SImode) |
| { |
| rtx shift_op, shift_reg; |
| |
| *cost = COSTS_N_INSNS (1); |
| if (TARGET_INT_SIMD |
| && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND |
| || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) |
| { |
| /* UXTA[BH] or SXTA[BH]. */ |
| if (speed_p) |
| *cost += extra_cost->alu.extend_arith; |
| *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0, |
| speed_p) |
| + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p)); |
| return true; |
| } |
| |
| shift_reg = NULL; |
| shift_op = shifter_op_p (XEXP (x, 0), &shift_reg); |
| if (shift_op != NULL) |
| { |
| if (shift_reg) |
| { |
| if (speed_p) |
| *cost += extra_cost->alu.arith_shift_reg; |
| *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); |
| } |
| else if (speed_p) |
| *cost += extra_cost->alu.arith_shift; |
| |
| *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) |
| + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p)); |
| return true; |
| } |
| if (GET_CODE (XEXP (x, 0)) == MULT) |
| { |
| rtx mul_op = XEXP (x, 0); |
| |
| *cost = COSTS_N_INSNS (1); |
| |
| if (TARGET_DSP_MULTIPLY |
| && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND |
| && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND |
| || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT |
| && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1)) |
| && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16))) |
| || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT |
| && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1)) |
| && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16 |
| && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND |
| || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT |
| && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1)) |
| && (INTVAL (XEXP (XEXP (mul_op, 1), 1)) |
| == 16)))))) |
| { |
| /* SMLA[BT][BT]. */ |
| if (speed_p) |
| *cost += extra_cost->mult[0].extend_add; |
| *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), |
| SIGN_EXTEND, 0, speed_p) |
| + rtx_cost (XEXP (XEXP (mul_op, 1), 0), |
| SIGN_EXTEND, 0, speed_p) |
| + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p)); |
| return true; |
| } |
| |
| if (speed_p) |
| *cost += extra_cost->mult[0].add; |
| *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p) |
| + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p) |
| + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p)); |
| return true; |
| } |
| if (CONST_INT_P (XEXP (x, 1))) |
| { |
| int insns = arm_gen_constant (PLUS, SImode, NULL_RTX, |
| INTVAL (XEXP (x, 1)), NULL_RTX, |
| NULL_RTX, 1, 0); |
| *cost = COSTS_N_INSNS (insns); |
| if (speed_p) |
| *cost += insns * extra_cost->alu.arith; |
| *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p); |
| return true; |
| } |
| else if (speed_p) |
| *cost += extra_cost->alu.arith; |
| |
| return false; |
| } |
| |
| if (mode == DImode) |
| { |
| if (arm_arch3m |
| && GET_CODE (XEXP (x, 0)) == MULT |
| && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND |
| && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND) |
| || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND |
| && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND))) |
| { |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->mult[1].extend_add; |
| *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), |
| ZERO_EXTEND, 0, speed_p) |
| + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), |
| ZERO_EXTEND, 0, speed_p) |
| + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p)); |
| return true; |
| } |
| |
| *cost = COSTS_N_INSNS (2); |
| |
| if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND |
| || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) |
| { |
| if (speed_p) |
| *cost += (extra_cost->alu.arith |
| + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND |
| ? extra_cost->alu.arith |
| : extra_cost->alu.arith_shift)); |
| |
| *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0, |
| speed_p) |
| + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p)); |
| return true; |
| } |
| |
| if (speed_p) |
| *cost += 2 * extra_cost->alu.arith; |
| return false; |
| } |
| |
| /* Vector mode? */ |
| *cost = LIBCALL_COST (2); |
| return false; |
| case IOR: |
| if (mode == SImode && arm_arch6 && aarch_rev16_p (x)) |
| { |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->alu.rev; |
| |
| return true; |
| } |
| /* Fall through. */ |
| case AND: case XOR: |
| if (mode == SImode) |
| { |
| enum rtx_code subcode = GET_CODE (XEXP (x, 0)); |
| rtx op0 = XEXP (x, 0); |
| rtx shift_op, shift_reg; |
| |
| *cost = COSTS_N_INSNS (1); |
| |
| if (subcode == NOT |
| && (code == AND |
| || (code == IOR && TARGET_THUMB2))) |
| op0 = XEXP (op0, 0); |
| |
| shift_reg = NULL; |
| shift_op = shifter_op_p (op0, &shift_reg); |
| if (shift_op != NULL) |
| { |
| if (shift_reg) |
| { |
| if (speed_p) |
| *cost += extra_cost->alu.log_shift_reg; |
| *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); |
| } |
| else if (speed_p) |
| *cost += extra_cost->alu.log_shift; |
| |
| *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) |
| + rtx_cost (XEXP (x, 1), code, 1, speed_p)); |
| return true; |
| } |
| |
| if (CONST_INT_P (XEXP (x, 1))) |
| { |
| int insns = arm_gen_constant (code, SImode, NULL_RTX, |
| INTVAL (XEXP (x, 1)), NULL_RTX, |
| NULL_RTX, 1, 0); |
| |
| *cost = COSTS_N_INSNS (insns); |
| if (speed_p) |
| *cost += insns * extra_cost->alu.logical; |
| *cost += rtx_cost (op0, code, 0, speed_p); |
| return true; |
| } |
| |
| if (speed_p) |
| *cost += extra_cost->alu.logical; |
| *cost += (rtx_cost (op0, code, 0, speed_p) |
| + rtx_cost (XEXP (x, 1), code, 1, speed_p)); |
| return true; |
| } |
| |
| if (mode == DImode) |
| { |
| rtx op0 = XEXP (x, 0); |
| enum rtx_code subcode = GET_CODE (op0); |
| |
| *cost = COSTS_N_INSNS (2); |
| |
| if (subcode == NOT |
| && (code == AND |
| || (code == IOR && TARGET_THUMB2))) |
| op0 = XEXP (op0, 0); |
| |
| if (GET_CODE (op0) == ZERO_EXTEND) |
| { |
| if (speed_p) |
| *cost += 2 * extra_cost->alu.logical; |
| |
| *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p) |
| + rtx_cost (XEXP (x, 1), code, 0, speed_p)); |
| return true; |
| } |
| else if (GET_CODE (op0) == SIGN_EXTEND) |
| { |
| if (speed_p) |
| *cost += extra_cost->alu.logical + extra_cost->alu.log_shift; |
| |
| *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p) |
| + rtx_cost (XEXP (x, 1), code, 0, speed_p)); |
| return true; |
| } |
| |
| if (speed_p) |
| *cost += 2 * extra_cost->alu.logical; |
| |
| return true; |
| } |
| /* Vector mode? */ |
| |
| *cost = LIBCALL_COST (2); |
| return false; |
| |
| case MULT: |
| if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT |
| && (mode == SFmode || !TARGET_VFP_SINGLE)) |
| { |
| rtx op0 = XEXP (x, 0); |
| |
| *cost = COSTS_N_INSNS (1); |
| |
| if (GET_CODE (op0) == NEG && !flag_rounding_math) |
| op0 = XEXP (op0, 0); |
| |
| if (speed_p) |
| *cost += extra_cost->fp[mode != SFmode].mult; |
| |
| *cost += (rtx_cost (op0, MULT, 0, speed_p) |
| + rtx_cost (XEXP (x, 1), MULT, 1, speed_p)); |
| return true; |
| } |
| else if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
| { |
| *cost = LIBCALL_COST (2); |
| return false; |
| } |
| |
| if (mode == SImode) |
| { |
| *cost = COSTS_N_INSNS (1); |
| if (TARGET_DSP_MULTIPLY |
| && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND |
| && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND |
| || (GET_CODE (XEXP (x, 1)) == ASHIFTRT |
| && CONST_INT_P (XEXP (XEXP (x, 1), 1)) |
| && INTVAL (XEXP (XEXP (x, 1), 1)) == 16))) |
| || (GET_CODE (XEXP (x, 0)) == ASHIFTRT |
| && CONST_INT_P (XEXP (XEXP (x, 0), 1)) |
| && INTVAL (XEXP (XEXP (x, 0), 1)) == 16 |
| && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND |
| || (GET_CODE (XEXP (x, 1)) == ASHIFTRT |
| && CONST_INT_P (XEXP (XEXP (x, 1), 1)) |
| && (INTVAL (XEXP (XEXP (x, 1), 1)) |
| == 16)))))) |
| { |
| /* SMUL[TB][TB]. */ |
| if (speed_p) |
| *cost += extra_cost->mult[0].extend; |
| *cost += rtx_cost (XEXP (XEXP (x, 0), 0), |
| SIGN_EXTEND, 0, speed_p); |
| *cost += rtx_cost (XEXP (XEXP (x, 1), 0), |
| SIGN_EXTEND, 1, speed_p); |
| return true; |
| } |
| if (speed_p) |
| *cost += extra_cost->mult[0].simple; |
| return false; |
| } |
| |
| if (mode == DImode) |
| { |
| if (arm_arch3m |
| && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND |
| && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND) |
| || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND |
| && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))) |
| { |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->mult[1].extend; |
| *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), |
| ZERO_EXTEND, 0, speed_p) |
| + rtx_cost (XEXP (XEXP (x, 1), 0), |
| ZERO_EXTEND, 0, speed_p)); |
| return true; |
| } |
| |
| *cost = LIBCALL_COST (2); |
| return false; |
| } |
| |
| /* Vector mode? */ |
| *cost = LIBCALL_COST (2); |
| return false; |
| |
| case NEG: |
| if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT |
| && (mode == SFmode || !TARGET_VFP_SINGLE)) |
| { |
| if (GET_CODE (XEXP (x, 0)) == MULT) |
| { |
| /* VNMUL. */ |
| *cost = rtx_cost (XEXP (x, 0), NEG, 0, speed_p); |
| return true; |
| } |
| |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->fp[mode != SFmode].neg; |
| |
| return false; |
| } |
| else if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
| { |
| *cost = LIBCALL_COST (1); |
| return false; |
| } |
| |
| if (mode == SImode) |
| { |
| if (GET_CODE (XEXP (x, 0)) == ABS) |
| { |
| *cost = COSTS_N_INSNS (2); |
| /* Assume the non-flag-changing variant. */ |
| if (speed_p) |
| *cost += (extra_cost->alu.log_shift |
| + extra_cost->alu.arith_shift); |
| *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p); |
| return true; |
| } |
| |
| if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE |
| || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE) |
| { |
| *cost = COSTS_N_INSNS (2); |
| /* No extra cost for MOV imm and MVN imm. */ |
| /* If the comparison op is using the flags, there's no further |
| cost, otherwise we need to add the cost of the comparison. */ |
| if (!(REG_P (XEXP (XEXP (x, 0), 0)) |
| && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM |
| && XEXP (XEXP (x, 0), 1) == const0_rtx)) |
| { |
| *cost += (COSTS_N_INSNS (1) |
| + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0, |
| speed_p) |
| + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1, |
| speed_p)); |
| if (speed_p) |
| *cost += extra_cost->alu.arith; |
| } |
| return true; |
| } |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->alu.arith; |
| return false; |
| } |
| |
| if (GET_MODE_CLASS (mode) == MODE_INT |
| && GET_MODE_SIZE (mode) < 4) |
| { |
| /* Slightly disparage, as we might need an extend operation. */ |
| *cost = 1 + COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->alu.arith; |
| return false; |
| } |
| |
| if (mode == DImode) |
| { |
| *cost = COSTS_N_INSNS (2); |
| if (speed_p) |
| *cost += 2 * extra_cost->alu.arith; |
| return false; |
| } |
| |
| /* Vector mode? */ |
| *cost = LIBCALL_COST (1); |
| return false; |
| |
| case NOT: |
| if (mode == SImode) |
| { |
| rtx shift_op; |
| rtx shift_reg = NULL; |
| |
| *cost = COSTS_N_INSNS (1); |
| shift_op = shifter_op_p (XEXP (x, 0), &shift_reg); |
| |
| if (shift_op) |
| { |
| if (shift_reg != NULL) |
| { |
| if (speed_p) |
| *cost += extra_cost->alu.log_shift_reg; |
| *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); |
| } |
| else if (speed_p) |
| *cost += extra_cost->alu.log_shift; |
| *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p); |
| return true; |
| } |
| |
| if (speed_p) |
| *cost += extra_cost->alu.logical; |
| return false; |
| } |
| if (mode == DImode) |
| { |
| *cost = COSTS_N_INSNS (2); |
| return false; |
| } |
| |
| /* Vector mode? */ |
| |
| *cost += LIBCALL_COST (1); |
| return false; |
| |
| case IF_THEN_ELSE: |
| { |
| if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) |
| { |
| *cost = COSTS_N_INSNS (4); |
| return true; |
| } |
| int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p); |
| int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p); |
| |
| *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p); |
| /* Assume that if one arm of the if_then_else is a register, |
| that it will be tied with the result and eliminate the |
| conditional insn. */ |
| if (REG_P (XEXP (x, 1))) |
| *cost += op2cost; |
| else if (REG_P (XEXP (x, 2))) |
| *cost += op1cost; |
| else |
| { |
| if (speed_p) |
| { |
| if (extra_cost->alu.non_exec_costs_exec) |
| *cost += op1cost + op2cost + extra_cost->alu.non_exec; |
| else |
| *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec; |
| } |
| else |
| *cost += op1cost + op2cost; |
| } |
| } |
| return true; |
| |
| case COMPARE: |
| if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx) |
| *cost = 0; |
| else |
| { |
| machine_mode op0mode; |
| /* We'll mostly assume that the cost of a compare is the cost of the |
| LHS. However, there are some notable exceptions. */ |
| |
| /* Floating point compares are never done as side-effects. */ |
| op0mode = GET_MODE (XEXP (x, 0)); |
| if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT |
| && (op0mode == SFmode || !TARGET_VFP_SINGLE)) |
| { |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->fp[op0mode != SFmode].compare; |
| |
| if (XEXP (x, 1) == CONST0_RTX (op0mode)) |
| { |
| *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); |
| return true; |
| } |
| |
| return false; |
| } |
| else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT) |
| { |
| *cost = LIBCALL_COST (2); |
| return false; |
| } |
| |
| /* DImode compares normally take two insns. */ |
| if (op0mode == DImode) |
| { |
| *cost = COSTS_N_INSNS (2); |
| if (speed_p) |
| *cost += 2 * extra_cost->alu.arith; |
| return false; |
| } |
| |
| if (op0mode == SImode) |
| { |
| rtx shift_op; |
| rtx shift_reg; |
| |
| if (XEXP (x, 1) == const0_rtx |
| && !(REG_P (XEXP (x, 0)) |
| || (GET_CODE (XEXP (x, 0)) == SUBREG |
| && REG_P (SUBREG_REG (XEXP (x, 0)))))) |
| { |
| *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p); |
| |
| /* Multiply operations that set the flags are often |
| significantly more expensive. */ |
| if (speed_p |
| && GET_CODE (XEXP (x, 0)) == MULT |
| && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode)) |
| *cost += extra_cost->mult[0].flag_setting; |
| |
| if (speed_p |
| && GET_CODE (XEXP (x, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT |
| && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0), |
| 0), 1), mode)) |
| *cost += extra_cost->mult[0].flag_setting; |
| return true; |
| } |
| |
| shift_reg = NULL; |
| shift_op = shifter_op_p (XEXP (x, 0), &shift_reg); |
| if (shift_op != NULL) |
| { |
| *cost = COSTS_N_INSNS (1); |
| if (shift_reg != NULL) |
| { |
| *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); |
| if (speed_p) |
| *cost += extra_cost->alu.arith_shift_reg; |
| } |
| else if (speed_p) |
| *cost += extra_cost->alu.arith_shift; |
| *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) |
| + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p)); |
| return true; |
| } |
| |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->alu.arith; |
| if (CONST_INT_P (XEXP (x, 1)) |
| && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE)) |
| { |
| *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p); |
| return true; |
| } |
| return false; |
| } |
| |
| /* Vector mode? */ |
| |
| *cost = LIBCALL_COST (2); |
| return false; |
| } |
| return true; |
| |
| case EQ: |
| case NE: |
| case LT: |
| case LE: |
| case GT: |
| case GE: |
| case LTU: |
| case LEU: |
| case GEU: |
| case GTU: |
| case ORDERED: |
| case UNORDERED: |
| case UNEQ: |
| case UNLE: |
| case UNLT: |
| case UNGE: |
| case UNGT: |
| case LTGT: |
| if (outer_code == SET) |
| { |
| /* Is it a store-flag operation? */ |
| if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM |
| && XEXP (x, 1) == const0_rtx) |
| { |
| /* Thumb also needs an IT insn. */ |
| *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2); |
| return true; |
| } |
| if (XEXP (x, 1) == const0_rtx) |
| { |
| switch (code) |
| { |
| case LT: |
| /* LSR Rd, Rn, #31. */ |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->alu.shift; |
| break; |
| |
| case EQ: |
| /* RSBS T1, Rn, #0 |
| ADC Rd, Rn, T1. */ |
| |
| case NE: |
| /* SUBS T1, Rn, #1 |
| SBC Rd, Rn, T1. */ |
| *cost = COSTS_N_INSNS (2); |
| break; |
| |
| case LE: |
| /* RSBS T1, Rn, Rn, LSR #31 |
| ADC Rd, Rn, T1. */ |
| *cost = COSTS_N_INSNS (2); |
| if (speed_p) |
| *cost += extra_cost->alu.arith_shift; |
| break; |
| |
| case GT: |
| /* RSB Rd, Rn, Rn, ASR #1 |
| LSR Rd, Rd, #31. */ |
| *cost = COSTS_N_INSNS (2); |
| if (speed_p) |
| *cost += (extra_cost->alu.arith_shift |
| + extra_cost->alu.shift); |
| break; |
| |
| case GE: |
| /* ASR Rd, Rn, #31 |
| ADD Rd, Rn, #1. */ |
| *cost = COSTS_N_INSNS (2); |
| if (speed_p) |
| *cost += extra_cost->alu.shift; |
| break; |
| |
| default: |
| /* Remaining cases are either meaningless or would take |
| three insns anyway. */ |
| *cost = COSTS_N_INSNS (3); |
| break; |
| } |
| *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); |
| return true; |
| } |
| else |
| { |
| *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3); |
| if (CONST_INT_P (XEXP (x, 1)) |
| && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE)) |
| { |
| *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); |
| return true; |
| } |
| |
| return false; |
| } |
| } |
| /* Not directly inside a set. If it involves the condition code |
| register it must be the condition for a branch, cond_exec or |
| I_T_E operation. Since the comparison is performed elsewhere |
| this is just the control part which has no additional |
| cost. */ |
| else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM |
| && XEXP (x, 1) == const0_rtx) |
| { |
| *cost = 0; |
| return true; |
| } |
| return false; |
| |
| case ABS: |
| if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT |
| && (mode == SFmode || !TARGET_VFP_SINGLE)) |
| { |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->fp[mode != SFmode].neg; |
| |
| return false; |
| } |
| else if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
| { |
| *cost = LIBCALL_COST (1); |
| return false; |
| } |
| |
| if (mode == SImode) |
| { |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift; |
| return false; |
| } |
| /* Vector mode? */ |
| *cost = LIBCALL_COST (1); |
| return false; |
| |
| case SIGN_EXTEND: |
| if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode) |
| && MEM_P (XEXP (x, 0))) |
| { |
| *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p); |
| |
| if (mode == DImode) |
| *cost += COSTS_N_INSNS (1); |
| |
| if (!speed_p) |
| return true; |
| |
| if (GET_MODE (XEXP (x, 0)) == SImode) |
| *cost += extra_cost->ldst.load; |
| else |
| *cost += extra_cost->ldst.load_sign_extend; |
| |
| if (mode == DImode) |
| *cost += extra_cost->alu.shift; |
| |
| return true; |
| } |
| |
| /* Widening from less than 32-bits requires an extend operation. */ |
| if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6) |
| { |
| /* We have SXTB/SXTH. */ |
| *cost = COSTS_N_INSNS (1); |
| *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); |
| if (speed_p) |
| *cost += extra_cost->alu.extend; |
| } |
| else if (GET_MODE (XEXP (x, 0)) != SImode) |
| { |
| /* Needs two shifts. */ |
| *cost = COSTS_N_INSNS (2); |
| *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); |
| if (speed_p) |
| *cost += 2 * extra_cost->alu.shift; |
| } |
| |
| /* Widening beyond 32-bits requires one more insn. */ |
| if (mode == DImode) |
| { |
| *cost += COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->alu.shift; |
| } |
| |
| return true; |
| |
| case ZERO_EXTEND: |
| if ((arm_arch4 |
| || GET_MODE (XEXP (x, 0)) == SImode |
| || GET_MODE (XEXP (x, 0)) == QImode) |
| && MEM_P (XEXP (x, 0))) |
| { |
| *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p); |
| |
| if (mode == DImode) |
| *cost += COSTS_N_INSNS (1); /* No speed penalty. */ |
| |
| return true; |
| } |
| |
| /* Widening from less than 32-bits requires an extend operation. */ |
| if (GET_MODE (XEXP (x, 0)) == QImode) |
| { |
| /* UXTB can be a shorter instruction in Thumb2, but it might |
| be slower than the AND Rd, Rn, #255 alternative. When |
| optimizing for speed it should never be slower to use |
| AND, and we don't really model 16-bit vs 32-bit insns |
| here. */ |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->alu.logical; |
| } |
| else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6) |
| { |
| /* We have UXTB/UXTH. */ |
| *cost = COSTS_N_INSNS (1); |
| *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); |
| if (speed_p) |
| *cost += extra_cost->alu.extend; |
| } |
| else if (GET_MODE (XEXP (x, 0)) != SImode) |
| { |
| /* Needs two shifts. It's marginally preferable to use |
| shifts rather than two BIC instructions as the second |
| shift may merge with a subsequent insn as a shifter |
| op. */ |
| *cost = COSTS_N_INSNS (2); |
| *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); |
| if (speed_p) |
| *cost += 2 * extra_cost->alu.shift; |
| } |
| else /* GET_MODE (XEXP (x, 0)) == SImode. */ |
| *cost = COSTS_N_INSNS (1); |
| |
| /* Widening beyond 32-bits requires one more insn. */ |
| if (mode == DImode) |
| { |
| *cost += COSTS_N_INSNS (1); /* No speed penalty. */ |
| } |
| |
| return true; |
| |
| case CONST_INT: |
| *cost = 0; |
| /* CONST_INT has no mode, so we cannot tell for sure how many |
| insns are really going to be needed. The best we can do is |
| look at the value passed. If it fits in SImode, then assume |
| that's the mode it will be used for. Otherwise assume it |
| will be used in DImode. */ |
| if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode)) |
| mode = SImode; |
| else |
| mode = DImode; |
| |
| /* Avoid blowing up in arm_gen_constant (). */ |
| if (!(outer_code == PLUS |
| || outer_code == AND |
| || outer_code == IOR |
| || outer_code == XOR |
| || outer_code == MINUS)) |
| outer_code = SET; |
| |
| const_int_cost: |
| if (mode == SImode) |
| { |
| *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL, |
| INTVAL (x), NULL, NULL, |
| 0, 0)); |
| /* Extra costs? */ |
| } |
| else |
| { |
| *cost += COSTS_N_INSNS (arm_gen_constant |
| (outer_code, SImode, NULL, |
| trunc_int_for_mode (INTVAL (x), SImode), |
| NULL, NULL, 0, 0) |
| + arm_gen_constant (outer_code, SImode, NULL, |
| INTVAL (x) >> 32, NULL, |
| NULL, 0, 0)); |
| /* Extra costs? */ |
| } |
| |
| return true; |
| |
| case CONST: |
| case LABEL_REF: |
| case SYMBOL_REF: |
| if (speed_p) |
| { |
| if (arm_arch_thumb2 && !flag_pic) |
| *cost = COSTS_N_INSNS (2); |
| else |
| *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load; |
| } |
| else |
| *cost = COSTS_N_INSNS (2); |
| |
| if (flag_pic) |
| { |
| *cost += COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->alu.arith; |
| } |
| |
| return true; |
| |
| case CONST_FIXED: |
| *cost = COSTS_N_INSNS (4); |
| /* Fixme. */ |
| return true; |
| |
| case CONST_DOUBLE: |
| if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT |
| && (mode == SFmode || !TARGET_VFP_SINGLE)) |
| { |
| if (vfp3_const_double_rtx (x)) |
| { |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->fp[mode == DFmode].fpconst; |
| return true; |
| } |
| |
| if (speed_p) |
| { |
| *cost = COSTS_N_INSNS (1); |
| if (mode == DFmode) |
| *cost += extra_cost->ldst.loadd; |
| else |
| *cost += extra_cost->ldst.loadf; |
| } |
| else |
| *cost = COSTS_N_INSNS (2 + (mode == DFmode)); |
| |
| return true; |
| } |
| *cost = COSTS_N_INSNS (4); |
| return true; |
| |
| case CONST_VECTOR: |
| /* Fixme. */ |
| if (TARGET_NEON |
| && TARGET_HARD_FLOAT |
| && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) |
| && neon_immediate_valid_for_move (x, mode, NULL, NULL)) |
| *cost = COSTS_N_INSNS (1); |
| else |
| *cost = COSTS_N_INSNS (4); |
| return true; |
| |
| case HIGH: |
| case LO_SUM: |
| *cost = COSTS_N_INSNS (1); |
| /* When optimizing for size, we prefer constant pool entries to |
| MOVW/MOVT pairs, so bump the cost of these slightly. */ |
| if (!speed_p) |
| *cost += 1; |
| return true; |
| |
| case CLZ: |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->alu.clz; |
| return false; |
| |
| case SMIN: |
| if (XEXP (x, 1) == const0_rtx) |
| { |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->alu.log_shift; |
| *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); |
| return true; |
| } |
| /* Fall through. */ |
| case SMAX: |
| case UMIN: |
| case UMAX: |
| *cost = COSTS_N_INSNS (2); |
| return false; |
| |
| case TRUNCATE: |
| if (GET_CODE (XEXP (x, 0)) == ASHIFTRT |
| && CONST_INT_P (XEXP (XEXP (x, 0), 1)) |
| && INTVAL (XEXP (XEXP (x, 0), 1)) == 32 |
| && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT |
| && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND |
| && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND) |
| || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND |
| && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) |
| == ZERO_EXTEND)))) |
| { |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->mult[1].extend; |
| *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0, |
| speed_p) |
| + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND, |
| 0, speed_p)); |
| return true; |
| } |
| *cost = LIBCALL_COST (1); |
| return false; |
| |
| case UNSPEC: |
| return arm_unspec_cost (x, outer_code, speed_p, cost); |
| |
| case PC: |
| /* Reading the PC is like reading any other register. Writing it |
| is more expensive, but we take that into account elsewhere. */ |
| *cost = 0; |
| return true; |
| |
| case ZERO_EXTRACT: |
| /* TODO: Simple zero_extract of bottom bits using AND. */ |
| /* Fall through. */ |
| case SIGN_EXTRACT: |
| if (arm_arch6 |
| && mode == SImode |
| && CONST_INT_P (XEXP (x, 1)) |
| && CONST_INT_P (XEXP (x, 2))) |
| { |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->alu.bfx; |
| *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); |
| return true; |
| } |
| /* Without UBFX/SBFX, need to resort to shift operations. */ |
| *cost = COSTS_N_INSNS (2); |
| if (speed_p) |
| *cost += 2 * extra_cost->alu.shift; |
| *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p); |
| return true; |
| |
| case FLOAT_EXTEND: |
| if (TARGET_HARD_FLOAT) |
| { |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->fp[mode == DFmode].widen; |
| if (!TARGET_FPU_ARMV8 |
| && GET_MODE (XEXP (x, 0)) == HFmode) |
| { |
| /* Pre v8, widening HF->DF is a two-step process, first |
| widening to SFmode. */ |
| *cost += COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->fp[0].widen; |
| } |
| *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); |
| return true; |
| } |
| |
| *cost = LIBCALL_COST (1); |
| return false; |
| |
| case FLOAT_TRUNCATE: |
| if (TARGET_HARD_FLOAT) |
| { |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->fp[mode == DFmode].narrow; |
| *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); |
| return true; |
| /* Vector modes? */ |
| } |
| *cost = LIBCALL_COST (1); |
| return false; |
| |
| case FMA: |
| if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA) |
| { |
| rtx op0 = XEXP (x, 0); |
| rtx op1 = XEXP (x, 1); |
| rtx op2 = XEXP (x, 2); |
| |
| *cost = COSTS_N_INSNS (1); |
| |
| /* vfms or vfnma. */ |
| if (GET_CODE (op0) == NEG) |
| op0 = XEXP (op0, 0); |
| |
| /* vfnms or vfnma. */ |
| if (GET_CODE (op2) == NEG) |
| op2 = XEXP (op2, 0); |
| |
| *cost += rtx_cost (op0, FMA, 0, speed_p); |
| *cost += rtx_cost (op1, FMA, 1, speed_p); |
| *cost += rtx_cost (op2, FMA, 2, speed_p); |
| |
| if (speed_p) |
| *cost += extra_cost->fp[mode ==DFmode].fma; |
| |
| return true; |
| } |
| |
| *cost = LIBCALL_COST (3); |
| return false; |
| |
| case FIX: |
| case UNSIGNED_FIX: |
| if (TARGET_HARD_FLOAT) |
| { |
| if (GET_MODE_CLASS (mode) == MODE_INT) |
| { |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint; |
| /* Strip of the 'cost' of rounding towards zero. */ |
| if (GET_CODE (XEXP (x, 0)) == FIX) |
| *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p); |
| else |
| *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p); |
| /* ??? Increase the cost to deal with transferring from |
| FP -> CORE registers? */ |
| return true; |
| } |
| else if (GET_MODE_CLASS (mode) == MODE_FLOAT |
| && TARGET_FPU_ARMV8) |
| { |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->fp[mode == DFmode].roundint; |
| return false; |
| } |
| /* Vector costs? */ |
| } |
| *cost = LIBCALL_COST (1); |
| return false; |
| |
| case FLOAT: |
| case UNSIGNED_FLOAT: |
| if (TARGET_HARD_FLOAT) |
| { |
| /* ??? Increase the cost to deal with transferring from CORE |
| -> FP registers? */ |
| *cost = COSTS_N_INSNS (1); |
| if (speed_p) |
| *cost += extra_cost->fp[mode == DFmode].fromint; |
| return false; |
| } |
| *cost = LIBCALL_COST (1); |
| return false; |
| |
| case CALL: |
| *cost = COSTS_N_INSNS (1); |
| return true; |
| |
| case ASM_OPERANDS: |
| { |
| /* Just a guess. Guess number of instructions in the asm |
| plus one insn per input. Always a minimum of COSTS_N_INSNS (1) |
| though (see PR60663). */ |
| int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x))); |
| int num_operands = ASM_OPERANDS_INPUT_LENGTH (x); |
| |
| *cost = COSTS_N_INSNS (asm_length + num_operands); |
| return true; |
| } |
| default: |
| if (mode != VOIDmode) |
| *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode)); |
| else |
| *cost = COSTS_N_INSNS (4); /* Who knows? */ |
| return false; |
| } |
| } |
| |
| #undef HANDLE_NARROW_SHIFT_ARITH |
| |
| /* RTX costs when optimizing for size. */ |
| static bool |
| arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, |
| int *total, bool speed) |
| { |
| bool result; |
| |
| if (TARGET_OLD_RTX_COSTS |
| || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS)) |
| { |
| /* Old way. (Deprecated.) */ |
| if (!speed) |
| result = arm_size_rtx_costs (x, (enum rtx_code) code, |
| (enum rtx_code) outer_code, total); |
| else |
| result = current_tune->rtx_costs (x, (enum rtx_code) code, |
| (enum rtx_code) outer_code, total, |
| speed); |
| } |
| else |
| { |
| /* New way. */ |
| if (current_tune->insn_extra_cost) |
| result = arm_new_rtx_costs (x, (enum rtx_code) code, |
| (enum rtx_code) outer_code, |
| current_tune->insn_extra_cost, |
| total, speed); |
| /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS |
| && current_tune->insn_extra_cost != NULL */ |
| else |
| result = arm_new_rtx_costs (x, (enum rtx_code) code, |
| (enum rtx_code) outer_code, |
| &generic_extra_costs, total, speed); |
| } |
| |
| if (dump_file && (dump_flags & TDF_DETAILS)) |
| { |
| print_rtl_single (dump_file, x); |
| fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold", |
| *total, result ? "final" : "partial"); |
| } |
| return result; |
| } |
| |
| /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not |
| supported on any "slowmul" cores, so it can be ignored. */ |
| |
| static bool |
| arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, |
| int *total, bool speed) |
| { |
| machine_mode mode = GET_MODE (x); |
| |
| if (TARGET_THUMB) |
| { |
| *total = thumb1_rtx_costs (x, code, outer_code); |
| return true; |
| } |
| |
| switch (code) |
| { |
| case MULT: |
| if (GET_MODE_CLASS (mode) == MODE_FLOAT |
| || mode == DImode) |
| { |
| *total = COSTS_N_INSNS (20); |
| return false; |
| } |
| |
| if (CONST_INT_P (XEXP (x, 1))) |
| { |
| unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1)) |
| & (unsigned HOST_WIDE_INT) 0xffffffff); |
| int cost, const_ok = const_ok_for_arm (i); |
| int j, booth_unit_size; |
| |
| /* Tune as appropriate. */ |
| cost = const_ok ? 4 : 8; |
| booth_unit_size = 2; |
| for (j = 0; i && j < 32; j += booth_unit_size) |
| { |
| i >>= booth_unit_size; |
| cost++; |
| } |
| |
| *total = COSTS_N_INSNS (cost); |
| *total += rtx_cost (XEXP (x, 0), code, 0, speed); |
| return true; |
| } |
| |
| *total = COSTS_N_INSNS (20); |
| return false; |
| |
| default: |
| return arm_rtx_costs_1 (x, outer_code, total, speed);; |
| } |
| } |
| |
| |
| /* RTX cost for cores with a fast multiply unit (M variants). */ |
| |
| static bool |
| arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, |
| int *total, bool speed) |
| { |
| machine_mode mode = GET_MODE (x); |
| |
| if (TARGET_THUMB1) |
| { |
| *total = thumb1_rtx_costs (x, code, outer_code); |
| return true; |
| } |
| |
| /* ??? should thumb2 use different costs? */ |
| switch (code) |
| { |
| case MULT: |
| /* There is no point basing this on the tuning, since it is always the |
| fast variant if it exists at all. */ |
| if (mode == DImode |
| && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1))) |
| && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND |
| || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) |
| { |
| *total = COSTS_N_INSNS(2); |
| return false; |
| } |
| |
| |
| if (mode == DImode) |
| { |
| *total = COSTS_N_INSNS (5); |
| return false; |
| } |
| |
| if (CONST_INT_P (XEXP (x, 1))) |
| { |
| unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1)) |
| & (unsigned HOST_WIDE_INT) 0xffffffff); |
| int cost, const_ok = const_ok_for_arm (i); |
| int j, booth_unit_size; |
| |
| /* Tune as appropriate. */ |
| cost = const_ok ? 4 : 8; |
| booth_unit_size = 8; |
| for (j = 0; i && j < 32; j += booth_unit_size) |
| { |
| i >>= booth_unit_size; |
| cost++; |
| } |
| |
| *total = COSTS_N_INSNS(cost); |
| return false; |
| } |
| |
| if (mode == SImode) |
| { |
| *total = COSTS_N_INSNS (4); |
| return false; |
| } |
| |
| if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
| { |
| if (TARGET_HARD_FLOAT |
| && (mode == SFmode |
| || (mode == DFmode && !TARGET_VFP_SINGLE))) |
| { |
| *total = COSTS_N_INSNS (1); |
| return false; |
| } |
| } |
| |
| /* Requires a lib call */ |
| *total = COSTS_N_INSNS (20); |
| return false; |
| |
| default: |
| return arm_rtx_costs_1 (x, outer_code, total, speed); |
| } |
| } |
| |
| |
| /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores, |
| so it can be ignored. */ |
| |
| static bool |
| arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, |
| int *total, bool speed) |
| { |
| machine_mode mode = GET_MODE (x); |
| |
| if (TARGET_THUMB) |
| { |
| *total = thumb1_rtx_costs (x, code, outer_code); |
| return true; |
| } |
| |
| switch (code) |
| { |
| case COMPARE: |
| if (GET_CODE (XEXP (x, 0)) != MULT) |
| return arm_rtx_costs_1 (x, outer_code, total, speed); |
| |
| /* A COMPARE of a MULT is slow on XScale; the muls instruction |
| will stall until the multiplication is complete. */ |
| *total = COSTS_N_INSNS (3); |
| return false; |
| |
| case MULT: |
| /* There is no point basing this on the tuning, since it is always the |
| fast variant if it exists at all. */ |
| if (mode == DImode |
| && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1))) |
| && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND |
| || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) |
| { |
| *total = COSTS_N_INSNS (2); |
| return false; |
| } |
| |
| |
| if (mode == DImode) |
| { |
| *total = COSTS_N_INSNS (5); |
| return false; |
| } |
| |
| if (CONST_INT_P (XEXP (x, 1))) |
| { |
| /* If operand 1 is a constant we can more accurately |
| calculate the cost of the multiply. The multiplier can |
| retire 15 bits on the first cycle and a further 12 on the |
| second. We do, of course, have to load the constant into |
| a register first. */ |
| unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1)); |
| /* There's a general overhead of one cycle. */ |
| int cost = 1; |
| unsigned HOST_WIDE_INT masked_const; |
| |
| if (i & 0x80000000) |
| i = ~i; |
| |
| i &= (unsigned HOST_WIDE_INT) 0xffffffff; |
| |
| masked_const = i & 0xffff8000; |
| if (masked_const != 0) |
| { |
| cost++; |
| masked_const = i & 0xf8000000; |
| if (masked_const != 0) |
| cost++; |
| } |
| *total = COSTS_N_INSNS (cost); |
| return false; |
| } |
| |
| if (mode == SImode) |
| { |
| *total = COSTS_N_INSNS (3); |
| return false; |
| } |
| |
| /* Requires a lib call */ |
| *total = COSTS_N_INSNS (20); |
| return false; |
| |
| default: |
| return arm_rtx_costs_1 (x, outer_code, total, speed); |
| } |
| } |
| |
| |
| /* RTX costs for 9e (and later) cores. */ |
| |
| static bool |
| arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, |
| int *total, bool speed) |
| { |
| machine_mode mode = GET_MODE (x); |
| |
| if (TARGET_THUMB1) |
| { |
| switch (code) |
| { |
| case MULT: |
| /* Small multiply: 32 cycles for an integer multiply inst. */ |
| if (arm_arch6m && arm_m_profile_small_mul) |
| *total = COSTS_N_INSNS (32); |
| else |
| *total = COSTS_N_INSNS (3); |
| return true; |
| |
| default: |
| *total = thumb1_rtx_costs (x, code, outer_code); |
| return true; |
| } |
| } |
| |
| switch (code) |
| { |
| case MULT: |
| /* There is no point basing this on the tuning, since it is always the |
| fast variant if it exists at all. */ |
| if (mode == DImode |
| && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1))) |
| && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND |
| || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) |
| { |
| *total = COSTS_N_INSNS (2); |
| return false; |
| } |
| |
| |
| if (mode == DImode) |
| { |
| *total = COSTS_N_INSNS (5); |
| return false; |
| } |
| |
| if (mode == SImode) |
| { |
| *total = COSTS_N_INSNS (2); |
| return false; |
| } |
| |
| if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
| { |
| if (TARGET_HARD_FLOAT |
| && (mode == SFmode |
| || (mode == DFmode && !TARGET_VFP_SINGLE))) |
| { |
| *total = COSTS_N_INSNS (1); |
| return false; |
| } |
| } |
| |
| *total = COSTS_N_INSNS (20); |
| return false; |
| |
| default: |
| return arm_rtx_costs_1 (x, outer_code, total, speed); |
| } |
| } |
| /* All address computations that can be done are free, but rtx cost returns |
| the same for practically all of them. So we weight the different types |
| of address here in the order (most pref first): |
| PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */ |
| static inline int |
| arm_arm_address_cost (rtx x) |
| { |
| enum rtx_code c = GET_CODE (x); |
| |
| if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC) |
| return 0; |
| if (c == MEM || c == LABEL_REF || c == SYMBOL_REF) |
| return 10; |
| |
| if (c == PLUS) |
| { |
| if (CONST_INT_P (XEXP (x, 1))) |
| return 2; |
| |
| if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1))) |
| return 3; |
| |
| return 4; |
| } |
| |
| return 6; |
| } |
| |
| static inline int |
| arm_thumb_address_cost (rtx x) |
| { |
| enum rtx_code c = GET_CODE (x); |
| |
| if (c == REG) |
| return 1; |
| if (c == PLUS |
| && REG_P (XEXP (x, 0)) |
| && CONST_INT_P (XEXP (x, 1))) |
| return 1; |
| |
| return 2; |
| } |
| |
| static int |
| arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED, |
| addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED) |
| { |
| return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x); |
| } |
| |
| /* Adjust cost hook for XScale. */ |
| static bool |
| xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost) |
| { |
| /* Some true dependencies can have a higher cost depending |
| on precisely how certain input operands are used. */ |
| if (REG_NOTE_KIND(link) == 0 |
| && recog_memoized (insn) >= 0 |
| && recog_memoized (dep) >= 0) |
| { |
| int shift_opnum = get_attr_shift (insn); |
| enum attr_type attr_type = get_attr_type (dep); |
| |
| /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted |
| operand for INSN. If we have a shifted input operand and the |
| instruction we depend on is another ALU instruction, then we may |
| have to account for an additional stall. */ |
| if (shift_opnum != 0 |
| && (attr_type == TYPE_ALU_SHIFT_IMM |
| || attr_type == TYPE_ALUS_SHIFT_IMM |
| || attr_type == TYPE_LOGIC_SHIFT_IMM |
| || attr_type == TYPE_LOGICS_SHIFT_IMM |
| || attr_type == TYPE_ALU_SHIFT_REG |
| || attr_type == TYPE_ALUS_SHIFT_REG |
| || attr_type == TYPE_LOGIC_SHIFT_REG |
| || attr_type == TYPE_LOGICS_SHIFT_REG |
| || attr_type == TYPE_MOV_SHIFT |
| || attr_type == TYPE_MVN_SHIFT |
| || attr_type == TYPE_MOV_SHIFT_REG |
| || attr_type == TYPE_MVN_SHIFT_REG)) |
| { |
| rtx shifted_operand; |
| int opno; |
| |
| /* Get the shifted operand. */ |
| extract_insn (insn); |
| shifted_operand = recog_data.operand[shift_opnum]; |
| |
| /* Iterate over all the operands in DEP. If we write an operand |
| that overlaps with SHIFTED_OPERAND, then we have increase the |
| cost of this dependency. */ |
| extract_insn (dep); |
| preprocess_constraints (dep); |
| for (opno = 0; opno < recog_data.n_operands; opno++) |
| { |
| /* We can ignore strict inputs. */ |
| if (recog_data.operand_type[opno] == OP_IN) |
| continue; |
| |
| if (reg_overlap_mentioned_p (recog_data.operand[opno], |
| shifted_operand)) |
| { |
| *cost = 2; |
| return false; |
| } |
| } |
| } |
| } |
| return true; |
| } |
| |
| /* Adjust cost hook for Cortex A9. */ |
| static bool |
| cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost) |
| { |
| switch (REG_NOTE_KIND (link)) |
| { |
| case REG_DEP_ANTI: |
| *cost = 0; |
| return false; |
| |
| case REG_DEP_TRUE: |
| case REG_DEP_OUTPUT: |
| if (recog_memoized (insn) >= 0 |
| && recog_memoized (dep) >= 0) |
| { |
| if (GET_CODE (PATTERN (insn)) == SET) |
| { |
| if (GET_MODE_CLASS |
| (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT |
| || GET_MODE_CLASS |
| (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT) |
| { |
| enum attr_type attr_type_insn = get_attr_type (insn); |
| enum attr_type attr_type_dep = get_attr_type (dep); |
| |
| /* By default all dependencies of the form |
| s0 = s0 <op> s1 |
| s0 = s0 <op> s2 |
| have an extra latency of 1 cycle because |
| of the input and output dependency in this |
| case. However this gets modeled as an true |
| dependency and hence all these checks. */ |
| if (REG_P (SET_DEST (PATTERN (insn))) |
| && REG_P (SET_DEST (PATTERN (dep))) |
| && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)), |
| SET_DEST (PATTERN (dep)))) |
| { |
| /* FMACS is a special case where the dependent |
| instruction can be issued 3 cycles before |
| the normal latency in case of an output |
| dependency. */ |
| if ((attr_type_insn == TYPE_FMACS |
| || attr_type_insn == TYPE_FMACD) |
| && (attr_type_dep == TYPE_FMACS |
| || attr_type_dep == TYPE_FMACD)) |
| { |
| if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT) |
| *cost = insn_default_latency (dep) - 3; |
| else |
| *cost = insn_default_latency (dep); |
| return false; |
| } |
| else |
| { |
| if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT) |
| *cost = insn_default_latency (dep) + 1; |
| else |
| *cost = insn_default_latency (dep); |
| } |
| return false; |
| } |
| } |
| } |
| } |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| return true; |
| } |
| |
| /* Adjust cost hook for FA726TE. */ |
| static bool |
| fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost) |
| { |
| /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated) |
| have penalty of 3. */ |
| if (REG_NOTE_KIND (link) == REG_DEP_TRUE |
| && recog_memoized (insn) >= 0 |
| && recog_memoized (dep) >= 0 |
| && get_attr_conds (dep) == CONDS_SET) |
| { |
| /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */ |
| if (get_attr_conds (insn) == CONDS_USE |
| && get_attr_type (insn) != TYPE_BRANCH) |
| { |
| *cost = 3; |
| return false; |
| } |
| |
| if (GET_CODE (PATTERN (insn)) == COND_EXEC |
| || get_attr_conds (insn) == CONDS_USE) |
| { |
| *cost = 0; |
| return false; |
| } |
| } |
| |
| return true; |
| } |
| |
| /* Implement TARGET_REGISTER_MOVE_COST. |
| |
| Moves between VFP_REGS and GENERAL_REGS are a single insn, but |
| it is typically more expensive than a single memory access. We set |
| the cost to less than two memory accesses so that floating |
| point to integer conversion does not go through memory. */ |
| |
| int |
| arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED, |
| reg_class_t from, reg_class_t to) |
| { |
| if (TARGET_32BIT) |
| { |
| if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to)) |
| || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to))) |
| return 15; |
| else if ((from == IWMMXT_REGS && to != IWMMXT_REGS) |
| || (from != IWMMXT_REGS && to == IWMMXT_REGS)) |
| return 4; |
| else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS) |
| return 20; |
| else |
| return 2; |
| } |
| else |
| { |
| if (from == HI_REGS || to == HI_REGS) |
| return 4; |
| else |
| return 2; |
| } |
| } |
| |
| /* Implement TARGET_MEMORY_MOVE_COST. */ |
| |
| int |
| arm_memory_move_cost (machine_mode mode, reg_class_t rclass, |
| bool in ATTRIBUTE_UNUSED) |
| { |
| if (TARGET_32BIT) |
| return 10; |
| else |
| { |
| if (GET_MODE_SIZE (mode) < 4) |
| return 8; |
| else |
| return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2)); |
| } |
| } |
| |
| /* Vectorizer cost model implementation. */ |
| |
| /* Implement targetm.vectorize.builtin_vectorization_cost. */ |
| static int |
| arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, |
| tree vectype, |
| int misalign ATTRIBUTE_UNUSED) |
| { |
| unsigned elements; |
| |
| switch (type_of_cost) |
| { |
| case scalar_stmt: |
| return current_tune->vec_costs->scalar_stmt_cost; |
| |
| case scalar_load: |
| return current_tune->vec_costs->scalar_load_cost; |
| |
| case scalar_store: |
| return current_tune->vec_costs->scalar_store_cost; |
| |
| case vector_stmt: |
| return current_tune->vec_costs->vec_stmt_cost; |
| |
| case vector_load: |
| return current_tune->vec_costs->vec_align_load_cost; |
| |
| case vector_store: |
| return current_tune->vec_costs->vec_store_cost; |
| |
| case vec_to_scalar: |
| return current_tune->vec_costs->vec_to_scalar_cost; |
| |
| case scalar_to_vec: |
| return current_tune->vec_costs->scalar_to_vec_cost; |
| |
| case unaligned_load: |
| return current_tune->vec_costs->vec_unalign_load_cost; |
| |
| case unaligned_store: |
| return current_tune->vec_costs->vec_unalign_store_cost; |
| |
| case cond_branch_taken: |
| return current_tune->vec_costs->cond_taken_branch_cost; |
| |
| case cond_branch_not_taken: |
| return current_tune->vec_costs->cond_not_taken_branch_cost; |
| |
| case vec_perm: |
| case vec_promote_demote: |
| return current_tune->vec_costs->vec_stmt_cost; |
| |
| case vec_construct: |
| elements = TYPE_VECTOR_SUBPARTS (vectype); |
| return elements / 2 + 1; |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Implement targetm.vectorize.add_stmt_cost. */ |
| |
| static unsigned |
| arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, |
| struct _stmt_vec_info *stmt_info, int misalign, |
| enum vect_cost_model_location where) |
| { |
| unsigned *cost = (unsigned *) data; |
| unsigned retval = 0; |
| |
| if (flag_vect_cost_model) |
| { |
| tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; |
| int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign); |
| |
| /* Statements in an inner loop relative to the loop being |
| vectorized are weighted more heavily. The value here is |
| arbitrary and could potentially be improved with analysis. */ |
| if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) |
| count *= 50; /* FIXME. */ |
| |
| retval = (unsigned) (count * stmt_cost); |
| cost[where] += retval; |
| } |
| |
| return retval; |
| } |
| |
| /* Return true if and only if this insn can dual-issue only as older. */ |
| static bool |
| cortexa7_older_only (rtx_insn *insn) |
| { |
| if (recog_memoized (insn) < 0) |
| return false; |
| |
| switch (get_attr_type (insn)) |
| { |
| case TYPE_ALU_DSP_REG: |
| case TYPE_ALU_SREG: |
| case TYPE_ALUS_SREG: |
| case TYPE_LOGIC_REG: |
| case TYPE_LOGICS_REG: |
| case TYPE_ADC_REG: |
| case TYPE_ADCS_REG: |
| case TYPE_ADR: |
| case TYPE_BFM: |
| case TYPE_REV: |
| case TYPE_MVN_REG: |
| case TYPE_SHIFT_IMM: |
| case TYPE_SHIFT_REG: |
| case TYPE_LOAD_BYTE: |
| case TYPE_LOAD1: |
| case TYPE_STORE1: |
| case TYPE_FFARITHS: |
| case TYPE_FADDS: |
| case TYPE_FFARITHD: |
| case TYPE_FADDD: |
| case TYPE_FMOV: |
| case TYPE_F_CVT: |
| case TYPE_FCMPS: |
| case TYPE_FCMPD: |
| case TYPE_FCONSTS: |
| case TYPE_FCONSTD: |
| case TYPE_FMULS: |
| case TYPE_FMACS: |
| case TYPE_FMULD: |
| case TYPE_FMACD: |
| case TYPE_FDIVS: |
| case TYPE_FDIVD: |
| case TYPE_F_MRC: |
| case TYPE_F_MRRC: |
| case TYPE_F_FLAG: |
| case TYPE_F_LOADS: |
| case TYPE_F_STORES: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| /* Return true if and only if this insn can dual-issue as younger. */ |
| static bool |
| cortexa7_younger (FILE *file, int verbose, rtx_insn *insn) |
| { |
| if (recog_memoized (insn) < 0) |
| { |
| if (verbose > 5) |
| fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn)); |
| return false; |
| } |
| |
| switch (get_attr_type (insn)) |
| { |
| case TYPE_ALU_IMM: |
| case TYPE_ALUS_IMM: |
| case TYPE_LOGIC_IMM: |
| case TYPE_LOGICS_IMM: |
| case TYPE_EXTEND: |
| case TYPE_MVN_IMM: |
| case TYPE_MOV_IMM: |
| case TYPE_MOV_REG: |
| case TYPE_MOV_SHIFT: |
| case TYPE_MOV_SHIFT_REG: |
| case TYPE_BRANCH: |
| case TYPE_CALL: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| |
| /* Look for an instruction that can dual issue only as an older |
| instruction, and move it in front of any instructions that can |
| dual-issue as younger, while preserving the relative order of all |
| other instructions in the ready list. This is a hueuristic to help |
| dual-issue in later cycles, by postponing issue of more flexible |
| instructions. This heuristic may affect dual issue opportunities |
| in the current cycle. */ |
| static void |
| cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready, |
| int *n_readyp, int clock) |
| { |
| int i; |
| int first_older_only = -1, first_younger = -1; |
| |
| if (verbose > 5) |
| fprintf (file, |
| ";; sched_reorder for cycle %d with %d insns in ready list\n", |
| clock, |
| *n_readyp); |
| |
| /* Traverse the ready list from the head (the instruction to issue |
| first), and looking for the first instruction that can issue as |
| younger and the first instruction that can dual-issue only as |
| older. */ |
| for (i = *n_readyp - 1; i >= 0; i--) |
| { |
| rtx_insn *insn = ready[i]; |
| if (cortexa7_older_only (insn)) |
| { |
| first_older_only = i; |
| if (verbose > 5) |
| fprintf (file, ";; reorder older found %d\n", INSN_UID (insn)); |
| break; |
| } |
| else if (cortexa7_younger (file, verbose, insn) && first_younger == -1) |
| first_younger = i; |
| } |
| |
| /* Nothing to reorder because either no younger insn found or insn |
| that can dual-issue only as older appears before any insn that |
| can dual-issue as younger. */ |
| if (first_younger == -1) |
| { |
| if (verbose > 5) |
| fprintf (file, ";; sched_reorder nothing to reorder as no younger\n"); |
| return; |
| } |
| |
| /* Nothing to reorder because no older-only insn in the ready list. */ |
| if (first_older_only == -1) |
| { |
| if (verbose > 5) |
| fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n"); |
| return; |
| } |
| |
| /* Move first_older_only insn before first_younger. */ |
| if (verbose > 5) |
| fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n", |
| INSN_UID(ready [first_older_only]), |
| INSN_UID(ready [first_younger])); |
| rtx_insn *first_older_only_insn = ready [first_older_only]; |
| for (i = first_older_only; i < first_younger; i++) |
| { |
| ready[i] = ready[i+1]; |
| } |
| |
| ready[i] = first_older_only_insn; |
| return; |
| } |
| |
| /* Implement TARGET_SCHED_REORDER. */ |
| static int |
| arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp, |
| int clock) |
| { |
| switch (arm_tune) |
| { |
| case cortexa7: |
| cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock); |
| break; |
| default: |
| /* Do nothing for other cores. */ |
| break; |
| } |
| |
| return arm_issue_rate (); |
| } |
| |
| /* This function implements the target macro TARGET_SCHED_ADJUST_COST. |
| It corrects the value of COST based on the relationship between |
| INSN and DEP through the dependence LINK. It returns the new |
| value. There is a per-core adjust_cost hook to adjust scheduler costs |
| and the per-core hook can choose to completely override the generic |
| adjust_cost function. Only put bits of code into arm_adjust_cost that |
| are common across all cores. */ |
| static int |
| arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost) |
| { |
| rtx i_pat, d_pat; |
| |
| /* When generating Thumb-1 code, we want to place flag-setting operations |
| close to a conditional branch which depends on them, so that we can |
| omit the comparison. */ |
| if (TARGET_THUMB1 |
| && REG_NOTE_KIND (link) == 0 |
| && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn |
| && recog_memoized (dep) >= 0 |
| && get_attr_conds (dep) == CONDS_SET) |
| return 0; |
| |
| if (current_tune->sched_adjust_cost != NULL) |
| { |
| if (!current_tune->sched_adjust_cost (insn, link, dep, &cost)) |
| return cost; |
| } |
| |
| /* XXX Is this strictly true? */ |
| if (REG_NOTE_KIND (link) == REG_DEP_ANTI |
| || REG_NOTE_KIND (link) == REG_DEP_OUTPUT) |
| return 0; |
| |
| /* Call insns don't incur a stall, even if they follow a load. */ |
| if (REG_NOTE_KIND (link) == 0 |
| && CALL_P (insn)) |
| return 1; |
| |
| if ((i_pat = single_set (insn)) != NULL |
| && MEM_P (SET_SRC (i_pat)) |
| && (d_pat = single_set (dep)) != NULL |
| && MEM_P (SET_DEST (d_pat))) |
| { |
| rtx src_mem = XEXP (SET_SRC (i_pat), 0); |
| /* This is a load after a store, there is no conflict if the load reads |
| from a cached area. Assume that loads from the stack, and from the |
| constant pool are cached, and that others will miss. This is a |
| hack. */ |
| |
| if ((GET_CODE (src_mem) == SYMBOL_REF |
| && CONSTANT_POOL_ADDRESS_P (src_mem)) |
| || reg_mentioned_p (stack_pointer_rtx, src_mem) |
| || reg_mentioned_p (frame_pointer_rtx, src_mem) |
| || reg_mentioned_p (hard_frame_pointer_rtx, src_mem)) |
| return 1; |
| } |
| |
| return cost; |
| } |
| |
| int |
| arm_max_conditional_execute (void) |
| { |
| return max_insns_skipped; |
| } |
| |
| static int |
| arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED) |
| { |
| if (TARGET_32BIT) |
| return (TARGET_THUMB2 && !speed_p) ? 1 : 4; |
| else |
| return (optimize > 0) ? 2 : 0; |
| } |
| |
| static int |
| arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p) |
| { |
| return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p); |
| } |
| |
| /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles" |
| on Cortex-M4, where P varies from 1 to 3 according to some criteria), since |
| sequences of non-executed instructions in IT blocks probably take the same |
| amount of time as executed instructions (and the IT instruction itself takes |
| space in icache). This function was experimentally determined to give good |
| results on a popular embedded benchmark. */ |
| |
| static int |
| arm_cortex_m_branch_cost (bool speed_p, bool predictable_p) |
| { |
| return (TARGET_32BIT && speed_p) ? 1 |
| : arm_default_branch_cost (speed_p, predictable_p); |
| } |
| |
| static int |
| arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p) |
| { |
| return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p); |
| } |
| |
| static bool fp_consts_inited = false; |
| |
| static REAL_VALUE_TYPE value_fp0; |
| |
| static void |
| init_fp_table (void) |
| { |
| REAL_VALUE_TYPE r; |
| |
| r = REAL_VALUE_ATOF ("0", DFmode); |
| value_fp0 = r; |
| fp_consts_inited = true; |
| } |
| |
| /* Return TRUE if rtx X is a valid immediate FP constant. */ |
| int |
| arm_const_double_rtx (rtx x) |
| { |
| REAL_VALUE_TYPE r; |
| |
| if (!fp_consts_inited) |
| init_fp_table (); |
| |
| REAL_VALUE_FROM_CONST_DOUBLE (r, x); |
| if (REAL_VALUE_MINUS_ZERO (r)) |
| return 0; |
| |
| if (REAL_VALUES_EQUAL (r, value_fp0)) |
| return 1; |
| |
| return 0; |
| } |
| |
| /* VFPv3 has a fairly wide range of representable immediates, formed from |
| "quarter-precision" floating-point values. These can be evaluated using this |
| formula (with ^ for exponentiation): |
| |
| -1^s * n * 2^-r |
| |
| Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that |
| 16 <= n <= 31 and 0 <= r <= 7. |
| |
| These values are mapped onto an 8-bit integer ABCDEFGH s.t. |
| |
| - A (most-significant) is the sign bit. |
| - BCD are the exponent (encoded as r XOR 3). |
| - EFGH are the mantissa (encoded as n - 16). |
| */ |
| |
| /* Return an integer index for a VFPv3 immediate operand X suitable for the |
| fconst[sd] instruction, or -1 if X isn't suitable. */ |
| static int |
| vfp3_const_double_index (rtx x) |
| { |
| REAL_VALUE_TYPE r, m; |
| int sign, exponent; |
| unsigned HOST_WIDE_INT mantissa, mant_hi; |
| unsigned HOST_WIDE_INT mask; |
| int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1; |
| bool fail; |
| |
| if (!TARGET_VFP3 || !CONST_DOUBLE_P (x)) |
| return -1; |
| |
| REAL_VALUE_FROM_CONST_DOUBLE (r, x); |
| |
| /* We can't represent these things, so detect them first. */ |
| if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r)) |
| return -1; |
| |
| /* Extract sign, exponent and mantissa. */ |
| sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0; |
| r = real_value_abs (&r); |
| exponent = REAL_EXP (&r); |
| /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the |
| highest (sign) bit, with a fixed binary point at bit point_pos. |
| WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1 |
| bits for the mantissa, this may fail (low bits would be lost). */ |
| real_ldexp (&m, &r, point_pos - exponent); |
| wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2); |
| mantissa = w.elt (0); |
| mant_hi = w.elt (1); |
| |
| /* If there are bits set in the low part of the mantissa, we can't |
| represent this value. */ |
| if (mantissa != 0) |
| return -1; |
| |
| /* Now make it so that mantissa contains the most-significant bits, and move |
| the point_pos to indicate that the least-significant bits have been |
| discarded. */ |
| point_pos -= HOST_BITS_PER_WIDE_INT; |
| mantissa = mant_hi; |
| |
| /* We can permit four significant bits of mantissa only, plus a high bit |
| which is always 1. */ |
| mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1; |
| if ((mantissa & mask) != 0) |
| return -1; |
| |
| /* Now we know the mantissa is in range, chop off the unneeded bits. */ |
| mantissa >>= point_pos - 5; |
| |
| /* The mantissa may be zero. Disallow that case. (It's possible to load the |
| floating-point immediate zero with Neon using an integer-zero load, but |
| that case is handled elsewhere.) */ |
| if (mantissa == 0) |
| return -1; |
| |
| gcc_assert (mantissa >= 16 && mantissa <= 31); |
| |
| /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where |
| normalized significands are in the range [1, 2). (Our mantissa is shifted |
| left 4 places at this point relative to normalized IEEE754 values). GCC |
| internally uses [0.5, 1) (see real.c), so the exponent returned from |
| REAL_EXP must be altered. */ |
| exponent = 5 - exponent; |
| |
| if (exponent < 0 || exponent > 7) |
| return -1; |
| |
| /* Sign, mantissa and exponent are now in the correct form to plug into the |
| formula described in the comment above. */ |
| return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16); |
| } |
| |
| /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */ |
| int |
| vfp3_const_double_rtx (rtx x) |
| { |
| if (!TARGET_VFP3) |
| return 0; |
| |
| return vfp3_const_double_index (x) != -1; |
| } |
| |
| /* Recognize immediates which can be used in various Neon instructions. Legal |
| immediates are described by the following table (for VMVN variants, the |
| bitwise inverse of the constant shown is recognized. In either case, VMOV |
| is output and the correct instruction to use for a given constant is chosen |
| by the assembler). The constant shown is replicated across all elements of |
| the destination vector. |
| |
| insn elems variant constant (binary) |
| ---- ----- ------- ----------------- |
| vmov i32 0 00000000 00000000 00000000 abcdefgh |
| vmov i32 1 00000000 00000000 abcdefgh 00000000 |
| vmov i32 2 00000000 abcdefgh 00000000 00000000 |
| vmov i32 3 abcdefgh 00000000 00000000 00000000 |
| vmov i16 4 00000000 abcdefgh |
| vmov i16 5 abcdefgh 00000000 |
| vmvn i32 6 00000000 00000000 00000000 abcdefgh |
| vmvn i32 7 00000000 00000000 abcdefgh 00000000 |
| vmvn i32 8 00000000 abcdefgh 00000000 00000000 |
| vmvn i32 9 abcdefgh 00000000 00000000 00000000 |
| vmvn i16 10 00000000 abcdefgh |
| vmvn i16 11 abcdefgh 00000000 |
| vmov i32 12 00000000 00000000 abcdefgh 11111111 |
| vmvn i32 13 00000000 00000000 abcdefgh 11111111 |
| vmov i32 14 00000000 abcdefgh 11111111 11111111 |
| vmvn i32 15 00000000 abcdefgh 11111111 11111111 |
| vmov i8 16 abcdefgh |
| vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd |
| eeeeeeee ffffffff gggggggg hhhhhhhh |
| vmov f32 18 aBbbbbbc defgh000 00000000 00000000 |
| vmov f32 19 00000000 00000000 00000000 00000000 |
| |
| For case 18, B = !b. Representable values are exactly those accepted by |
| vfp3_const_double_index, but are output as floating-point numbers rather |
| than indices. |
| |
| For case 19, we will change it to vmov.i32 when assembling. |
| |
| Variants 0-5 (inclusive) may also be used as immediates for the second |
| operand of VORR/VBIC instructions. |
| |
| The INVERSE argument causes the bitwise inverse of the given operand to be |
| recognized instead (used for recognizing legal immediates for the VAND/VORN |
| pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is |
| *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be |
| output, rather than the real insns vbic/vorr). |
| |
| INVERSE makes no difference to the recognition of float vectors. |
| |
| The return value is the variant of immediate as shown in the above table, or |
| -1 if the given value doesn't match any of the listed patterns. |
| */ |
| static int |
| neon_valid_immediate (rtx op, machine_mode mode, int inverse, |
| rtx *modconst, int *elementwidth) |
| { |
| #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \ |
| matches = 1; \ |
| for (i = 0; i < idx; i += (STRIDE)) \ |
| if (!(TEST)) \ |
| matches = 0; \ |
| if (matches) \ |
| { \ |
| immtype = (CLASS); \ |
| elsize = (ELSIZE); \ |
| break; \ |
| } |
| |
| unsigned int i, elsize = 0, idx = 0, n_elts; |
| unsigned int innersize; |
| unsigned char bytes[16]; |
| int immtype = -1, matches; |
| unsigned int invmask = inverse ? 0xff : 0; |
| bool vector = GET_CODE (op) == CONST_VECTOR; |
| |
| if (vector) |
| { |
| n_elts = CONST_VECTOR_NUNITS (op); |
| innersize = GET_MODE_SIZE (GET_MODE_INNER (mode)); |
| } |
| else |
| { |
| n_elts = 1; |
| if (mode == VOIDmode) |
| mode = DImode; |
| innersize = GET_MODE_SIZE (mode); |
| } |
| |
| /* Vectors of float constants. */ |
| if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) |
| { |
| rtx el0 = CONST_VECTOR_ELT (op, 0); |
| REAL_VALUE_TYPE r0; |
| |
| if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0))) |
| return -1; |
| |
| REAL_VALUE_FROM_CONST_DOUBLE (r0, el0); |
| |
| for (i = 1; i < n_elts; i++) |
| { |
| rtx elt = CONST_VECTOR_ELT (op, i); |
| REAL_VALUE_TYPE re; |
| |
| REAL_VALUE_FROM_CONST_DOUBLE (re, elt); |
| |
| if (!REAL_VALUES_EQUAL (r0, re)) |
| return -1; |
| } |
| |
| if (modconst) |
| *modconst = CONST_VECTOR_ELT (op, 0); |
| |
| if (elementwidth) |
| *elementwidth = 0; |
| |
| if (el0 == CONST0_RTX (GET_MODE (el0))) |
| return 19; |
| else |
| return 18; |
| } |
| |
| /* Splat vector constant out into a byte vector. */ |
| for (i = 0; i < n_elts; i++) |
| { |
| rtx el = vector ? CONST_VECTOR_ELT (op, i) : op; |
| unsigned HOST_WIDE_INT elpart; |
| unsigned int part, parts; |
| |
| if (CONST_INT_P (el)) |
| { |
| elpart = INTVAL (el); |
| parts = 1; |
| } |
| else if (CONST_DOUBLE_P (el)) |
| { |
| elpart = CONST_DOUBLE_LOW (el); |
| parts = 2; |
| } |
| else |
| gcc_unreachable (); |
| |
| for (part = 0; part < parts; part++) |
| { |
| unsigned int byte; |
| for (byte = 0; byte < innersize; byte++) |
| { |
| bytes[idx++] = (elpart & 0xff) ^ invmask; |
| elpart >>= BITS_PER_UNIT; |
| } |
| if (CONST_DOUBLE_P (el)) |
| elpart = CONST_DOUBLE_HIGH (el); |
| } |
| } |
| |
| /* Sanity check. */ |
| gcc_assert (idx == GET_MODE_SIZE (mode)); |
| |
| do |
| { |
| CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0 |
| && bytes[i + 2] == 0 && bytes[i + 3] == 0); |
| |
| CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1] |
| && bytes[i + 2] == 0 && bytes[i + 3] == 0); |
| |
| CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0 |
| && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0); |
| |
| CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0 |
| && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]); |
| |
| CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0); |
| |
| CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]); |
| |
| CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff |
| && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff); |
| |
| CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1] |
| && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff); |
| |
| CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff |
| && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff); |
| |
| CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff |
| && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]); |
| |
| CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff); |
| |
| CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]); |
| |
| CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1] |
| && bytes[i + 2] == 0 && bytes[i + 3] == 0); |
| |
| CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1] |
| && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff); |
| |
| CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff |
| && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0); |
| |
| CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0 |
| && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff); |
| |
| CHECK (1, 8, 16, bytes[i] == bytes[0]); |
| |
| CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff) |
| && bytes[i] == bytes[(i + 8) % idx]); |
| } |
| while (0); |
| |
| if (immtype == -1) |
| return -1; |
| |
| if (elementwidth) |
| *elementwidth = elsize; |
| |
| if (modconst) |
| { |
| unsigned HOST_WIDE_INT imm = 0; |
| |
| /* Un-invert bytes of recognized vector, if necessary. */ |
| if (invmask != 0) |
| for (i = 0; i < idx; i++) |
| bytes[i] ^= invmask; |
| |
| if (immtype == 17) |
| { |
| /* FIXME: Broken on 32-bit H_W_I hosts. */ |
| gcc_assert (sizeof (HOST_WIDE_INT) == 8); |
| |
| for (i = 0; i < 8; i++) |
| imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0) |
| << (i * BITS_PER_UNIT); |
| |
| *modconst = GEN_INT (imm); |
| } |
| else |
| { |
| unsigned HOST_WIDE_INT imm = 0; |
| |
| for (i = 0; i < elsize / BITS_PER_UNIT; i++) |
| imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT); |
| |
| *modconst = GEN_INT (imm); |
| } |
| } |
| |
| return immtype; |
| #undef CHECK |
| } |
| |
| /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly, |
| VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for |
| float elements), and a modified constant (whatever should be output for a |
| VMOV) in *MODCONST. */ |
| |
| int |
| neon_immediate_valid_for_move (rtx op, machine_mode mode, |
| rtx *modconst, int *elementwidth) |
| { |
| rtx tmpconst; |
| int tmpwidth; |
| int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth); |
| |
| if (retval == -1) |
| return 0; |
| |
| if (modconst) |
| *modconst = tmpconst; |
| |
| if (elementwidth) |
| *elementwidth = tmpwidth; |
| |
| return 1; |
| } |
| |
| /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If |
| the immediate is valid, write a constant suitable for using as an operand |
| to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to |
| *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */ |
| |
| int |
| neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse, |
| rtx *modconst, int *elementwidth) |
| { |
| rtx tmpconst; |
| int tmpwidth; |
| int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth); |
| |
| if (retval < 0 || retval > 5) |
| return 0; |
| |
| if (modconst) |
| *modconst = tmpconst; |
| |
| if (elementwidth) |
| *elementwidth = tmpwidth; |
| |
| return 1; |
| } |
| |
| /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If |
| the immediate is valid, write a constant suitable for using as an operand |
| to VSHR/VSHL to *MODCONST and the corresponding element width to |
| *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift, |
| because they have different limitations. */ |
| |
| int |
| neon_immediate_valid_for_shift (rtx op, machine_mode mode, |
| rtx *modconst, int *elementwidth, |
| bool isleftshift) |
| { |
| unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode)); |
| unsigned int n_elts = CONST_VECTOR_NUNITS (op), i; |
| unsigned HOST_WIDE_INT last_elt = 0; |
| unsigned HOST_WIDE_INT maxshift; |
| |
| /* Split vector constant out into a byte vector. */ |
| for (i = 0; i < n_elts; i++) |
| { |
| rtx el = CONST_VECTOR_ELT (op, i); |
| unsigned HOST_WIDE_INT elpart; |
| |
| if (CONST_INT_P (el)) |
| elpart = INTVAL (el); |
| else if (CONST_DOUBLE_P (el)) |
| return 0; |
| else |
| gcc_unreachable (); |
| |
| if (i != 0 && elpart != last_elt) |
| return 0; |
| |
| last_elt = elpart; |
| } |
| |
| /* Shift less than element size. */ |
| maxshift = innersize * 8; |
| |
| if (isleftshift) |
| { |
| /* Left shift immediate value can be from 0 to <size>-1. */ |
| if (last_elt >= maxshift) |
| return 0; |
| } |
| else |
| { |
| /* Right shift immediate value can be from 1 to <size>. */ |
| if (last_elt == 0 || last_elt > maxshift) |
| return 0; |
| } |
| |
| if (elementwidth) |
| *elementwidth = innersize * 8; |
| |
| if (modconst) |
| *modconst = CONST_VECTOR_ELT (op, 0); |
| |
| return 1; |
| } |
| |
| /* Return a string suitable for output of Neon immediate logic operation |
| MNEM. */ |
| |
| char * |
| neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode, |
| int inverse, int quad) |
| { |
| int width, is_valid; |
| static char templ[40]; |
| |
| is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width); |
| |
| gcc_assert (is_valid != 0); |
| |
| if (quad) |
| sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width); |
| else |
| sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width); |
| |
| return templ; |
| } |
| |
| /* Return a string suitable for output of Neon immediate shift operation |
| (VSHR or VSHL) MNEM. */ |
| |
| char * |
| neon_output_shift_immediate (const char *mnem, char sign, rtx *op2, |
| machine_mode mode, int quad, |
| bool isleftshift) |
| { |
| int width, is_valid; |
| static char templ[40]; |
| |
| is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift); |
| gcc_assert (is_valid != 0); |
| |
| if (quad) |
| sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width); |
| else |
| sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width); |
| |
| return templ; |
| } |
| |
| /* Output a sequence of pairwise operations to implement a reduction. |
| NOTE: We do "too much work" here, because pairwise operations work on two |
| registers-worth of operands in one go. Unfortunately we can't exploit those |
| extra calculations to do the full operation in fewer steps, I don't think. |
| Although all vector elements of the result but the first are ignored, we |
| actually calculate the same result in each of the elements. An alternative |
| such as initially loading a vector with zero to use as each of the second |
| operands would use up an additional register and take an extra instruction, |
| for no particular gain. */ |
| |
| void |
| neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode, |
| rtx (*reduc) (rtx, rtx, rtx)) |
| { |
| machine_mode inner = GET_MODE_INNER (mode); |
| unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner); |
| rtx tmpsum = op1; |
| |
| for (i = parts / 2; i >= 1; i /= 2) |
| { |
| rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode); |
| emit_insn (reduc (dest, tmpsum, tmpsum)); |
| tmpsum = dest; |
| } |
| } |
| |
| /* If VALS is a vector constant that can be loaded into a register |
| using VDUP, generate instructions to do so and return an RTX to |
| assign to the register. Otherwise return NULL_RTX. */ |
| |
| static rtx |
| neon_vdup_constant (rtx vals) |
| { |
| machine_mode mode = GET_MODE (vals); |
| machine_mode inner_mode = GET_MODE_INNER (mode); |
| int n_elts = GET_MODE_NUNITS (mode); |
| bool all_same = true; |
| rtx x; |
| int i; |
| |
| if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4) |
| return NULL_RTX; |
| |
| for (i = 0; i < n_elts; ++i) |
| { |
| x = XVECEXP (vals, 0, i); |
| if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) |
| all_same = false; |
| } |
| |
| if (!all_same) |
| /* The elements are not all the same. We could handle repeating |
| patterns of a mode larger than INNER_MODE here (e.g. int8x8_t |
| {0, C, 0, C, 0, C, 0, C} which can be loaded using |
| vdup.i16). */ |
| return NULL_RTX; |
| |
| /* We can load this constant by using VDUP and a constant in a |
| single ARM register. This will be cheaper than a vector |
| load. */ |
| |
| x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); |
| return gen_rtx_VEC_DUPLICATE (mode, x); |
| } |
| |
| /* Generate code to load VALS, which is a PARALLEL containing only |
| constants (for vec_init) or CONST_VECTOR, efficiently into a |
| register. Returns an RTX to copy into the register, or NULL_RTX |
| for a PARALLEL that can not be converted into a CONST_VECTOR. */ |
| |
| rtx |
| neon_make_constant (rtx vals) |
| { |
| machine_mode mode = GET_MODE (vals); |
| rtx target; |
| rtx const_vec = NULL_RTX; |
| int n_elts = GET_MODE_NUNITS (mode); |
| int n_const = 0; |
| int i; |
| |
| if (GET_CODE (vals) == CONST_VECTOR) |
| const_vec = vals; |
| else if (GET_CODE (vals) == PARALLEL) |
| { |
| /* A CONST_VECTOR must contain only CONST_INTs and |
| CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF). |
| Only store valid constants in a CONST_VECTOR. */ |
| for (i = 0; i < n_elts; ++i) |
| { |
| rtx x = XVECEXP (vals, 0, i); |
| if (CONST_INT_P (x) || CONST_DOUBLE_P (x)) |
| n_const++; |
| } |
| if (n_const == n_elts) |
| const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); |
| } |
| else |
| gcc_unreachable (); |
| |
| if (const_vec != NULL |
| && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL)) |
| /* Load using VMOV. On Cortex-A8 this takes one cycle. */ |
| return const_vec; |
| else if ((target = neon_vdup_constant (vals)) != NULL_RTX) |
| /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON |
| pipeline cycle; creating the constant takes one or two ARM |
| pipeline cycles. */ |
| return target; |
| else if (const_vec != NULL_RTX) |
| /* Load from constant pool. On Cortex-A8 this takes two cycles |
| (for either double or quad vectors). We can not take advantage |
| of single-cycle VLD1 because we need a PC-relative addressing |
| mode. */ |
| return const_vec; |
| else |
| /* A PARALLEL containing something not valid inside CONST_VECTOR. |
| We can not construct an initializer. */ |
| return NULL_RTX; |
| } |
| |
| /* Initialize vector TARGET to VALS. */ |
| |
| void |
| neon_expand_vector_init (rtx target, rtx vals) |
| { |
| machine_mode mode = GET_MODE (target); |
| machine_mode inner_mode = GET_MODE_INNER (mode); |
| int n_elts = GET_MODE_NUNITS (mode); |
| int n_var = 0, one_var = -1; |
| bool all_same = true; |
| rtx x, mem; |
| int i; |
| |
| for (i = 0; i < n_elts; ++i) |
| { |
| x = XVECEXP (vals, 0, i); |
| if (!CONSTANT_P (x)) |
| ++n_var, one_var = i; |
| |
| if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) |
| all_same = false; |
| } |
| |
| if (n_var == 0) |
| { |
| rtx constant = neon_make_constant (vals); |
| if (constant != NULL_RTX) |
| { |
| emit_move_insn (target, constant); |
| return; |
| } |
| } |
| |
| /* Splat a single non-constant element if we can. */ |
| if (all_same && GET_MODE_SIZE (inner_mode) <= 4) |
| { |
| x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); |
| emit_insn (gen_rtx_SET (VOIDmode, target, |
| gen_rtx_VEC_DUPLICATE (mode, x))); |
| return; |
| } |
| |
| /* One field is non-constant. Load constant then overwrite varying |
| field. This is more efficient than using the stack. */ |
| if (n_var == 1) |
| { |
| rtx copy = copy_rtx (vals); |
| rtx index = GEN_INT (one_var); |
| |
| /* Load constant part of vector, substitute neighboring value for |
| varying element. */ |
| XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts); |
| neon_expand_vector_init (target, copy); |
| |
| /* Insert variable. */ |
| x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var)); |
| switch (mode) |
| { |
| case V8QImode: |
| emit_insn (gen_neon_vset_lanev8qi (target, x, target, index)); |
| break; |
| case V16QImode: |
| emit_insn (gen_neon_vset_lanev16qi (target, x, target, index)); |
| break; |
| case V4HImode: |
| emit_insn (gen_neon_vset_lanev4hi (target, x, target, index)); |
| break; |
| case V8HImode: |
| emit_insn (gen_neon_vset_lanev8hi (target, x, target, index)); |
| break; |
| case V2SImode: |
| emit_insn (gen_neon_vset_lanev2si (target, x, target, index)); |
| break; |
| case V4SImode: |
| emit_insn (gen_neon_vset_lanev4si (target, x, target, index)); |
| break; |
| case V2SFmode: |
| emit_insn (gen_neon_vset_lanev2sf (target, x, target, index)); |
| break; |
| case V4SFmode: |
| emit_insn (gen_neon_vset_lanev4sf (target, x, target, index)); |
| break; |
| case V2DImode: |
| emit_insn (gen_neon_vset_lanev2di (target, x, target, index)); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| return; |
| } |
| |
| /* Construct the vector in memory one field at a time |
| and load the whole vector. */ |
| mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); |
| for (i = 0; i < n_elts; i++) |
| emit_move_insn (adjust_address_nv (mem, inner_mode, |
| i * GET_MODE_SIZE (inner_mode)), |
| XVECEXP (vals, 0, i)); |
| emit_move_insn (target, mem); |
| } |
| |
| /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise |
| ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so |
| reported source locations are bogus. */ |
| |
| static void |
| bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high, |
| const char *err) |
| { |
| HOST_WIDE_INT lane; |
| |
| gcc_assert (CONST_INT_P (operand)); |
| |
| lane = INTVAL (operand); |
| |
| if (lane < low || lane >= high) |
| error (err); |
| } |
| |
| /* Bounds-check lanes. */ |
| |
| void |
| neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) |
| { |
| bounds_check (operand, low, high, "lane out of range"); |
| } |
| |
| /* Bounds-check constants. */ |
| |
| void |
| neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) |
| { |
| bounds_check (operand, low, high, "constant out of range"); |
| } |
| |
| HOST_WIDE_INT |
| neon_element_bits (machine_mode mode) |
| { |
| if (mode == DImode) |
| return GET_MODE_BITSIZE (mode); |
| else |
| return GET_MODE_BITSIZE (GET_MODE_INNER (mode)); |
| } |
| |
| |
| /* Predicates for `match_operand' and `match_operator'. */ |
| |
| /* Return TRUE if OP is a valid coprocessor memory address pattern. |
| WB is true if full writeback address modes are allowed and is false |
| if limited writeback address modes (POST_INC and PRE_DEC) are |
| allowed. */ |
| |
| int |
| arm_coproc_mem_operand (rtx op, bool wb) |
| { |
| rtx ind; |
| |
| /* Reject eliminable registers. */ |
| if (! (reload_in_progress || reload_completed || lra_in_progress) |
| && ( reg_mentioned_p (frame_pointer_rtx, op) |
| || reg_mentioned_p (arg_pointer_rtx, op) |
| || reg_mentioned_p (virtual_incoming_args_rtx, op) |
| || reg_mentioned_p (virtual_outgoing_args_rtx, op) |
| || reg_mentioned_p (virtual_stack_dynamic_rtx, op) |
| || reg_mentioned_p (virtual_stack_vars_rtx, op))) |
| return FALSE; |
| |
| /* Constants are converted into offsets from labels. */ |
| if (!MEM_P (op)) |
| return FALSE; |
| |
| ind = XEXP (op, 0); |
| |
| if (reload_completed |
| && (GET_CODE (ind) == LABEL_REF |
| || (GET_CODE (ind) == CONST |
| && GET_CODE (XEXP (ind, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF |
| && CONST_INT_P (XEXP (XEXP (ind, 0), 1))))) |
| return TRUE; |
| |
| /* Match: (mem (reg)). */ |
| if (REG_P (ind)) |
| return arm_address_register_rtx_p (ind, 0); |
| |
| /* Autoincremment addressing modes. POST_INC and PRE_DEC are |
| acceptable in any case (subject to verification by |
| arm_address_register_rtx_p). We need WB to be true to accept |
| PRE_INC and POST_DEC. */ |
| if (GET_CODE (ind) == POST_INC |
| || GET_CODE (ind) == PRE_DEC |
| || (wb |
| && (GET_CODE (ind) == PRE_INC |
| || GET_CODE (ind) == POST_DEC))) |
| return arm_address_register_rtx_p (XEXP (ind, 0), 0); |
| |
| if (wb |
| && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY) |
| && arm_address_register_rtx_p (XEXP (ind, 0), 0) |
| && GET_CODE (XEXP (ind, 1)) == PLUS |
| && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0))) |
| ind = XEXP (ind, 1); |
| |
| /* Match: |
| (plus (reg) |
| (const)). */ |
| if (GET_CODE (ind) == PLUS |
| && REG_P (XEXP (ind, 0)) |
| && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode) |
| && CONST_INT_P (XEXP (ind, 1)) |
| && INTVAL (XEXP (ind, 1)) > -1024 |
| && INTVAL (XEXP (ind, 1)) < 1024 |
| && (INTVAL (XEXP (ind, 1)) & 3) == 0) |
| return TRUE; |
| |
| return FALSE; |
| } |
| |
| /* Return TRUE if OP is a memory operand which we can load or store a vector |
| to/from. TYPE is one of the following values: |
| 0 - Vector load/stor (vldr) |
| 1 - Core registers (ldm) |
| 2 - Element/structure loads (vld1) |
| */ |
| int |
| neon_vector_mem_operand (rtx op, int type, bool strict) |
| { |
| rtx ind; |
| |
| /* Reject eliminable registers. */ |
| if (! (reload_in_progress || reload_completed) |
| && ( reg_mentioned_p (frame_pointer_rtx, op) |
| || reg_mentioned_p (arg_pointer_rtx, op) |
| || reg_mentioned_p (virtual_incoming_args_rtx, op) |
| || reg_mentioned_p (virtual_outgoing_args_rtx, op) |
| || reg_mentioned_p (virtual_stack_dynamic_rtx, op) |
| || reg_mentioned_p (virtual_stack_vars_rtx, op))) |
| return !strict; |
| |
| /* Constants are converted into offsets from labels. */ |
| if (!MEM_P (op)) |
| return FALSE; |
| |
| ind = XEXP (op, 0); |
| |
| if (reload_completed |
| && (GET_CODE (ind) == LABEL_REF |
| || (GET_CODE (ind) == CONST |
| && GET_CODE (XEXP (ind, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF |
| && CONST_INT_P (XEXP (XEXP (ind, 0), 1))))) |
| return TRUE; |
| |
| /* Match: (mem (reg)). */ |
| if (REG_P (ind)) |
| return arm_address_register_rtx_p (ind, 0); |
| |
| /* Allow post-increment with Neon registers. */ |
| if ((type != 1 && GET_CODE (ind) == POST_INC) |
| || (type == 0 && GET_CODE (ind) == PRE_DEC)) |
| return arm_address_register_rtx_p (XEXP (ind, 0), 0); |
| |
| /* Allow post-increment by register for VLDn */ |
| if (type == 2 && GET_CODE (ind) == POST_MODIFY |
| && GET_CODE (XEXP (ind, 1)) == PLUS |
| && REG_P (XEXP (XEXP (ind, 1), 1))) |
| return true; |
| |
| /* Match: |
| (plus (reg) |
| (const)). */ |
| if (type == 0 |
| && GET_CODE (ind) == PLUS |
| && REG_P (XEXP (ind, 0)) |
| && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode) |
| && CONST_INT_P (XEXP (ind, 1)) |
| && INTVAL (XEXP (ind, 1)) > -1024 |
| /* For quad modes, we restrict the constant offset to be slightly less |
| than what the instruction format permits. We have no such constraint |
| on double mode offsets. (This must match arm_legitimate_index_p.) */ |
| && (INTVAL (XEXP (ind, 1)) |
| < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024)) |
| && (INTVAL (XEXP (ind, 1)) & 3) == 0) |
| return TRUE; |
| |
| return FALSE; |
| } |
| |
| /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct |
| type. */ |
| int |
| neon_struct_mem_operand (rtx op) |
| { |
| rtx ind; |
| |
| /* Reject eliminable registers. */ |
| if (! (reload_in_progress || reload_completed) |
| && ( reg_mentioned_p (frame_pointer_rtx, op) |
| || reg_mentioned_p (arg_pointer_rtx, op) |
| || reg_mentioned_p (virtual_incoming_args_rtx, op) |
| || reg_mentioned_p (virtual_outgoing_args_rtx, op) |
| || reg_mentioned_p (virtual_stack_dynamic_rtx, op) |
| || reg_mentioned_p (virtual_stack_vars_rtx, op))) |
| return FALSE; |
| |
| /* Constants are converted into offsets from labels. */ |
| if (!MEM_P (op)) |
| return FALSE; |
| |
| ind = XEXP (op, 0); |
| |
| if (reload_completed |
| && (GET_CODE (ind) == LABEL_REF |
| || (GET_CODE (ind) == CONST |
| && GET_CODE (XEXP (ind, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF |
| && CONST_INT_P (XEXP (XEXP (ind, 0), 1))))) |
| return TRUE; |
| |
| /* Match: (mem (reg)). */ |
| if (REG_P (ind)) |
| return arm_address_register_rtx_p (ind, 0); |
| |
| /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */ |
| if (GET_CODE (ind) == POST_INC |
| || GET_CODE (ind) == PRE_DEC) |
| return arm_address_register_rtx_p (XEXP (ind, 0), 0); |
| |
| return FALSE; |
| } |
| |
| /* Return true if X is a register that will be eliminated later on. */ |
| int |
| arm_eliminable_register (rtx x) |
| { |
| return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM |
| || REGNO (x) == ARG_POINTER_REGNUM |
| || (REGNO (x) >= FIRST_VIRTUAL_REGISTER |
| && REGNO (x) <= LAST_VIRTUAL_REGISTER)); |
| } |
| |
| /* Return GENERAL_REGS if a scratch register required to reload x to/from |
| coprocessor registers. Otherwise return NO_REGS. */ |
| |
| enum reg_class |
| coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb) |
| { |
| if (mode == HFmode) |
| { |
| if (!TARGET_NEON_FP16) |
| return GENERAL_REGS; |
| if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true)) |
| return NO_REGS; |
| return GENERAL_REGS; |
| } |
| |
| /* The neon move patterns handle all legitimate vector and struct |
| addresses. */ |
| if (TARGET_NEON |
| && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR) |
| && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT |
| || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT |
| || VALID_NEON_STRUCT_MODE (mode))) |
| return NO_REGS; |
| |
| if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode)) |
| return NO_REGS; |
| |
| return GENERAL_REGS; |
| } |
| |
| /* Values which must be returned in the most-significant end of the return |
| register. */ |
| |
| static bool |
| arm_return_in_msb (const_tree valtype) |
| { |
| return (TARGET_AAPCS_BASED |
| && BYTES_BIG_ENDIAN |
| && (AGGREGATE_TYPE_P (valtype) |
| || TREE_CODE (valtype) == COMPLEX_TYPE |
| || FIXED_POINT_TYPE_P (valtype))); |
| } |
| |
| /* Return TRUE if X references a SYMBOL_REF. */ |
| int |
| symbol_mentioned_p (rtx x) |
| { |
| const char * fmt; |
| int i; |
| |
| if (GET_CODE (x) == SYMBOL_REF) |
| return 1; |
| |
| /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they |
| are constant offsets, not symbols. */ |
| if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) |
| return 0; |
| |
| fmt = GET_RTX_FORMAT (GET_CODE (x)); |
| |
| for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) |
| { |
| if (fmt[i] == 'E') |
| { |
| int j; |
| |
| for (j = XVECLEN (x, i) - 1; j >= 0; j--) |
| if (symbol_mentioned_p (XVECEXP (x, i, j))) |
| return 1; |
| } |
| else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i))) |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| /* Return TRUE if X references a LABEL_REF. */ |
| int |
| label_mentioned_p (rtx x) |
| { |
| const char * fmt; |
| int i; |
| |
| if (GET_CODE (x) == LABEL_REF) |
| return 1; |
| |
| /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing |
| instruction, but they are constant offsets, not symbols. */ |
| if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) |
| return 0; |
| |
| fmt = GET_RTX_FORMAT (GET_CODE (x)); |
| for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) |
| { |
| if (fmt[i] == 'E') |
| { |
| int j; |
| |
| for (j = XVECLEN (x, i) - 1; j >= 0; j--) |
| if (label_mentioned_p (XVECEXP (x, i, j))) |
| return 1; |
| } |
| else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i))) |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| int |
| tls_mentioned_p (rtx x) |
| { |
| switch (GET_CODE (x)) |
| { |
| case CONST: |
| return tls_mentioned_p (XEXP (x, 0)); |
| |
| case UNSPEC: |
| if (XINT (x, 1) == UNSPEC_TLS) |
| return 1; |
| |
| default: |
| return 0; |
| } |
| } |
| |
| /* Must not copy any rtx that uses a pc-relative address. */ |
| |
| static bool |
| arm_cannot_copy_insn_p (rtx_insn *insn) |
| { |
| /* The tls call insn cannot be copied, as it is paired with a data |
| word. */ |
| if (recog_memoized (insn) == CODE_FOR_tlscall) |
| return true; |
| |
| subrtx_iterator::array_type array; |
| FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) |
| { |
| const_rtx x = *iter; |
| if (GET_CODE (x) == UNSPEC |
| && (XINT (x, 1) == UNSPEC_PIC_BASE |
| || XINT (x, 1) == UNSPEC_PIC_UNIFIED)) |
| return true; |
| } |
| return false; |
| } |
| |
| enum rtx_code |
| minmax_code (rtx x) |
| { |
| enum rtx_code code = GET_CODE (x); |
| |
| switch (code) |
| { |
| case SMAX: |
| return GE; |
| case SMIN: |
| return LE; |
| case UMIN: |
| return LEU; |
| case UMAX: |
| return GEU; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Match pair of min/max operators that can be implemented via usat/ssat. */ |
| |
| bool |
| arm_sat_operator_match (rtx lo_bound, rtx hi_bound, |
| int *mask, bool *signed_sat) |
| { |
| /* The high bound must be a power of two minus one. */ |
| int log = exact_log2 (INTVAL (hi_bound) + 1); |
| if (log == -1) |
| return false; |
| |
| /* The low bound is either zero (for usat) or one less than the |
| negation of the high bound (for ssat). */ |
| if (INTVAL (lo_bound) == 0) |
| { |
| if (mask) |
| *mask = log; |
| if (signed_sat) |
| *signed_sat = false; |
| |
| return true; |
| } |
| |
| if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1) |
| { |
| if (mask) |
| *mask = log + 1; |
| if (signed_sat) |
| *signed_sat = true; |
| |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* Return 1 if memory locations are adjacent. */ |
| int |
| adjacent_mem_locations (rtx a, rtx b) |
| { |
| /* We don't guarantee to preserve the order of these memory refs. */ |
| if (volatile_refs_p (a) || volatile_refs_p (b)) |
| return 0; |
| |
| if ((REG_P (XEXP (a, 0)) |
| || (GET_CODE (XEXP (a, 0)) == PLUS |
| && CONST_INT_P (XEXP (XEXP (a, 0), 1)))) |
| && (REG_P (XEXP (b, 0)) |
| || (GET_CODE (XEXP (b, 0)) == PLUS |
| && CONST_INT_P (XEXP (XEXP (b, 0), 1))))) |
| { |
| HOST_WIDE_INT val0 = 0, val1 = 0; |
| rtx reg0, reg1; |
| int val_diff; |
| |
| if (GET_CODE (XEXP (a, 0)) == PLUS) |
| { |
| reg0 = XEXP (XEXP (a, 0), 0); |
| val0 = INTVAL (XEXP (XEXP (a, 0), 1)); |
| } |
| else |
| reg0 = XEXP (a, 0); |
| |
| if (GET_CODE (XEXP (b, 0)) == PLUS) |
| { |
| reg1 = XEXP (XEXP (b, 0), 0); |
| val1 = INTVAL (XEXP (XEXP (b, 0), 1)); |
| } |
| else |
| reg1 = XEXP (b, 0); |
| |
| /* Don't accept any offset that will require multiple |
| instructions to handle, since this would cause the |
| arith_adjacentmem pattern to output an overlong sequence. */ |
| if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS)) |
| return 0; |
| |
| /* Don't allow an eliminable register: register elimination can make |
| the offset too large. */ |
| if (arm_eliminable_register (reg0)) |
| return 0; |
| |
| val_diff = val1 - val0; |
| |
| if (arm_ld_sched) |
| { |
| /* If the target has load delay slots, then there's no benefit |
| to using an ldm instruction unless the offset is zero and |
| we are optimizing for size. */ |
| return (optimize_size && (REGNO (reg0) == REGNO (reg1)) |
| && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4) |
| && (val_diff == 4 || val_diff == -4)); |
| } |
| |
| return ((REGNO (reg0) == REGNO (reg1)) |
| && (val_diff == 4 || val_diff == -4)); |
| } |
| |
| return 0; |
| } |
| |
| /* Return true if OP is a valid load or store multiple operation. LOAD is true |
| for load operations, false for store operations. CONSECUTIVE is true |
| if the register numbers in the operation must be consecutive in the register |
| bank. RETURN_PC is true if value is to be loaded in PC. |
| The pattern we are trying to match for load is: |
| [(SET (R_d0) (MEM (PLUS (addr) (offset)))) |
| (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>)))) |
| : |
| : |
| (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>)))) |
| ] |
| where |
| 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))). |
| 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn). |
| 3. If consecutive is TRUE, then for kth register being loaded, |
| REGNO (R_dk) = REGNO (R_d0) + k. |
| The pattern for store is similar. */ |
| bool |
| ldm_stm_operation_p (rtx op, bool load, machine_mode mode, |
| bool consecutive, bool return_pc) |
| { |
| HOST_WIDE_INT count = XVECLEN (op, 0); |
| rtx reg, mem, addr; |
| unsigned regno; |
| unsigned first_regno; |
| HOST_WIDE_INT i = 1, base = 0, offset = 0; |
| rtx elt; |
| bool addr_reg_in_reglist = false; |
| bool update = false; |
| int reg_increment; |
| int offset_adj; |
| int regs_per_val; |
| |
| /* If not in SImode, then registers must be consecutive |
| (e.g., VLDM instructions for DFmode). */ |
| gcc_assert ((mode == SImode) || consecutive); |
| /* Setting return_pc for stores is illegal. */ |
| gcc_assert (!return_pc || load); |
| |
| /* Set up the increments and the regs per val based on the mode. */ |
| reg_increment = GET_MODE_SIZE (mode); |
| regs_per_val = reg_increment / 4; |
| offset_adj = return_pc ? 1 : 0; |
| |
| if (count <= 1 |
| || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET |
| || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj))))) |
| return false; |
| |
| /* Check if this is a write-back. */ |
| elt = XVECEXP (op, 0, offset_adj); |
| if (GET_CODE (SET_SRC (elt)) == PLUS) |
| { |
| i++; |
| base = 1; |
| update = true; |
| |
| /* The offset adjustment must be the number of registers being |
| popped times the size of a single register. */ |
| if (!REG_P (SET_DEST (elt)) |
| || !REG_P (XEXP (SET_SRC (elt), 0)) |
| || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0))) |
| || !CONST_INT_P (XEXP (SET_SRC (elt), 1)) |
| || INTVAL (XEXP (SET_SRC (elt), 1)) != |
| ((count - 1 - offset_adj) * reg_increment)) |
| return false; |
| } |
| |
| i = i + offset_adj; |
| base = base + offset_adj; |
| /* Perform a quick check so we don't blow up below. If only one reg is loaded, |
| success depends on the type: VLDM can do just one reg, |
| LDM must do at least two. */ |
| if ((count <= i) && (mode == SImode)) |
| return false; |
| |
| elt = XVECEXP (op, 0, i - 1); |
| if (GET_CODE (elt) != SET) |
| return false; |
| |
| if (load) |
| { |
| reg = SET_DEST (elt); |
| mem = SET_SRC (elt); |
| } |
| else |
| { |
| reg = SET_SRC (elt); |
| mem = SET_DEST (elt); |
| } |
| |
| if (!REG_P (reg) || !MEM_P (mem)) |
| return false; |
| |
| regno = REGNO (reg); |
| first_regno = regno; |
| addr = XEXP (mem, 0); |
| if (GET_CODE (addr) == PLUS) |
| { |
| if (!CONST_INT_P (XEXP (addr, 1))) |
| return false; |
| |
| offset = INTVAL (XEXP (addr, 1)); |
| addr = XEXP (addr, 0); |
| } |
| |
| if (!REG_P (addr)) |
| return false; |
| |
| /* Don't allow SP to be loaded unless it is also the base register. It |
| guarantees that SP is reset correctly when an LDM instruction |
| is interrupted. Otherwise, we might end up with a corrupt stack. */ |
| if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM)) |
| return false; |
| |
| for (; i < count; i++) |
| { |
| elt = XVECEXP (op, 0, i); |
| if (GET_CODE (elt) != SET) |
| return false; |
| |
| if (load) |
| { |
| reg = SET_DEST (elt); |
| mem = SET_SRC (elt); |
| } |
| else |
| { |
| reg = SET_SRC (elt); |
| mem = SET_DEST (elt); |
| } |
| |
| if (!REG_P (reg) |
| || GET_MODE (reg) != mode |
| || REGNO (reg) <= regno |
| || (consecutive |
| && (REGNO (reg) != |
| (unsigned int) (first_regno + regs_per_val * (i - base)))) |
| /* Don't allow SP to be loaded unless it is also the base register. It |
| guarantees that SP is reset correctly when an LDM instruction |
| is interrupted. Otherwise, we might end up with a corrupt stack. */ |
| || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM)) |
| || !MEM_P (mem) |
| || GET_MODE (mem) != mode |
| || ((GET_CODE (XEXP (mem, 0)) != PLUS |
| || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr) |
| || !CONST_INT_P (XEXP (XEXP (mem, 0), 1)) |
| || (INTVAL (XEXP (XEXP (mem, 0), 1)) != |
| offset + (i - base) * reg_increment)) |
| && (!REG_P (XEXP (mem, 0)) |
| || offset + (i - base) * reg_increment != 0))) |
| return false; |
| |
| regno = REGNO (reg); |
| if (regno == REGNO (addr)) |
| addr_reg_in_reglist = true; |
| } |
| |
| if (load) |
| { |
| if (update && addr_reg_in_reglist) |
| return false; |
| |
| /* For Thumb-1, address register is always modified - either by write-back |
| or by explicit load. If the pattern does not describe an update, |
| then the address register must be in the list of loaded registers. */ |
| if (TARGET_THUMB1) |
| return update || addr_reg_in_reglist; |
| } |
| |
| return true; |
| } |
| |
| /* Return true iff it would be profitable to turn a sequence of NOPS loads |
| or stores (depending on IS_STORE) into a load-multiple or store-multiple |
| instruction. ADD_OFFSET is nonzero if the base address register needs |
| to be modified with an add instruction before we can use it. */ |
| |
| static bool |
| multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED, |
| int nops, HOST_WIDE_INT add_offset) |
| { |
| /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm |
| if the offset isn't small enough. The reason 2 ldrs are faster |
| is because these ARMs are able to do more than one cache access |
| in a single cycle. The ARM9 and StrongARM have Harvard caches, |
| whilst the ARM8 has a double bandwidth cache. This means that |
| these cores can do both an instruction fetch and a data fetch in |
| a single cycle, so the trick of calculating the address into a |
| scratch register (one of the result regs) and then doing a load |
| multiple actually becomes slower (and no smaller in code size). |
| That is the transformation |
| |
| ldr rd1, [rbase + offset] |
| ldr rd2, [rbase + offset + 4] |
| |
| to |
| |
| add rd1, rbase, offset |
| ldmia rd1, {rd1, rd2} |
| |
| produces worse code -- '3 cycles + any stalls on rd2' instead of |
| '2 cycles + any stalls on rd2'. On ARMs with only one cache |
| access per cycle, the first sequence could never complete in less |
| than 6 cycles, whereas the ldm sequence would only take 5 and |
| would make better use of sequential accesses if not hitting the |
| cache. |
| |
| We cheat here and test 'arm_ld_sched' which we currently know to |
| only be true for the ARM8, ARM9 and StrongARM. If this ever |
| changes, then the test below needs to be reworked. */ |
| if (nops == 2 && arm_ld_sched && add_offset != 0) |
| return false; |
| |
| /* XScale has load-store double instructions, but they have stricter |
| alignment requirements than load-store multiple, so we cannot |
| use them. |
| |
| For XScale ldm requires 2 + NREGS cycles to complete and blocks |
| the pipeline until completion. |
| |
| NREGS CYCLES |
| 1 3 |
| 2 4 |
| 3 5 |
| 4 6 |
| |
| An ldr instruction takes 1-3 cycles, but does not block the |
| pipeline. |
| |
| NREGS CYCLES |
| 1 1-3 |
| 2 2-6 |
| 3 3-9 |
| 4 4-12 |
| |
| Best case ldr will always win. However, the more ldr instructions |
| we issue, the less likely we are to be able to schedule them well. |
| Using ldr instructions also increases code size. |
| |
| As a compromise, we use ldr for counts of 1 or 2 regs, and ldm |
| for counts of 3 or 4 regs. */ |
| if (nops <= 2 && arm_tune_xscale && !optimize_size) |
| return false; |
| return true; |
| } |
| |
| /* Subroutine of load_multiple_sequence and store_multiple_sequence. |
| Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute |
| an array ORDER which describes the sequence to use when accessing the |
| offsets that produces an ascending order. In this sequence, each |
| offset must be larger by exactly 4 than the previous one. ORDER[0] |
| must have been filled in with the lowest offset by the caller. |
| If UNSORTED_REGS is nonnull, it is an array of register numbers that |
| we use to verify that ORDER produces an ascending order of registers. |
| Return true if it was possible to construct such an order, false if |
| not. */ |
| |
| static bool |
| compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order, |
| int *unsorted_regs) |
| { |
| int i; |
| for (i = 1; i < nops; i++) |
| { |
| int j; |
| |
| order[i] = order[i - 1]; |
| for (j = 0; j < nops; j++) |
| if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4) |
| { |
| /* We must find exactly one offset that is higher than the |
| previous one by 4. */ |
| if (order[i] != order[i - 1]) |
| return false; |
| order[i] = j; |
| } |
| if (order[i] == order[i - 1]) |
| return false; |
| /* The register numbers must be ascending. */ |
| if (unsorted_regs != NULL |
| && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]]) |
| return false; |
| } |
| return true; |
| } |
| |
| /* Used to determine in a peephole whether a sequence of load |
| instructions can be changed into a load-multiple instruction. |
| NOPS is the number of separate load instructions we are examining. The |
| first NOPS entries in OPERANDS are the destination registers, the |
| next NOPS entries are memory operands. If this function is |
| successful, *BASE is set to the common base register of the memory |
| accesses; *LOAD_OFFSET is set to the first memory location's offset |
| from that base register. |
| REGS is an array filled in with the destination register numbers. |
| SAVED_ORDER (if nonnull), is an array filled in with an order that maps |
| insn numbers to an ascending order of stores. If CHECK_REGS is true, |
| the sequence of registers in REGS matches the loads from ascending memory |
| locations, and the function verifies that the register numbers are |
| themselves ascending. If CHECK_REGS is false, the register numbers |
| are stored in the order they are found in the operands. */ |
| static int |
| load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order, |
| int *base, HOST_WIDE_INT *load_offset, bool check_regs) |
| { |
| int unsorted_regs[MAX_LDM_STM_OPS]; |
| HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS]; |
| int order[MAX_LDM_STM_OPS]; |
| rtx base_reg_rtx = NULL; |
| int base_reg = -1; |
| int i, ldm_case; |
| |
| /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be |
| easily extended if required. */ |
| gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS); |
| |
| memset (order, 0, MAX_LDM_STM_OPS * sizeof (int)); |
| |
| /* Loop over the operands and check that the memory references are |
| suitable (i.e. immediate offsets from the same base register). At |
| the same time, extract the target register, and the memory |
| offsets. */ |
| for (i = 0; i < nops; i++) |
| { |
| rtx reg; |
| rtx offset; |
| |
| /* Convert a subreg of a mem into the mem itself. */ |
| if (GET_CODE (operands[nops + i]) == SUBREG) |
| operands[nops + i] = alter_subreg (operands + (nops + i), true); |
| |
| gcc_assert (MEM_P (operands[nops + i])); |
| |
| /* Don't reorder volatile memory references; it doesn't seem worth |
| looking for the case where the order is ok anyway. */ |
| if (MEM_VOLATILE_P (operands[nops + i])) |
| return 0; |
| |
| offset = const0_rtx; |
| |
| if ((REG_P (reg = XEXP (operands[nops + i], 0)) |
| || (GET_CODE (reg) == SUBREG |
| && REG_P (reg = SUBREG_REG (reg)))) |
| || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS |
| && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0))) |
| || (GET_CODE (reg) == SUBREG |
| && REG_P (reg = SUBREG_REG (reg)))) |
| && (CONST_INT_P (offset |
| = XEXP (XEXP (operands[nops + i], 0), 1))))) |
| { |
| if (i == 0) |
| { |
| base_reg = REGNO (reg); |
| base_reg_rtx = reg; |
| if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM) |
| return 0; |
| } |
| else if (base_reg != (int) REGNO (reg)) |
| /* Not addressed from the same base register. */ |
| return 0; |
| |
| unsorted_regs[i] = (REG_P (operands[i]) |
| ? REGNO (operands[i]) |
| : REGNO (SUBREG_REG (operands[i]))); |
| |
| /* If it isn't an integer register, or if it overwrites the |
| base register but isn't the last insn in the list, then |
| we can't do this. */ |
| if (unsorted_regs[i] < 0 |
| || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM) |
| || unsorted_regs[i] > 14 |
| || (i != nops - 1 && unsorted_regs[i] == base_reg)) |
| return 0; |
| |
| /* Don't allow SP to be loaded unless it is also the base |
| register. It guarantees that SP is reset correctly when |
| an LDM instruction is interrupted. Otherwise, we might |
| end up with a corrupt stack. */ |
| if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM) |
| return 0; |
| |
| unsorted_offsets[i] = INTVAL (offset); |
| if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]]) |
| order[0] = i; |
| } |
| else |
| /* Not a suitable memory address. */ |
| return 0; |
| } |
| |
| /* All the useful information has now been extracted from the |
| operands into unsorted_regs and unsorted_offsets; additionally, |
| order[0] has been set to the lowest offset in the list. Sort |
| the offsets into order, verifying that they are adjacent, and |
| check that the register numbers are ascending. */ |
| if (!compute_offset_order (nops, unsorted_offsets, order, |
| check_regs ? unsorted_regs : NULL)) |
| return 0; |
| |
| if (saved_order) |
| memcpy (saved_order, order, sizeof order); |
| |
| if (base) |
| { |
| *base = base_reg; |
| |
| for (i = 0; i < nops; i++) |
| regs[i] = unsorted_regs[check_regs ? order[i] : i]; |
| |
| *load_offset = unsorted_offsets[order[0]]; |
| } |
| |
| if (TARGET_THUMB1 |
| && !peep2_reg_dead_p (nops, base_reg_rtx)) |
| return 0; |
| |
| if (unsorted_offsets[order[0]] == 0) |
| ldm_case = 1; /* ldmia */ |
| else if (TARGET_ARM && unsorted_offsets[order[0]] == 4) |
| ldm_case = 2; /* ldmib */ |
| else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0) |
| ldm_case = 3; /* ldmda */ |
| else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4) |
| ldm_case = 4; /* ldmdb */ |
| else if (const_ok_for_arm (unsorted_offsets[order[0]]) |
| || const_ok_for_arm (-unsorted_offsets[order[0]])) |
| ldm_case = 5; |
| else |
| return 0; |
| |
| if (!multiple_operation_profitable_p (false, nops, |
| ldm_case == 5 |
| ? unsorted_offsets[order[0]] : 0)) |
| return 0; |
| |
| return ldm_case; |
| } |
| |
| /* Used to determine in a peephole whether a sequence of store instructions can |
| be changed into a store-multiple instruction. |
| NOPS is the number of separate store instructions we are examining. |
| NOPS_TOTAL is the total number of instructions recognized by the peephole |
| pattern. |
| The first NOPS entries in OPERANDS are the source registers, the next |
| NOPS entries are memory operands. If this function is successful, *BASE is |
| set to the common base register of the memory accesses; *LOAD_OFFSET is set |
| to the first memory location's offset from that base register. REGS is an |
| array filled in with the source register numbers, REG_RTXS (if nonnull) is |
| likewise filled with the corresponding rtx's. |
| SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn |
| numbers to an ascending order of stores. |
| If CHECK_REGS is true, the sequence of registers in *REGS matches the stores |
| from ascending memory locations, and the function verifies that the register |
| numbers are themselves ascending. If CHECK_REGS is false, the register |
| numbers are stored in the order they are found in the operands. */ |
| static int |
| store_multiple_sequence (rtx *operands, int nops, int nops_total, |
| int *regs, rtx *reg_rtxs, int *saved_order, int *base, |
| HOST_WIDE_INT *load_offset, bool check_regs) |
| { |
| int unsorted_regs[MAX_LDM_STM_OPS]; |
| rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS]; |
| HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS]; |
| int order[MAX_LDM_STM_OPS]; |
| int base_reg = -1; |
| rtx base_reg_rtx = NULL; |
| int i, stm_case; |
| |
| /* Write back of base register is currently only supported for Thumb 1. */ |
| int base_writeback = TARGET_THUMB1; |
| |
| /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be |
| easily extended if required. */ |
| gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS); |
| |
| memset (order, 0, MAX_LDM_STM_OPS * sizeof (int)); |
| |
| /* Loop over the operands and check that the memory references are |
| suitable (i.e. immediate offsets from the same base register). At |
| the same time, extract the target register, and the memory |
| offsets. */ |
| for (i = 0; i < nops; i++) |
| { |
| rtx reg; |
| rtx offset; |
| |
| /* Convert a subreg of a mem into the mem itself. */ |
| if (GET_CODE (operands[nops + i]) == SUBREG) |
| operands[nops + i] = alter_subreg (operands + (nops + i), true); |
| |
| gcc_assert (MEM_P (operands[nops + i])); |
| |
| /* Don't reorder volatile memory references; it doesn't seem worth |
| looking for the case where the order is ok anyway. */ |
| if (MEM_VOLATILE_P (operands[nops + i])) |
| return 0; |
| |
| offset = const0_rtx; |
| |
| if ((REG_P (reg = XEXP (operands[nops + i], 0)) |
| || (GET_CODE (reg) == SUBREG |
| && REG_P (reg = SUBREG_REG (reg)))) |
| || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS |
| && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0))) |
| || (GET_CODE (reg) == SUBREG |
| && REG_P (reg = SUBREG_REG (reg)))) |
| && (CONST_INT_P (offset |
| = XEXP (XEXP (operands[nops + i], 0), 1))))) |
| { |
| unsorted_reg_rtxs[i] = (REG_P (operands[i]) |
| ? operands[i] : SUBREG_REG (operands[i])); |
| unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]); |
| |
| if (i == 0) |
| { |
| base_reg = REGNO (reg); |
| base_reg_rtx = reg; |
| if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM) |
| return 0; |
| } |
| else if (base_reg != (int) REGNO (reg)) |
| /* Not addressed from the same base register. */ |
| return 0; |
| |
| /* If it isn't an integer register, then we can't do this. */ |
| if (unsorted_regs[i] < 0 |
| || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM) |
| /* The effects are unpredictable if the base register is |
| both updated and stored. */ |
| || (base_writeback && unsorted_regs[i] == base_reg) |
| || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM) |
| || unsorted_regs[i] > 14) |
| return 0; |
| |
| unsorted_offsets[i] = INTVAL (offset); |
| if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]]) |
| order[0] = i; |
| } |
| else |
| /* Not a suitable memory address. */ |
| return 0; |
| } |
| |
| /* All the useful information has now been extracted from the |
| operands into unsorted_regs and unsorted_offsets; additionally, |
| order[0] has been set to the lowest offset in the list. Sort |
| the offsets into order, verifying that they are adjacent, and |
| check that the register numbers are ascending. */ |
| if (!compute_offset_order (nops, unsorted_offsets, order, |
| check_regs ? unsorted_regs : NULL)) |
| return 0; |
| |
| if (saved_order) |
| memcpy (saved_order, order, sizeof order); |
| |
| if (base) |
| { |
| *base = base_reg; |
| |
| for (i = 0; i < nops; i++) |
| { |
| regs[i] = unsorted_regs[check_regs ? order[i] : i]; |
| if (reg_rtxs) |
| reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i]; |
| } |
| |
| *load_offset = unsorted_offsets[order[0]]; |
| } |
| |
| if (TARGET_THUMB1 |
| && !peep2_reg_dead_p (nops_total, base_reg_rtx)) |
| return 0; |
| |
| if (unsorted_offsets[order[0]] == 0) |
| stm_case = 1; /* stmia */ |
| else if (TARGET_ARM && unsorted_offsets[order[0]] == 4) |
| stm_case = 2; /* stmib */ |
| else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0) |
| stm_case = 3; /* stmda */ |
| else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4) |
| stm_case = 4; /* stmdb */ |
| else |
| return 0; |
| |
| if (!multiple_operation_profitable_p (false, nops, 0)) |
| return 0; |
| |
| return stm_case; |
| } |
| |
| /* Routines for use in generating RTL. */ |
| |
| /* Generate a load-multiple instruction. COUNT is the number of loads in |
| the instruction; REGS and MEMS are arrays containing the operands. |
| BASEREG is the base register to be used in addressing the memory operands. |
| WBACK_OFFSET is nonzero if the instruction should update the base |
| register. */ |
| |
| static rtx |
| arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg, |
| HOST_WIDE_INT wback_offset) |
| { |
| int i = 0, j; |
| rtx result; |
| |
| if (!multiple_operation_profitable_p (false, count, 0)) |
| { |
| rtx seq; |
| |
| start_sequence (); |
| |
| for (i = 0; i < count; i++) |
| emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]); |
| |
| if (wback_offset != 0) |
| emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset)); |
| |
| seq = get_insns (); |
| end_sequence (); |
| |
| return seq; |
| } |
| |
| result = gen_rtx_PARALLEL (VOIDmode, |
| rtvec_alloc (count + (wback_offset != 0 ? 1 : 0))); |
| if (wback_offset != 0) |
| { |
| XVECEXP (result, 0, 0) |
| = gen_rtx_SET (VOIDmode, basereg, |
| plus_constant (Pmode, basereg, wback_offset)); |
| i = 1; |
| count++; |
| } |
| |
| for (j = 0; i < count; i++, j++) |
| XVECEXP (result, 0, i) |
| = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]); |
| |
| return result; |
| } |
| |
| /* Generate a store-multiple instruction. COUNT is the number of stores in |
| the instruction; REGS and MEMS are arrays containing the operands. |
| BASEREG is the base register to be used in addressing the memory operands. |
| WBACK_OFFSET is nonzero if the instruction should update the base |
| register. */ |
| |
| static rtx |
| arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg, |
| HOST_WIDE_INT wback_offset) |
| { |
| int i = 0, j; |
| rtx result; |
| |
| if (GET_CODE (basereg) == PLUS) |
| basereg = XEXP (basereg, 0); |
| |
| if (!multiple_operation_profitable_p (false, count, 0)) |
| { |
| rtx seq; |
| |
| start_sequence (); |
| |
| for (i = 0; i < count; i++) |
| emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i])); |
| |
| if (wback_offset != 0) |
| emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset)); |
| |
| seq = get_insns (); |
| end_sequence (); |
| |
| return seq; |
| } |
| |
| result = gen_rtx_PARALLEL (VOIDmode, |
| rtvec_alloc (count + (wback_offset != 0 ? 1 : 0))); |
| if (wback_offset != 0) |
| { |
| XVECEXP (result, 0, 0) |
| = gen_rtx_SET (VOIDmode, basereg, |
| plus_constant (Pmode, basereg, wback_offset)); |
| i = 1; |
| count++; |
| } |
| |
| for (j = 0; i < count; i++, j++) |
| XVECEXP (result, 0, i) |
| = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j])); |
| |
| return result; |
| } |
| |
| /* Generate either a load-multiple or a store-multiple instruction. This |
| function can be used in situations where we can start with a single MEM |
| rtx and adjust its address upwards. |
| COUNT is the number of operations in the instruction, not counting a |
| possible update of the base register. REGS is an array containing the |
| register operands. |
| BASEREG is the base register to be used in addressing the memory operands, |
| which are constructed from BASEMEM. |
| WRITE_BACK specifies whether the generated instruction should include an |
| update of the base register. |
| OFFSETP is used to pass an offset to and from this function; this offset |
| is not used when constructing the address (instead BASEMEM should have an |
| appropriate offset in its address), it is used only for setting |
| MEM_OFFSET. It is updated only if WRITE_BACK is true.*/ |
| |
| static rtx |
| arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg, |
| bool write_back, rtx basemem, HOST_WIDE_INT *offsetp) |
| { |
| rtx mems[MAX_LDM_STM_OPS]; |
| HOST_WIDE_INT offset = *offsetp; |
| int i; |
| |
| gcc_assert (count <= MAX_LDM_STM_OPS); |
| |
| if (GET_CODE (basereg) == PLUS) |
| basereg = XEXP (basereg, 0); |
| |
| for (i = 0; i < count; i++) |
| { |
| rtx addr = plus_constant (Pmode, basereg, i * 4); |
| mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset); |
| offset += 4; |
| } |
| |
| if (write_back) |
| *offsetp = offset; |
| |
| if (is_load) |
| return arm_gen_load_multiple_1 (count, regs, mems, basereg, |
| write_back ? 4 * count : 0); |
| else |
| return arm_gen_store_multiple_1 (count, regs, mems, basereg, |
| write_back ? 4 * count : 0); |
| } |
| |
| rtx |
| arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back, |
| rtx basemem, HOST_WIDE_INT *offsetp) |
| { |
| return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem, |
| offsetp); |
| } |
| |
| rtx |
| arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back, |
| rtx basemem, HOST_WIDE_INT *offsetp) |
| { |
| return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem, |
| offsetp); |
| } |
| |
| /* Called from a peephole2 expander to turn a sequence of loads into an |
| LDM instruction. OPERANDS are the operands found by the peephole matcher; |
| NOPS indicates how many separate loads we are trying to combine. SORT_REGS |
| is true if we can reorder the registers because they are used commutatively |
| subsequently. |
| Returns true iff we could generate a new instruction. */ |
| |
| bool |
| gen_ldm_seq (rtx *operands, int nops, bool sort_regs) |
| { |
| int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS]; |
| rtx mems[MAX_LDM_STM_OPS]; |
| int i, j, base_reg; |
| rtx base_reg_rtx; |
| HOST_WIDE_INT offset; |
| int write_back = FALSE; |
| int ldm_case; |
| rtx addr; |
| |
| ldm_case = load_multiple_sequence (operands, nops, regs, mem_order, |
| &base_reg, &offset, !sort_regs); |
| |
| if (ldm_case == 0) |
| return false; |
| |
| if (sort_regs) |
| for (i = 0; i < nops - 1; i++) |
| for (j = i + 1; j < nops; j++) |
| if (regs[i] > regs[j]) |
| { |
| int t = regs[i]; |
| regs[i] = regs[j]; |
| regs[j] = t; |
| } |
| base_reg_rtx = gen_rtx_REG (Pmode, base_reg); |
| |
| if (TARGET_THUMB1) |
| { |
| gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx)); |
| gcc_assert (ldm_case == 1 || ldm_case == 5); |
| write_back = TRUE; |
| } |
| |
| if (ldm_case == 5) |
| { |
| rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]); |
| emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset))); |
| offset = 0; |
| if (!TARGET_THUMB1) |
| { |
| base_reg = regs[0]; |
| base_reg_rtx = newbase; |
| } |
| } |
| |
| for (i = 0; i < nops; i++) |
| { |
| addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4); |
| mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]], |
| SImode, addr, 0); |
| } |
| emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx, |
| write_back ? offset + i * 4 : 0)); |
| return true; |
| } |
| |
| /* Called from a peephole2 expander to turn a sequence of stores into an |
| STM instruction. OPERANDS are the operands found by the peephole matcher; |
| NOPS indicates how many separate stores we are trying to combine. |
| Returns true iff we could generate a new instruction. */ |
| |
| bool |
| gen_stm_seq (rtx *operands, int nops) |
| { |
| int i; |
| int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS]; |
| rtx mems[MAX_LDM_STM_OPS]; |
| int base_reg; |
| rtx base_reg_rtx; |
| HOST_WIDE_INT offset; |
| int write_back = FALSE; |
| int stm_case; |
| rtx addr; |
| bool base_reg_dies; |
| |
| stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL, |
| mem_order, &base_reg, &offset, true); |
| |
| if (stm_case == 0) |
| return false; |
| |
| base_reg_rtx = gen_rtx_REG (Pmode, base_reg); |
| |
| base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx); |
| if (TARGET_THUMB1) |
| { |
| gcc_assert (base_reg_dies); |
| write_back = TRUE; |
| } |
| |
| if (stm_case == 5) |
| { |
| gcc_assert (base_reg_dies); |
| emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset))); |
| offset = 0; |
| } |
| |
| addr = plus_constant (Pmode, base_reg_rtx, offset); |
| |
| for (i = 0; i < nops; i++) |
| { |
| addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4); |
| mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]], |
| SImode, addr, 0); |
| } |
| emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx, |
| write_back ? offset + i * 4 : 0)); |
| return true; |
| } |
| |
| /* Called from a peephole2 expander to turn a sequence of stores that are |
| preceded by constant loads into an STM instruction. OPERANDS are the |
| operands found by the peephole matcher; NOPS indicates how many |
| separate stores we are trying to combine; there are 2 * NOPS |
| instructions in the peephole. |
| Returns true iff we could generate a new instruction. */ |
| |
| bool |
| gen_const_stm_seq (rtx *operands, int nops) |
| { |
| int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS]; |
| int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS]; |
| rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS]; |
| rtx mems[MAX_LDM_STM_OPS]; |
| int base_reg; |
| rtx base_reg_rtx; |
| HOST_WIDE_INT offset; |
| int write_back = FALSE; |
| int stm_case; |
| rtx addr; |
| bool base_reg_dies; |
| int i, j; |
| HARD_REG_SET allocated; |
| |
| stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs, |
| mem_order, &base_reg, &offset, false); |
| |
| if (stm_case == 0) |
| return false; |
| |
| memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs); |
| |
| /* If the same register is used more than once, try to find a free |
| register. */ |
| CLEAR_HARD_REG_SET (allocated); |
| for (i = 0; i < nops; i++) |
| { |
| for (j = i + 1; j < nops; j++) |
| if (regs[i] == regs[j]) |
| { |
| rtx t = peep2_find_free_register (0, nops * 2, |
| TARGET_THUMB1 ? "l" : "r", |
| SImode, &allocated); |
| if (t == NULL_RTX) |
| return false; |
| reg_rtxs[i] = t; |
| regs[i] = REGNO (t); |
| } |
| } |
| |
| /* Compute an ordering that maps the register numbers to an ascending |
| sequence. */ |
| reg_order[0] = 0; |
| for (i = 0; i < nops; i++) |
| if (regs[i] < regs[reg_order[0]]) |
| reg_order[0] = i; |
| |
| for (i = 1; i < nops; i++) |
| { |
| int this_order = reg_order[i - 1]; |
| for (j = 0; j < nops; j++) |
| if (regs[j] > regs[reg_order[i - 1]] |
| && (this_order == reg_order[i - 1] |
| || regs[j] < regs[this_order])) |
| this_order = j; |
| reg_order[i] = this_order; |
| } |
| |
| /* Ensure that registers that must be live after the instruction end |
| up with the correct value. */ |
| for (i = 0; i < nops; i++) |
| { |
| int this_order = reg_order[i]; |
| if ((this_order != mem_order[i] |
| || orig_reg_rtxs[this_order] != reg_rtxs[this_order]) |
| && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order])) |
| return false; |
| } |
| |
| /* Load the constants. */ |
| for (i = 0; i < nops; i++) |
| { |
| rtx op = operands[2 * nops + mem_order[i]]; |
| sorted_regs[i] = regs[reg_order[i]]; |
| emit_move_insn (reg_rtxs[reg_order[i]], op); |
| } |
| |
| base_reg_rtx = gen_rtx_REG (Pmode, base_reg); |
| |
| base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx); |
| if (TARGET_THUMB1) |
| { |
| gcc_assert (base_reg_dies); |
| write_back = TRUE; |
| } |
| |
| if (stm_case == 5) |
| { |
| gcc_assert (base_reg_dies); |
| emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset))); |
| offset = 0; |
| } |
| |
| addr = plus_constant (Pmode, base_reg_rtx, offset); |
| |
| for (i = 0; i < nops; i++) |
| { |
| addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4); |
| mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]], |
| SImode, addr, 0); |
| } |
| emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx, |
| write_back ? offset + i * 4 : 0)); |
| return true; |
| } |
| |
| /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit |
| unaligned copies on processors which support unaligned semantics for those |
| instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency |
| (using more registers) by doing e.g. load/load/store/store for a factor of 2. |
| An interleave factor of 1 (the minimum) will perform no interleaving. |
| Load/store multiple are used for aligned addresses where possible. */ |
| |
| static void |
| arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase, |
| HOST_WIDE_INT length, |
| unsigned int interleave_factor) |
| { |
| rtx *regs = XALLOCAVEC (rtx, interleave_factor); |
| int *regnos = XALLOCAVEC (int, interleave_factor); |
| HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD; |
| HOST_WIDE_INT i, j; |
| HOST_WIDE_INT remaining = length, words; |
| rtx halfword_tmp = NULL, byte_tmp = NULL; |
| rtx dst, src; |
| bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD; |
| bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD; |
| HOST_WIDE_INT srcoffset, dstoffset; |
| HOST_WIDE_INT src_autoinc, dst_autoinc; |
| rtx mem, addr; |
| |
| gcc_assert (1 <= interleave_factor && interleave_factor <= 4); |
| |
| /* Use hard registers if we have aligned source or destination so we can use |
| load/store multiple with contiguous registers. */ |
| if (dst_aligned || src_aligned) |
| for (i = 0; i < interleave_factor; i++) |
| regs[i] = gen_rtx_REG (SImode, i); |
| else |
| for (i = 0; i < interleave_factor; i++) |
| regs[i] = gen_reg_rtx (SImode); |
| |
| dst = copy_addr_to_reg (XEXP (dstbase, 0)); |
| src = copy_addr_to_reg (XEXP (srcbase, 0)); |
| |
| srcoffset = dstoffset = 0; |
| |
| /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST. |
| For copying the last bytes we want to subtract this offset again. */ |
| src_autoinc = dst_autoinc = 0; |
| |
| for (i = 0; i < interleave_factor; i++) |
| regnos[i] = i; |
| |
| /* Copy BLOCK_SIZE_BYTES chunks. */ |
| |
| for (i = 0; i + block_size_bytes <= length; i += block_size_bytes) |
| { |
| /* Load words. */ |
| if (src_aligned && interleave_factor > 1) |
| { |
| emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src, |
| TRUE, srcbase, &srcoffset)); |
| src_autoinc += UNITS_PER_WORD * interleave_factor; |
| } |
| else |
| { |
| for (j = 0; j < interleave_factor; j++) |
| { |
| addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD |
| - src_autoinc)); |
| mem = adjust_automodify_address (srcbase, SImode, addr, |
| srcoffset + j * UNITS_PER_WORD); |
| emit_insn (gen_unaligned_loadsi (regs[j], mem)); |
| } |
| srcoffset += block_size_bytes; |
| } |
| |
| /* Store words. */ |
| if (dst_aligned && interleave_factor > 1) |
| { |
| emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst, |
| TRUE, dstbase, &dstoffset)); |
| dst_autoinc += UNITS_PER_WORD * interleave_factor; |
| } |
| else |
| { |
| for (j = 0; j < interleave_factor; j++) |
| { |
| addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD |
| - dst_autoinc)); |
| mem = adjust_automodify_address (dstbase, SImode, addr, |
| dstoffset + j * UNITS_PER_WORD); |
| emit_insn (gen_unaligned_storesi (mem, regs[j])); |
| } |
| dstoffset += block_size_bytes; |
| } |
| |
| remaining -= block_size_bytes; |
| } |
| |
| /* Copy any whole words left (note these aren't interleaved with any |
| subsequent halfword/byte load/stores in the interests of simplicity). */ |
| |
| words = remaining / UNITS_PER_WORD; |
| |
| gcc_assert (words < interleave_factor); |
| |
| if (src_aligned && words > 1) |
| { |
| emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase, |
| &srcoffset)); |
| src_autoinc += UNITS_PER_WORD * words; |
| } |
| else |
| { |
| for (j = 0; j < words; j++) |
| { |
| addr = plus_constant (Pmode, src, |
| srcoffset + j * UNITS_PER_WORD - src_autoinc); |
| mem = adjust_automodify_address (srcbase, SImode, addr, |
| srcoffset + j * UNITS_PER_WORD); |
| emit_insn (gen_unaligned_loadsi (regs[j], mem)); |
| } |
| srcoffset += words * UNITS_PER_WORD; |
| } |
| |
| if (dst_aligned && words > 1) |
| { |
| emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase, |
| &dstoffset)); |
| dst_autoinc += words * UNITS_PER_WORD; |
| } |
| else |
| { |
| for (j = 0; j < words; j++) |
| { |
| addr = plus_constant (Pmode, dst, |
| dstoffset + j * UNITS_PER_WORD - dst_autoinc); |
| mem = adjust_automodify_address (dstbase, SImode, addr, |
| dstoffset + j * UNITS_PER_WORD); |
| emit_insn (gen_unaligned_storesi (mem, regs[j])); |
| } |
| dstoffset += words * UNITS_PER_WORD; |
| } |
| |
| remaining -= words * UNITS_PER_WORD; |
| |
| gcc_assert (remaining < 4); |
| |
| /* Copy a halfword if necessary. */ |
| |
| if (remaining >= 2) |
| { |
| halfword_tmp = gen_reg_rtx (SImode); |
| |
| addr = plus_constant (Pmode, src, srcoffset - src_autoinc); |
| mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset); |
| emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem)); |
| |
| /* Either write out immediately, or delay until we've loaded the last |
| byte, depending on interleave factor. */ |
| if (interleave_factor == 1) |
| { |
| addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc); |
| mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset); |
| emit_insn (gen_unaligned_storehi (mem, |
| gen_lowpart (HImode, halfword_tmp))); |
| halfword_tmp = NULL; |
| dstoffset += 2; |
| } |
| |
| remaining -= 2; |
| srcoffset += 2; |
| } |
| |
| gcc_assert (remaining < 2); |
| |
| /* Copy last byte. */ |
| |
| if ((remaining & 1) != 0) |
| { |
| byte_tmp = gen_reg_rtx (SImode); |
| |
| addr = plus_constant (Pmode, src, srcoffset - src_autoinc); |
| mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset); |
| emit_move_insn (gen_lowpart (QImode, byte_tmp), mem); |
| |
| if (interleave_factor == 1) |
| { |
| addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc); |
| mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset); |
| emit_move_insn (mem, gen_lowpart (QImode, byte_tmp)); |
| byte_tmp = NULL; |
| dstoffset++; |
| } |
| |
| remaining--; |
| srcoffset++; |
| } |
| |
| /* Store last halfword if we haven't done so already. */ |
| |
| if (halfword_tmp) |
| { |
| addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc); |
| mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset); |
| emit_insn (gen_unaligned_storehi (mem, |
| gen_lowpart (HImode, halfword_tmp))); |
| dstoffset += 2; |
| } |
| |
| /* Likewise for last byte. */ |
| |
| if (byte_tmp) |
| { |
| addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc); |
| mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset); |
| emit_move_insn (mem, gen_lowpart (QImode, byte_tmp)); |
| dstoffset++; |
| } |
| |
| gcc_assert (remaining == 0 && srcoffset == dstoffset); |
| } |
| |
| /* From mips_adjust_block_mem: |
| |
| Helper function for doing a loop-based block operation on memory |
| reference MEM. Each iteration of the loop will operate on LENGTH |
| bytes of MEM. |
| |
| Create a new base register for use within the loop and point it to |
| the start of MEM. Create a new memory reference that uses this |
| register. Store them in *LOOP_REG and *LOOP_MEM respectively. */ |
| |
| static void |
| arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg, |
| rtx *loop_mem) |
| { |
| *loop_reg = copy_addr_to_reg (XEXP (mem, 0)); |
| |
| /* Although the new mem does not refer to a known location, |
| it does keep up to LENGTH bytes of alignment. */ |
| *loop_mem = change_address (mem, BLKmode, *loop_reg); |
| set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT)); |
| } |
| |
| /* From mips_block_move_loop: |
| |
| Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER |
| bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that |
| the memory regions do not overlap. */ |
| |
| static void |
| arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length, |
| unsigned int interleave_factor, |
| HOST_WIDE_INT bytes_per_iter) |
| { |
| rtx src_reg, dest_reg, final_src, test; |
| HOST_WIDE_INT leftover; |
| |
| leftover = length % bytes_per_iter; |
| length -= leftover; |
| |
| /* Create registers and memory references for use within the loop. */ |
| arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src); |
| arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest); |
| |
| /* Calculate the value that SRC_REG should have after the last iteration of |
| the loop. */ |
| final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length), |
| 0, 0, OPTAB_WIDEN); |
| |
| /* Emit the start of the loop. */ |
| rtx_code_label *label = gen_label_rtx (); |
| emit_label (label); |
| |
| /* Emit the loop body. */ |
| arm_block_move_unaligned_straight (dest, src, bytes_per_iter, |
| interleave_factor); |
| |
| /* Move on to the next block. */ |
| emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter)); |
| emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter)); |
| |
| /* Emit the loop condition. */ |
| test = gen_rtx_NE (VOIDmode, src_reg, final_src); |
| emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label)); |
| |
| /* Mop up any left-over bytes. */ |
| if (leftover) |
| arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor); |
| } |
| |
| /* Emit a block move when either the source or destination is unaligned (not |
| aligned to a four-byte boundary). This may need further tuning depending on |
| core type, optimize_size setting, etc. */ |
| |
| static int |
| arm_movmemqi_unaligned (rtx *operands) |
| { |
| HOST_WIDE_INT length = INTVAL (operands[2]); |
| |
| if (optimize_size) |
| { |
| bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD; |
| bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD; |
| /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit |
| size of code if optimizing for size. We'll use ldm/stm if src_aligned |
| or dst_aligned though: allow more interleaving in those cases since the |
| resulting code can be smaller. */ |
| unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1; |
| HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4; |
| |
| if (length > 12) |
| arm_block_move_unaligned_loop (operands[0], operands[1], length, |
| interleave_factor, bytes_per_iter); |
| else |
| arm_block_move_unaligned_straight (operands[0], operands[1], length, |
| interleave_factor); |
| } |
| else |
| { |
| /* Note that the loop created by arm_block_move_unaligned_loop may be |
| subject to loop unrolling, which makes tuning this condition a little |
| redundant. */ |
| if (length > 32) |
| arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16); |
| else |
| arm_block_move_unaligned_straight (operands[0], operands[1], length, 4); |
| } |
| |
| return 1; |
| } |
| |
| int |
| arm_gen_movmemqi (rtx *operands) |
| { |
| HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes; |
| HOST_WIDE_INT srcoffset, dstoffset; |
| int i; |
| rtx src, dst, srcbase, dstbase; |
| rtx part_bytes_reg = NULL; |
| rtx mem; |
| |
| if (!CONST_INT_P (operands[2]) |
| || !CONST_INT_P (operands[3]) |
| || INTVAL (operands[2]) > 64) |
| return 0; |
| |
| if (unaligned_access && (INTVAL (operands[3]) & 3) != 0) |
| return arm_movmemqi_unaligned (operands); |
| |
| if (INTVAL (operands[3]) & 3) |
| return 0; |
| |
| dstbase = operands[0]; |
| srcbase = operands[1]; |
| |
| dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0)); |
| src = copy_to_mode_reg (SImode, XEXP (srcbase, 0)); |
| |
| in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2])); |
| out_words_to_go = INTVAL (operands[2]) / 4; |
| last_bytes = INTVAL (operands[2]) & 3; |
| dstoffset = srcoffset = 0; |
| |
| if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0) |
| part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3); |
| |
| for (i = 0; in_words_to_go >= 2; i+=4) |
| { |
| if (in_words_to_go > 4) |
| emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src, |
| TRUE, srcbase, &srcoffset)); |
| else |
| emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go, |
| src, FALSE, srcbase, |
| &srcoffset)); |
| |
| if (out_words_to_go) |
| { |
| if (out_words_to_go > 4) |
| emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst, |
| TRUE, dstbase, &dstoffset)); |
| else if (out_words_to_go != 1) |
| emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, |
| out_words_to_go, dst, |
| (last_bytes == 0 |
| ? FALSE : TRUE), |
| dstbase, &dstoffset)); |
| else |
| { |
| mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset); |
| emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM)); |
| if (last_bytes != 0) |
| { |
| emit_insn (gen_addsi3 (dst, dst, GEN_INT (4))); |
| dstoffset += 4; |
| } |
| } |
| } |
| |
| in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4; |
| out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4; |
| } |
| |
| /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */ |
| if (out_words_to_go) |
| { |
| rtx sreg; |
| |
| mem = adjust_automodify_address (srcbase, SImode, src, srcoffset); |
| sreg = copy_to_reg (mem); |
| |
| mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset); |
| emit_move_insn (mem, sreg); |
| in_words_to_go--; |
| |
| gcc_assert (!in_words_to_go); /* Sanity check */ |
| } |
| |
| if (in_words_to_go) |
| { |
| gcc_assert (in_words_to_go > 0); |
| |
| mem = adjust_automodify_address (srcbase, SImode, src, srcoffset); |
| part_bytes_reg = copy_to_mode_reg (SImode, mem); |
| } |
| |
| gcc_assert (!last_bytes || part_bytes_reg); |
| |
| if (BYTES_BIG_ENDIAN && last_bytes) |
| { |
| rtx tmp = gen_reg_rtx (SImode); |
| |
| /* The bytes we want are in the top end of the word. */ |
| emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, |
| GEN_INT (8 * (4 - last_bytes)))); |
| part_bytes_reg = tmp; |
| |
| while (last_bytes) |
| { |
| mem = adjust_automodify_address (dstbase, QImode, |
| plus_constant (Pmode, dst, |
| last_bytes - 1), |
| dstoffset + last_bytes - 1); |
| emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg)); |
| |
| if (--last_bytes) |
| { |
| tmp = gen_reg_rtx (SImode); |
| emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8))); |
| part_bytes_reg = tmp; |
| } |
| } |
| |
| } |
| else |
| { |
| if (last_bytes > 1) |
| { |
| mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset); |
| emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg)); |
| last_bytes -= 2; |
| if (last_bytes) |
| { |
| rtx tmp = gen_reg_rtx (SImode); |
| emit_insn (gen_addsi3 (dst, dst, const2_rtx)); |
| emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16))); |
| part_bytes_reg = tmp; |
| dstoffset += 2; |
| } |
| } |
| |
| if (last_bytes) |
| { |
| mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset); |
| emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg)); |
| } |
| } |
| |
| return 1; |
| } |
| |
| /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx |
| by mode size. */ |
| inline static rtx |
| next_consecutive_mem (rtx mem) |
| { |
| machine_mode mode = GET_MODE (mem); |
| HOST_WIDE_INT offset = GET_MODE_SIZE (mode); |
| rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset); |
| |
| return adjust_automodify_address (mem, mode, addr, offset); |
| } |
| |
| /* Copy using LDRD/STRD instructions whenever possible. |
| Returns true upon success. */ |
| bool |
| gen_movmem_ldrd_strd (rtx *operands) |
| { |
| unsigned HOST_WIDE_INT len; |
| HOST_WIDE_INT align; |
| rtx src, dst, base; |
| rtx reg0; |
| bool src_aligned, dst_aligned; |
| bool src_volatile, dst_volatile; |
| |
| gcc_assert (CONST_INT_P (operands[2])); |
| gcc_assert (CONST_INT_P (operands[3])); |
| |
| len = UINTVAL (operands[2]); |
| if (len > 64) |
| return false; |
| |
| /* Maximum alignment we can assume for both src and dst buffers. */ |
| align = INTVAL (operands[3]); |
| |
| if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0)) |
| return false; |
| |
| /* Place src and dst addresses in registers |
| and update the corresponding mem rtx. */ |
| dst = operands[0]; |
| dst_volatile = MEM_VOLATILE_P (dst); |
| dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD; |
| base = copy_to_mode_reg (SImode, XEXP (dst, 0)); |
| dst = adjust_automodify_address (dst, VOIDmode, base, 0); |
| |
| src = operands[1]; |
| src_volatile = MEM_VOLATILE_P (src); |
| src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD; |
| base = copy_to_mode_reg (SImode, XEXP (src, 0)); |
| src = adjust_automodify_address (src, VOIDmode, base, 0); |
| |
| if (!unaligned_access && !(src_aligned && dst_aligned)) |
| return false; |
| |
| if (src_volatile || dst_volatile) |
| return false; |
| |
| /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */ |
| if (!(dst_aligned || src_aligned)) |
| return arm_gen_movmemqi (operands); |
| |
| src = adjust_address (src, DImode, 0); |
| dst = adjust_address (dst, DImode, 0); |
| while (len >= 8) |
| { |
| len -= 8; |
| reg0 = gen_reg_rtx (DImode); |
| if (src_aligned) |
| emit_move_insn (reg0, src); |
| else |
| emit_insn (gen_unaligned_loaddi (reg0, src)); |
| |
| if (dst_aligned) |
| emit_move_insn (dst, reg0); |
| else |
| emit_insn (gen_unaligned_storedi (dst, reg0)); |
| |
| src = next_consecutive_mem (src); |
| dst = next_consecutive_mem (dst); |
| } |
| |
| gcc_assert (len < 8); |
| if (len >= 4) |
| { |
| /* More than a word but less than a double-word to copy. Copy a word. */ |
| reg0 = gen_reg_rtx (SImode); |
| src = adjust_address (src, SImode, 0); |
| dst = adjust_address (dst, SImode, 0); |
| if (src_aligned) |
| emit_move_insn (reg0, src); |
| else |
| emit_insn (gen_unaligned_loadsi (reg0, src)); |
| |
| if (dst_aligned) |
| emit_move_insn (dst, reg0); |
| else |
| emit_insn (gen_unaligned_storesi (dst, reg0)); |
| |
| src = next_consecutive_mem (src); |
| dst = next_consecutive_mem (dst); |
| len -= 4; |
| } |
| |
| if (len == 0) |
| return true; |
| |
| /* Copy the remaining bytes. */ |
| if (len >= 2) |
| { |
| dst = adjust_address (dst, HImode, 0); |
| src = adjust_address (src, HImode, 0); |
| reg0 = gen_reg_rtx (SImode); |
| if (src_aligned) |
| emit_insn (gen_zero_extendhisi2 (reg0, src)); |
| else |
| emit_insn (gen_unaligned_loadhiu (reg0, src)); |
| |
| if (dst_aligned) |
| emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0))); |
| else |
| emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0))); |
| |
| src = next_consecutive_mem (src); |
| dst = next_consecutive_mem (dst); |
| if (len == 2) |
| return true; |
| } |
| |
| dst = adjust_address (dst, QImode, 0); |
| src = adjust_address (src, QImode, 0); |
| reg0 = gen_reg_rtx (QImode); |
| emit_move_insn (reg0, src); |
| emit_move_insn (dst, reg0); |
| return true; |
| } |
| |
| /* Select a dominance comparison mode if possible for a test of the general |
| form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms. |
| COND_OR == DOM_CC_X_AND_Y => (X && Y) |
| COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y) |
| COND_OR == DOM_CC_X_OR_Y => (X || Y) |
| In all cases OP will be either EQ or NE, but we don't need to know which |
| here. If we are unable to support a dominance comparison we return |
| CC mode. This will then fail to match for the RTL expressions that |
| generate this call. */ |
| machine_mode |
| arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or) |
| { |
| enum rtx_code cond1, cond2; |
| int swapped = 0; |
| |
| /* Currently we will probably get the wrong result if the individual |
| comparisons are not simple. This also ensures that it is safe to |
| reverse a comparison if necessary. */ |
| if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1)) |
| != CCmode) |
| || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1)) |
| != CCmode)) |
| return CCmode; |
| |
| /* The if_then_else variant of this tests the second condition if the |
| first passes, but is true if the first fails. Reverse the first |
| condition to get a true "inclusive-or" expression. */ |
| if (cond_or == DOM_CC_NX_OR_Y) |
| cond1 = reverse_condition (cond1); |
| |
| /* If the comparisons are not equal, and one doesn't dominate the other, |
| then we can't do this. */ |
| if (cond1 != cond2 |
| && !comparison_dominates_p (cond1, cond2) |
| && (swapped = 1, !comparison_dominates_p (cond2, cond1))) |
| return CCmode; |
| |
| if (swapped) |
| std::swap (cond1, cond2); |
| |
| switch (cond1) |
| { |
| case EQ: |
| if (cond_or == DOM_CC_X_AND_Y) |
| return CC_DEQmode; |
| |
| switch (cond2) |
| { |
| case EQ: return CC_DEQmode; |
| case LE: return CC_DLEmode; |
| case LEU: return CC_DLEUmode; |
| case GE: return CC_DGEmode; |
| case GEU: return CC_DGEUmode; |
| default: gcc_unreachable (); |
| } |
| |
| case LT: |
| if (cond_or == DOM_CC_X_AND_Y) |
| return CC_DLTmode; |
| |
| switch (cond2) |
| { |
| case LT: |
| return CC_DLTmode; |
| case LE: |
| return CC_DLEmode; |
| case NE: |
| return CC_DNEmode; |
| default: |
| gcc_unreachable (); |
| } |
| |
| case GT: |
| if (cond_or == DOM_CC_X_AND_Y) |
| return CC_DGTmode; |
| |
| switch (cond2) |
| { |
| case GT: |
| return CC_DGTmode; |
| case GE: |
| return CC_DGEmode; |
| case NE: |
| return CC_DNEmode; |
| default: |
| gcc_unreachable (); |
| } |
| |
| case LTU: |
| if (cond_or == DOM_CC_X_AND_Y) |
| return CC_DLTUmode; |
| |
| switch (cond2) |
| { |
| |