| /* Machine description for AArch64 architecture. |
| Copyright (C) 2009-2015 Free Software Foundation, Inc. |
| Contributed by ARM Ltd. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it |
| under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| GCC is distributed in the hope that it will be useful, but |
| WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "tm.h" |
| #include "insn-codes.h" |
| #include "rtl.h" |
| #include "insn-attr.h" |
| #include "hash-set.h" |
| #include "machmode.h" |
| #include "vec.h" |
| #include "double-int.h" |
| #include "input.h" |
| #include "alias.h" |
| #include "symtab.h" |
| #include "wide-int.h" |
| #include "inchash.h" |
| #include "tree.h" |
| #include "fold-const.h" |
| #include "stringpool.h" |
| #include "stor-layout.h" |
| #include "calls.h" |
| #include "varasm.h" |
| #include "regs.h" |
| #include "dominance.h" |
| #include "cfg.h" |
| #include "cfgrtl.h" |
| #include "cfganal.h" |
| #include "lcm.h" |
| #include "cfgbuild.h" |
| #include "cfgcleanup.h" |
| #include "predict.h" |
| #include "basic-block.h" |
| #include "df.h" |
| #include "hard-reg-set.h" |
| #include "output.h" |
| #include "hashtab.h" |
| #include "function.h" |
| #include "flags.h" |
| #include "statistics.h" |
| #include "real.h" |
| #include "fixed-value.h" |
| #include "insn-config.h" |
| #include "expmed.h" |
| #include "dojump.h" |
| #include "explow.h" |
| #include "emit-rtl.h" |
| #include "stmt.h" |
| #include "expr.h" |
| #include "reload.h" |
| #include "toplev.h" |
| #include "target.h" |
| #include "target-def.h" |
| #include "targhooks.h" |
| #include "ggc.h" |
| #include "tm_p.h" |
| #include "recog.h" |
| #include "langhooks.h" |
| #include "diagnostic-core.h" |
| #include "hash-table.h" |
| #include "tree-ssa-alias.h" |
| #include "internal-fn.h" |
| #include "gimple-fold.h" |
| #include "tree-eh.h" |
| #include "gimple-expr.h" |
| #include "is-a.h" |
| #include "gimple.h" |
| #include "gimplify.h" |
| #include "optabs.h" |
| #include "dwarf2.h" |
| #include "cfgloop.h" |
| #include "tree-vectorizer.h" |
| #include "aarch64-cost-tables.h" |
| #include "dumpfile.h" |
| #include "builtins.h" |
| #include "rtl-iter.h" |
| #include "tm-constrs.h" |
| #include "sched-int.h" |
| |
| /* Defined for convenience. */ |
| #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT) |
| |
| /* Classifies an address. |
| |
| ADDRESS_REG_IMM |
| A simple base register plus immediate offset. |
| |
| ADDRESS_REG_WB |
| A base register indexed by immediate offset with writeback. |
| |
| ADDRESS_REG_REG |
| A base register indexed by (optionally scaled) register. |
| |
| ADDRESS_REG_UXTW |
| A base register indexed by (optionally scaled) zero-extended register. |
| |
| ADDRESS_REG_SXTW |
| A base register indexed by (optionally scaled) sign-extended register. |
| |
| ADDRESS_LO_SUM |
| A LO_SUM rtx with a base register and "LO12" symbol relocation. |
| |
| ADDRESS_SYMBOLIC: |
| A constant symbolic address, in pc-relative literal pool. */ |
| |
| enum aarch64_address_type { |
| ADDRESS_REG_IMM, |
| ADDRESS_REG_WB, |
| ADDRESS_REG_REG, |
| ADDRESS_REG_UXTW, |
| ADDRESS_REG_SXTW, |
| ADDRESS_LO_SUM, |
| ADDRESS_SYMBOLIC |
| }; |
| |
| struct aarch64_address_info { |
| enum aarch64_address_type type; |
| rtx base; |
| rtx offset; |
| int shift; |
| enum aarch64_symbol_type symbol_type; |
| }; |
| |
| struct simd_immediate_info |
| { |
| rtx value; |
| int shift; |
| int element_width; |
| bool mvn; |
| bool msl; |
| }; |
| |
| /* The current code model. */ |
| enum aarch64_code_model aarch64_cmodel; |
| |
| #ifdef HAVE_AS_TLS |
| #undef TARGET_HAVE_TLS |
| #define TARGET_HAVE_TLS 1 |
| #endif |
| |
| static bool aarch64_composite_type_p (const_tree, machine_mode); |
| static bool aarch64_vfp_is_call_or_return_candidate (machine_mode, |
| const_tree, |
| machine_mode *, int *, |
| bool *); |
| static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED; |
| static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED; |
| static void aarch64_override_options_after_change (void); |
| static bool aarch64_vector_mode_supported_p (machine_mode); |
| static unsigned bit_count (unsigned HOST_WIDE_INT); |
| static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode, |
| const unsigned char *sel); |
| static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool); |
| |
| /* Major revision number of the ARM Architecture implemented by the target. */ |
| unsigned aarch64_architecture_version; |
| |
| /* The processor for which instructions should be scheduled. */ |
| enum aarch64_processor aarch64_tune = cortexa53; |
| |
| /* The current tuning set. */ |
| const struct tune_params *aarch64_tune_params; |
| |
| /* Mask to specify which instructions we are allowed to generate. */ |
| unsigned long aarch64_isa_flags = 0; |
| |
| /* Mask to specify which instruction scheduling options should be used. */ |
| unsigned long aarch64_tune_flags = 0; |
| |
| /* Tuning parameters. */ |
| |
| static const struct cpu_addrcost_table generic_addrcost_table = |
| { |
| { |
| 0, /* hi */ |
| 0, /* si */ |
| 0, /* di */ |
| 0, /* ti */ |
| }, |
| 0, /* pre_modify */ |
| 0, /* post_modify */ |
| 0, /* register_offset */ |
| 0, /* register_extend */ |
| 0 /* imm_offset */ |
| }; |
| |
| static const struct cpu_addrcost_table cortexa57_addrcost_table = |
| { |
| { |
| 1, /* hi */ |
| 0, /* si */ |
| 0, /* di */ |
| 1, /* ti */ |
| }, |
| 0, /* pre_modify */ |
| 0, /* post_modify */ |
| 0, /* register_offset */ |
| 0, /* register_extend */ |
| 0, /* imm_offset */ |
| }; |
| |
| static const struct cpu_addrcost_table xgene1_addrcost_table = |
| { |
| { |
| 1, /* hi */ |
| 0, /* si */ |
| 0, /* di */ |
| 1, /* ti */ |
| }, |
| 1, /* pre_modify */ |
| 0, /* post_modify */ |
| 0, /* register_offset */ |
| 1, /* register_extend */ |
| 0, /* imm_offset */ |
| }; |
| |
| static const struct cpu_regmove_cost generic_regmove_cost = |
| { |
| 1, /* GP2GP */ |
| /* Avoid the use of slow int<->fp moves for spilling by setting |
| their cost higher than memmov_cost. */ |
| 5, /* GP2FP */ |
| 5, /* FP2GP */ |
| 2 /* FP2FP */ |
| }; |
| |
| static const struct cpu_regmove_cost cortexa57_regmove_cost = |
| { |
| 1, /* GP2GP */ |
| /* Avoid the use of slow int<->fp moves for spilling by setting |
| their cost higher than memmov_cost. */ |
| 5, /* GP2FP */ |
| 5, /* FP2GP */ |
| 2 /* FP2FP */ |
| }; |
| |
| static const struct cpu_regmove_cost cortexa53_regmove_cost = |
| { |
| 1, /* GP2GP */ |
| /* Avoid the use of slow int<->fp moves for spilling by setting |
| their cost higher than memmov_cost. */ |
| 5, /* GP2FP */ |
| 5, /* FP2GP */ |
| 2 /* FP2FP */ |
| }; |
| |
| static const struct cpu_regmove_cost thunderx_regmove_cost = |
| { |
| 2, /* GP2GP */ |
| 2, /* GP2FP */ |
| 6, /* FP2GP */ |
| 4 /* FP2FP */ |
| }; |
| |
| static const struct cpu_regmove_cost xgene1_regmove_cost = |
| { |
| 1, /* GP2GP */ |
| /* Avoid the use of slow int<->fp moves for spilling by setting |
| their cost higher than memmov_cost. */ |
| 8, /* GP2FP */ |
| 8, /* FP2GP */ |
| 2 /* FP2FP */ |
| }; |
| |
| /* Generic costs for vector insn classes. */ |
| static const struct cpu_vector_cost generic_vector_cost = |
| { |
| 1, /* scalar_stmt_cost */ |
| 1, /* scalar_load_cost */ |
| 1, /* scalar_store_cost */ |
| 1, /* vec_stmt_cost */ |
| 1, /* vec_to_scalar_cost */ |
| 1, /* scalar_to_vec_cost */ |
| 1, /* vec_align_load_cost */ |
| 1, /* vec_unalign_load_cost */ |
| 1, /* vec_unalign_store_cost */ |
| 1, /* vec_store_cost */ |
| 3, /* cond_taken_branch_cost */ |
| 1 /* cond_not_taken_branch_cost */ |
| }; |
| |
| /* Generic costs for vector insn classes. */ |
| static const struct cpu_vector_cost cortexa57_vector_cost = |
| { |
| 1, /* scalar_stmt_cost */ |
| 4, /* scalar_load_cost */ |
| 1, /* scalar_store_cost */ |
| 3, /* vec_stmt_cost */ |
| 8, /* vec_to_scalar_cost */ |
| 8, /* scalar_to_vec_cost */ |
| 5, /* vec_align_load_cost */ |
| 5, /* vec_unalign_load_cost */ |
| 1, /* vec_unalign_store_cost */ |
| 1, /* vec_store_cost */ |
| 1, /* cond_taken_branch_cost */ |
| 1 /* cond_not_taken_branch_cost */ |
| }; |
| |
| /* Generic costs for vector insn classes. */ |
| static const struct cpu_vector_cost xgene1_vector_cost = |
| { |
| 1, /* scalar_stmt_cost */ |
| 5, /* scalar_load_cost */ |
| 1, /* scalar_store_cost */ |
| 2, /* vec_stmt_cost */ |
| 4, /* vec_to_scalar_cost */ |
| 4, /* scalar_to_vec_cost */ |
| 10, /* vec_align_load_cost */ |
| 10, /* vec_unalign_load_cost */ |
| 2, /* vec_unalign_store_cost */ |
| 2, /* vec_store_cost */ |
| 2, /* cond_taken_branch_cost */ |
| 1 /* cond_not_taken_branch_cost */ |
| }; |
| |
| #define AARCH64_FUSE_NOTHING (0) |
| #define AARCH64_FUSE_MOV_MOVK (1 << 0) |
| #define AARCH64_FUSE_ADRP_ADD (1 << 1) |
| #define AARCH64_FUSE_MOVK_MOVK (1 << 2) |
| #define AARCH64_FUSE_ADRP_LDR (1 << 3) |
| #define AARCH64_FUSE_CMP_BRANCH (1 << 4) |
| |
| static const struct tune_params generic_tunings = |
| { |
| &cortexa57_extra_costs, |
| &generic_addrcost_table, |
| &generic_regmove_cost, |
| &generic_vector_cost, |
| 4, /* memmov_cost */ |
| 2, /* issue_rate */ |
| AARCH64_FUSE_NOTHING, /* fuseable_ops */ |
| 8, /* function_align. */ |
| 8, /* jump_align. */ |
| 4, /* loop_align. */ |
| 2, /* int_reassoc_width. */ |
| 4, /* fp_reassoc_width. */ |
| 1 /* vec_reassoc_width. */ |
| }; |
| |
| static const struct tune_params cortexa53_tunings = |
| { |
| &cortexa53_extra_costs, |
| &generic_addrcost_table, |
| &cortexa53_regmove_cost, |
| &generic_vector_cost, |
| 4, /* memmov_cost */ |
| 2, /* issue_rate */ |
| (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD |
| | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fuseable_ops */ |
| 8, /* function_align. */ |
| 8, /* jump_align. */ |
| 4, /* loop_align. */ |
| 2, /* int_reassoc_width. */ |
| 4, /* fp_reassoc_width. */ |
| 1 /* vec_reassoc_width. */ |
| }; |
| |
| static const struct tune_params cortexa57_tunings = |
| { |
| &cortexa57_extra_costs, |
| &cortexa57_addrcost_table, |
| &cortexa57_regmove_cost, |
| &cortexa57_vector_cost, |
| 4, /* memmov_cost */ |
| 3, /* issue_rate */ |
| (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD |
| | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */ |
| 16, /* function_align. */ |
| 8, /* jump_align. */ |
| 4, /* loop_align. */ |
| 2, /* int_reassoc_width. */ |
| 4, /* fp_reassoc_width. */ |
| 1 /* vec_reassoc_width. */ |
| }; |
| |
| static const struct tune_params thunderx_tunings = |
| { |
| &thunderx_extra_costs, |
| &generic_addrcost_table, |
| &thunderx_regmove_cost, |
| &generic_vector_cost, |
| 6, /* memmov_cost */ |
| 2, /* issue_rate */ |
| AARCH64_FUSE_CMP_BRANCH, /* fuseable_ops */ |
| 8, /* function_align. */ |
| 8, /* jump_align. */ |
| 8, /* loop_align. */ |
| 2, /* int_reassoc_width. */ |
| 4, /* fp_reassoc_width. */ |
| 1 /* vec_reassoc_width. */ |
| }; |
| |
| static const struct tune_params xgene1_tunings = |
| { |
| &xgene1_extra_costs, |
| &xgene1_addrcost_table, |
| &xgene1_regmove_cost, |
| &xgene1_vector_cost, |
| 6, /* memmov_cost */ |
| 4, /* issue_rate */ |
| AARCH64_FUSE_NOTHING, /* fuseable_ops */ |
| 16, /* function_align. */ |
| 8, /* jump_align. */ |
| 16, /* loop_align. */ |
| 2, /* int_reassoc_width. */ |
| 4, /* fp_reassoc_width. */ |
| 1 /* vec_reassoc_width. */ |
| }; |
| |
| /* A processor implementing AArch64. */ |
| struct processor |
| { |
| const char *const name; |
| enum aarch64_processor core; |
| const char *arch; |
| unsigned architecture_version; |
| const unsigned long flags; |
| const struct tune_params *const tune; |
| }; |
| |
| /* Processor cores implementing AArch64. */ |
| static const struct processor all_cores[] = |
| { |
| #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS) \ |
| {NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings}, |
| #include "aarch64-cores.def" |
| #undef AARCH64_CORE |
| {"generic", cortexa53, "8", 8, AARCH64_FL_FOR_ARCH8, &generic_tunings}, |
| {NULL, aarch64_none, NULL, 0, 0, NULL} |
| }; |
| |
| /* Architectures implementing AArch64. */ |
| static const struct processor all_architectures[] = |
| { |
| #define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \ |
| {NAME, CORE, #ARCH, ARCH, FLAGS, NULL}, |
| #include "aarch64-arches.def" |
| #undef AARCH64_ARCH |
| {NULL, aarch64_none, NULL, 0, 0, NULL} |
| }; |
| |
| /* Target specification. These are populated as commandline arguments |
| are processed, or NULL if not specified. */ |
| static const struct processor *selected_arch; |
| static const struct processor *selected_cpu; |
| static const struct processor *selected_tune; |
| |
| #define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0) |
| |
| /* An ISA extension in the co-processor and main instruction set space. */ |
| struct aarch64_option_extension |
| { |
| const char *const name; |
| const unsigned long flags_on; |
| const unsigned long flags_off; |
| }; |
| |
| /* ISA extensions in AArch64. */ |
| static const struct aarch64_option_extension all_extensions[] = |
| { |
| #define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \ |
| {NAME, FLAGS_ON, FLAGS_OFF}, |
| #include "aarch64-option-extensions.def" |
| #undef AARCH64_OPT_EXTENSION |
| {NULL, 0, 0} |
| }; |
| |
| /* Used to track the size of an address when generating a pre/post |
| increment address. */ |
| static machine_mode aarch64_memory_reference_mode; |
| |
| /* A table of valid AArch64 "bitmask immediate" values for |
| logical instructions. */ |
| |
| #define AARCH64_NUM_BITMASKS 5334 |
| static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS]; |
| |
| typedef enum aarch64_cond_code |
| { |
| AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL, |
| AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT, |
| AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV |
| } |
| aarch64_cc; |
| |
| #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1)) |
| |
| /* The condition codes of the processor, and the inverse function. */ |
| static const char * const aarch64_condition_codes[] = |
| { |
| "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", |
| "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" |
| }; |
| |
| static unsigned int |
| aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED) |
| { |
| return 2; |
| } |
| |
| static int |
| aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED, |
| enum machine_mode mode) |
| { |
| if (VECTOR_MODE_P (mode)) |
| return aarch64_tune_params->vec_reassoc_width; |
| if (INTEGRAL_MODE_P (mode)) |
| return aarch64_tune_params->int_reassoc_width; |
| if (FLOAT_MODE_P (mode)) |
| return aarch64_tune_params->fp_reassoc_width; |
| return 1; |
| } |
| |
| /* Provide a mapping from gcc register numbers to dwarf register numbers. */ |
| unsigned |
| aarch64_dbx_register_number (unsigned regno) |
| { |
| if (GP_REGNUM_P (regno)) |
| return AARCH64_DWARF_R0 + regno - R0_REGNUM; |
| else if (regno == SP_REGNUM) |
| return AARCH64_DWARF_SP; |
| else if (FP_REGNUM_P (regno)) |
| return AARCH64_DWARF_V0 + regno - V0_REGNUM; |
| |
| /* Return values >= DWARF_FRAME_REGISTERS indicate that there is no |
| equivalent DWARF register. */ |
| return DWARF_FRAME_REGISTERS; |
| } |
| |
| /* Return TRUE if MODE is any of the large INT modes. */ |
| static bool |
| aarch64_vect_struct_mode_p (machine_mode mode) |
| { |
| return mode == OImode || mode == CImode || mode == XImode; |
| } |
| |
| /* Return TRUE if MODE is any of the vector modes. */ |
| static bool |
| aarch64_vector_mode_p (machine_mode mode) |
| { |
| return aarch64_vector_mode_supported_p (mode) |
| || aarch64_vect_struct_mode_p (mode); |
| } |
| |
| /* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */ |
| static bool |
| aarch64_array_mode_supported_p (machine_mode mode, |
| unsigned HOST_WIDE_INT nelems) |
| { |
| if (TARGET_SIMD |
| && AARCH64_VALID_SIMD_QREG_MODE (mode) |
| && (nelems >= 2 && nelems <= 4)) |
| return true; |
| |
| return false; |
| } |
| |
| /* Implement HARD_REGNO_NREGS. */ |
| |
| int |
| aarch64_hard_regno_nregs (unsigned regno, machine_mode mode) |
| { |
| switch (aarch64_regno_regclass (regno)) |
| { |
| case FP_REGS: |
| case FP_LO_REGS: |
| return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG; |
| default: |
| return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; |
| } |
| gcc_unreachable (); |
| } |
| |
| /* Implement HARD_REGNO_MODE_OK. */ |
| |
| int |
| aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode) |
| { |
| if (GET_MODE_CLASS (mode) == MODE_CC) |
| return regno == CC_REGNUM; |
| |
| if (regno == SP_REGNUM) |
| /* The purpose of comparing with ptr_mode is to support the |
| global register variable associated with the stack pointer |
| register via the syntax of asm ("wsp") in ILP32. */ |
| return mode == Pmode || mode == ptr_mode; |
| |
| if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM) |
| return mode == Pmode; |
| |
| if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode)) |
| return 1; |
| |
| if (FP_REGNUM_P (regno)) |
| { |
| if (aarch64_vect_struct_mode_p (mode)) |
| return |
| (regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM; |
| else |
| return 1; |
| } |
| |
| return 0; |
| } |
| |
| /* Implement HARD_REGNO_CALLER_SAVE_MODE. */ |
| machine_mode |
| aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs, |
| machine_mode mode) |
| { |
| /* Handle modes that fit within single registers. */ |
| if (nregs == 1 && GET_MODE_SIZE (mode) <= 16) |
| { |
| if (GET_MODE_SIZE (mode) >= 4) |
| return mode; |
| else |
| return SImode; |
| } |
| /* Fall back to generic for multi-reg and very large modes. */ |
| else |
| return choose_hard_reg_mode (regno, nregs, false); |
| } |
| |
| /* Return true if calls to DECL should be treated as |
| long-calls (ie called via a register). */ |
| static bool |
| aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED) |
| { |
| return false; |
| } |
| |
| /* Return true if calls to symbol-ref SYM should be treated as |
| long-calls (ie called via a register). */ |
| bool |
| aarch64_is_long_call_p (rtx sym) |
| { |
| return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym)); |
| } |
| |
| /* Return true if the offsets to a zero/sign-extract operation |
| represent an expression that matches an extend operation. The |
| operands represent the paramters from |
| |
| (extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */ |
| bool |
| aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm, |
| rtx extract_imm) |
| { |
| HOST_WIDE_INT mult_val, extract_val; |
| |
| if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm)) |
| return false; |
| |
| mult_val = INTVAL (mult_imm); |
| extract_val = INTVAL (extract_imm); |
| |
| if (extract_val > 8 |
| && extract_val < GET_MODE_BITSIZE (mode) |
| && exact_log2 (extract_val & ~7) > 0 |
| && (extract_val & 7) <= 4 |
| && mult_val == (1 << (extract_val & 7))) |
| return true; |
| |
| return false; |
| } |
| |
| /* Emit an insn that's a simple single-set. Both the operands must be |
| known to be valid. */ |
| inline static rtx |
| emit_set_insn (rtx x, rtx y) |
| { |
| return emit_insn (gen_rtx_SET (VOIDmode, x, y)); |
| } |
| |
| /* X and Y are two things to compare using CODE. Emit the compare insn and |
| return the rtx for register 0 in the proper mode. */ |
| rtx |
| aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y) |
| { |
| machine_mode mode = SELECT_CC_MODE (code, x, y); |
| rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM); |
| |
| emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y)); |
| return cc_reg; |
| } |
| |
| /* Build the SYMBOL_REF for __tls_get_addr. */ |
| |
| static GTY(()) rtx tls_get_addr_libfunc; |
| |
| rtx |
| aarch64_tls_get_addr (void) |
| { |
| if (!tls_get_addr_libfunc) |
| tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr"); |
| return tls_get_addr_libfunc; |
| } |
| |
| /* Return the TLS model to use for ADDR. */ |
| |
| static enum tls_model |
| tls_symbolic_operand_type (rtx addr) |
| { |
| enum tls_model tls_kind = TLS_MODEL_NONE; |
| rtx sym, addend; |
| |
| if (GET_CODE (addr) == CONST) |
| { |
| split_const (addr, &sym, &addend); |
| if (GET_CODE (sym) == SYMBOL_REF) |
| tls_kind = SYMBOL_REF_TLS_MODEL (sym); |
| } |
| else if (GET_CODE (addr) == SYMBOL_REF) |
| tls_kind = SYMBOL_REF_TLS_MODEL (addr); |
| |
| return tls_kind; |
| } |
| |
| /* We'll allow lo_sum's in addresses in our legitimate addresses |
| so that combine would take care of combining addresses where |
| necessary, but for generation purposes, we'll generate the address |
| as : |
| RTL Absolute |
| tmp = hi (symbol_ref); adrp x1, foo |
| dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo |
| nop |
| |
| PIC TLS |
| adrp x1, :got:foo adrp tmp, :tlsgd:foo |
| ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo |
| bl __tls_get_addr |
| nop |
| |
| Load TLS symbol, depending on TLS mechanism and TLS access model. |
| |
| Global Dynamic - Traditional TLS: |
| adrp tmp, :tlsgd:imm |
| add dest, tmp, #:tlsgd_lo12:imm |
| bl __tls_get_addr |
| |
| Global Dynamic - TLS Descriptors: |
| adrp dest, :tlsdesc:imm |
| ldr tmp, [dest, #:tlsdesc_lo12:imm] |
| add dest, dest, #:tlsdesc_lo12:imm |
| blr tmp |
| mrs tp, tpidr_el0 |
| add dest, dest, tp |
| |
| Initial Exec: |
| mrs tp, tpidr_el0 |
| adrp tmp, :gottprel:imm |
| ldr dest, [tmp, #:gottprel_lo12:imm] |
| add dest, dest, tp |
| |
| Local Exec: |
| mrs tp, tpidr_el0 |
| add t0, tp, #:tprel_hi12:imm, lsl #12 |
| add t0, t0, #:tprel_lo12_nc:imm |
| */ |
| |
| static void |
| aarch64_load_symref_appropriately (rtx dest, rtx imm, |
| enum aarch64_symbol_type type) |
| { |
| switch (type) |
| { |
| case SYMBOL_SMALL_ABSOLUTE: |
| { |
| /* In ILP32, the mode of dest can be either SImode or DImode. */ |
| rtx tmp_reg = dest; |
| machine_mode mode = GET_MODE (dest); |
| |
| gcc_assert (mode == Pmode || mode == ptr_mode); |
| |
| if (can_create_pseudo_p ()) |
| tmp_reg = gen_reg_rtx (mode); |
| |
| emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm)); |
| emit_insn (gen_add_losym (dest, tmp_reg, imm)); |
| return; |
| } |
| |
| case SYMBOL_TINY_ABSOLUTE: |
| emit_insn (gen_rtx_SET (Pmode, dest, imm)); |
| return; |
| |
| case SYMBOL_SMALL_GOT: |
| { |
| /* In ILP32, the mode of dest can be either SImode or DImode, |
| while the got entry is always of SImode size. The mode of |
| dest depends on how dest is used: if dest is assigned to a |
| pointer (e.g. in the memory), it has SImode; it may have |
| DImode if dest is dereferenced to access the memeory. |
| This is why we have to handle three different ldr_got_small |
| patterns here (two patterns for ILP32). */ |
| rtx tmp_reg = dest; |
| machine_mode mode = GET_MODE (dest); |
| |
| if (can_create_pseudo_p ()) |
| tmp_reg = gen_reg_rtx (mode); |
| |
| emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm)); |
| if (mode == ptr_mode) |
| { |
| if (mode == DImode) |
| emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm)); |
| else |
| emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm)); |
| } |
| else |
| { |
| gcc_assert (mode == Pmode); |
| emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm)); |
| } |
| |
| return; |
| } |
| |
| case SYMBOL_SMALL_TLSGD: |
| { |
| rtx_insn *insns; |
| rtx result = gen_rtx_REG (Pmode, R0_REGNUM); |
| |
| start_sequence (); |
| aarch64_emit_call_insn (gen_tlsgd_small (result, imm)); |
| insns = get_insns (); |
| end_sequence (); |
| |
| RTL_CONST_CALL_P (insns) = 1; |
| emit_libcall_block (insns, dest, result, imm); |
| return; |
| } |
| |
| case SYMBOL_SMALL_TLSDESC: |
| { |
| machine_mode mode = GET_MODE (dest); |
| rtx x0 = gen_rtx_REG (mode, R0_REGNUM); |
| rtx tp; |
| |
| gcc_assert (mode == Pmode || mode == ptr_mode); |
| |
| /* In ILP32, the got entry is always of SImode size. Unlike |
| small GOT, the dest is fixed at reg 0. */ |
| if (TARGET_ILP32) |
| emit_insn (gen_tlsdesc_small_si (imm)); |
| else |
| emit_insn (gen_tlsdesc_small_di (imm)); |
| tp = aarch64_load_tp (NULL); |
| |
| if (mode != Pmode) |
| tp = gen_lowpart (mode, tp); |
| |
| emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0))); |
| set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); |
| return; |
| } |
| |
| case SYMBOL_SMALL_GOTTPREL: |
| { |
| /* In ILP32, the mode of dest can be either SImode or DImode, |
| while the got entry is always of SImode size. The mode of |
| dest depends on how dest is used: if dest is assigned to a |
| pointer (e.g. in the memory), it has SImode; it may have |
| DImode if dest is dereferenced to access the memeory. |
| This is why we have to handle three different tlsie_small |
| patterns here (two patterns for ILP32). */ |
| machine_mode mode = GET_MODE (dest); |
| rtx tmp_reg = gen_reg_rtx (mode); |
| rtx tp = aarch64_load_tp (NULL); |
| |
| if (mode == ptr_mode) |
| { |
| if (mode == DImode) |
| emit_insn (gen_tlsie_small_di (tmp_reg, imm)); |
| else |
| { |
| emit_insn (gen_tlsie_small_si (tmp_reg, imm)); |
| tp = gen_lowpart (mode, tp); |
| } |
| } |
| else |
| { |
| gcc_assert (mode == Pmode); |
| emit_insn (gen_tlsie_small_sidi (tmp_reg, imm)); |
| } |
| |
| emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg))); |
| set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); |
| return; |
| } |
| |
| case SYMBOL_SMALL_TPREL: |
| { |
| rtx tp = aarch64_load_tp (NULL); |
| |
| if (GET_MODE (dest) != Pmode) |
| tp = gen_lowpart (GET_MODE (dest), tp); |
| |
| emit_insn (gen_tlsle_small (dest, tp, imm)); |
| set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); |
| return; |
| } |
| |
| case SYMBOL_TINY_GOT: |
| emit_insn (gen_ldr_got_tiny (dest, imm)); |
| return; |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Emit a move from SRC to DEST. Assume that the move expanders can |
| handle all moves if !can_create_pseudo_p (). The distinction is |
| important because, unlike emit_move_insn, the move expanders know |
| how to force Pmode objects into the constant pool even when the |
| constant pool address is not itself legitimate. */ |
| static rtx |
| aarch64_emit_move (rtx dest, rtx src) |
| { |
| return (can_create_pseudo_p () |
| ? emit_move_insn (dest, src) |
| : emit_move_insn_1 (dest, src)); |
| } |
| |
| /* Split a 128-bit move operation into two 64-bit move operations, |
| taking care to handle partial overlap of register to register |
| copies. Special cases are needed when moving between GP regs and |
| FP regs. SRC can be a register, constant or memory; DST a register |
| or memory. If either operand is memory it must not have any side |
| effects. */ |
| void |
| aarch64_split_128bit_move (rtx dst, rtx src) |
| { |
| rtx dst_lo, dst_hi; |
| rtx src_lo, src_hi; |
| |
| machine_mode mode = GET_MODE (dst); |
| |
| gcc_assert (mode == TImode || mode == TFmode); |
| gcc_assert (!(side_effects_p (src) || side_effects_p (dst))); |
| gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode); |
| |
| if (REG_P (dst) && REG_P (src)) |
| { |
| int src_regno = REGNO (src); |
| int dst_regno = REGNO (dst); |
| |
| /* Handle FP <-> GP regs. */ |
| if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno)) |
| { |
| src_lo = gen_lowpart (word_mode, src); |
| src_hi = gen_highpart (word_mode, src); |
| |
| if (mode == TImode) |
| { |
| emit_insn (gen_aarch64_movtilow_di (dst, src_lo)); |
| emit_insn (gen_aarch64_movtihigh_di (dst, src_hi)); |
| } |
| else |
| { |
| emit_insn (gen_aarch64_movtflow_di (dst, src_lo)); |
| emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi)); |
| } |
| return; |
| } |
| else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno)) |
| { |
| dst_lo = gen_lowpart (word_mode, dst); |
| dst_hi = gen_highpart (word_mode, dst); |
| |
| if (mode == TImode) |
| { |
| emit_insn (gen_aarch64_movdi_tilow (dst_lo, src)); |
| emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src)); |
| } |
| else |
| { |
| emit_insn (gen_aarch64_movdi_tflow (dst_lo, src)); |
| emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src)); |
| } |
| return; |
| } |
| } |
| |
| dst_lo = gen_lowpart (word_mode, dst); |
| dst_hi = gen_highpart (word_mode, dst); |
| src_lo = gen_lowpart (word_mode, src); |
| src_hi = gen_highpart_mode (word_mode, mode, src); |
| |
| /* At most one pairing may overlap. */ |
| if (reg_overlap_mentioned_p (dst_lo, src_hi)) |
| { |
| aarch64_emit_move (dst_hi, src_hi); |
| aarch64_emit_move (dst_lo, src_lo); |
| } |
| else |
| { |
| aarch64_emit_move (dst_lo, src_lo); |
| aarch64_emit_move (dst_hi, src_hi); |
| } |
| } |
| |
| bool |
| aarch64_split_128bit_move_p (rtx dst, rtx src) |
| { |
| return (! REG_P (src) |
| || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src)))); |
| } |
| |
| /* Split a complex SIMD combine. */ |
| |
| void |
| aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2) |
| { |
| machine_mode src_mode = GET_MODE (src1); |
| machine_mode dst_mode = GET_MODE (dst); |
| |
| gcc_assert (VECTOR_MODE_P (dst_mode)); |
| |
| if (REG_P (dst) && REG_P (src1) && REG_P (src2)) |
| { |
| rtx (*gen) (rtx, rtx, rtx); |
| |
| switch (src_mode) |
| { |
| case V8QImode: |
| gen = gen_aarch64_simd_combinev8qi; |
| break; |
| case V4HImode: |
| gen = gen_aarch64_simd_combinev4hi; |
| break; |
| case V2SImode: |
| gen = gen_aarch64_simd_combinev2si; |
| break; |
| case V2SFmode: |
| gen = gen_aarch64_simd_combinev2sf; |
| break; |
| case DImode: |
| gen = gen_aarch64_simd_combinedi; |
| break; |
| case DFmode: |
| gen = gen_aarch64_simd_combinedf; |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| |
| emit_insn (gen (dst, src1, src2)); |
| return; |
| } |
| } |
| |
| /* Split a complex SIMD move. */ |
| |
| void |
| aarch64_split_simd_move (rtx dst, rtx src) |
| { |
| machine_mode src_mode = GET_MODE (src); |
| machine_mode dst_mode = GET_MODE (dst); |
| |
| gcc_assert (VECTOR_MODE_P (dst_mode)); |
| |
| if (REG_P (dst) && REG_P (src)) |
| { |
| rtx (*gen) (rtx, rtx); |
| |
| gcc_assert (VECTOR_MODE_P (src_mode)); |
| |
| switch (src_mode) |
| { |
| case V16QImode: |
| gen = gen_aarch64_split_simd_movv16qi; |
| break; |
| case V8HImode: |
| gen = gen_aarch64_split_simd_movv8hi; |
| break; |
| case V4SImode: |
| gen = gen_aarch64_split_simd_movv4si; |
| break; |
| case V2DImode: |
| gen = gen_aarch64_split_simd_movv2di; |
| break; |
| case V4SFmode: |
| gen = gen_aarch64_split_simd_movv4sf; |
| break; |
| case V2DFmode: |
| gen = gen_aarch64_split_simd_movv2df; |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| |
| emit_insn (gen (dst, src)); |
| return; |
| } |
| } |
| |
| bool |
| aarch64_zero_extend_const_eq (machine_mode xmode, rtx x, |
| machine_mode ymode, rtx y) |
| { |
| rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode); |
| gcc_assert (r != NULL); |
| return rtx_equal_p (x, r); |
| } |
| |
| |
| static rtx |
| aarch64_force_temporary (machine_mode mode, rtx x, rtx value) |
| { |
| if (can_create_pseudo_p ()) |
| return force_reg (mode, value); |
| else |
| { |
| x = aarch64_emit_move (x, value); |
| return x; |
| } |
| } |
| |
| |
| static rtx |
| aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset) |
| { |
| if (!aarch64_plus_immediate (GEN_INT (offset), mode)) |
| { |
| rtx high; |
| /* Load the full offset into a register. This |
| might be improvable in the future. */ |
| high = GEN_INT (offset); |
| offset = 0; |
| high = aarch64_force_temporary (mode, temp, high); |
| reg = aarch64_force_temporary (mode, temp, |
| gen_rtx_PLUS (mode, high, reg)); |
| } |
| return plus_constant (mode, reg, offset); |
| } |
| |
| static int |
| aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate, |
| machine_mode mode) |
| { |
| unsigned HOST_WIDE_INT mask; |
| int i; |
| bool first; |
| unsigned HOST_WIDE_INT val; |
| bool subtargets; |
| rtx subtarget; |
| int one_match, zero_match, first_not_ffff_match; |
| int num_insns = 0; |
| |
| if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode)) |
| { |
| if (generate) |
| emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); |
| num_insns++; |
| return num_insns; |
| } |
| |
| if (mode == SImode) |
| { |
| /* We know we can't do this in 1 insn, and we must be able to do it |
| in two; so don't mess around looking for sequences that don't buy |
| us anything. */ |
| if (generate) |
| { |
| emit_insn (gen_rtx_SET (VOIDmode, dest, |
| GEN_INT (INTVAL (imm) & 0xffff))); |
| emit_insn (gen_insv_immsi (dest, GEN_INT (16), |
| GEN_INT ((INTVAL (imm) >> 16) & 0xffff))); |
| } |
| num_insns += 2; |
| return num_insns; |
| } |
| |
| /* Remaining cases are all for DImode. */ |
| |
| val = INTVAL (imm); |
| subtargets = optimize && can_create_pseudo_p (); |
| |
| one_match = 0; |
| zero_match = 0; |
| mask = 0xffff; |
| first_not_ffff_match = -1; |
| |
| for (i = 0; i < 64; i += 16, mask <<= 16) |
| { |
| if ((val & mask) == mask) |
| one_match++; |
| else |
| { |
| if (first_not_ffff_match < 0) |
| first_not_ffff_match = i; |
| if ((val & mask) == 0) |
| zero_match++; |
| } |
| } |
| |
| if (one_match == 2) |
| { |
| /* Set one of the quarters and then insert back into result. */ |
| mask = 0xffffll << first_not_ffff_match; |
| if (generate) |
| { |
| emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask))); |
| emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match), |
| GEN_INT ((val >> first_not_ffff_match) |
| & 0xffff))); |
| } |
| num_insns += 2; |
| return num_insns; |
| } |
| |
| if (zero_match == 2) |
| goto simple_sequence; |
| |
| mask = 0x0ffff0000UL; |
| for (i = 16; i < 64; i += 16, mask <<= 16) |
| { |
| HOST_WIDE_INT comp = mask & ~(mask - 1); |
| |
| if (aarch64_uimm12_shift (val - (val & mask))) |
| { |
| if (generate) |
| { |
| subtarget = subtargets ? gen_reg_rtx (DImode) : dest; |
| emit_insn (gen_rtx_SET (VOIDmode, subtarget, |
| GEN_INT (val & mask))); |
| emit_insn (gen_adddi3 (dest, subtarget, |
| GEN_INT (val - (val & mask)))); |
| } |
| num_insns += 2; |
| return num_insns; |
| } |
| else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask)))) |
| { |
| if (generate) |
| { |
| subtarget = subtargets ? gen_reg_rtx (DImode) : dest; |
| emit_insn (gen_rtx_SET (VOIDmode, subtarget, |
| GEN_INT ((val + comp) & mask))); |
| emit_insn (gen_adddi3 (dest, subtarget, |
| GEN_INT (val - ((val + comp) & mask)))); |
| } |
| num_insns += 2; |
| return num_insns; |
| } |
| else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask))) |
| { |
| if (generate) |
| { |
| subtarget = subtargets ? gen_reg_rtx (DImode) : dest; |
| emit_insn (gen_rtx_SET (VOIDmode, subtarget, |
| GEN_INT ((val - comp) | ~mask))); |
| emit_insn (gen_adddi3 (dest, subtarget, |
| GEN_INT (val - ((val - comp) | ~mask)))); |
| } |
| num_insns += 2; |
| return num_insns; |
| } |
| else if (aarch64_uimm12_shift (-(val - (val | ~mask)))) |
| { |
| if (generate) |
| { |
| subtarget = subtargets ? gen_reg_rtx (DImode) : dest; |
| emit_insn (gen_rtx_SET (VOIDmode, subtarget, |
| GEN_INT (val | ~mask))); |
| emit_insn (gen_adddi3 (dest, subtarget, |
| GEN_INT (val - (val | ~mask)))); |
| } |
| num_insns += 2; |
| return num_insns; |
| } |
| } |
| |
| /* See if we can do it by arithmetically combining two |
| immediates. */ |
| for (i = 0; i < AARCH64_NUM_BITMASKS; i++) |
| { |
| int j; |
| mask = 0xffff; |
| |
| if (aarch64_uimm12_shift (val - aarch64_bitmasks[i]) |
| || aarch64_uimm12_shift (-val + aarch64_bitmasks[i])) |
| { |
| if (generate) |
| { |
| subtarget = subtargets ? gen_reg_rtx (DImode) : dest; |
| emit_insn (gen_rtx_SET (VOIDmode, subtarget, |
| GEN_INT (aarch64_bitmasks[i]))); |
| emit_insn (gen_adddi3 (dest, subtarget, |
| GEN_INT (val - aarch64_bitmasks[i]))); |
| } |
| num_insns += 2; |
| return num_insns; |
| } |
| |
| for (j = 0; j < 64; j += 16, mask <<= 16) |
| { |
| if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask)) |
| { |
| if (generate) |
| { |
| emit_insn (gen_rtx_SET (VOIDmode, dest, |
| GEN_INT (aarch64_bitmasks[i]))); |
| emit_insn (gen_insv_immdi (dest, GEN_INT (j), |
| GEN_INT ((val >> j) & 0xffff))); |
| } |
| num_insns += 2; |
| return num_insns; |
| } |
| } |
| } |
| |
| /* See if we can do it by logically combining two immediates. */ |
| for (i = 0; i < AARCH64_NUM_BITMASKS; i++) |
| { |
| if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i]) |
| { |
| int j; |
| |
| for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++) |
| if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j])) |
| { |
| if (generate) |
| { |
| subtarget = subtargets ? gen_reg_rtx (mode) : dest; |
| emit_insn (gen_rtx_SET (VOIDmode, subtarget, |
| GEN_INT (aarch64_bitmasks[i]))); |
| emit_insn (gen_iordi3 (dest, subtarget, |
| GEN_INT (aarch64_bitmasks[j]))); |
| } |
| num_insns += 2; |
| return num_insns; |
| } |
| } |
| else if ((val & aarch64_bitmasks[i]) == val) |
| { |
| int j; |
| |
| for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++) |
| if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i])) |
| { |
| if (generate) |
| { |
| subtarget = subtargets ? gen_reg_rtx (mode) : dest; |
| emit_insn (gen_rtx_SET (VOIDmode, subtarget, |
| GEN_INT (aarch64_bitmasks[j]))); |
| emit_insn (gen_anddi3 (dest, subtarget, |
| GEN_INT (aarch64_bitmasks[i]))); |
| } |
| num_insns += 2; |
| return num_insns; |
| } |
| } |
| } |
| |
| if (one_match > zero_match) |
| { |
| /* Set either first three quarters or all but the third. */ |
| mask = 0xffffll << (16 - first_not_ffff_match); |
| if (generate) |
| emit_insn (gen_rtx_SET (VOIDmode, dest, |
| GEN_INT (val | mask | 0xffffffff00000000ull))); |
| num_insns ++; |
| |
| /* Now insert other two quarters. */ |
| for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1); |
| i < 64; i += 16, mask <<= 16) |
| { |
| if ((val & mask) != mask) |
| { |
| if (generate) |
| emit_insn (gen_insv_immdi (dest, GEN_INT (i), |
| GEN_INT ((val >> i) & 0xffff))); |
| num_insns ++; |
| } |
| } |
| return num_insns; |
| } |
| |
| simple_sequence: |
| first = true; |
| mask = 0xffff; |
| for (i = 0; i < 64; i += 16, mask <<= 16) |
| { |
| if ((val & mask) != 0) |
| { |
| if (first) |
| { |
| if (generate) |
| emit_insn (gen_rtx_SET (VOIDmode, dest, |
| GEN_INT (val & mask))); |
| num_insns ++; |
| first = false; |
| } |
| else |
| { |
| if (generate) |
| emit_insn (gen_insv_immdi (dest, GEN_INT (i), |
| GEN_INT ((val >> i) & 0xffff))); |
| num_insns ++; |
| } |
| } |
| } |
| |
| return num_insns; |
| } |
| |
| |
| void |
| aarch64_expand_mov_immediate (rtx dest, rtx imm) |
| { |
| machine_mode mode = GET_MODE (dest); |
| |
| gcc_assert (mode == SImode || mode == DImode); |
| |
| /* Check on what type of symbol it is. */ |
| if (GET_CODE (imm) == SYMBOL_REF |
| || GET_CODE (imm) == LABEL_REF |
| || GET_CODE (imm) == CONST) |
| { |
| rtx mem, base, offset; |
| enum aarch64_symbol_type sty; |
| |
| /* If we have (const (plus symbol offset)), separate out the offset |
| before we start classifying the symbol. */ |
| split_const (imm, &base, &offset); |
| |
| sty = aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR); |
| switch (sty) |
| { |
| case SYMBOL_FORCE_TO_MEM: |
| if (offset != const0_rtx |
| && targetm.cannot_force_const_mem (mode, imm)) |
| { |
| gcc_assert (can_create_pseudo_p ()); |
| base = aarch64_force_temporary (mode, dest, base); |
| base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); |
| aarch64_emit_move (dest, base); |
| return; |
| } |
| mem = force_const_mem (ptr_mode, imm); |
| gcc_assert (mem); |
| if (mode != ptr_mode) |
| mem = gen_rtx_ZERO_EXTEND (mode, mem); |
| emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); |
| return; |
| |
| case SYMBOL_SMALL_TLSGD: |
| case SYMBOL_SMALL_TLSDESC: |
| case SYMBOL_SMALL_GOTTPREL: |
| case SYMBOL_SMALL_GOT: |
| case SYMBOL_TINY_GOT: |
| if (offset != const0_rtx) |
| { |
| gcc_assert(can_create_pseudo_p ()); |
| base = aarch64_force_temporary (mode, dest, base); |
| base = aarch64_add_offset (mode, NULL, base, INTVAL (offset)); |
| aarch64_emit_move (dest, base); |
| return; |
| } |
| /* FALLTHRU */ |
| |
| case SYMBOL_SMALL_TPREL: |
| case SYMBOL_SMALL_ABSOLUTE: |
| case SYMBOL_TINY_ABSOLUTE: |
| aarch64_load_symref_appropriately (dest, imm, sty); |
| return; |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| if (!CONST_INT_P (imm)) |
| { |
| if (GET_CODE (imm) == HIGH) |
| emit_insn (gen_rtx_SET (VOIDmode, dest, imm)); |
| else |
| { |
| rtx mem = force_const_mem (mode, imm); |
| gcc_assert (mem); |
| emit_insn (gen_rtx_SET (VOIDmode, dest, mem)); |
| } |
| |
| return; |
| } |
| |
| aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest)); |
| } |
| |
| static bool |
| aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED, |
| tree exp ATTRIBUTE_UNUSED) |
| { |
| /* Currently, always true. */ |
| return true; |
| } |
| |
| /* Implement TARGET_PASS_BY_REFERENCE. */ |
| |
| static bool |
| aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED, |
| machine_mode mode, |
| const_tree type, |
| bool named ATTRIBUTE_UNUSED) |
| { |
| HOST_WIDE_INT size; |
| machine_mode dummymode; |
| int nregs; |
| |
| /* GET_MODE_SIZE (BLKmode) is useless since it is 0. */ |
| size = (mode == BLKmode && type) |
| ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); |
| |
| /* Aggregates are passed by reference based on their size. */ |
| if (type && AGGREGATE_TYPE_P (type)) |
| { |
| size = int_size_in_bytes (type); |
| } |
| |
| /* Variable sized arguments are always returned by reference. */ |
| if (size < 0) |
| return true; |
| |
| /* Can this be a candidate to be passed in fp/simd register(s)? */ |
| if (aarch64_vfp_is_call_or_return_candidate (mode, type, |
| &dummymode, &nregs, |
| NULL)) |
| return false; |
| |
| /* Arguments which are variable sized or larger than 2 registers are |
| passed by reference unless they are a homogenous floating point |
| aggregate. */ |
| return size > 2 * UNITS_PER_WORD; |
| } |
| |
| /* Return TRUE if VALTYPE is padded to its least significant bits. */ |
| static bool |
| aarch64_return_in_msb (const_tree valtype) |
| { |
| machine_mode dummy_mode; |
| int dummy_int; |
| |
| /* Never happens in little-endian mode. */ |
| if (!BYTES_BIG_ENDIAN) |
| return false; |
| |
| /* Only composite types smaller than or equal to 16 bytes can |
| be potentially returned in registers. */ |
| if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype)) |
| || int_size_in_bytes (valtype) <= 0 |
| || int_size_in_bytes (valtype) > 16) |
| return false; |
| |
| /* But not a composite that is an HFA (Homogeneous Floating-point Aggregate) |
| or an HVA (Homogeneous Short-Vector Aggregate); such a special composite |
| is always passed/returned in the least significant bits of fp/simd |
| register(s). */ |
| if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype, |
| &dummy_mode, &dummy_int, NULL)) |
| return false; |
| |
| return true; |
| } |
| |
| /* Implement TARGET_FUNCTION_VALUE. |
| Define how to find the value returned by a function. */ |
| |
| static rtx |
| aarch64_function_value (const_tree type, const_tree func, |
| bool outgoing ATTRIBUTE_UNUSED) |
| { |
| machine_mode mode; |
| int unsignedp; |
| int count; |
| machine_mode ag_mode; |
| |
| mode = TYPE_MODE (type); |
| if (INTEGRAL_TYPE_P (type)) |
| mode = promote_function_mode (type, mode, &unsignedp, func, 1); |
| |
| if (aarch64_return_in_msb (type)) |
| { |
| HOST_WIDE_INT size = int_size_in_bytes (type); |
| |
| if (size % UNITS_PER_WORD != 0) |
| { |
| size += UNITS_PER_WORD - size % UNITS_PER_WORD; |
| mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0); |
| } |
| } |
| |
| if (aarch64_vfp_is_call_or_return_candidate (mode, type, |
| &ag_mode, &count, NULL)) |
| { |
| if (!aarch64_composite_type_p (type, mode)) |
| { |
| gcc_assert (count == 1 && mode == ag_mode); |
| return gen_rtx_REG (mode, V0_REGNUM); |
| } |
| else |
| { |
| int i; |
| rtx par; |
| |
| par = gen_rtx_PARALLEL (mode, rtvec_alloc (count)); |
| for (i = 0; i < count; i++) |
| { |
| rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i); |
| tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, |
| GEN_INT (i * GET_MODE_SIZE (ag_mode))); |
| XVECEXP (par, 0, i) = tmp; |
| } |
| return par; |
| } |
| } |
| else |
| return gen_rtx_REG (mode, R0_REGNUM); |
| } |
| |
| /* Implements TARGET_FUNCTION_VALUE_REGNO_P. |
| Return true if REGNO is the number of a hard register in which the values |
| of called function may come back. */ |
| |
| static bool |
| aarch64_function_value_regno_p (const unsigned int regno) |
| { |
| /* Maximum of 16 bytes can be returned in the general registers. Examples |
| of 16-byte return values are: 128-bit integers and 16-byte small |
| structures (excluding homogeneous floating-point aggregates). */ |
| if (regno == R0_REGNUM || regno == R1_REGNUM) |
| return true; |
| |
| /* Up to four fp/simd registers can return a function value, e.g. a |
| homogeneous floating-point aggregate having four members. */ |
| if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS) |
| return TARGET_FLOAT; |
| |
| return false; |
| } |
| |
| /* Implement TARGET_RETURN_IN_MEMORY. |
| |
| If the type T of the result of a function is such that |
| void func (T arg) |
| would require that arg be passed as a value in a register (or set of |
| registers) according to the parameter passing rules, then the result |
| is returned in the same registers as would be used for such an |
| argument. */ |
| |
| static bool |
| aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED) |
| { |
| HOST_WIDE_INT size; |
| machine_mode ag_mode; |
| int count; |
| |
| if (!AGGREGATE_TYPE_P (type) |
| && TREE_CODE (type) != COMPLEX_TYPE |
| && TREE_CODE (type) != VECTOR_TYPE) |
| /* Simple scalar types always returned in registers. */ |
| return false; |
| |
| if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type), |
| type, |
| &ag_mode, |
| &count, |
| NULL)) |
| return false; |
| |
| /* Types larger than 2 registers returned in memory. */ |
| size = int_size_in_bytes (type); |
| return (size < 0 || size > 2 * UNITS_PER_WORD); |
| } |
| |
| static bool |
| aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode, |
| const_tree type, int *nregs) |
| { |
| CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); |
| return aarch64_vfp_is_call_or_return_candidate (mode, |
| type, |
| &pcum->aapcs_vfp_rmode, |
| nregs, |
| NULL); |
| } |
| |
| /* Given MODE and TYPE of a function argument, return the alignment in |
| bits. The idea is to suppress any stronger alignment requested by |
| the user and opt for the natural alignment (specified in AAPCS64 \S 4.1). |
| This is a helper function for local use only. */ |
| |
| static unsigned int |
| aarch64_function_arg_alignment (machine_mode mode, const_tree type) |
| { |
| unsigned int alignment; |
| |
| if (type) |
| { |
| if (!integer_zerop (TYPE_SIZE (type))) |
| { |
| if (TYPE_MODE (type) == mode) |
| alignment = TYPE_ALIGN (type); |
| else |
| alignment = GET_MODE_ALIGNMENT (mode); |
| } |
| else |
| alignment = 0; |
| } |
| else |
| alignment = GET_MODE_ALIGNMENT (mode); |
| |
| return alignment; |
| } |
| |
| /* Layout a function argument according to the AAPCS64 rules. The rule |
| numbers refer to the rule numbers in the AAPCS64. */ |
| |
| static void |
| aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode, |
| const_tree type, |
| bool named ATTRIBUTE_UNUSED) |
| { |
| CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); |
| int ncrn, nvrn, nregs; |
| bool allocate_ncrn, allocate_nvrn; |
| HOST_WIDE_INT size; |
| |
| /* We need to do this once per argument. */ |
| if (pcum->aapcs_arg_processed) |
| return; |
| |
| pcum->aapcs_arg_processed = true; |
| |
| /* Size in bytes, rounded to the nearest multiple of 8 bytes. */ |
| size |
| = AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode), |
| UNITS_PER_WORD); |
| |
| allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode); |
| allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v, |
| mode, |
| type, |
| &nregs); |
| |
| /* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable. |
| The following code thus handles passing by SIMD/FP registers first. */ |
| |
| nvrn = pcum->aapcs_nvrn; |
| |
| /* C1 - C5 for floating point, homogenous floating point aggregates (HFA) |
| and homogenous short-vector aggregates (HVA). */ |
| if (allocate_nvrn) |
| { |
| if (nvrn + nregs <= NUM_FP_ARG_REGS) |
| { |
| pcum->aapcs_nextnvrn = nvrn + nregs; |
| if (!aarch64_composite_type_p (type, mode)) |
| { |
| gcc_assert (nregs == 1); |
| pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn); |
| } |
| else |
| { |
| rtx par; |
| int i; |
| par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs)); |
| for (i = 0; i < nregs; i++) |
| { |
| rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode, |
| V0_REGNUM + nvrn + i); |
| tmp = gen_rtx_EXPR_LIST |
| (VOIDmode, tmp, |
| GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode))); |
| XVECEXP (par, 0, i) = tmp; |
| } |
| pcum->aapcs_reg = par; |
| } |
| return; |
| } |
| else |
| { |
| /* C.3 NSRN is set to 8. */ |
| pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS; |
| goto on_stack; |
| } |
| } |
| |
| ncrn = pcum->aapcs_ncrn; |
| nregs = size / UNITS_PER_WORD; |
| |
| /* C6 - C9. though the sign and zero extension semantics are |
| handled elsewhere. This is the case where the argument fits |
| entirely general registers. */ |
| if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS)) |
| { |
| unsigned int alignment = aarch64_function_arg_alignment (mode, type); |
| |
| gcc_assert (nregs == 0 || nregs == 1 || nregs == 2); |
| |
| /* C.8 if the argument has an alignment of 16 then the NGRN is |
| rounded up to the next even number. */ |
| if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2) |
| { |
| ++ncrn; |
| gcc_assert (ncrn + nregs <= NUM_ARG_REGS); |
| } |
| /* NREGS can be 0 when e.g. an empty structure is to be passed. |
| A reg is still generated for it, but the caller should be smart |
| enough not to use it. */ |
| if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT) |
| { |
| pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn); |
| } |
| else |
| { |
| rtx par; |
| int i; |
| |
| par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs)); |
| for (i = 0; i < nregs; i++) |
| { |
| rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i); |
| tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, |
| GEN_INT (i * UNITS_PER_WORD)); |
| XVECEXP (par, 0, i) = tmp; |
| } |
| pcum->aapcs_reg = par; |
| } |
| |
| pcum->aapcs_nextncrn = ncrn + nregs; |
| return; |
| } |
| |
| /* C.11 */ |
| pcum->aapcs_nextncrn = NUM_ARG_REGS; |
| |
| /* The argument is passed on stack; record the needed number of words for |
| this argument and align the total size if necessary. */ |
| on_stack: |
| pcum->aapcs_stack_words = size / UNITS_PER_WORD; |
| if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT) |
| pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size, |
| 16 / UNITS_PER_WORD); |
| return; |
| } |
| |
| /* Implement TARGET_FUNCTION_ARG. */ |
| |
| static rtx |
| aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode, |
| const_tree type, bool named) |
| { |
| CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); |
| gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64); |
| |
| if (mode == VOIDmode) |
| return NULL_RTX; |
| |
| aarch64_layout_arg (pcum_v, mode, type, named); |
| return pcum->aapcs_reg; |
| } |
| |
| void |
| aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum, |
| const_tree fntype ATTRIBUTE_UNUSED, |
| rtx libname ATTRIBUTE_UNUSED, |
| const_tree fndecl ATTRIBUTE_UNUSED, |
| unsigned n_named ATTRIBUTE_UNUSED) |
| { |
| pcum->aapcs_ncrn = 0; |
| pcum->aapcs_nvrn = 0; |
| pcum->aapcs_nextncrn = 0; |
| pcum->aapcs_nextnvrn = 0; |
| pcum->pcs_variant = ARM_PCS_AAPCS64; |
| pcum->aapcs_reg = NULL_RTX; |
| pcum->aapcs_arg_processed = false; |
| pcum->aapcs_stack_words = 0; |
| pcum->aapcs_stack_size = 0; |
| |
| return; |
| } |
| |
| static void |
| aarch64_function_arg_advance (cumulative_args_t pcum_v, |
| machine_mode mode, |
| const_tree type, |
| bool named) |
| { |
| CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); |
| if (pcum->pcs_variant == ARM_PCS_AAPCS64) |
| { |
| aarch64_layout_arg (pcum_v, mode, type, named); |
| gcc_assert ((pcum->aapcs_reg != NULL_RTX) |
| != (pcum->aapcs_stack_words != 0)); |
| pcum->aapcs_arg_processed = false; |
| pcum->aapcs_ncrn = pcum->aapcs_nextncrn; |
| pcum->aapcs_nvrn = pcum->aapcs_nextnvrn; |
| pcum->aapcs_stack_size += pcum->aapcs_stack_words; |
| pcum->aapcs_stack_words = 0; |
| pcum->aapcs_reg = NULL_RTX; |
| } |
| } |
| |
| bool |
| aarch64_function_arg_regno_p (unsigned regno) |
| { |
| return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS) |
| || (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS)); |
| } |
| |
| /* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least |
| PARM_BOUNDARY bits of alignment, but will be given anything up |
| to STACK_BOUNDARY bits if the type requires it. This makes sure |
| that both before and after the layout of each argument, the Next |
| Stacked Argument Address (NSAA) will have a minimum alignment of |
| 8 bytes. */ |
| |
| static unsigned int |
| aarch64_function_arg_boundary (machine_mode mode, const_tree type) |
| { |
| unsigned int alignment = aarch64_function_arg_alignment (mode, type); |
| |
| if (alignment < PARM_BOUNDARY) |
| alignment = PARM_BOUNDARY; |
| if (alignment > STACK_BOUNDARY) |
| alignment = STACK_BOUNDARY; |
| return alignment; |
| } |
| |
| /* For use by FUNCTION_ARG_PADDING (MODE, TYPE). |
| |
| Return true if an argument passed on the stack should be padded upwards, |
| i.e. if the least-significant byte of the stack slot has useful data. |
| |
| Small aggregate types are placed in the lowest memory address. |
| |
| The related parameter passing rules are B.4, C.3, C.5 and C.14. */ |
| |
| bool |
| aarch64_pad_arg_upward (machine_mode mode, const_tree type) |
| { |
| /* On little-endian targets, the least significant byte of every stack |
| argument is passed at the lowest byte address of the stack slot. */ |
| if (!BYTES_BIG_ENDIAN) |
| return true; |
| |
| /* Otherwise, integral, floating-point and pointer types are padded downward: |
| the least significant byte of a stack argument is passed at the highest |
| byte address of the stack slot. */ |
| if (type |
| ? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type) |
| || POINTER_TYPE_P (type)) |
| : (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode))) |
| return false; |
| |
| /* Everything else padded upward, i.e. data in first byte of stack slot. */ |
| return true; |
| } |
| |
| /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST). |
| |
| It specifies padding for the last (may also be the only) |
| element of a block move between registers and memory. If |
| assuming the block is in the memory, padding upward means that |
| the last element is padded after its highest significant byte, |
| while in downward padding, the last element is padded at the |
| its least significant byte side. |
| |
| Small aggregates and small complex types are always padded |
| upwards. |
| |
| We don't need to worry about homogeneous floating-point or |
| short-vector aggregates; their move is not affected by the |
| padding direction determined here. Regardless of endianness, |
| each element of such an aggregate is put in the least |
| significant bits of a fp/simd register. |
| |
| Return !BYTES_BIG_ENDIAN if the least significant byte of the |
| register has useful data, and return the opposite if the most |
| significant byte does. */ |
| |
| bool |
| aarch64_pad_reg_upward (machine_mode mode, const_tree type, |
| bool first ATTRIBUTE_UNUSED) |
| { |
| |
| /* Small composite types are always padded upward. */ |
| if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode)) |
| { |
| HOST_WIDE_INT size = (type ? int_size_in_bytes (type) |
| : GET_MODE_SIZE (mode)); |
| if (size < 2 * UNITS_PER_WORD) |
| return true; |
| } |
| |
| /* Otherwise, use the default padding. */ |
| return !BYTES_BIG_ENDIAN; |
| } |
| |
| static machine_mode |
| aarch64_libgcc_cmp_return_mode (void) |
| { |
| return SImode; |
| } |
| |
| static bool |
| aarch64_frame_pointer_required (void) |
| { |
| /* In aarch64_override_options_after_change |
| flag_omit_leaf_frame_pointer turns off the frame pointer by |
| default. Turn it back on now if we've not got a leaf |
| function. */ |
| if (flag_omit_leaf_frame_pointer |
| && (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM))) |
| return true; |
| |
| return false; |
| } |
| |
| /* Mark the registers that need to be saved by the callee and calculate |
| the size of the callee-saved registers area and frame record (both FP |
| and LR may be omitted). */ |
| static void |
| aarch64_layout_frame (void) |
| { |
| HOST_WIDE_INT offset = 0; |
| int regno; |
| |
| if (reload_completed && cfun->machine->frame.laid_out) |
| return; |
| |
| #define SLOT_NOT_REQUIRED (-2) |
| #define SLOT_REQUIRED (-1) |
| |
| cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER; |
| cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER; |
| |
| /* First mark all the registers that really need to be saved... */ |
| for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) |
| cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED; |
| |
| for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) |
| cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED; |
| |
| /* ... that includes the eh data registers (if needed)... */ |
| if (crtl->calls_eh_return) |
| for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++) |
| cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)] |
| = SLOT_REQUIRED; |
| |
| /* ... and any callee saved register that dataflow says is live. */ |
| for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) |
| if (df_regs_ever_live_p (regno) |
| && (regno == R30_REGNUM |
| || !call_used_regs[regno])) |
| cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED; |
| |
| for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) |
| if (df_regs_ever_live_p (regno) |
| && !call_used_regs[regno]) |
| cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED; |
| |
| if (frame_pointer_needed) |
| { |
| /* FP and LR are placed in the linkage record. */ |
| cfun->machine->frame.reg_offset[R29_REGNUM] = 0; |
| cfun->machine->frame.wb_candidate1 = R29_REGNUM; |
| cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD; |
| cfun->machine->frame.wb_candidate2 = R30_REGNUM; |
| cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD; |
| offset += 2 * UNITS_PER_WORD; |
| } |
| |
| /* Now assign stack slots for them. */ |
| for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++) |
| if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED) |
| { |
| cfun->machine->frame.reg_offset[regno] = offset; |
| if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER) |
| cfun->machine->frame.wb_candidate1 = regno; |
| else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER) |
| cfun->machine->frame.wb_candidate2 = regno; |
| offset += UNITS_PER_WORD; |
| } |
| |
| for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++) |
| if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED) |
| { |
| cfun->machine->frame.reg_offset[regno] = offset; |
| if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER) |
| cfun->machine->frame.wb_candidate1 = regno; |
| else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER |
| && cfun->machine->frame.wb_candidate1 >= V0_REGNUM) |
| cfun->machine->frame.wb_candidate2 = regno; |
| offset += UNITS_PER_WORD; |
| } |
| |
| cfun->machine->frame.padding0 = |
| (AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset); |
| offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT); |
| |
| cfun->machine->frame.saved_regs_size = offset; |
| |
| cfun->machine->frame.hard_fp_offset |
| = AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size |
| + get_frame_size () |
| + cfun->machine->frame.saved_regs_size, |
| STACK_BOUNDARY / BITS_PER_UNIT); |
| |
| cfun->machine->frame.frame_size |
| = AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset |
| + crtl->outgoing_args_size, |
| STACK_BOUNDARY / BITS_PER_UNIT); |
| |
| cfun->machine->frame.laid_out = true; |
| } |
| |
| static bool |
| aarch64_register_saved_on_entry (int regno) |
| { |
| return cfun->machine->frame.reg_offset[regno] >= 0; |
| } |
| |
| static unsigned |
| aarch64_next_callee_save (unsigned regno, unsigned limit) |
| { |
| while (regno <= limit && !aarch64_register_saved_on_entry (regno)) |
| regno ++; |
| return regno; |
| } |
| |
| static void |
| aarch64_pushwb_single_reg (machine_mode mode, unsigned regno, |
| HOST_WIDE_INT adjustment) |
| { |
| rtx base_rtx = stack_pointer_rtx; |
| rtx insn, reg, mem; |
| |
| reg = gen_rtx_REG (mode, regno); |
| mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx, |
| plus_constant (Pmode, base_rtx, -adjustment)); |
| mem = gen_rtx_MEM (mode, mem); |
| |
| insn = emit_move_insn (mem, reg); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| |
| static rtx |
| aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2, |
| HOST_WIDE_INT adjustment) |
| { |
| switch (mode) |
| { |
| case DImode: |
| return gen_storewb_pairdi_di (base, base, reg, reg2, |
| GEN_INT (-adjustment), |
| GEN_INT (UNITS_PER_WORD - adjustment)); |
| case DFmode: |
| return gen_storewb_pairdf_di (base, base, reg, reg2, |
| GEN_INT (-adjustment), |
| GEN_INT (UNITS_PER_WORD - adjustment)); |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| static void |
| aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1, |
| unsigned regno2, HOST_WIDE_INT adjustment) |
| { |
| rtx_insn *insn; |
| rtx reg1 = gen_rtx_REG (mode, regno1); |
| rtx reg2 = gen_rtx_REG (mode, regno2); |
| |
| insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1, |
| reg2, adjustment)); |
| RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1; |
| RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| |
| static rtx |
| aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2, |
| HOST_WIDE_INT adjustment) |
| { |
| switch (mode) |
| { |
| case DImode: |
| return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment), |
| GEN_INT (UNITS_PER_WORD)); |
| case DFmode: |
| return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment), |
| GEN_INT (UNITS_PER_WORD)); |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| static rtx |
| aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2, |
| rtx reg2) |
| { |
| switch (mode) |
| { |
| case DImode: |
| return gen_store_pairdi (mem1, reg1, mem2, reg2); |
| |
| case DFmode: |
| return gen_store_pairdf (mem1, reg1, mem2, reg2); |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| static rtx |
| aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2, |
| rtx mem2) |
| { |
| switch (mode) |
| { |
| case DImode: |
| return gen_load_pairdi (reg1, mem1, reg2, mem2); |
| |
| case DFmode: |
| return gen_load_pairdf (reg1, mem1, reg2, mem2); |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| |
| static void |
| aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset, |
| unsigned start, unsigned limit, bool skip_wb) |
| { |
| rtx_insn *insn; |
| rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed |
| ? gen_frame_mem : gen_rtx_MEM); |
| unsigned regno; |
| unsigned regno2; |
| |
| for (regno = aarch64_next_callee_save (start, limit); |
| regno <= limit; |
| regno = aarch64_next_callee_save (regno + 1, limit)) |
| { |
| rtx reg, mem; |
| HOST_WIDE_INT offset; |
| |
| if (skip_wb |
| && (regno == cfun->machine->frame.wb_candidate1 |
| || regno == cfun->machine->frame.wb_candidate2)) |
| continue; |
| |
| reg = gen_rtx_REG (mode, regno); |
| offset = start_offset + cfun->machine->frame.reg_offset[regno]; |
| mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx, |
| offset)); |
| |
| regno2 = aarch64_next_callee_save (regno + 1, limit); |
| |
| if (regno2 <= limit |
| && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD) |
| == cfun->machine->frame.reg_offset[regno2])) |
| |
| { |
| rtx reg2 = gen_rtx_REG (mode, regno2); |
| rtx mem2; |
| |
| offset = start_offset + cfun->machine->frame.reg_offset[regno2]; |
| mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx, |
| offset)); |
| insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2, |
| reg2)); |
| |
| /* The first part of a frame-related parallel insn is |
| always assumed to be relevant to the frame |
| calculations; subsequent parts, are only |
| frame-related if explicitly marked. */ |
| RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1; |
| regno = regno2; |
| } |
| else |
| insn = emit_move_insn (mem, reg); |
| |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| } |
| |
| static void |
| aarch64_restore_callee_saves (machine_mode mode, |
| HOST_WIDE_INT start_offset, unsigned start, |
| unsigned limit, bool skip_wb, rtx *cfi_ops) |
| { |
| rtx base_rtx = stack_pointer_rtx; |
| rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed |
| ? gen_frame_mem : gen_rtx_MEM); |
| unsigned regno; |
| unsigned regno2; |
| HOST_WIDE_INT offset; |
| |
| for (regno = aarch64_next_callee_save (start, limit); |
| regno <= limit; |
| regno = aarch64_next_callee_save (regno + 1, limit)) |
| { |
| rtx reg, mem; |
| |
| if (skip_wb |
| && (regno == cfun->machine->frame.wb_candidate1 |
| || regno == cfun->machine->frame.wb_candidate2)) |
| continue; |
| |
| reg = gen_rtx_REG (mode, regno); |
| offset = start_offset + cfun->machine->frame.reg_offset[regno]; |
| mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset)); |
| |
| regno2 = aarch64_next_callee_save (regno + 1, limit); |
| |
| if (regno2 <= limit |
| && ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD) |
| == cfun->machine->frame.reg_offset[regno2])) |
| { |
| rtx reg2 = gen_rtx_REG (mode, regno2); |
| rtx mem2; |
| |
| offset = start_offset + cfun->machine->frame.reg_offset[regno2]; |
| mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset)); |
| emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2)); |
| |
| *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops); |
| regno = regno2; |
| } |
| else |
| emit_move_insn (reg, mem); |
| *cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops); |
| } |
| } |
| |
| /* AArch64 stack frames generated by this compiler look like: |
| |
| +-------------------------------+ |
| | | |
| | incoming stack arguments | |
| | | |
| +-------------------------------+ |
| | | <-- incoming stack pointer (aligned) |
| | callee-allocated save area | |
| | for register varargs | |
| | | |
| +-------------------------------+ |
| | local variables | <-- frame_pointer_rtx |
| | | |
| +-------------------------------+ |
| | padding0 | \ |
| +-------------------------------+ | |
| | callee-saved registers | | frame.saved_regs_size |
| +-------------------------------+ | |
| | LR' | | |
| +-------------------------------+ | |
| | FP' | / <- hard_frame_pointer_rtx (aligned) |
| +-------------------------------+ |
| | dynamic allocation | |
| +-------------------------------+ |
| | padding | |
| +-------------------------------+ |
| | outgoing stack arguments | <-- arg_pointer |
| | | |
| +-------------------------------+ |
| | | <-- stack_pointer_rtx (aligned) |
| |
| Dynamic stack allocations via alloca() decrease stack_pointer_rtx |
| but leave frame_pointer_rtx and hard_frame_pointer_rtx |
| unchanged. */ |
| |
| /* Generate the prologue instructions for entry into a function. |
| Establish the stack frame by decreasing the stack pointer with a |
| properly calculated size and, if necessary, create a frame record |
| filled with the values of LR and previous frame pointer. The |
| current FP is also set up if it is in use. */ |
| |
| void |
| aarch64_expand_prologue (void) |
| { |
| /* sub sp, sp, #<frame_size> |
| stp {fp, lr}, [sp, #<frame_size> - 16] |
| add fp, sp, #<frame_size> - hardfp_offset |
| stp {cs_reg}, [fp, #-16] etc. |
| |
| sub sp, sp, <final_adjustment_if_any> |
| */ |
| HOST_WIDE_INT frame_size, offset; |
| HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */ |
| HOST_WIDE_INT hard_fp_offset; |
| rtx_insn *insn; |
| |
| aarch64_layout_frame (); |
| |
| offset = frame_size = cfun->machine->frame.frame_size; |
| hard_fp_offset = cfun->machine->frame.hard_fp_offset; |
| fp_offset = frame_size - hard_fp_offset; |
| |
| if (flag_stack_usage_info) |
| current_function_static_stack_size = frame_size; |
| |
| /* Store pairs and load pairs have a range only -512 to 504. */ |
| if (offset >= 512) |
| { |
| /* When the frame has a large size, an initial decrease is done on |
| the stack pointer to jump over the callee-allocated save area for |
| register varargs, the local variable area and/or the callee-saved |
| register area. This will allow the pre-index write-back |
| store pair instructions to be used for setting up the stack frame |
| efficiently. */ |
| offset = hard_fp_offset; |
| if (offset >= 512) |
| offset = cfun->machine->frame.saved_regs_size; |
| |
| frame_size -= (offset + crtl->outgoing_args_size); |
| fp_offset = 0; |
| |
| if (frame_size >= 0x1000000) |
| { |
| rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM); |
| emit_move_insn (op0, GEN_INT (-frame_size)); |
| insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0)); |
| |
| add_reg_note (insn, REG_CFA_ADJUST_CFA, |
| gen_rtx_SET (VOIDmode, stack_pointer_rtx, |
| plus_constant (Pmode, stack_pointer_rtx, |
| -frame_size))); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| else if (frame_size > 0) |
| { |
| int hi_ofs = frame_size & 0xfff000; |
| int lo_ofs = frame_size & 0x000fff; |
| |
| if (hi_ofs) |
| { |
| insn = emit_insn (gen_add2_insn |
| (stack_pointer_rtx, GEN_INT (-hi_ofs))); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| if (lo_ofs) |
| { |
| insn = emit_insn (gen_add2_insn |
| (stack_pointer_rtx, GEN_INT (-lo_ofs))); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| } |
| } |
| else |
| frame_size = -1; |
| |
| if (offset > 0) |
| { |
| bool skip_wb = false; |
| |
| if (frame_pointer_needed) |
| { |
| skip_wb = true; |
| |
| if (fp_offset) |
| { |
| insn = emit_insn (gen_add2_insn (stack_pointer_rtx, |
| GEN_INT (-offset))); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM, |
| R30_REGNUM, false); |
| } |
| else |
| aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset); |
| |
| /* Set up frame pointer to point to the location of the |
| previous frame pointer on the stack. */ |
| insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx, |
| stack_pointer_rtx, |
| GEN_INT (fp_offset))); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx)); |
| } |
| else |
| { |
| unsigned reg1 = cfun->machine->frame.wb_candidate1; |
| unsigned reg2 = cfun->machine->frame.wb_candidate2; |
| |
| if (fp_offset |
| || reg1 == FIRST_PSEUDO_REGISTER |
| || (reg2 == FIRST_PSEUDO_REGISTER |
| && offset >= 256)) |
| { |
| insn = emit_insn (gen_add2_insn (stack_pointer_rtx, |
| GEN_INT (-offset))); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| else |
| { |
| machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode; |
| |
| skip_wb = true; |
| |
| if (reg2 == FIRST_PSEUDO_REGISTER) |
| aarch64_pushwb_single_reg (mode1, reg1, offset); |
| else |
| aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset); |
| } |
| } |
| |
| aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM, |
| skip_wb); |
| aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM, |
| skip_wb); |
| } |
| |
| /* when offset >= 512, |
| sub sp, sp, #<outgoing_args_size> */ |
| if (frame_size > -1) |
| { |
| if (crtl->outgoing_args_size > 0) |
| { |
| insn = emit_insn (gen_add2_insn |
| (stack_pointer_rtx, |
| GEN_INT (- crtl->outgoing_args_size))); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| } |
| } |
| |
| /* Return TRUE if we can use a simple_return insn. |
| |
| This function checks whether the callee saved stack is empty, which |
| means no restore actions are need. The pro_and_epilogue will use |
| this to check whether shrink-wrapping opt is feasible. */ |
| |
| bool |
| aarch64_use_return_insn_p (void) |
| { |
| if (!reload_completed) |
| return false; |
| |
| if (crtl->profile) |
| return false; |
| |
| aarch64_layout_frame (); |
| |
| return cfun->machine->frame.frame_size == 0; |
| } |
| |
| /* Generate the epilogue instructions for returning from a function. */ |
| void |
| aarch64_expand_epilogue (bool for_sibcall) |
| { |
| HOST_WIDE_INT frame_size, offset; |
| HOST_WIDE_INT fp_offset; |
| HOST_WIDE_INT hard_fp_offset; |
| rtx_insn *insn; |
| /* We need to add memory barrier to prevent read from deallocated stack. */ |
| bool need_barrier_p = (get_frame_size () != 0 |
| || cfun->machine->frame.saved_varargs_size); |
| |
| aarch64_layout_frame (); |
| |
| offset = frame_size = cfun->machine->frame.frame_size; |
| hard_fp_offset = cfun->machine->frame.hard_fp_offset; |
| fp_offset = frame_size - hard_fp_offset; |
| |
| /* Store pairs and load pairs have a range only -512 to 504. */ |
| if (offset >= 512) |
| { |
| offset = hard_fp_offset; |
| if (offset >= 512) |
| offset = cfun->machine->frame.saved_regs_size; |
| |
| frame_size -= (offset + crtl->outgoing_args_size); |
| fp_offset = 0; |
| if (!frame_pointer_needed && crtl->outgoing_args_size > 0) |
| { |
| insn = emit_insn (gen_add2_insn |
| (stack_pointer_rtx, |
| GEN_INT (crtl->outgoing_args_size))); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| } |
| else |
| frame_size = -1; |
| |
| /* If there were outgoing arguments or we've done dynamic stack |
| allocation, then restore the stack pointer from the frame |
| pointer. This is at most one insn and more efficient than using |
| GCC's internal mechanism. */ |
| if (frame_pointer_needed |
| && (crtl->outgoing_args_size || cfun->calls_alloca)) |
| { |
| if (cfun->calls_alloca) |
| emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); |
| |
| insn = emit_insn (gen_add3_insn (stack_pointer_rtx, |
| hard_frame_pointer_rtx, |
| GEN_INT (0))); |
| offset = offset - fp_offset; |
| } |
| |
| if (offset > 0) |
| { |
| unsigned reg1 = cfun->machine->frame.wb_candidate1; |
| unsigned reg2 = cfun->machine->frame.wb_candidate2; |
| bool skip_wb = true; |
| rtx cfi_ops = NULL; |
| |
| if (frame_pointer_needed) |
| fp_offset = 0; |
| else if (fp_offset |
| || reg1 == FIRST_PSEUDO_REGISTER |
| || (reg2 == FIRST_PSEUDO_REGISTER |
| && offset >= 256)) |
| skip_wb = false; |
| |
| aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM, |
| skip_wb, &cfi_ops); |
| aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM, |
| skip_wb, &cfi_ops); |
| |
| if (need_barrier_p) |
| emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); |
| |
| if (skip_wb) |
| { |
| machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode; |
| rtx rreg1 = gen_rtx_REG (mode1, reg1); |
| |
| cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops); |
| if (reg2 == FIRST_PSEUDO_REGISTER) |
| { |
| rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset); |
| mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem); |
| mem = gen_rtx_MEM (mode1, mem); |
| insn = emit_move_insn (rreg1, mem); |
| } |
| else |
| { |
| rtx rreg2 = gen_rtx_REG (mode1, reg2); |
| |
| cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops); |
| insn = emit_insn (aarch64_gen_loadwb_pair |
| (mode1, stack_pointer_rtx, rreg1, |
| rreg2, offset)); |
| } |
| } |
| else |
| { |
| insn = emit_insn (gen_add2_insn (stack_pointer_rtx, |
| GEN_INT (offset))); |
| } |
| |
| /* Reset the CFA to be SP + FRAME_SIZE. */ |
| rtx new_cfa = stack_pointer_rtx; |
| if (frame_size > 0) |
| new_cfa = plus_constant (Pmode, new_cfa, frame_size); |
| cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops); |
| REG_NOTES (insn) = cfi_ops; |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| |
| if (frame_size > 0) |
| { |
| if (need_barrier_p) |
| emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx)); |
| |
| if (frame_size >= 0x1000000) |
| { |
| rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM); |
| emit_move_insn (op0, GEN_INT (frame_size)); |
| insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0)); |
| } |
| else |
| { |
| int hi_ofs = frame_size & 0xfff000; |
| int lo_ofs = frame_size & 0x000fff; |
| |
| if (hi_ofs && lo_ofs) |
| { |
| insn = emit_insn (gen_add2_insn |
| (stack_pointer_rtx, GEN_INT (hi_ofs))); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| frame_size = lo_ofs; |
| } |
| insn = emit_insn (gen_add2_insn |
| (stack_pointer_rtx, GEN_INT (frame_size))); |
| } |
| |
| /* Reset the CFA to be SP + 0. */ |
| add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| |
| /* Stack adjustment for exception handler. */ |
| if (crtl->calls_eh_return) |
| { |
| /* We need to unwind the stack by the offset computed by |
| EH_RETURN_STACKADJ_RTX. We have already reset the CFA |
| to be SP; letting the CFA move during this adjustment |
| is just as correct as retaining the CFA from the body |
| of the function. Therefore, do nothing special. */ |
| emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX)); |
| } |
| |
| emit_use (gen_rtx_REG (DImode, LR_REGNUM)); |
| if (!for_sibcall) |
| emit_jump_insn (ret_rtx); |
| } |
| |
| /* Return the place to copy the exception unwinding return address to. |
| This will probably be a stack slot, but could (in theory be the |
| return register). */ |
| rtx |
| aarch64_final_eh_return_addr (void) |
| { |
| HOST_WIDE_INT fp_offset; |
| |
| aarch64_layout_frame (); |
| |
| fp_offset = cfun->machine->frame.frame_size |
| - cfun->machine->frame.hard_fp_offset; |
| |
| if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0) |
| return gen_rtx_REG (DImode, LR_REGNUM); |
| |
| /* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can |
| result in a store to save LR introduced by builtin_eh_return () being |
| incorrectly deleted because the alias is not detected. |
| So in the calculation of the address to copy the exception unwinding |
| return address to, we note 2 cases. |
| If FP is needed and the fp_offset is 0, it means that SP = FP and hence |
| we return a SP-relative location since all the addresses are SP-relative |
| in this case. This prevents the store from being optimized away. |
| If the fp_offset is not 0, then the addresses will be FP-relative and |
| therefore we return a FP-relative location. */ |
| |
| if (frame_pointer_needed) |
| { |
| if (fp_offset) |
| return gen_frame_mem (DImode, |
| plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD)); |
| else |
| return gen_frame_mem (DImode, |
| plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD)); |
| } |
| |
| /* If FP is not needed, we calculate the location of LR, which would be |
| at the top of the saved registers block. */ |
| |
| return gen_frame_mem (DImode, |
| plus_constant (Pmode, |
| stack_pointer_rtx, |
| fp_offset |
| + cfun->machine->frame.saved_regs_size |
| - 2 * UNITS_PER_WORD)); |
| } |
| |
| /* Possibly output code to build up a constant in a register. For |
| the benefit of the costs infrastructure, returns the number of |
| instructions which would be emitted. GENERATE inhibits or |
| enables code generation. */ |
| |
| static int |
| aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate) |
| { |
| int insns = 0; |
| |
| if (aarch64_bitmask_imm (val, DImode)) |
| { |
| if (generate) |
| emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val)); |
| insns = 1; |
| } |
| else |
| { |
| int i; |
| int ncount = 0; |
| int zcount = 0; |
| HOST_WIDE_INT valp = val >> 16; |
| HOST_WIDE_INT valm; |
| HOST_WIDE_INT tval; |
| |
| for (i = 16; i < 64; i += 16) |
| { |
| valm = (valp & 0xffff); |
| |
| if (valm != 0) |
| ++ zcount; |
| |
| if (valm != 0xffff) |
| ++ ncount; |
| |
| valp >>= 16; |
| } |
| |
| /* zcount contains the number of additional MOVK instructions |
| required if the constant is built up with an initial MOVZ instruction, |
| while ncount is the number of MOVK instructions required if starting |
| with a MOVN instruction. Choose the sequence that yields the fewest |
| number of instructions, preferring MOVZ instructions when they are both |
| the same. */ |
| if (ncount < zcount) |
| { |
| if (generate) |
| emit_move_insn (gen_rtx_REG (Pmode, regnum), |
| GEN_INT (val | ~(HOST_WIDE_INT) 0xffff)); |
| tval = 0xffff; |
| insns++; |
| } |
| else |
| { |
| if (generate) |
| emit_move_insn (gen_rtx_REG (Pmode, regnum), |
| GEN_INT (val & 0xffff)); |
| tval = 0; |
| insns++; |
| } |
| |
| val >>= 16; |
| |
| for (i = 16; i < 64; i += 16) |
| { |
| if ((val & 0xffff) != tval) |
| { |
| if (generate) |
| emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum), |
| GEN_INT (i), |
| GEN_INT (val & 0xffff))); |
| insns++; |
| } |
| val >>= 16; |
| } |
| } |
| return insns; |
| } |
| |
| static void |
| aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta) |
| { |
| HOST_WIDE_INT mdelta = delta; |
| rtx this_rtx = gen_rtx_REG (Pmode, regnum); |
| rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg); |
| |
| if (mdelta < 0) |
| mdelta = -mdelta; |
| |
| if (mdelta >= 4096 * 4096) |
| { |
| (void) aarch64_build_constant (scratchreg, delta, true); |
| emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx)); |
| } |
| else if (mdelta > 0) |
| { |
| if (mdelta >= 4096) |
| { |
| emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096))); |
| rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12)); |
| if (delta < 0) |
| emit_insn (gen_rtx_SET (Pmode, this_rtx, |
| gen_rtx_MINUS (Pmode, this_rtx, shift))); |
| else |
| emit_insn (gen_rtx_SET (Pmode, this_rtx, |
| gen_rtx_PLUS (Pmode, this_rtx, shift))); |
| } |
| if (mdelta % 4096 != 0) |
| { |
| scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096)); |
| emit_insn (gen_rtx_SET (Pmode, this_rtx, |
| gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx))); |
| } |
| } |
| } |
| |
| /* Output code to add DELTA to the first argument, and then jump |
| to FUNCTION. Used for C++ multiple inheritance. */ |
| static void |
| aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED, |
| HOST_WIDE_INT delta, |
| HOST_WIDE_INT vcall_offset, |
| tree function) |
| { |
| /* The this pointer is always in x0. Note that this differs from |
| Arm where the this pointer maybe bumped to r1 if r0 is required |
| to return a pointer to an aggregate. On AArch64 a result value |
| pointer will be in x8. */ |
| int this_regno = R0_REGNUM; |
| rtx this_rtx, temp0, temp1, addr, funexp; |
| rtx_insn *insn; |
| |
| reload_completed = 1; |
| emit_note (NOTE_INSN_PROLOGUE_END); |
| |
| if (vcall_offset == 0) |
| aarch64_add_constant (this_regno, IP1_REGNUM, delta); |
| else |
| { |
| gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0); |
| |
| this_rtx = gen_rtx_REG (Pmode, this_regno); |
| temp0 = gen_rtx_REG (Pmode, IP0_REGNUM); |
| temp1 = gen_rtx_REG (Pmode, IP1_REGNUM); |
| |
| addr = this_rtx; |
| if (delta != 0) |
| { |
| if (delta >= -256 && delta < 256) |
| addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx, |
| plus_constant (Pmode, this_rtx, delta)); |
| else |
| aarch64_add_constant (this_regno, IP1_REGNUM, delta); |
| } |
| |
| if (Pmode == ptr_mode) |
| aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr)); |
| else |
| aarch64_emit_move (temp0, |
| gen_rtx_ZERO_EXTEND (Pmode, |
| gen_rtx_MEM (ptr_mode, addr))); |
| |
| if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES) |
| addr = plus_constant (Pmode, temp0, vcall_offset); |
| else |
| { |
| (void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true); |
| addr = gen_rtx_PLUS (Pmode, temp0, temp1); |
| } |
| |
| if (Pmode == ptr_mode) |
| aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr)); |
| else |
| aarch64_emit_move (temp1, |
| gen_rtx_SIGN_EXTEND (Pmode, |
| gen_rtx_MEM (ptr_mode, addr))); |
| |
| emit_insn (gen_add2_insn (this_rtx, temp1)); |
| } |
| |
| /* Generate a tail call to the target function. */ |
| if (!TREE_USED (function)) |
| { |
| assemble_external (function); |
| TREE_USED (function) = 1; |
| } |
| funexp = XEXP (DECL_RTL (function), 0); |
| funexp = gen_rtx_MEM (FUNCTION_MODE, funexp); |
| insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX)); |
| SIBLING_CALL_P (insn) = 1; |
| |
| insn = get_insns (); |
| shorten_branches (insn); |
| final_start_function (insn, file, 1); |
| final (insn, file, 1); |
| final_end_function (); |
| |
| /* Stop pretending to be a post-reload pass. */ |
| reload_completed = 0; |
| } |
| |
| static bool |
| aarch64_tls_referenced_p (rtx x) |
| { |
| if (!TARGET_HAVE_TLS) |
| return false; |
| subrtx_iterator::array_type array; |
| FOR_EACH_SUBRTX (iter, array, x, ALL) |
| { |
| const_rtx x = *iter; |
| if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0) |
| return true; |
| /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are |
| TLS offsets, not real symbol references. */ |
| if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) |
| iter.skip_subrtxes (); |
| } |
| return false; |
| } |
| |
| |
| static int |
| aarch64_bitmasks_cmp (const void *i1, const void *i2) |
| { |
| const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1; |
| const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2; |
| |
| if (*imm1 < *imm2) |
| return -1; |
| if (*imm1 > *imm2) |
| return +1; |
| return 0; |
| } |
| |
| |
| static void |
| aarch64_build_bitmask_table (void) |
| { |
| unsigned HOST_WIDE_INT mask, imm; |
| unsigned int log_e, e, s, r; |
| unsigned int nimms = 0; |
| |
| for (log_e = 1; log_e <= 6; log_e++) |
| { |
| e = 1 << log_e; |
| if (e == 64) |
| mask = ~(HOST_WIDE_INT) 0; |
| else |
| mask = ((HOST_WIDE_INT) 1 << e) - 1; |
| for (s = 1; s < e; s++) |
| { |
| for (r = 0; r < e; r++) |
| { |
| /* set s consecutive bits to 1 (s < 64) */ |
| imm = ((unsigned HOST_WIDE_INT)1 << s) - 1; |
| /* rotate right by r */ |
| if (r != 0) |
| imm = ((imm >> r) | (imm << (e - r))) & mask; |
| /* replicate the constant depending on SIMD size */ |
| switch (log_e) { |
| case 1: imm |= (imm << 2); |
| case 2: imm |= (imm << 4); |
| case 3: imm |= (imm << 8); |
| case 4: imm |= (imm << 16); |
| case 5: imm |= (imm << 32); |
| case 6: |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| gcc_assert (nimms < AARCH64_NUM_BITMASKS); |
| aarch64_bitmasks[nimms++] = imm; |
| } |
| } |
| } |
| |
| gcc_assert (nimms == AARCH64_NUM_BITMASKS); |
| qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]), |
| aarch64_bitmasks_cmp); |
| } |
| |
| |
| /* Return true if val can be encoded as a 12-bit unsigned immediate with |
| a left shift of 0 or 12 bits. */ |
| bool |
| aarch64_uimm12_shift (HOST_WIDE_INT val) |
| { |
| return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val |
| || (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val |
| ); |
| } |
| |
| |
| /* Return true if val is an immediate that can be loaded into a |
| register by a MOVZ instruction. */ |
| static bool |
| aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode) |
| { |
| if (GET_MODE_SIZE (mode) > 4) |
| { |
| if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val |
| || (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val) |
| return 1; |
| } |
| else |
| { |
| /* Ignore sign extension. */ |
| val &= (HOST_WIDE_INT) 0xffffffff; |
| } |
| return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val |
| || (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val); |
| } |
| |
| |
| /* Return true if val is a valid bitmask immediate. */ |
| bool |
| aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode mode) |
| { |
| if (GET_MODE_SIZE (mode) < 8) |
| { |
| /* Replicate bit pattern. */ |
| val &= (HOST_WIDE_INT) 0xffffffff; |
| val |= val << 32; |
| } |
| return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS, |
| sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL; |
| } |
| |
| |
| /* Return true if val is an immediate that can be loaded into a |
| register in a single instruction. */ |
| bool |
| aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode) |
| { |
| if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode)) |
| return 1; |
| return aarch64_bitmask_imm (val, mode); |
| } |
| |
| static bool |
| aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) |
| { |
| rtx base, offset; |
| |
| if (GET_CODE (x) == HIGH) |
| return true; |
| |
| split_const (x, &base, &offset); |
| if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF) |
| { |
| if (aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR) |
| != SYMBOL_FORCE_TO_MEM) |
| return true; |
| else |
| /* Avoid generating a 64-bit relocation in ILP32; leave |
| to aarch64_expand_mov_immediate to handle it properly. */ |
| return mode != ptr_mode; |
| } |
| |
| return aarch64_tls_referenced_p (x); |
| } |
| |
| /* Return true if register REGNO is a valid index register. |
| STRICT_P is true if REG_OK_STRICT is in effect. */ |
| |
| bool |
| aarch64_regno_ok_for_index_p (int regno, bool strict_p) |
| { |
| if (!HARD_REGISTER_NUM_P (regno)) |
| { |
| if (!strict_p) |
| return true; |
| |
| if (!reg_renumber) |
| return false; |
| |
| regno = reg_renumber[regno]; |
| } |
| return GP_REGNUM_P (regno); |
| } |
| |
| /* Return true if register REGNO is a valid base register for mode MODE. |
| STRICT_P is true if REG_OK_STRICT is in effect. */ |
| |
| bool |
| aarch64_regno_ok_for_base_p (int regno, bool strict_p) |
| { |
| if (!HARD_REGISTER_NUM_P (regno)) |
| { |
| if (!strict_p) |
| return true; |
| |
| if (!reg_renumber) |
| return false; |
| |
| regno = reg_renumber[regno]; |
| } |
| |
| /* The fake registers will be eliminated to either the stack or |
| hard frame pointer, both of which are usually valid base registers. |
| Reload deals with the cases where the eliminated form isn't valid. */ |
| return (GP_REGNUM_P (regno) |
| || regno == SP_REGNUM |
| || regno == FRAME_POINTER_REGNUM |
| || regno == ARG_POINTER_REGNUM); |
| } |
| |
| /* Return true if X is a valid base register for mode MODE. |
| STRICT_P is true if REG_OK_STRICT is in effect. */ |
| |
| static bool |
| aarch64_base_register_rtx_p (rtx x, bool strict_p) |
| { |
| if (!strict_p && GET_CODE (x) == SUBREG) |
| x = SUBREG_REG (x); |
| |
| return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p)); |
| } |
| |
| /* Return true if address offset is a valid index. If it is, fill in INFO |
| appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */ |
| |
| static bool |
| aarch64_classify_index (struct aarch64_address_info *info, rtx x, |
| machine_mode mode, bool strict_p) |
| { |
| enum aarch64_address_type type; |
| rtx index; |
| int shift; |
| |
| /* (reg:P) */ |
| if ((REG_P (x) || GET_CODE (x) == SUBREG) |
| && GET_MODE (x) == Pmode) |
| { |
| type = ADDRESS_REG_REG; |
| index = x; |
| shift = 0; |
| } |
| /* (sign_extend:DI (reg:SI)) */ |
| else if ((GET_CODE (x) == SIGN_EXTEND |
| || GET_CODE (x) ==
|