blob: a918e679c2e312d281a0be98ec11bacef9eb034b [file] [log] [blame]
/* Machine description for AArch64 architecture.
Copyright (C) 2009-2015 Free Software Foundation, Inc.
Contributed by ARM Ltd.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "insn-codes.h"
#include "rtl.h"
#include "insn-attr.h"
#include "hash-set.h"
#include "machmode.h"
#include "vec.h"
#include "double-int.h"
#include "input.h"
#include "alias.h"
#include "symtab.h"
#include "wide-int.h"
#include "inchash.h"
#include "tree.h"
#include "fold-const.h"
#include "stringpool.h"
#include "stor-layout.h"
#include "calls.h"
#include "varasm.h"
#include "regs.h"
#include "dominance.h"
#include "cfg.h"
#include "cfgrtl.h"
#include "cfganal.h"
#include "lcm.h"
#include "cfgbuild.h"
#include "cfgcleanup.h"
#include "predict.h"
#include "basic-block.h"
#include "df.h"
#include "hard-reg-set.h"
#include "output.h"
#include "hashtab.h"
#include "function.h"
#include "flags.h"
#include "statistics.h"
#include "real.h"
#include "fixed-value.h"
#include "insn-config.h"
#include "expmed.h"
#include "dojump.h"
#include "explow.h"
#include "emit-rtl.h"
#include "stmt.h"
#include "expr.h"
#include "reload.h"
#include "toplev.h"
#include "target.h"
#include "target-def.h"
#include "targhooks.h"
#include "ggc.h"
#include "tm_p.h"
#include "recog.h"
#include "langhooks.h"
#include "diagnostic-core.h"
#include "hash-table.h"
#include "tree-ssa-alias.h"
#include "internal-fn.h"
#include "gimple-fold.h"
#include "tree-eh.h"
#include "gimple-expr.h"
#include "is-a.h"
#include "gimple.h"
#include "gimplify.h"
#include "optabs.h"
#include "dwarf2.h"
#include "cfgloop.h"
#include "tree-vectorizer.h"
#include "aarch64-cost-tables.h"
#include "dumpfile.h"
#include "builtins.h"
#include "rtl-iter.h"
#include "tm-constrs.h"
#include "sched-int.h"
/* Defined for convenience. */
#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
/* Classifies an address.
ADDRESS_REG_IMM
A simple base register plus immediate offset.
ADDRESS_REG_WB
A base register indexed by immediate offset with writeback.
ADDRESS_REG_REG
A base register indexed by (optionally scaled) register.
ADDRESS_REG_UXTW
A base register indexed by (optionally scaled) zero-extended register.
ADDRESS_REG_SXTW
A base register indexed by (optionally scaled) sign-extended register.
ADDRESS_LO_SUM
A LO_SUM rtx with a base register and "LO12" symbol relocation.
ADDRESS_SYMBOLIC:
A constant symbolic address, in pc-relative literal pool. */
enum aarch64_address_type {
ADDRESS_REG_IMM,
ADDRESS_REG_WB,
ADDRESS_REG_REG,
ADDRESS_REG_UXTW,
ADDRESS_REG_SXTW,
ADDRESS_LO_SUM,
ADDRESS_SYMBOLIC
};
struct aarch64_address_info {
enum aarch64_address_type type;
rtx base;
rtx offset;
int shift;
enum aarch64_symbol_type symbol_type;
};
struct simd_immediate_info
{
rtx value;
int shift;
int element_width;
bool mvn;
bool msl;
};
/* The current code model. */
enum aarch64_code_model aarch64_cmodel;
#ifdef HAVE_AS_TLS
#undef TARGET_HAVE_TLS
#define TARGET_HAVE_TLS 1
#endif
static bool aarch64_composite_type_p (const_tree, machine_mode);
static bool aarch64_vfp_is_call_or_return_candidate (machine_mode,
const_tree,
machine_mode *, int *,
bool *);
static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
static void aarch64_override_options_after_change (void);
static bool aarch64_vector_mode_supported_p (machine_mode);
static unsigned bit_count (unsigned HOST_WIDE_INT);
static bool aarch64_vectorize_vec_perm_const_ok (machine_mode vmode,
const unsigned char *sel);
static int aarch64_address_cost (rtx, machine_mode, addr_space_t, bool);
/* Major revision number of the ARM Architecture implemented by the target. */
unsigned aarch64_architecture_version;
/* The processor for which instructions should be scheduled. */
enum aarch64_processor aarch64_tune = cortexa53;
/* The current tuning set. */
const struct tune_params *aarch64_tune_params;
/* Mask to specify which instructions we are allowed to generate. */
unsigned long aarch64_isa_flags = 0;
/* Mask to specify which instruction scheduling options should be used. */
unsigned long aarch64_tune_flags = 0;
/* Tuning parameters. */
static const struct cpu_addrcost_table generic_addrcost_table =
{
{
0, /* hi */
0, /* si */
0, /* di */
0, /* ti */
},
0, /* pre_modify */
0, /* post_modify */
0, /* register_offset */
0, /* register_extend */
0 /* imm_offset */
};
static const struct cpu_addrcost_table cortexa57_addrcost_table =
{
{
1, /* hi */
0, /* si */
0, /* di */
1, /* ti */
},
0, /* pre_modify */
0, /* post_modify */
0, /* register_offset */
0, /* register_extend */
0, /* imm_offset */
};
static const struct cpu_addrcost_table xgene1_addrcost_table =
{
{
1, /* hi */
0, /* si */
0, /* di */
1, /* ti */
},
1, /* pre_modify */
0, /* post_modify */
0, /* register_offset */
1, /* register_extend */
0, /* imm_offset */
};
static const struct cpu_regmove_cost generic_regmove_cost =
{
1, /* GP2GP */
/* Avoid the use of slow int<->fp moves for spilling by setting
their cost higher than memmov_cost. */
5, /* GP2FP */
5, /* FP2GP */
2 /* FP2FP */
};
static const struct cpu_regmove_cost cortexa57_regmove_cost =
{
1, /* GP2GP */
/* Avoid the use of slow int<->fp moves for spilling by setting
their cost higher than memmov_cost. */
5, /* GP2FP */
5, /* FP2GP */
2 /* FP2FP */
};
static const struct cpu_regmove_cost cortexa53_regmove_cost =
{
1, /* GP2GP */
/* Avoid the use of slow int<->fp moves for spilling by setting
their cost higher than memmov_cost. */
5, /* GP2FP */
5, /* FP2GP */
2 /* FP2FP */
};
static const struct cpu_regmove_cost thunderx_regmove_cost =
{
2, /* GP2GP */
2, /* GP2FP */
6, /* FP2GP */
4 /* FP2FP */
};
static const struct cpu_regmove_cost xgene1_regmove_cost =
{
1, /* GP2GP */
/* Avoid the use of slow int<->fp moves for spilling by setting
their cost higher than memmov_cost. */
8, /* GP2FP */
8, /* FP2GP */
2 /* FP2FP */
};
/* Generic costs for vector insn classes. */
static const struct cpu_vector_cost generic_vector_cost =
{
1, /* scalar_stmt_cost */
1, /* scalar_load_cost */
1, /* scalar_store_cost */
1, /* vec_stmt_cost */
1, /* vec_to_scalar_cost */
1, /* scalar_to_vec_cost */
1, /* vec_align_load_cost */
1, /* vec_unalign_load_cost */
1, /* vec_unalign_store_cost */
1, /* vec_store_cost */
3, /* cond_taken_branch_cost */
1 /* cond_not_taken_branch_cost */
};
/* Generic costs for vector insn classes. */
static const struct cpu_vector_cost cortexa57_vector_cost =
{
1, /* scalar_stmt_cost */
4, /* scalar_load_cost */
1, /* scalar_store_cost */
3, /* vec_stmt_cost */
8, /* vec_to_scalar_cost */
8, /* scalar_to_vec_cost */
5, /* vec_align_load_cost */
5, /* vec_unalign_load_cost */
1, /* vec_unalign_store_cost */
1, /* vec_store_cost */
1, /* cond_taken_branch_cost */
1 /* cond_not_taken_branch_cost */
};
/* Generic costs for vector insn classes. */
static const struct cpu_vector_cost xgene1_vector_cost =
{
1, /* scalar_stmt_cost */
5, /* scalar_load_cost */
1, /* scalar_store_cost */
2, /* vec_stmt_cost */
4, /* vec_to_scalar_cost */
4, /* scalar_to_vec_cost */
10, /* vec_align_load_cost */
10, /* vec_unalign_load_cost */
2, /* vec_unalign_store_cost */
2, /* vec_store_cost */
2, /* cond_taken_branch_cost */
1 /* cond_not_taken_branch_cost */
};
#define AARCH64_FUSE_NOTHING (0)
#define AARCH64_FUSE_MOV_MOVK (1 << 0)
#define AARCH64_FUSE_ADRP_ADD (1 << 1)
#define AARCH64_FUSE_MOVK_MOVK (1 << 2)
#define AARCH64_FUSE_ADRP_LDR (1 << 3)
#define AARCH64_FUSE_CMP_BRANCH (1 << 4)
static const struct tune_params generic_tunings =
{
&cortexa57_extra_costs,
&generic_addrcost_table,
&generic_regmove_cost,
&generic_vector_cost,
4, /* memmov_cost */
2, /* issue_rate */
AARCH64_FUSE_NOTHING, /* fuseable_ops */
8, /* function_align. */
8, /* jump_align. */
4, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
1 /* vec_reassoc_width. */
};
static const struct tune_params cortexa53_tunings =
{
&cortexa53_extra_costs,
&generic_addrcost_table,
&cortexa53_regmove_cost,
&generic_vector_cost,
4, /* memmov_cost */
2, /* issue_rate */
(AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fuseable_ops */
8, /* function_align. */
8, /* jump_align. */
4, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
1 /* vec_reassoc_width. */
};
static const struct tune_params cortexa57_tunings =
{
&cortexa57_extra_costs,
&cortexa57_addrcost_table,
&cortexa57_regmove_cost,
&cortexa57_vector_cost,
4, /* memmov_cost */
3, /* issue_rate */
(AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
| AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops */
16, /* function_align. */
8, /* jump_align. */
4, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
1 /* vec_reassoc_width. */
};
static const struct tune_params thunderx_tunings =
{
&thunderx_extra_costs,
&generic_addrcost_table,
&thunderx_regmove_cost,
&generic_vector_cost,
6, /* memmov_cost */
2, /* issue_rate */
AARCH64_FUSE_CMP_BRANCH, /* fuseable_ops */
8, /* function_align. */
8, /* jump_align. */
8, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
1 /* vec_reassoc_width. */
};
static const struct tune_params xgene1_tunings =
{
&xgene1_extra_costs,
&xgene1_addrcost_table,
&xgene1_regmove_cost,
&xgene1_vector_cost,
6, /* memmov_cost */
4, /* issue_rate */
AARCH64_FUSE_NOTHING, /* fuseable_ops */
16, /* function_align. */
8, /* jump_align. */
16, /* loop_align. */
2, /* int_reassoc_width. */
4, /* fp_reassoc_width. */
1 /* vec_reassoc_width. */
};
/* A processor implementing AArch64. */
struct processor
{
const char *const name;
enum aarch64_processor core;
const char *arch;
unsigned architecture_version;
const unsigned long flags;
const struct tune_params *const tune;
};
/* Processor cores implementing AArch64. */
static const struct processor all_cores[] =
{
#define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, FLAGS, COSTS) \
{NAME, SCHED, #ARCH, ARCH, FLAGS, &COSTS##_tunings},
#include "aarch64-cores.def"
#undef AARCH64_CORE
{"generic", cortexa53, "8", 8, AARCH64_FL_FOR_ARCH8, &generic_tunings},
{NULL, aarch64_none, NULL, 0, 0, NULL}
};
/* Architectures implementing AArch64. */
static const struct processor all_architectures[] =
{
#define AARCH64_ARCH(NAME, CORE, ARCH, FLAGS) \
{NAME, CORE, #ARCH, ARCH, FLAGS, NULL},
#include "aarch64-arches.def"
#undef AARCH64_ARCH
{NULL, aarch64_none, NULL, 0, 0, NULL}
};
/* Target specification. These are populated as commandline arguments
are processed, or NULL if not specified. */
static const struct processor *selected_arch;
static const struct processor *selected_cpu;
static const struct processor *selected_tune;
#define AARCH64_CPU_DEFAULT_FLAGS ((selected_cpu) ? selected_cpu->flags : 0)
/* An ISA extension in the co-processor and main instruction set space. */
struct aarch64_option_extension
{
const char *const name;
const unsigned long flags_on;
const unsigned long flags_off;
};
/* ISA extensions in AArch64. */
static const struct aarch64_option_extension all_extensions[] =
{
#define AARCH64_OPT_EXTENSION(NAME, FLAGS_ON, FLAGS_OFF) \
{NAME, FLAGS_ON, FLAGS_OFF},
#include "aarch64-option-extensions.def"
#undef AARCH64_OPT_EXTENSION
{NULL, 0, 0}
};
/* Used to track the size of an address when generating a pre/post
increment address. */
static machine_mode aarch64_memory_reference_mode;
/* A table of valid AArch64 "bitmask immediate" values for
logical instructions. */
#define AARCH64_NUM_BITMASKS 5334
static unsigned HOST_WIDE_INT aarch64_bitmasks[AARCH64_NUM_BITMASKS];
typedef enum aarch64_cond_code
{
AARCH64_EQ = 0, AARCH64_NE, AARCH64_CS, AARCH64_CC, AARCH64_MI, AARCH64_PL,
AARCH64_VS, AARCH64_VC, AARCH64_HI, AARCH64_LS, AARCH64_GE, AARCH64_LT,
AARCH64_GT, AARCH64_LE, AARCH64_AL, AARCH64_NV
}
aarch64_cc;
#define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
/* The condition codes of the processor, and the inverse function. */
static const char * const aarch64_condition_codes[] =
{
"eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
"hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
};
static unsigned int
aarch64_min_divisions_for_recip_mul (enum machine_mode mode ATTRIBUTE_UNUSED)
{
return 2;
}
static int
aarch64_reassociation_width (unsigned opc ATTRIBUTE_UNUSED,
enum machine_mode mode)
{
if (VECTOR_MODE_P (mode))
return aarch64_tune_params->vec_reassoc_width;
if (INTEGRAL_MODE_P (mode))
return aarch64_tune_params->int_reassoc_width;
if (FLOAT_MODE_P (mode))
return aarch64_tune_params->fp_reassoc_width;
return 1;
}
/* Provide a mapping from gcc register numbers to dwarf register numbers. */
unsigned
aarch64_dbx_register_number (unsigned regno)
{
if (GP_REGNUM_P (regno))
return AARCH64_DWARF_R0 + regno - R0_REGNUM;
else if (regno == SP_REGNUM)
return AARCH64_DWARF_SP;
else if (FP_REGNUM_P (regno))
return AARCH64_DWARF_V0 + regno - V0_REGNUM;
/* Return values >= DWARF_FRAME_REGISTERS indicate that there is no
equivalent DWARF register. */
return DWARF_FRAME_REGISTERS;
}
/* Return TRUE if MODE is any of the large INT modes. */
static bool
aarch64_vect_struct_mode_p (machine_mode mode)
{
return mode == OImode || mode == CImode || mode == XImode;
}
/* Return TRUE if MODE is any of the vector modes. */
static bool
aarch64_vector_mode_p (machine_mode mode)
{
return aarch64_vector_mode_supported_p (mode)
|| aarch64_vect_struct_mode_p (mode);
}
/* Implement target hook TARGET_ARRAY_MODE_SUPPORTED_P. */
static bool
aarch64_array_mode_supported_p (machine_mode mode,
unsigned HOST_WIDE_INT nelems)
{
if (TARGET_SIMD
&& AARCH64_VALID_SIMD_QREG_MODE (mode)
&& (nelems >= 2 && nelems <= 4))
return true;
return false;
}
/* Implement HARD_REGNO_NREGS. */
int
aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
{
switch (aarch64_regno_regclass (regno))
{
case FP_REGS:
case FP_LO_REGS:
return (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG;
default:
return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
}
gcc_unreachable ();
}
/* Implement HARD_REGNO_MODE_OK. */
int
aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
{
if (GET_MODE_CLASS (mode) == MODE_CC)
return regno == CC_REGNUM;
if (regno == SP_REGNUM)
/* The purpose of comparing with ptr_mode is to support the
global register variable associated with the stack pointer
register via the syntax of asm ("wsp") in ILP32. */
return mode == Pmode || mode == ptr_mode;
if (regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM)
return mode == Pmode;
if (GP_REGNUM_P (regno) && ! aarch64_vect_struct_mode_p (mode))
return 1;
if (FP_REGNUM_P (regno))
{
if (aarch64_vect_struct_mode_p (mode))
return
(regno + aarch64_hard_regno_nregs (regno, mode) - 1) <= V31_REGNUM;
else
return 1;
}
return 0;
}
/* Implement HARD_REGNO_CALLER_SAVE_MODE. */
machine_mode
aarch64_hard_regno_caller_save_mode (unsigned regno, unsigned nregs,
machine_mode mode)
{
/* Handle modes that fit within single registers. */
if (nregs == 1 && GET_MODE_SIZE (mode) <= 16)
{
if (GET_MODE_SIZE (mode) >= 4)
return mode;
else
return SImode;
}
/* Fall back to generic for multi-reg and very large modes. */
else
return choose_hard_reg_mode (regno, nregs, false);
}
/* Return true if calls to DECL should be treated as
long-calls (ie called via a register). */
static bool
aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
{
return false;
}
/* Return true if calls to symbol-ref SYM should be treated as
long-calls (ie called via a register). */
bool
aarch64_is_long_call_p (rtx sym)
{
return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
}
/* Return true if the offsets to a zero/sign-extract operation
represent an expression that matches an extend operation. The
operands represent the paramters from
(extract:MODE (mult (reg) (MULT_IMM)) (EXTRACT_IMM) (const_int 0)). */
bool
aarch64_is_extend_from_extract (machine_mode mode, rtx mult_imm,
rtx extract_imm)
{
HOST_WIDE_INT mult_val, extract_val;
if (! CONST_INT_P (mult_imm) || ! CONST_INT_P (extract_imm))
return false;
mult_val = INTVAL (mult_imm);
extract_val = INTVAL (extract_imm);
if (extract_val > 8
&& extract_val < GET_MODE_BITSIZE (mode)
&& exact_log2 (extract_val & ~7) > 0
&& (extract_val & 7) <= 4
&& mult_val == (1 << (extract_val & 7)))
return true;
return false;
}
/* Emit an insn that's a simple single-set. Both the operands must be
known to be valid. */
inline static rtx
emit_set_insn (rtx x, rtx y)
{
return emit_insn (gen_rtx_SET (VOIDmode, x, y));
}
/* X and Y are two things to compare using CODE. Emit the compare insn and
return the rtx for register 0 in the proper mode. */
rtx
aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
{
machine_mode mode = SELECT_CC_MODE (code, x, y);
rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
return cc_reg;
}
/* Build the SYMBOL_REF for __tls_get_addr. */
static GTY(()) rtx tls_get_addr_libfunc;
rtx
aarch64_tls_get_addr (void)
{
if (!tls_get_addr_libfunc)
tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
return tls_get_addr_libfunc;
}
/* Return the TLS model to use for ADDR. */
static enum tls_model
tls_symbolic_operand_type (rtx addr)
{
enum tls_model tls_kind = TLS_MODEL_NONE;
rtx sym, addend;
if (GET_CODE (addr) == CONST)
{
split_const (addr, &sym, &addend);
if (GET_CODE (sym) == SYMBOL_REF)
tls_kind = SYMBOL_REF_TLS_MODEL (sym);
}
else if (GET_CODE (addr) == SYMBOL_REF)
tls_kind = SYMBOL_REF_TLS_MODEL (addr);
return tls_kind;
}
/* We'll allow lo_sum's in addresses in our legitimate addresses
so that combine would take care of combining addresses where
necessary, but for generation purposes, we'll generate the address
as :
RTL Absolute
tmp = hi (symbol_ref); adrp x1, foo
dest = lo_sum (tmp, symbol_ref); add dest, x1, :lo_12:foo
nop
PIC TLS
adrp x1, :got:foo adrp tmp, :tlsgd:foo
ldr x1, [:got_lo12:foo] add dest, tmp, :tlsgd_lo12:foo
bl __tls_get_addr
nop
Load TLS symbol, depending on TLS mechanism and TLS access model.
Global Dynamic - Traditional TLS:
adrp tmp, :tlsgd:imm
add dest, tmp, #:tlsgd_lo12:imm
bl __tls_get_addr
Global Dynamic - TLS Descriptors:
adrp dest, :tlsdesc:imm
ldr tmp, [dest, #:tlsdesc_lo12:imm]
add dest, dest, #:tlsdesc_lo12:imm
blr tmp
mrs tp, tpidr_el0
add dest, dest, tp
Initial Exec:
mrs tp, tpidr_el0
adrp tmp, :gottprel:imm
ldr dest, [tmp, #:gottprel_lo12:imm]
add dest, dest, tp
Local Exec:
mrs tp, tpidr_el0
add t0, tp, #:tprel_hi12:imm, lsl #12
add t0, t0, #:tprel_lo12_nc:imm
*/
static void
aarch64_load_symref_appropriately (rtx dest, rtx imm,
enum aarch64_symbol_type type)
{
switch (type)
{
case SYMBOL_SMALL_ABSOLUTE:
{
/* In ILP32, the mode of dest can be either SImode or DImode. */
rtx tmp_reg = dest;
machine_mode mode = GET_MODE (dest);
gcc_assert (mode == Pmode || mode == ptr_mode);
if (can_create_pseudo_p ())
tmp_reg = gen_reg_rtx (mode);
emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
emit_insn (gen_add_losym (dest, tmp_reg, imm));
return;
}
case SYMBOL_TINY_ABSOLUTE:
emit_insn (gen_rtx_SET (Pmode, dest, imm));
return;
case SYMBOL_SMALL_GOT:
{
/* In ILP32, the mode of dest can be either SImode or DImode,
while the got entry is always of SImode size. The mode of
dest depends on how dest is used: if dest is assigned to a
pointer (e.g. in the memory), it has SImode; it may have
DImode if dest is dereferenced to access the memeory.
This is why we have to handle three different ldr_got_small
patterns here (two patterns for ILP32). */
rtx tmp_reg = dest;
machine_mode mode = GET_MODE (dest);
if (can_create_pseudo_p ())
tmp_reg = gen_reg_rtx (mode);
emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, imm));
if (mode == ptr_mode)
{
if (mode == DImode)
emit_insn (gen_ldr_got_small_di (dest, tmp_reg, imm));
else
emit_insn (gen_ldr_got_small_si (dest, tmp_reg, imm));
}
else
{
gcc_assert (mode == Pmode);
emit_insn (gen_ldr_got_small_sidi (dest, tmp_reg, imm));
}
return;
}
case SYMBOL_SMALL_TLSGD:
{
rtx_insn *insns;
rtx result = gen_rtx_REG (Pmode, R0_REGNUM);
start_sequence ();
aarch64_emit_call_insn (gen_tlsgd_small (result, imm));
insns = get_insns ();
end_sequence ();
RTL_CONST_CALL_P (insns) = 1;
emit_libcall_block (insns, dest, result, imm);
return;
}
case SYMBOL_SMALL_TLSDESC:
{
machine_mode mode = GET_MODE (dest);
rtx x0 = gen_rtx_REG (mode, R0_REGNUM);
rtx tp;
gcc_assert (mode == Pmode || mode == ptr_mode);
/* In ILP32, the got entry is always of SImode size. Unlike
small GOT, the dest is fixed at reg 0. */
if (TARGET_ILP32)
emit_insn (gen_tlsdesc_small_si (imm));
else
emit_insn (gen_tlsdesc_small_di (imm));
tp = aarch64_load_tp (NULL);
if (mode != Pmode)
tp = gen_lowpart (mode, tp);
emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, x0)));
set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
return;
}
case SYMBOL_SMALL_GOTTPREL:
{
/* In ILP32, the mode of dest can be either SImode or DImode,
while the got entry is always of SImode size. The mode of
dest depends on how dest is used: if dest is assigned to a
pointer (e.g. in the memory), it has SImode; it may have
DImode if dest is dereferenced to access the memeory.
This is why we have to handle three different tlsie_small
patterns here (two patterns for ILP32). */
machine_mode mode = GET_MODE (dest);
rtx tmp_reg = gen_reg_rtx (mode);
rtx tp = aarch64_load_tp (NULL);
if (mode == ptr_mode)
{
if (mode == DImode)
emit_insn (gen_tlsie_small_di (tmp_reg, imm));
else
{
emit_insn (gen_tlsie_small_si (tmp_reg, imm));
tp = gen_lowpart (mode, tp);
}
}
else
{
gcc_assert (mode == Pmode);
emit_insn (gen_tlsie_small_sidi (tmp_reg, imm));
}
emit_insn (gen_rtx_SET (mode, dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
return;
}
case SYMBOL_SMALL_TPREL:
{
rtx tp = aarch64_load_tp (NULL);
if (GET_MODE (dest) != Pmode)
tp = gen_lowpart (GET_MODE (dest), tp);
emit_insn (gen_tlsle_small (dest, tp, imm));
set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
return;
}
case SYMBOL_TINY_GOT:
emit_insn (gen_ldr_got_tiny (dest, imm));
return;
default:
gcc_unreachable ();
}
}
/* Emit a move from SRC to DEST. Assume that the move expanders can
handle all moves if !can_create_pseudo_p (). The distinction is
important because, unlike emit_move_insn, the move expanders know
how to force Pmode objects into the constant pool even when the
constant pool address is not itself legitimate. */
static rtx
aarch64_emit_move (rtx dest, rtx src)
{
return (can_create_pseudo_p ()
? emit_move_insn (dest, src)
: emit_move_insn_1 (dest, src));
}
/* Split a 128-bit move operation into two 64-bit move operations,
taking care to handle partial overlap of register to register
copies. Special cases are needed when moving between GP regs and
FP regs. SRC can be a register, constant or memory; DST a register
or memory. If either operand is memory it must not have any side
effects. */
void
aarch64_split_128bit_move (rtx dst, rtx src)
{
rtx dst_lo, dst_hi;
rtx src_lo, src_hi;
machine_mode mode = GET_MODE (dst);
gcc_assert (mode == TImode || mode == TFmode);
gcc_assert (!(side_effects_p (src) || side_effects_p (dst)));
gcc_assert (mode == GET_MODE (src) || GET_MODE (src) == VOIDmode);
if (REG_P (dst) && REG_P (src))
{
int src_regno = REGNO (src);
int dst_regno = REGNO (dst);
/* Handle FP <-> GP regs. */
if (FP_REGNUM_P (dst_regno) && GP_REGNUM_P (src_regno))
{
src_lo = gen_lowpart (word_mode, src);
src_hi = gen_highpart (word_mode, src);
if (mode == TImode)
{
emit_insn (gen_aarch64_movtilow_di (dst, src_lo));
emit_insn (gen_aarch64_movtihigh_di (dst, src_hi));
}
else
{
emit_insn (gen_aarch64_movtflow_di (dst, src_lo));
emit_insn (gen_aarch64_movtfhigh_di (dst, src_hi));
}
return;
}
else if (GP_REGNUM_P (dst_regno) && FP_REGNUM_P (src_regno))
{
dst_lo = gen_lowpart (word_mode, dst);
dst_hi = gen_highpart (word_mode, dst);
if (mode == TImode)
{
emit_insn (gen_aarch64_movdi_tilow (dst_lo, src));
emit_insn (gen_aarch64_movdi_tihigh (dst_hi, src));
}
else
{
emit_insn (gen_aarch64_movdi_tflow (dst_lo, src));
emit_insn (gen_aarch64_movdi_tfhigh (dst_hi, src));
}
return;
}
}
dst_lo = gen_lowpart (word_mode, dst);
dst_hi = gen_highpart (word_mode, dst);
src_lo = gen_lowpart (word_mode, src);
src_hi = gen_highpart_mode (word_mode, mode, src);
/* At most one pairing may overlap. */
if (reg_overlap_mentioned_p (dst_lo, src_hi))
{
aarch64_emit_move (dst_hi, src_hi);
aarch64_emit_move (dst_lo, src_lo);
}
else
{
aarch64_emit_move (dst_lo, src_lo);
aarch64_emit_move (dst_hi, src_hi);
}
}
bool
aarch64_split_128bit_move_p (rtx dst, rtx src)
{
return (! REG_P (src)
|| ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
}
/* Split a complex SIMD combine. */
void
aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
{
machine_mode src_mode = GET_MODE (src1);
machine_mode dst_mode = GET_MODE (dst);
gcc_assert (VECTOR_MODE_P (dst_mode));
if (REG_P (dst) && REG_P (src1) && REG_P (src2))
{
rtx (*gen) (rtx, rtx, rtx);
switch (src_mode)
{
case V8QImode:
gen = gen_aarch64_simd_combinev8qi;
break;
case V4HImode:
gen = gen_aarch64_simd_combinev4hi;
break;
case V2SImode:
gen = gen_aarch64_simd_combinev2si;
break;
case V2SFmode:
gen = gen_aarch64_simd_combinev2sf;
break;
case DImode:
gen = gen_aarch64_simd_combinedi;
break;
case DFmode:
gen = gen_aarch64_simd_combinedf;
break;
default:
gcc_unreachable ();
}
emit_insn (gen (dst, src1, src2));
return;
}
}
/* Split a complex SIMD move. */
void
aarch64_split_simd_move (rtx dst, rtx src)
{
machine_mode src_mode = GET_MODE (src);
machine_mode dst_mode = GET_MODE (dst);
gcc_assert (VECTOR_MODE_P (dst_mode));
if (REG_P (dst) && REG_P (src))
{
rtx (*gen) (rtx, rtx);
gcc_assert (VECTOR_MODE_P (src_mode));
switch (src_mode)
{
case V16QImode:
gen = gen_aarch64_split_simd_movv16qi;
break;
case V8HImode:
gen = gen_aarch64_split_simd_movv8hi;
break;
case V4SImode:
gen = gen_aarch64_split_simd_movv4si;
break;
case V2DImode:
gen = gen_aarch64_split_simd_movv2di;
break;
case V4SFmode:
gen = gen_aarch64_split_simd_movv4sf;
break;
case V2DFmode:
gen = gen_aarch64_split_simd_movv2df;
break;
default:
gcc_unreachable ();
}
emit_insn (gen (dst, src));
return;
}
}
bool
aarch64_zero_extend_const_eq (machine_mode xmode, rtx x,
machine_mode ymode, rtx y)
{
rtx r = simplify_const_unary_operation (ZERO_EXTEND, xmode, y, ymode);
gcc_assert (r != NULL);
return rtx_equal_p (x, r);
}
static rtx
aarch64_force_temporary (machine_mode mode, rtx x, rtx value)
{
if (can_create_pseudo_p ())
return force_reg (mode, value);
else
{
x = aarch64_emit_move (x, value);
return x;
}
}
static rtx
aarch64_add_offset (machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
{
if (!aarch64_plus_immediate (GEN_INT (offset), mode))
{
rtx high;
/* Load the full offset into a register. This
might be improvable in the future. */
high = GEN_INT (offset);
offset = 0;
high = aarch64_force_temporary (mode, temp, high);
reg = aarch64_force_temporary (mode, temp,
gen_rtx_PLUS (mode, high, reg));
}
return plus_constant (mode, reg, offset);
}
static int
aarch64_internal_mov_immediate (rtx dest, rtx imm, bool generate,
machine_mode mode)
{
unsigned HOST_WIDE_INT mask;
int i;
bool first;
unsigned HOST_WIDE_INT val;
bool subtargets;
rtx subtarget;
int one_match, zero_match, first_not_ffff_match;
int num_insns = 0;
if (CONST_INT_P (imm) && aarch64_move_imm (INTVAL (imm), mode))
{
if (generate)
emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
num_insns++;
return num_insns;
}
if (mode == SImode)
{
/* We know we can't do this in 1 insn, and we must be able to do it
in two; so don't mess around looking for sequences that don't buy
us anything. */
if (generate)
{
emit_insn (gen_rtx_SET (VOIDmode, dest,
GEN_INT (INTVAL (imm) & 0xffff)));
emit_insn (gen_insv_immsi (dest, GEN_INT (16),
GEN_INT ((INTVAL (imm) >> 16) & 0xffff)));
}
num_insns += 2;
return num_insns;
}
/* Remaining cases are all for DImode. */
val = INTVAL (imm);
subtargets = optimize && can_create_pseudo_p ();
one_match = 0;
zero_match = 0;
mask = 0xffff;
first_not_ffff_match = -1;
for (i = 0; i < 64; i += 16, mask <<= 16)
{
if ((val & mask) == mask)
one_match++;
else
{
if (first_not_ffff_match < 0)
first_not_ffff_match = i;
if ((val & mask) == 0)
zero_match++;
}
}
if (one_match == 2)
{
/* Set one of the quarters and then insert back into result. */
mask = 0xffffll << first_not_ffff_match;
if (generate)
{
emit_insn (gen_rtx_SET (VOIDmode, dest, GEN_INT (val | mask)));
emit_insn (gen_insv_immdi (dest, GEN_INT (first_not_ffff_match),
GEN_INT ((val >> first_not_ffff_match)
& 0xffff)));
}
num_insns += 2;
return num_insns;
}
if (zero_match == 2)
goto simple_sequence;
mask = 0x0ffff0000UL;
for (i = 16; i < 64; i += 16, mask <<= 16)
{
HOST_WIDE_INT comp = mask & ~(mask - 1);
if (aarch64_uimm12_shift (val - (val & mask)))
{
if (generate)
{
subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget,
GEN_INT (val & mask)));
emit_insn (gen_adddi3 (dest, subtarget,
GEN_INT (val - (val & mask))));
}
num_insns += 2;
return num_insns;
}
else if (aarch64_uimm12_shift (-(val - ((val + comp) & mask))))
{
if (generate)
{
subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget,
GEN_INT ((val + comp) & mask)));
emit_insn (gen_adddi3 (dest, subtarget,
GEN_INT (val - ((val + comp) & mask))));
}
num_insns += 2;
return num_insns;
}
else if (aarch64_uimm12_shift (val - ((val - comp) | ~mask)))
{
if (generate)
{
subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget,
GEN_INT ((val - comp) | ~mask)));
emit_insn (gen_adddi3 (dest, subtarget,
GEN_INT (val - ((val - comp) | ~mask))));
}
num_insns += 2;
return num_insns;
}
else if (aarch64_uimm12_shift (-(val - (val | ~mask))))
{
if (generate)
{
subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget,
GEN_INT (val | ~mask)));
emit_insn (gen_adddi3 (dest, subtarget,
GEN_INT (val - (val | ~mask))));
}
num_insns += 2;
return num_insns;
}
}
/* See if we can do it by arithmetically combining two
immediates. */
for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
{
int j;
mask = 0xffff;
if (aarch64_uimm12_shift (val - aarch64_bitmasks[i])
|| aarch64_uimm12_shift (-val + aarch64_bitmasks[i]))
{
if (generate)
{
subtarget = subtargets ? gen_reg_rtx (DImode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget,
GEN_INT (aarch64_bitmasks[i])));
emit_insn (gen_adddi3 (dest, subtarget,
GEN_INT (val - aarch64_bitmasks[i])));
}
num_insns += 2;
return num_insns;
}
for (j = 0; j < 64; j += 16, mask <<= 16)
{
if ((aarch64_bitmasks[i] & ~mask) == (val & ~mask))
{
if (generate)
{
emit_insn (gen_rtx_SET (VOIDmode, dest,
GEN_INT (aarch64_bitmasks[i])));
emit_insn (gen_insv_immdi (dest, GEN_INT (j),
GEN_INT ((val >> j) & 0xffff)));
}
num_insns += 2;
return num_insns;
}
}
}
/* See if we can do it by logically combining two immediates. */
for (i = 0; i < AARCH64_NUM_BITMASKS; i++)
{
if ((aarch64_bitmasks[i] & val) == aarch64_bitmasks[i])
{
int j;
for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
if (val == (aarch64_bitmasks[i] | aarch64_bitmasks[j]))
{
if (generate)
{
subtarget = subtargets ? gen_reg_rtx (mode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget,
GEN_INT (aarch64_bitmasks[i])));
emit_insn (gen_iordi3 (dest, subtarget,
GEN_INT (aarch64_bitmasks[j])));
}
num_insns += 2;
return num_insns;
}
}
else if ((val & aarch64_bitmasks[i]) == val)
{
int j;
for (j = i + 1; j < AARCH64_NUM_BITMASKS; j++)
if (val == (aarch64_bitmasks[j] & aarch64_bitmasks[i]))
{
if (generate)
{
subtarget = subtargets ? gen_reg_rtx (mode) : dest;
emit_insn (gen_rtx_SET (VOIDmode, subtarget,
GEN_INT (aarch64_bitmasks[j])));
emit_insn (gen_anddi3 (dest, subtarget,
GEN_INT (aarch64_bitmasks[i])));
}
num_insns += 2;
return num_insns;
}
}
}
if (one_match > zero_match)
{
/* Set either first three quarters or all but the third. */
mask = 0xffffll << (16 - first_not_ffff_match);
if (generate)
emit_insn (gen_rtx_SET (VOIDmode, dest,
GEN_INT (val | mask | 0xffffffff00000000ull)));
num_insns ++;
/* Now insert other two quarters. */
for (i = first_not_ffff_match + 16, mask <<= (first_not_ffff_match << 1);
i < 64; i += 16, mask <<= 16)
{
if ((val & mask) != mask)
{
if (generate)
emit_insn (gen_insv_immdi (dest, GEN_INT (i),
GEN_INT ((val >> i) & 0xffff)));
num_insns ++;
}
}
return num_insns;
}
simple_sequence:
first = true;
mask = 0xffff;
for (i = 0; i < 64; i += 16, mask <<= 16)
{
if ((val & mask) != 0)
{
if (first)
{
if (generate)
emit_insn (gen_rtx_SET (VOIDmode, dest,
GEN_INT (val & mask)));
num_insns ++;
first = false;
}
else
{
if (generate)
emit_insn (gen_insv_immdi (dest, GEN_INT (i),
GEN_INT ((val >> i) & 0xffff)));
num_insns ++;
}
}
}
return num_insns;
}
void
aarch64_expand_mov_immediate (rtx dest, rtx imm)
{
machine_mode mode = GET_MODE (dest);
gcc_assert (mode == SImode || mode == DImode);
/* Check on what type of symbol it is. */
if (GET_CODE (imm) == SYMBOL_REF
|| GET_CODE (imm) == LABEL_REF
|| GET_CODE (imm) == CONST)
{
rtx mem, base, offset;
enum aarch64_symbol_type sty;
/* If we have (const (plus symbol offset)), separate out the offset
before we start classifying the symbol. */
split_const (imm, &base, &offset);
sty = aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR);
switch (sty)
{
case SYMBOL_FORCE_TO_MEM:
if (offset != const0_rtx
&& targetm.cannot_force_const_mem (mode, imm))
{
gcc_assert (can_create_pseudo_p ());
base = aarch64_force_temporary (mode, dest, base);
base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
aarch64_emit_move (dest, base);
return;
}
mem = force_const_mem (ptr_mode, imm);
gcc_assert (mem);
if (mode != ptr_mode)
mem = gen_rtx_ZERO_EXTEND (mode, mem);
emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
return;
case SYMBOL_SMALL_TLSGD:
case SYMBOL_SMALL_TLSDESC:
case SYMBOL_SMALL_GOTTPREL:
case SYMBOL_SMALL_GOT:
case SYMBOL_TINY_GOT:
if (offset != const0_rtx)
{
gcc_assert(can_create_pseudo_p ());
base = aarch64_force_temporary (mode, dest, base);
base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
aarch64_emit_move (dest, base);
return;
}
/* FALLTHRU */
case SYMBOL_SMALL_TPREL:
case SYMBOL_SMALL_ABSOLUTE:
case SYMBOL_TINY_ABSOLUTE:
aarch64_load_symref_appropriately (dest, imm, sty);
return;
default:
gcc_unreachable ();
}
}
if (!CONST_INT_P (imm))
{
if (GET_CODE (imm) == HIGH)
emit_insn (gen_rtx_SET (VOIDmode, dest, imm));
else
{
rtx mem = force_const_mem (mode, imm);
gcc_assert (mem);
emit_insn (gen_rtx_SET (VOIDmode, dest, mem));
}
return;
}
aarch64_internal_mov_immediate (dest, imm, true, GET_MODE (dest));
}
static bool
aarch64_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
tree exp ATTRIBUTE_UNUSED)
{
/* Currently, always true. */
return true;
}
/* Implement TARGET_PASS_BY_REFERENCE. */
static bool
aarch64_pass_by_reference (cumulative_args_t pcum ATTRIBUTE_UNUSED,
machine_mode mode,
const_tree type,
bool named ATTRIBUTE_UNUSED)
{
HOST_WIDE_INT size;
machine_mode dummymode;
int nregs;
/* GET_MODE_SIZE (BLKmode) is useless since it is 0. */
size = (mode == BLKmode && type)
? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
/* Aggregates are passed by reference based on their size. */
if (type && AGGREGATE_TYPE_P (type))
{
size = int_size_in_bytes (type);
}
/* Variable sized arguments are always returned by reference. */
if (size < 0)
return true;
/* Can this be a candidate to be passed in fp/simd register(s)? */
if (aarch64_vfp_is_call_or_return_candidate (mode, type,
&dummymode, &nregs,
NULL))
return false;
/* Arguments which are variable sized or larger than 2 registers are
passed by reference unless they are a homogenous floating point
aggregate. */
return size > 2 * UNITS_PER_WORD;
}
/* Return TRUE if VALTYPE is padded to its least significant bits. */
static bool
aarch64_return_in_msb (const_tree valtype)
{
machine_mode dummy_mode;
int dummy_int;
/* Never happens in little-endian mode. */
if (!BYTES_BIG_ENDIAN)
return false;
/* Only composite types smaller than or equal to 16 bytes can
be potentially returned in registers. */
if (!aarch64_composite_type_p (valtype, TYPE_MODE (valtype))
|| int_size_in_bytes (valtype) <= 0
|| int_size_in_bytes (valtype) > 16)
return false;
/* But not a composite that is an HFA (Homogeneous Floating-point Aggregate)
or an HVA (Homogeneous Short-Vector Aggregate); such a special composite
is always passed/returned in the least significant bits of fp/simd
register(s). */
if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (valtype), valtype,
&dummy_mode, &dummy_int, NULL))
return false;
return true;
}
/* Implement TARGET_FUNCTION_VALUE.
Define how to find the value returned by a function. */
static rtx
aarch64_function_value (const_tree type, const_tree func,
bool outgoing ATTRIBUTE_UNUSED)
{
machine_mode mode;
int unsignedp;
int count;
machine_mode ag_mode;
mode = TYPE_MODE (type);
if (INTEGRAL_TYPE_P (type))
mode = promote_function_mode (type, mode, &unsignedp, func, 1);
if (aarch64_return_in_msb (type))
{
HOST_WIDE_INT size = int_size_in_bytes (type);
if (size % UNITS_PER_WORD != 0)
{
size += UNITS_PER_WORD - size % UNITS_PER_WORD;
mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
}
}
if (aarch64_vfp_is_call_or_return_candidate (mode, type,
&ag_mode, &count, NULL))
{
if (!aarch64_composite_type_p (type, mode))
{
gcc_assert (count == 1 && mode == ag_mode);
return gen_rtx_REG (mode, V0_REGNUM);
}
else
{
int i;
rtx par;
par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
for (i = 0; i < count; i++)
{
rtx tmp = gen_rtx_REG (ag_mode, V0_REGNUM + i);
tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
GEN_INT (i * GET_MODE_SIZE (ag_mode)));
XVECEXP (par, 0, i) = tmp;
}
return par;
}
}
else
return gen_rtx_REG (mode, R0_REGNUM);
}
/* Implements TARGET_FUNCTION_VALUE_REGNO_P.
Return true if REGNO is the number of a hard register in which the values
of called function may come back. */
static bool
aarch64_function_value_regno_p (const unsigned int regno)
{
/* Maximum of 16 bytes can be returned in the general registers. Examples
of 16-byte return values are: 128-bit integers and 16-byte small
structures (excluding homogeneous floating-point aggregates). */
if (regno == R0_REGNUM || regno == R1_REGNUM)
return true;
/* Up to four fp/simd registers can return a function value, e.g. a
homogeneous floating-point aggregate having four members. */
if (regno >= V0_REGNUM && regno < V0_REGNUM + HA_MAX_NUM_FLDS)
return TARGET_FLOAT;
return false;
}
/* Implement TARGET_RETURN_IN_MEMORY.
If the type T of the result of a function is such that
void func (T arg)
would require that arg be passed as a value in a register (or set of
registers) according to the parameter passing rules, then the result
is returned in the same registers as would be used for such an
argument. */
static bool
aarch64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
{
HOST_WIDE_INT size;
machine_mode ag_mode;
int count;
if (!AGGREGATE_TYPE_P (type)
&& TREE_CODE (type) != COMPLEX_TYPE
&& TREE_CODE (type) != VECTOR_TYPE)
/* Simple scalar types always returned in registers. */
return false;
if (aarch64_vfp_is_call_or_return_candidate (TYPE_MODE (type),
type,
&ag_mode,
&count,
NULL))
return false;
/* Types larger than 2 registers returned in memory. */
size = int_size_in_bytes (type);
return (size < 0 || size > 2 * UNITS_PER_WORD);
}
static bool
aarch64_vfp_is_call_candidate (cumulative_args_t pcum_v, machine_mode mode,
const_tree type, int *nregs)
{
CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
return aarch64_vfp_is_call_or_return_candidate (mode,
type,
&pcum->aapcs_vfp_rmode,
nregs,
NULL);
}
/* Given MODE and TYPE of a function argument, return the alignment in
bits. The idea is to suppress any stronger alignment requested by
the user and opt for the natural alignment (specified in AAPCS64 \S 4.1).
This is a helper function for local use only. */
static unsigned int
aarch64_function_arg_alignment (machine_mode mode, const_tree type)
{
unsigned int alignment;
if (type)
{
if (!integer_zerop (TYPE_SIZE (type)))
{
if (TYPE_MODE (type) == mode)
alignment = TYPE_ALIGN (type);
else
alignment = GET_MODE_ALIGNMENT (mode);
}
else
alignment = 0;
}
else
alignment = GET_MODE_ALIGNMENT (mode);
return alignment;
}
/* Layout a function argument according to the AAPCS64 rules. The rule
numbers refer to the rule numbers in the AAPCS64. */
static void
aarch64_layout_arg (cumulative_args_t pcum_v, machine_mode mode,
const_tree type,
bool named ATTRIBUTE_UNUSED)
{
CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
int ncrn, nvrn, nregs;
bool allocate_ncrn, allocate_nvrn;
HOST_WIDE_INT size;
/* We need to do this once per argument. */
if (pcum->aapcs_arg_processed)
return;
pcum->aapcs_arg_processed = true;
/* Size in bytes, rounded to the nearest multiple of 8 bytes. */
size
= AARCH64_ROUND_UP (type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode),
UNITS_PER_WORD);
allocate_ncrn = (type) ? !(FLOAT_TYPE_P (type)) : !FLOAT_MODE_P (mode);
allocate_nvrn = aarch64_vfp_is_call_candidate (pcum_v,
mode,
type,
&nregs);
/* allocate_ncrn may be false-positive, but allocate_nvrn is quite reliable.
The following code thus handles passing by SIMD/FP registers first. */
nvrn = pcum->aapcs_nvrn;
/* C1 - C5 for floating point, homogenous floating point aggregates (HFA)
and homogenous short-vector aggregates (HVA). */
if (allocate_nvrn)
{
if (nvrn + nregs <= NUM_FP_ARG_REGS)
{
pcum->aapcs_nextnvrn = nvrn + nregs;
if (!aarch64_composite_type_p (type, mode))
{
gcc_assert (nregs == 1);
pcum->aapcs_reg = gen_rtx_REG (mode, V0_REGNUM + nvrn);
}
else
{
rtx par;
int i;
par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
for (i = 0; i < nregs; i++)
{
rtx tmp = gen_rtx_REG (pcum->aapcs_vfp_rmode,
V0_REGNUM + nvrn + i);
tmp = gen_rtx_EXPR_LIST
(VOIDmode, tmp,
GEN_INT (i * GET_MODE_SIZE (pcum->aapcs_vfp_rmode)));
XVECEXP (par, 0, i) = tmp;
}
pcum->aapcs_reg = par;
}
return;
}
else
{
/* C.3 NSRN is set to 8. */
pcum->aapcs_nextnvrn = NUM_FP_ARG_REGS;
goto on_stack;
}
}
ncrn = pcum->aapcs_ncrn;
nregs = size / UNITS_PER_WORD;
/* C6 - C9. though the sign and zero extension semantics are
handled elsewhere. This is the case where the argument fits
entirely general registers. */
if (allocate_ncrn && (ncrn + nregs <= NUM_ARG_REGS))
{
unsigned int alignment = aarch64_function_arg_alignment (mode, type);
gcc_assert (nregs == 0 || nregs == 1 || nregs == 2);
/* C.8 if the argument has an alignment of 16 then the NGRN is
rounded up to the next even number. */
if (nregs == 2 && alignment == 16 * BITS_PER_UNIT && ncrn % 2)
{
++ncrn;
gcc_assert (ncrn + nregs <= NUM_ARG_REGS);
}
/* NREGS can be 0 when e.g. an empty structure is to be passed.
A reg is still generated for it, but the caller should be smart
enough not to use it. */
if (nregs == 0 || nregs == 1 || GET_MODE_CLASS (mode) == MODE_INT)
{
pcum->aapcs_reg = gen_rtx_REG (mode, R0_REGNUM + ncrn);
}
else
{
rtx par;
int i;
par = gen_rtx_PARALLEL (mode, rtvec_alloc (nregs));
for (i = 0; i < nregs; i++)
{
rtx tmp = gen_rtx_REG (word_mode, R0_REGNUM + ncrn + i);
tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
GEN_INT (i * UNITS_PER_WORD));
XVECEXP (par, 0, i) = tmp;
}
pcum->aapcs_reg = par;
}
pcum->aapcs_nextncrn = ncrn + nregs;
return;
}
/* C.11 */
pcum->aapcs_nextncrn = NUM_ARG_REGS;
/* The argument is passed on stack; record the needed number of words for
this argument and align the total size if necessary. */
on_stack:
pcum->aapcs_stack_words = size / UNITS_PER_WORD;
if (aarch64_function_arg_alignment (mode, type) == 16 * BITS_PER_UNIT)
pcum->aapcs_stack_size = AARCH64_ROUND_UP (pcum->aapcs_stack_size,
16 / UNITS_PER_WORD);
return;
}
/* Implement TARGET_FUNCTION_ARG. */
static rtx
aarch64_function_arg (cumulative_args_t pcum_v, machine_mode mode,
const_tree type, bool named)
{
CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
gcc_assert (pcum->pcs_variant == ARM_PCS_AAPCS64);
if (mode == VOIDmode)
return NULL_RTX;
aarch64_layout_arg (pcum_v, mode, type, named);
return pcum->aapcs_reg;
}
void
aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
const_tree fntype ATTRIBUTE_UNUSED,
rtx libname ATTRIBUTE_UNUSED,
const_tree fndecl ATTRIBUTE_UNUSED,
unsigned n_named ATTRIBUTE_UNUSED)
{
pcum->aapcs_ncrn = 0;
pcum->aapcs_nvrn = 0;
pcum->aapcs_nextncrn = 0;
pcum->aapcs_nextnvrn = 0;
pcum->pcs_variant = ARM_PCS_AAPCS64;
pcum->aapcs_reg = NULL_RTX;
pcum->aapcs_arg_processed = false;
pcum->aapcs_stack_words = 0;
pcum->aapcs_stack_size = 0;
return;
}
static void
aarch64_function_arg_advance (cumulative_args_t pcum_v,
machine_mode mode,
const_tree type,
bool named)
{
CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
if (pcum->pcs_variant == ARM_PCS_AAPCS64)
{
aarch64_layout_arg (pcum_v, mode, type, named);
gcc_assert ((pcum->aapcs_reg != NULL_RTX)
!= (pcum->aapcs_stack_words != 0));
pcum->aapcs_arg_processed = false;
pcum->aapcs_ncrn = pcum->aapcs_nextncrn;
pcum->aapcs_nvrn = pcum->aapcs_nextnvrn;
pcum->aapcs_stack_size += pcum->aapcs_stack_words;
pcum->aapcs_stack_words = 0;
pcum->aapcs_reg = NULL_RTX;
}
}
bool
aarch64_function_arg_regno_p (unsigned regno)
{
return ((GP_REGNUM_P (regno) && regno < R0_REGNUM + NUM_ARG_REGS)
|| (FP_REGNUM_P (regno) && regno < V0_REGNUM + NUM_FP_ARG_REGS));
}
/* Implement FUNCTION_ARG_BOUNDARY. Every parameter gets at least
PARM_BOUNDARY bits of alignment, but will be given anything up
to STACK_BOUNDARY bits if the type requires it. This makes sure
that both before and after the layout of each argument, the Next
Stacked Argument Address (NSAA) will have a minimum alignment of
8 bytes. */
static unsigned int
aarch64_function_arg_boundary (machine_mode mode, const_tree type)
{
unsigned int alignment = aarch64_function_arg_alignment (mode, type);
if (alignment < PARM_BOUNDARY)
alignment = PARM_BOUNDARY;
if (alignment > STACK_BOUNDARY)
alignment = STACK_BOUNDARY;
return alignment;
}
/* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
Return true if an argument passed on the stack should be padded upwards,
i.e. if the least-significant byte of the stack slot has useful data.
Small aggregate types are placed in the lowest memory address.
The related parameter passing rules are B.4, C.3, C.5 and C.14. */
bool
aarch64_pad_arg_upward (machine_mode mode, const_tree type)
{
/* On little-endian targets, the least significant byte of every stack
argument is passed at the lowest byte address of the stack slot. */
if (!BYTES_BIG_ENDIAN)
return true;
/* Otherwise, integral, floating-point and pointer types are padded downward:
the least significant byte of a stack argument is passed at the highest
byte address of the stack slot. */
if (type
? (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type)
|| POINTER_TYPE_P (type))
: (SCALAR_INT_MODE_P (mode) || SCALAR_FLOAT_MODE_P (mode)))
return false;
/* Everything else padded upward, i.e. data in first byte of stack slot. */
return true;
}
/* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
It specifies padding for the last (may also be the only)
element of a block move between registers and memory. If
assuming the block is in the memory, padding upward means that
the last element is padded after its highest significant byte,
while in downward padding, the last element is padded at the
its least significant byte side.
Small aggregates and small complex types are always padded
upwards.
We don't need to worry about homogeneous floating-point or
short-vector aggregates; their move is not affected by the
padding direction determined here. Regardless of endianness,
each element of such an aggregate is put in the least
significant bits of a fp/simd register.
Return !BYTES_BIG_ENDIAN if the least significant byte of the
register has useful data, and return the opposite if the most
significant byte does. */
bool
aarch64_pad_reg_upward (machine_mode mode, const_tree type,
bool first ATTRIBUTE_UNUSED)
{
/* Small composite types are always padded upward. */
if (BYTES_BIG_ENDIAN && aarch64_composite_type_p (type, mode))
{
HOST_WIDE_INT size = (type ? int_size_in_bytes (type)
: GET_MODE_SIZE (mode));
if (size < 2 * UNITS_PER_WORD)
return true;
}
/* Otherwise, use the default padding. */
return !BYTES_BIG_ENDIAN;
}
static machine_mode
aarch64_libgcc_cmp_return_mode (void)
{
return SImode;
}
static bool
aarch64_frame_pointer_required (void)
{
/* In aarch64_override_options_after_change
flag_omit_leaf_frame_pointer turns off the frame pointer by
default. Turn it back on now if we've not got a leaf
function. */
if (flag_omit_leaf_frame_pointer
&& (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM)))
return true;
return false;
}
/* Mark the registers that need to be saved by the callee and calculate
the size of the callee-saved registers area and frame record (both FP
and LR may be omitted). */
static void
aarch64_layout_frame (void)
{
HOST_WIDE_INT offset = 0;
int regno;
if (reload_completed && cfun->machine->frame.laid_out)
return;
#define SLOT_NOT_REQUIRED (-2)
#define SLOT_REQUIRED (-1)
cfun->machine->frame.wb_candidate1 = FIRST_PSEUDO_REGISTER;
cfun->machine->frame.wb_candidate2 = FIRST_PSEUDO_REGISTER;
/* First mark all the registers that really need to be saved... */
for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
cfun->machine->frame.reg_offset[regno] = SLOT_NOT_REQUIRED;
/* ... that includes the eh data registers (if needed)... */
if (crtl->calls_eh_return)
for (regno = 0; EH_RETURN_DATA_REGNO (regno) != INVALID_REGNUM; regno++)
cfun->machine->frame.reg_offset[EH_RETURN_DATA_REGNO (regno)]
= SLOT_REQUIRED;
/* ... and any callee saved register that dataflow says is live. */
for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
if (df_regs_ever_live_p (regno)
&& (regno == R30_REGNUM
|| !call_used_regs[regno]))
cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
if (df_regs_ever_live_p (regno)
&& !call_used_regs[regno])
cfun->machine->frame.reg_offset[regno] = SLOT_REQUIRED;
if (frame_pointer_needed)
{
/* FP and LR are placed in the linkage record. */
cfun->machine->frame.reg_offset[R29_REGNUM] = 0;
cfun->machine->frame.wb_candidate1 = R29_REGNUM;
cfun->machine->frame.reg_offset[R30_REGNUM] = UNITS_PER_WORD;
cfun->machine->frame.wb_candidate2 = R30_REGNUM;
cfun->machine->frame.hardfp_offset = 2 * UNITS_PER_WORD;
offset += 2 * UNITS_PER_WORD;
}
/* Now assign stack slots for them. */
for (regno = R0_REGNUM; regno <= R30_REGNUM; regno++)
if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
{
cfun->machine->frame.reg_offset[regno] = offset;
if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
cfun->machine->frame.wb_candidate1 = regno;
else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER)
cfun->machine->frame.wb_candidate2 = regno;
offset += UNITS_PER_WORD;
}
for (regno = V0_REGNUM; regno <= V31_REGNUM; regno++)
if (cfun->machine->frame.reg_offset[regno] == SLOT_REQUIRED)
{
cfun->machine->frame.reg_offset[regno] = offset;
if (cfun->machine->frame.wb_candidate1 == FIRST_PSEUDO_REGISTER)
cfun->machine->frame.wb_candidate1 = regno;
else if (cfun->machine->frame.wb_candidate2 == FIRST_PSEUDO_REGISTER
&& cfun->machine->frame.wb_candidate1 >= V0_REGNUM)
cfun->machine->frame.wb_candidate2 = regno;
offset += UNITS_PER_WORD;
}
cfun->machine->frame.padding0 =
(AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT) - offset);
offset = AARCH64_ROUND_UP (offset, STACK_BOUNDARY / BITS_PER_UNIT);
cfun->machine->frame.saved_regs_size = offset;
cfun->machine->frame.hard_fp_offset
= AARCH64_ROUND_UP (cfun->machine->frame.saved_varargs_size
+ get_frame_size ()
+ cfun->machine->frame.saved_regs_size,
STACK_BOUNDARY / BITS_PER_UNIT);
cfun->machine->frame.frame_size
= AARCH64_ROUND_UP (cfun->machine->frame.hard_fp_offset
+ crtl->outgoing_args_size,
STACK_BOUNDARY / BITS_PER_UNIT);
cfun->machine->frame.laid_out = true;
}
static bool
aarch64_register_saved_on_entry (int regno)
{
return cfun->machine->frame.reg_offset[regno] >= 0;
}
static unsigned
aarch64_next_callee_save (unsigned regno, unsigned limit)
{
while (regno <= limit && !aarch64_register_saved_on_entry (regno))
regno ++;
return regno;
}
static void
aarch64_pushwb_single_reg (machine_mode mode, unsigned regno,
HOST_WIDE_INT adjustment)
{
rtx base_rtx = stack_pointer_rtx;
rtx insn, reg, mem;
reg = gen_rtx_REG (mode, regno);
mem = gen_rtx_PRE_MODIFY (Pmode, base_rtx,
plus_constant (Pmode, base_rtx, -adjustment));
mem = gen_rtx_MEM (mode, mem);
insn = emit_move_insn (mem, reg);
RTX_FRAME_RELATED_P (insn) = 1;
}
static rtx
aarch64_gen_storewb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
HOST_WIDE_INT adjustment)
{
switch (mode)
{
case DImode:
return gen_storewb_pairdi_di (base, base, reg, reg2,
GEN_INT (-adjustment),
GEN_INT (UNITS_PER_WORD - adjustment));
case DFmode:
return gen_storewb_pairdf_di (base, base, reg, reg2,
GEN_INT (-adjustment),
GEN_INT (UNITS_PER_WORD - adjustment));
default:
gcc_unreachable ();
}
}
static void
aarch64_pushwb_pair_reg (machine_mode mode, unsigned regno1,
unsigned regno2, HOST_WIDE_INT adjustment)
{
rtx_insn *insn;
rtx reg1 = gen_rtx_REG (mode, regno1);
rtx reg2 = gen_rtx_REG (mode, regno2);
insn = emit_insn (aarch64_gen_storewb_pair (mode, stack_pointer_rtx, reg1,
reg2, adjustment));
RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 2)) = 1;
RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
RTX_FRAME_RELATED_P (insn) = 1;
}
static rtx
aarch64_gen_loadwb_pair (machine_mode mode, rtx base, rtx reg, rtx reg2,
HOST_WIDE_INT adjustment)
{
switch (mode)
{
case DImode:
return gen_loadwb_pairdi_di (base, base, reg, reg2, GEN_INT (adjustment),
GEN_INT (UNITS_PER_WORD));
case DFmode:
return gen_loadwb_pairdf_di (base, base, reg, reg2, GEN_INT (adjustment),
GEN_INT (UNITS_PER_WORD));
default:
gcc_unreachable ();
}
}
static rtx
aarch64_gen_store_pair (machine_mode mode, rtx mem1, rtx reg1, rtx mem2,
rtx reg2)
{
switch (mode)
{
case DImode:
return gen_store_pairdi (mem1, reg1, mem2, reg2);
case DFmode:
return gen_store_pairdf (mem1, reg1, mem2, reg2);
default:
gcc_unreachable ();
}
}
static rtx
aarch64_gen_load_pair (machine_mode mode, rtx reg1, rtx mem1, rtx reg2,
rtx mem2)
{
switch (mode)
{
case DImode:
return gen_load_pairdi (reg1, mem1, reg2, mem2);
case DFmode:
return gen_load_pairdf (reg1, mem1, reg2, mem2);
default:
gcc_unreachable ();
}
}
static void
aarch64_save_callee_saves (machine_mode mode, HOST_WIDE_INT start_offset,
unsigned start, unsigned limit, bool skip_wb)
{
rtx_insn *insn;
rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
? gen_frame_mem : gen_rtx_MEM);
unsigned regno;
unsigned regno2;
for (regno = aarch64_next_callee_save (start, limit);
regno <= limit;
regno = aarch64_next_callee_save (regno + 1, limit))
{
rtx reg, mem;
HOST_WIDE_INT offset;
if (skip_wb
&& (regno == cfun->machine->frame.wb_candidate1
|| regno == cfun->machine->frame.wb_candidate2))
continue;
reg = gen_rtx_REG (mode, regno);
offset = start_offset + cfun->machine->frame.reg_offset[regno];
mem = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
offset));
regno2 = aarch64_next_callee_save (regno + 1, limit);
if (regno2 <= limit
&& ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
== cfun->machine->frame.reg_offset[regno2]))
{
rtx reg2 = gen_rtx_REG (mode, regno2);
rtx mem2;
offset = start_offset + cfun->machine->frame.reg_offset[regno2];
mem2 = gen_mem_ref (mode, plus_constant (Pmode, stack_pointer_rtx,
offset));
insn = emit_insn (aarch64_gen_store_pair (mode, mem, reg, mem2,
reg2));
/* The first part of a frame-related parallel insn is
always assumed to be relevant to the frame
calculations; subsequent parts, are only
frame-related if explicitly marked. */
RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 1)) = 1;
regno = regno2;
}
else
insn = emit_move_insn (mem, reg);
RTX_FRAME_RELATED_P (insn) = 1;
}
}
static void
aarch64_restore_callee_saves (machine_mode mode,
HOST_WIDE_INT start_offset, unsigned start,
unsigned limit, bool skip_wb, rtx *cfi_ops)
{
rtx base_rtx = stack_pointer_rtx;
rtx (*gen_mem_ref) (machine_mode, rtx) = (frame_pointer_needed
? gen_frame_mem : gen_rtx_MEM);
unsigned regno;
unsigned regno2;
HOST_WIDE_INT offset;
for (regno = aarch64_next_callee_save (start, limit);
regno <= limit;
regno = aarch64_next_callee_save (regno + 1, limit))
{
rtx reg, mem;
if (skip_wb
&& (regno == cfun->machine->frame.wb_candidate1
|| regno == cfun->machine->frame.wb_candidate2))
continue;
reg = gen_rtx_REG (mode, regno);
offset = start_offset + cfun->machine->frame.reg_offset[regno];
mem = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
regno2 = aarch64_next_callee_save (regno + 1, limit);
if (regno2 <= limit
&& ((cfun->machine->frame.reg_offset[regno] + UNITS_PER_WORD)
== cfun->machine->frame.reg_offset[regno2]))
{
rtx reg2 = gen_rtx_REG (mode, regno2);
rtx mem2;
offset = start_offset + cfun->machine->frame.reg_offset[regno2];
mem2 = gen_mem_ref (mode, plus_constant (Pmode, base_rtx, offset));
emit_insn (aarch64_gen_load_pair (mode, reg, mem, reg2, mem2));
*cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg2, *cfi_ops);
regno = regno2;
}
else
emit_move_insn (reg, mem);
*cfi_ops = alloc_reg_note (REG_CFA_RESTORE, reg, *cfi_ops);
}
}
/* AArch64 stack frames generated by this compiler look like:
+-------------------------------+
| |
| incoming stack arguments |
| |
+-------------------------------+
| | <-- incoming stack pointer (aligned)
| callee-allocated save area |
| for register varargs |
| |
+-------------------------------+
| local variables | <-- frame_pointer_rtx
| |
+-------------------------------+
| padding0 | \
+-------------------------------+ |
| callee-saved registers | | frame.saved_regs_size
+-------------------------------+ |
| LR' | |
+-------------------------------+ |
| FP' | / <- hard_frame_pointer_rtx (aligned)
+-------------------------------+
| dynamic allocation |
+-------------------------------+
| padding |
+-------------------------------+
| outgoing stack arguments | <-- arg_pointer
| |
+-------------------------------+
| | <-- stack_pointer_rtx (aligned)
Dynamic stack allocations via alloca() decrease stack_pointer_rtx
but leave frame_pointer_rtx and hard_frame_pointer_rtx
unchanged. */
/* Generate the prologue instructions for entry into a function.
Establish the stack frame by decreasing the stack pointer with a
properly calculated size and, if necessary, create a frame record
filled with the values of LR and previous frame pointer. The
current FP is also set up if it is in use. */
void
aarch64_expand_prologue (void)
{
/* sub sp, sp, #<frame_size>
stp {fp, lr}, [sp, #<frame_size> - 16]
add fp, sp, #<frame_size> - hardfp_offset
stp {cs_reg}, [fp, #-16] etc.
sub sp, sp, <final_adjustment_if_any>
*/
HOST_WIDE_INT frame_size, offset;
HOST_WIDE_INT fp_offset; /* Offset from hard FP to SP. */
HOST_WIDE_INT hard_fp_offset;
rtx_insn *insn;
aarch64_layout_frame ();
offset = frame_size = cfun->machine->frame.frame_size;
hard_fp_offset = cfun->machine->frame.hard_fp_offset;
fp_offset = frame_size - hard_fp_offset;
if (flag_stack_usage_info)
current_function_static_stack_size = frame_size;
/* Store pairs and load pairs have a range only -512 to 504. */
if (offset >= 512)
{
/* When the frame has a large size, an initial decrease is done on
the stack pointer to jump over the callee-allocated save area for
register varargs, the local variable area and/or the callee-saved
register area. This will allow the pre-index write-back
store pair instructions to be used for setting up the stack frame
efficiently. */
offset = hard_fp_offset;
if (offset >= 512)
offset = cfun->machine->frame.saved_regs_size;
frame_size -= (offset + crtl->outgoing_args_size);
fp_offset = 0;
if (frame_size >= 0x1000000)
{
rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
emit_move_insn (op0, GEN_INT (-frame_size));
insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
add_reg_note (insn, REG_CFA_ADJUST_CFA,
gen_rtx_SET (VOIDmode, stack_pointer_rtx,
plus_constant (Pmode, stack_pointer_rtx,
-frame_size)));
RTX_FRAME_RELATED_P (insn) = 1;
}
else if (frame_size > 0)
{
int hi_ofs = frame_size & 0xfff000;
int lo_ofs = frame_size & 0x000fff;
if (hi_ofs)
{
insn = emit_insn (gen_add2_insn
(stack_pointer_rtx, GEN_INT (-hi_ofs)));
RTX_FRAME_RELATED_P (insn) = 1;
}
if (lo_ofs)
{
insn = emit_insn (gen_add2_insn
(stack_pointer_rtx, GEN_INT (-lo_ofs)));
RTX_FRAME_RELATED_P (insn) = 1;
}
}
}
else
frame_size = -1;
if (offset > 0)
{
bool skip_wb = false;
if (frame_pointer_needed)
{
skip_wb = true;
if (fp_offset)
{
insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
GEN_INT (-offset)));
RTX_FRAME_RELATED_P (insn) = 1;
aarch64_save_callee_saves (DImode, fp_offset, R29_REGNUM,
R30_REGNUM, false);
}
else
aarch64_pushwb_pair_reg (DImode, R29_REGNUM, R30_REGNUM, offset);
/* Set up frame pointer to point to the location of the
previous frame pointer on the stack. */
insn = emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
stack_pointer_rtx,
GEN_INT (fp_offset)));
RTX_FRAME_RELATED_P (insn) = 1;
emit_insn (gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
}
else
{
unsigned reg1 = cfun->machine->frame.wb_candidate1;
unsigned reg2 = cfun->machine->frame.wb_candidate2;
if (fp_offset
|| reg1 == FIRST_PSEUDO_REGISTER
|| (reg2 == FIRST_PSEUDO_REGISTER
&& offset >= 256))
{
insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
GEN_INT (-offset)));
RTX_FRAME_RELATED_P (insn) = 1;
}
else
{
machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
skip_wb = true;
if (reg2 == FIRST_PSEUDO_REGISTER)
aarch64_pushwb_single_reg (mode1, reg1, offset);
else
aarch64_pushwb_pair_reg (mode1, reg1, reg2, offset);
}
}
aarch64_save_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
skip_wb);
aarch64_save_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
skip_wb);
}
/* when offset >= 512,
sub sp, sp, #<outgoing_args_size> */
if (frame_size > -1)
{
if (crtl->outgoing_args_size > 0)
{
insn = emit_insn (gen_add2_insn
(stack_pointer_rtx,
GEN_INT (- crtl->outgoing_args_size)));
RTX_FRAME_RELATED_P (insn) = 1;
}
}
}
/* Return TRUE if we can use a simple_return insn.
This function checks whether the callee saved stack is empty, which
means no restore actions are need. The pro_and_epilogue will use
this to check whether shrink-wrapping opt is feasible. */
bool
aarch64_use_return_insn_p (void)
{
if (!reload_completed)
return false;
if (crtl->profile)
return false;
aarch64_layout_frame ();
return cfun->machine->frame.frame_size == 0;
}
/* Generate the epilogue instructions for returning from a function. */
void
aarch64_expand_epilogue (bool for_sibcall)
{
HOST_WIDE_INT frame_size, offset;
HOST_WIDE_INT fp_offset;
HOST_WIDE_INT hard_fp_offset;
rtx_insn *insn;
/* We need to add memory barrier to prevent read from deallocated stack. */
bool need_barrier_p = (get_frame_size () != 0
|| cfun->machine->frame.saved_varargs_size);
aarch64_layout_frame ();
offset = frame_size = cfun->machine->frame.frame_size;
hard_fp_offset = cfun->machine->frame.hard_fp_offset;
fp_offset = frame_size - hard_fp_offset;
/* Store pairs and load pairs have a range only -512 to 504. */
if (offset >= 512)
{
offset = hard_fp_offset;
if (offset >= 512)
offset = cfun->machine->frame.saved_regs_size;
frame_size -= (offset + crtl->outgoing_args_size);
fp_offset = 0;
if (!frame_pointer_needed && crtl->outgoing_args_size > 0)
{
insn = emit_insn (gen_add2_insn
(stack_pointer_rtx,
GEN_INT (crtl->outgoing_args_size)));
RTX_FRAME_RELATED_P (insn) = 1;
}
}
else
frame_size = -1;
/* If there were outgoing arguments or we've done dynamic stack
allocation, then restore the stack pointer from the frame
pointer. This is at most one insn and more efficient than using
GCC's internal mechanism. */
if (frame_pointer_needed
&& (crtl->outgoing_args_size || cfun->calls_alloca))
{
if (cfun->calls_alloca)
emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
insn = emit_insn (gen_add3_insn (stack_pointer_rtx,
hard_frame_pointer_rtx,
GEN_INT (0)));
offset = offset - fp_offset;
}
if (offset > 0)
{
unsigned reg1 = cfun->machine->frame.wb_candidate1;
unsigned reg2 = cfun->machine->frame.wb_candidate2;
bool skip_wb = true;
rtx cfi_ops = NULL;
if (frame_pointer_needed)
fp_offset = 0;
else if (fp_offset
|| reg1 == FIRST_PSEUDO_REGISTER
|| (reg2 == FIRST_PSEUDO_REGISTER
&& offset >= 256))
skip_wb = false;
aarch64_restore_callee_saves (DImode, fp_offset, R0_REGNUM, R30_REGNUM,
skip_wb, &cfi_ops);
aarch64_restore_callee_saves (DFmode, fp_offset, V0_REGNUM, V31_REGNUM,
skip_wb, &cfi_ops);
if (need_barrier_p)
emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
if (skip_wb)
{
machine_mode mode1 = (reg1 <= R30_REGNUM) ? DImode : DFmode;
rtx rreg1 = gen_rtx_REG (mode1, reg1);
cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg1, cfi_ops);
if (reg2 == FIRST_PSEUDO_REGISTER)
{
rtx mem = plus_constant (Pmode, stack_pointer_rtx, offset);
mem = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, mem);
mem = gen_rtx_MEM (mode1, mem);
insn = emit_move_insn (rreg1, mem);
}
else
{
rtx rreg2 = gen_rtx_REG (mode1, reg2);
cfi_ops = alloc_reg_note (REG_CFA_RESTORE, rreg2, cfi_ops);
insn = emit_insn (aarch64_gen_loadwb_pair
(mode1, stack_pointer_rtx, rreg1,
rreg2, offset));
}
}
else
{
insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
GEN_INT (offset)));
}
/* Reset the CFA to be SP + FRAME_SIZE. */
rtx new_cfa = stack_pointer_rtx;
if (frame_size > 0)
new_cfa = plus_constant (Pmode, new_cfa, frame_size);
cfi_ops = alloc_reg_note (REG_CFA_DEF_CFA, new_cfa, cfi_ops);
REG_NOTES (insn) = cfi_ops;
RTX_FRAME_RELATED_P (insn) = 1;
}
if (frame_size > 0)
{
if (need_barrier_p)
emit_insn (gen_stack_tie (stack_pointer_rtx, stack_pointer_rtx));
if (frame_size >= 0x1000000)
{
rtx op0 = gen_rtx_REG (Pmode, IP0_REGNUM);
emit_move_insn (op0, GEN_INT (frame_size));
insn = emit_insn (gen_add2_insn (stack_pointer_rtx, op0));
}
else
{
int hi_ofs = frame_size & 0xfff000;
int lo_ofs = frame_size & 0x000fff;
if (hi_ofs && lo_ofs)
{
insn = emit_insn (gen_add2_insn
(stack_pointer_rtx, GEN_INT (hi_ofs)));
RTX_FRAME_RELATED_P (insn) = 1;
frame_size = lo_ofs;
}
insn = emit_insn (gen_add2_insn
(stack_pointer_rtx, GEN_INT (frame_size)));
}
/* Reset the CFA to be SP + 0. */
add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
RTX_FRAME_RELATED_P (insn) = 1;
}
/* Stack adjustment for exception handler. */
if (crtl->calls_eh_return)
{
/* We need to unwind the stack by the offset computed by
EH_RETURN_STACKADJ_RTX. We have already reset the CFA
to be SP; letting the CFA move during this adjustment
is just as correct as retaining the CFA from the body
of the function. Therefore, do nothing special. */
emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
}
emit_use (gen_rtx_REG (DImode, LR_REGNUM));
if (!for_sibcall)
emit_jump_insn (ret_rtx);
}
/* Return the place to copy the exception unwinding return address to.
This will probably be a stack slot, but could (in theory be the
return register). */
rtx
aarch64_final_eh_return_addr (void)
{
HOST_WIDE_INT fp_offset;
aarch64_layout_frame ();
fp_offset = cfun->machine->frame.frame_size
- cfun->machine->frame.hard_fp_offset;
if (cfun->machine->frame.reg_offset[LR_REGNUM] < 0)
return gen_rtx_REG (DImode, LR_REGNUM);
/* DSE and CSELIB do not detect an alias between sp+k1 and fp+k2. This can
result in a store to save LR introduced by builtin_eh_return () being
incorrectly deleted because the alias is not detected.
So in the calculation of the address to copy the exception unwinding
return address to, we note 2 cases.
If FP is needed and the fp_offset is 0, it means that SP = FP and hence
we return a SP-relative location since all the addresses are SP-relative
in this case. This prevents the store from being optimized away.
If the fp_offset is not 0, then the addresses will be FP-relative and
therefore we return a FP-relative location. */
if (frame_pointer_needed)
{
if (fp_offset)
return gen_frame_mem (DImode,
plus_constant (Pmode, hard_frame_pointer_rtx, UNITS_PER_WORD));
else
return gen_frame_mem (DImode,
plus_constant (Pmode, stack_pointer_rtx, UNITS_PER_WORD));
}
/* If FP is not needed, we calculate the location of LR, which would be
at the top of the saved registers block. */
return gen_frame_mem (DImode,
plus_constant (Pmode,
stack_pointer_rtx,
fp_offset
+ cfun->machine->frame.saved_regs_size
- 2 * UNITS_PER_WORD));
}
/* Possibly output code to build up a constant in a register. For
the benefit of the costs infrastructure, returns the number of
instructions which would be emitted. GENERATE inhibits or
enables code generation. */
static int
aarch64_build_constant (int regnum, HOST_WIDE_INT val, bool generate)
{
int insns = 0;
if (aarch64_bitmask_imm (val, DImode))
{
if (generate)
emit_move_insn (gen_rtx_REG (Pmode, regnum), GEN_INT (val));
insns = 1;
}
else
{
int i;
int ncount = 0;
int zcount = 0;
HOST_WIDE_INT valp = val >> 16;
HOST_WIDE_INT valm;
HOST_WIDE_INT tval;
for (i = 16; i < 64; i += 16)
{
valm = (valp & 0xffff);
if (valm != 0)
++ zcount;
if (valm != 0xffff)
++ ncount;
valp >>= 16;
}
/* zcount contains the number of additional MOVK instructions
required if the constant is built up with an initial MOVZ instruction,
while ncount is the number of MOVK instructions required if starting
with a MOVN instruction. Choose the sequence that yields the fewest
number of instructions, preferring MOVZ instructions when they are both
the same. */
if (ncount < zcount)
{
if (generate)
emit_move_insn (gen_rtx_REG (Pmode, regnum),
GEN_INT (val | ~(HOST_WIDE_INT) 0xffff));
tval = 0xffff;
insns++;
}
else
{
if (generate)
emit_move_insn (gen_rtx_REG (Pmode, regnum),
GEN_INT (val & 0xffff));
tval = 0;
insns++;
}
val >>= 16;
for (i = 16; i < 64; i += 16)
{
if ((val & 0xffff) != tval)
{
if (generate)
emit_insn (gen_insv_immdi (gen_rtx_REG (Pmode, regnum),
GEN_INT (i),
GEN_INT (val & 0xffff)));
insns++;
}
val >>= 16;
}
}
return insns;
}
static void
aarch64_add_constant (int regnum, int scratchreg, HOST_WIDE_INT delta)
{
HOST_WIDE_INT mdelta = delta;
rtx this_rtx = gen_rtx_REG (Pmode, regnum);
rtx scratch_rtx = gen_rtx_REG (Pmode, scratchreg);
if (mdelta < 0)
mdelta = -mdelta;
if (mdelta >= 4096 * 4096)
{
(void) aarch64_build_constant (scratchreg, delta, true);
emit_insn (gen_add3_insn (this_rtx, this_rtx, scratch_rtx));
}
else if (mdelta > 0)
{
if (mdelta >= 4096)
{
emit_insn (gen_rtx_SET (Pmode, scratch_rtx, GEN_INT (mdelta / 4096)));
rtx shift = gen_rtx_ASHIFT (Pmode, scratch_rtx, GEN_INT (12));
if (delta < 0)
emit_insn (gen_rtx_SET (Pmode, this_rtx,
gen_rtx_MINUS (Pmode, this_rtx, shift)));
else
emit_insn (gen_rtx_SET (Pmode, this_rtx,
gen_rtx_PLUS (Pmode, this_rtx, shift)));
}
if (mdelta % 4096 != 0)
{
scratch_rtx = GEN_INT ((delta < 0 ? -1 : 1) * (mdelta % 4096));
emit_insn (gen_rtx_SET (Pmode, this_rtx,
gen_rtx_PLUS (Pmode, this_rtx, scratch_rtx)));
}
}
}
/* Output code to add DELTA to the first argument, and then jump
to FUNCTION. Used for C++ multiple inheritance. */
static void
aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
HOST_WIDE_INT delta,
HOST_WIDE_INT vcall_offset,
tree function)
{
/* The this pointer is always in x0. Note that this differs from
Arm where the this pointer maybe bumped to r1 if r0 is required
to return a pointer to an aggregate. On AArch64 a result value
pointer will be in x8. */
int this_regno = R0_REGNUM;
rtx this_rtx, temp0, temp1, addr, funexp;
rtx_insn *insn;
reload_completed = 1;
emit_note (NOTE_INSN_PROLOGUE_END);
if (vcall_offset == 0)
aarch64_add_constant (this_regno, IP1_REGNUM, delta);
else
{
gcc_assert ((vcall_offset & (POINTER_BYTES - 1)) == 0);
this_rtx = gen_rtx_REG (Pmode, this_regno);
temp0 = gen_rtx_REG (Pmode, IP0_REGNUM);
temp1 = gen_rtx_REG (Pmode, IP1_REGNUM);
addr = this_rtx;
if (delta != 0)
{
if (delta >= -256 && delta < 256)
addr = gen_rtx_PRE_MODIFY (Pmode, this_rtx,
plus_constant (Pmode, this_rtx, delta));
else
aarch64_add_constant (this_regno, IP1_REGNUM, delta);
}
if (Pmode == ptr_mode)
aarch64_emit_move (temp0, gen_rtx_MEM (ptr_mode, addr));
else
aarch64_emit_move (temp0,
gen_rtx_ZERO_EXTEND (Pmode,
gen_rtx_MEM (ptr_mode, addr)));
if (vcall_offset >= -256 && vcall_offset < 4096 * POINTER_BYTES)
addr = plus_constant (Pmode, temp0, vcall_offset);
else
{
(void) aarch64_build_constant (IP1_REGNUM, vcall_offset, true);
addr = gen_rtx_PLUS (Pmode, temp0, temp1);
}
if (Pmode == ptr_mode)
aarch64_emit_move (temp1, gen_rtx_MEM (ptr_mode,addr));
else
aarch64_emit_move (temp1,
gen_rtx_SIGN_EXTEND (Pmode,
gen_rtx_MEM (ptr_mode, addr)));
emit_insn (gen_add2_insn (this_rtx, temp1));
}
/* Generate a tail call to the target function. */
if (!TREE_USED (function))
{
assemble_external (function);
TREE_USED (function) = 1;
}
funexp = XEXP (DECL_RTL (function), 0);
funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
SIBLING_CALL_P (insn) = 1;
insn = get_insns ();
shorten_branches (insn);
final_start_function (insn, file, 1);
final (insn, file, 1);
final_end_function ();
/* Stop pretending to be a post-reload pass. */
reload_completed = 0;
}
static bool
aarch64_tls_referenced_p (rtx x)
{
if (!TARGET_HAVE_TLS)
return false;
subrtx_iterator::array_type array;
FOR_EACH_SUBRTX (iter, array, x, ALL)
{
const_rtx x = *iter;
if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
return true;
/* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
TLS offsets, not real symbol references. */
if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
iter.skip_subrtxes ();
}
return false;
}
static int
aarch64_bitmasks_cmp (const void *i1, const void *i2)
{
const unsigned HOST_WIDE_INT *imm1 = (const unsigned HOST_WIDE_INT *) i1;
const unsigned HOST_WIDE_INT *imm2 = (const unsigned HOST_WIDE_INT *) i2;
if (*imm1 < *imm2)
return -1;
if (*imm1 > *imm2)
return +1;
return 0;
}
static void
aarch64_build_bitmask_table (void)
{
unsigned HOST_WIDE_INT mask, imm;
unsigned int log_e, e, s, r;
unsigned int nimms = 0;
for (log_e = 1; log_e <= 6; log_e++)
{
e = 1 << log_e;
if (e == 64)
mask = ~(HOST_WIDE_INT) 0;
else
mask = ((HOST_WIDE_INT) 1 << e) - 1;
for (s = 1; s < e; s++)
{
for (r = 0; r < e; r++)
{
/* set s consecutive bits to 1 (s < 64) */
imm = ((unsigned HOST_WIDE_INT)1 << s) - 1;
/* rotate right by r */
if (r != 0)
imm = ((imm >> r) | (imm << (e - r))) & mask;
/* replicate the constant depending on SIMD size */
switch (log_e) {
case 1: imm |= (imm << 2);
case 2: imm |= (imm << 4);
case 3: imm |= (imm << 8);
case 4: imm |= (imm << 16);
case 5: imm |= (imm << 32);
case 6:
break;
default:
gcc_unreachable ();
}
gcc_assert (nimms < AARCH64_NUM_BITMASKS);
aarch64_bitmasks[nimms++] = imm;
}
}
}
gcc_assert (nimms == AARCH64_NUM_BITMASKS);
qsort (aarch64_bitmasks, nimms, sizeof (aarch64_bitmasks[0]),
aarch64_bitmasks_cmp);
}
/* Return true if val can be encoded as a 12-bit unsigned immediate with
a left shift of 0 or 12 bits. */
bool
aarch64_uimm12_shift (HOST_WIDE_INT val)
{
return ((val & (((HOST_WIDE_INT) 0xfff) << 0)) == val
|| (val & (((HOST_WIDE_INT) 0xfff) << 12)) == val
);
}
/* Return true if val is an immediate that can be loaded into a
register by a MOVZ instruction. */
static bool
aarch64_movw_imm (HOST_WIDE_INT val, machine_mode mode)
{
if (GET_MODE_SIZE (mode) > 4)
{
if ((val & (((HOST_WIDE_INT) 0xffff) << 32)) == val
|| (val & (((HOST_WIDE_INT) 0xffff) << 48)) == val)
return 1;
}
else
{
/* Ignore sign extension. */
val &= (HOST_WIDE_INT) 0xffffffff;
}
return ((val & (((HOST_WIDE_INT) 0xffff) << 0)) == val
|| (val & (((HOST_WIDE_INT) 0xffff) << 16)) == val);
}
/* Return true if val is a valid bitmask immediate. */
bool
aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode mode)
{
if (GET_MODE_SIZE (mode) < 8)
{
/* Replicate bit pattern. */
val &= (HOST_WIDE_INT) 0xffffffff;
val |= val << 32;
}
return bsearch (&val, aarch64_bitmasks, AARCH64_NUM_BITMASKS,
sizeof (aarch64_bitmasks[0]), aarch64_bitmasks_cmp) != NULL;
}
/* Return true if val is an immediate that can be loaded into a
register in a single instruction. */
bool
aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode)
{
if (aarch64_movw_imm (val, mode) || aarch64_movw_imm (~val, mode))
return 1;
return aarch64_bitmask_imm (val, mode);
}
static bool
aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
{
rtx base, offset;
if (GET_CODE (x) == HIGH)
return true;
split_const (x, &base, &offset);
if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
{
if (aarch64_classify_symbol (base, offset, SYMBOL_CONTEXT_ADR)
!= SYMBOL_FORCE_TO_MEM)
return true;
else
/* Avoid generating a 64-bit relocation in ILP32; leave
to aarch64_expand_mov_immediate to handle it properly. */
return mode != ptr_mode;
}
return aarch64_tls_referenced_p (x);
}
/* Return true if register REGNO is a valid index register.
STRICT_P is true if REG_OK_STRICT is in effect. */
bool
aarch64_regno_ok_for_index_p (int regno, bool strict_p)
{
if (!HARD_REGISTER_NUM_P (regno))
{
if (!strict_p)
return true;
if (!reg_renumber)
return false;
regno = reg_renumber[regno];
}
return GP_REGNUM_P (regno);
}
/* Return true if register REGNO is a valid base register for mode MODE.
STRICT_P is true if REG_OK_STRICT is in effect. */
bool
aarch64_regno_ok_for_base_p (int regno, bool strict_p)
{
if (!HARD_REGISTER_NUM_P (regno))
{
if (!strict_p)
return true;
if (!reg_renumber)
return false;
regno = reg_renumber[regno];
}
/* The fake registers will be eliminated to either the stack or
hard frame pointer, both of which are usually valid base registers.
Reload deals with the cases where the eliminated form isn't valid. */
return (GP_REGNUM_P (regno)
|| regno == SP_REGNUM
|| regno == FRAME_POINTER_REGNUM
|| regno == ARG_POINTER_REGNUM);
}
/* Return true if X is a valid base register for mode MODE.
STRICT_P is true if REG_OK_STRICT is in effect. */
static bool
aarch64_base_register_rtx_p (rtx x, bool strict_p)
{
if (!strict_p && GET_CODE (x) == SUBREG)
x = SUBREG_REG (x);
return (REG_P (x) && aarch64_regno_ok_for_base_p (REGNO (x), strict_p));
}
/* Return true if address offset is a valid index. If it is, fill in INFO
appropriately. STRICT_P is true if REG_OK_STRICT is in effect. */
static bool
aarch64_classify_index (struct aarch64_address_info *info, rtx x,
machine_mode mode, bool strict_p)
{
enum aarch64_address_type type;
rtx index;
int shift;
/* (reg:P) */
if ((REG_P (x) || GET_CODE (x) == SUBREG)
&& GET_MODE (x) == Pmode)
{
type = ADDRESS_REG_REG;
index = x;
shift = 0;
}
/* (sign_extend:DI (reg:SI)) */
else if ((GET_CODE (x) == SIGN_EXTEND
|| GET_CODE (x) ==