| /* Subroutines used for LoongArch code generation. |
| Copyright (C) 2021-2022 Free Software Foundation, Inc. |
| Contributed by Loongson Ltd. |
| Based on MIPS and RISC-V target for GNU compiler. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| GCC is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #define IN_TARGET_CODE 1 |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "backend.h" |
| #include "target.h" |
| #include "rtl.h" |
| #include "tree.h" |
| #include "memmodel.h" |
| #include "gimple.h" |
| #include "cfghooks.h" |
| #include "df.h" |
| #include "tm_p.h" |
| #include "stringpool.h" |
| #include "attribs.h" |
| #include "optabs.h" |
| #include "regs.h" |
| #include "emit-rtl.h" |
| #include "recog.h" |
| #include "cgraph.h" |
| #include "diagnostic.h" |
| #include "insn-attr.h" |
| #include "output.h" |
| #include "alias.h" |
| #include "fold-const.h" |
| #include "varasm.h" |
| #include "stor-layout.h" |
| #include "calls.h" |
| #include "explow.h" |
| #include "expr.h" |
| #include "libfuncs.h" |
| #include "reload.h" |
| #include "common/common-target.h" |
| #include "langhooks.h" |
| #include "cfgrtl.h" |
| #include "cfganal.h" |
| #include "sched-int.h" |
| #include "gimplify.h" |
| #include "target-globals.h" |
| #include "tree-pass.h" |
| #include "context.h" |
| #include "builtins.h" |
| #include "rtl-iter.h" |
| |
| /* This file should be included last. */ |
| #include "target-def.h" |
| |
| /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF. */ |
| #define UNSPEC_ADDRESS_P(X) \ |
| (GET_CODE (X) == UNSPEC \ |
| && XINT (X, 1) >= UNSPEC_ADDRESS_FIRST \ |
| && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES) |
| |
| /* Extract the symbol or label from UNSPEC wrapper X. */ |
| #define UNSPEC_ADDRESS(X) XVECEXP (X, 0, 0) |
| |
| /* Extract the symbol type from UNSPEC wrapper X. */ |
| #define UNSPEC_ADDRESS_TYPE(X) \ |
| ((enum loongarch_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST)) |
| |
| /* True if INSN is a loongarch.md pattern or asm statement. */ |
| /* ??? This test exists through the compiler, perhaps it should be |
| moved to rtl.h. */ |
| #define USEFUL_INSN_P(INSN) \ |
| (NONDEBUG_INSN_P (INSN) \ |
| && GET_CODE (PATTERN (INSN)) != USE \ |
| && GET_CODE (PATTERN (INSN)) != CLOBBER) |
| |
| /* True if bit BIT is set in VALUE. */ |
| #define BITSET_P(VALUE, BIT) (((VALUE) & (1 << (BIT))) != 0) |
| |
| /* Classifies an address. |
| |
| ADDRESS_REG |
| A natural register + offset address. The register satisfies |
| loongarch_valid_base_register_p and the offset is a const_arith_operand. |
| |
| ADDRESS_REG_REG |
| A base register indexed by (optionally scaled) register. |
| |
| ADDRESS_LO_SUM |
| A LO_SUM rtx. The first operand is a valid base register and the second |
| operand is a symbolic address. |
| |
| ADDRESS_CONST_INT |
| A signed 16-bit constant address. |
| |
| ADDRESS_SYMBOLIC: |
| A constant symbolic address. */ |
| enum loongarch_address_type |
| { |
| ADDRESS_REG, |
| ADDRESS_REG_REG, |
| ADDRESS_LO_SUM, |
| ADDRESS_CONST_INT, |
| ADDRESS_SYMBOLIC |
| }; |
| |
| |
| /* Information about an address described by loongarch_address_type. */ |
| struct loongarch_address_info |
| { |
| enum loongarch_address_type type; |
| rtx reg; |
| rtx offset; |
| enum loongarch_symbol_type symbol_type; |
| }; |
| |
| /* Method of loading instant numbers: |
| |
| METHOD_NORMAL: |
| Load 0-31 bit of the immediate number. |
| |
| METHOD_LU32I: |
| Load 32-51 bit of the immediate number. |
| |
| METHOD_LU52I: |
| Load 52-63 bit of the immediate number. |
| |
| METHOD_INSV: |
| immediate like 0xfff00000fffffxxx |
| */ |
| enum loongarch_load_imm_method |
| { |
| METHOD_NORMAL, |
| METHOD_LU32I, |
| METHOD_LU52I, |
| METHOD_INSV |
| }; |
| |
| struct loongarch_integer_op |
| { |
| enum rtx_code code; |
| HOST_WIDE_INT value; |
| enum loongarch_load_imm_method method; |
| }; |
| |
| /* The largest number of operations needed to load an integer constant. |
| The worst accepted case for 64-bit constants is LU12I.W,LU32I.D,LU52I.D,ORI |
| or LU12I.W,LU32I.D,LU52I.D,ADDI.D DECL_ASSEMBLER_NAME. */ |
| #define LARCH_MAX_INTEGER_OPS 4 |
| |
| /* Arrays that map GCC register numbers to debugger register numbers. */ |
| int loongarch_dwarf_regno[FIRST_PSEUDO_REGISTER]; |
| |
| /* Index [M][R] is true if register R is allowed to hold a value of mode M. */ |
| static bool loongarch_hard_regno_mode_ok_p[MAX_MACHINE_MODE] |
| [FIRST_PSEUDO_REGISTER]; |
| |
| /* Index C is true if character C is a valid PRINT_OPERAND punctation |
| character. */ |
| static bool loongarch_print_operand_punct[256]; |
| |
| /* Cached value of can_issue_more. This is cached in loongarch_variable_issue |
| hook and returned from loongarch_sched_reorder2. */ |
| static int cached_can_issue_more; |
| |
| /* Index R is the smallest register class that contains register R. */ |
| const enum reg_class loongarch_regno_to_class[FIRST_PSEUDO_REGISTER] = { |
| GR_REGS, GR_REGS, GR_REGS, GR_REGS, |
| JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS, |
| JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS, |
| SIBCALL_REGS, JIRL_REGS, SIBCALL_REGS, SIBCALL_REGS, |
| SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, |
| SIBCALL_REGS, GR_REGS, GR_REGS, JIRL_REGS, |
| JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS, |
| JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS, |
| |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FCC_REGS, FCC_REGS, FCC_REGS, FCC_REGS, |
| FCC_REGS, FCC_REGS, FCC_REGS, FCC_REGS, |
| FRAME_REGS, FRAME_REGS |
| }; |
| |
| /* Which cost information to use. */ |
| static const struct loongarch_rtx_cost_data *loongarch_cost; |
| |
| /* Information about a single argument. */ |
| struct loongarch_arg_info |
| { |
| /* True if the argument is at least partially passed on the stack. */ |
| bool stack_p; |
| |
| /* The number of integer registers allocated to this argument. */ |
| unsigned int num_gprs; |
| |
| /* The offset of the first register used, provided num_gprs is nonzero. |
| If passed entirely on the stack, the value is MAX_ARGS_IN_REGISTERS. */ |
| unsigned int gpr_offset; |
| |
| /* The number of floating-point registers allocated to this argument. */ |
| unsigned int num_fprs; |
| |
| /* The offset of the first register used, provided num_fprs is nonzero. */ |
| unsigned int fpr_offset; |
| }; |
| |
| /* Invoke MACRO (COND) for each fcmp.cond.{s/d} condition. */ |
| #define LARCH_FP_CONDITIONS(MACRO) \ |
| MACRO (f), \ |
| MACRO (un), \ |
| MACRO (eq), \ |
| MACRO (ueq), \ |
| MACRO (olt), \ |
| MACRO (ult), \ |
| MACRO (ole), \ |
| MACRO (ule), \ |
| MACRO (sf), \ |
| MACRO (ngle), \ |
| MACRO (seq), \ |
| MACRO (ngl), \ |
| MACRO (lt), \ |
| MACRO (nge), \ |
| MACRO (le), \ |
| MACRO (ngt) |
| |
| /* Enumerates the codes above as LARCH_FP_COND_<X>. */ |
| #define DECLARE_LARCH_COND(X) LARCH_FP_COND_##X |
| enum loongarch_fp_condition |
| { |
| LARCH_FP_CONDITIONS (DECLARE_LARCH_COND) |
| }; |
| #undef DECLARE_LARCH_COND |
| |
| /* Index X provides the string representation of LARCH_FP_COND_<X>. */ |
| #define STRINGIFY(X) #X |
| const char *const |
| loongarch_fp_conditions[16]= {LARCH_FP_CONDITIONS (STRINGIFY)}; |
| #undef STRINGIFY |
| |
| /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Every parameter gets at |
| least PARM_BOUNDARY bits of alignment, but will be given anything up |
| to PREFERRED_STACK_BOUNDARY bits if the type requires it. */ |
| |
| static unsigned int |
| loongarch_function_arg_boundary (machine_mode mode, const_tree type) |
| { |
| unsigned int alignment; |
| |
| /* Use natural alignment if the type is not aggregate data. */ |
| if (type && !AGGREGATE_TYPE_P (type)) |
| alignment = TYPE_ALIGN (TYPE_MAIN_VARIANT (type)); |
| else |
| alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode); |
| |
| return MIN (PREFERRED_STACK_BOUNDARY, MAX (PARM_BOUNDARY, alignment)); |
| } |
| |
| /* If MODE represents an argument that can be passed or returned in |
| floating-point registers, return the number of registers, else 0. */ |
| |
| static unsigned |
| loongarch_pass_mode_in_fpr_p (machine_mode mode) |
| { |
| if (GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FP_ARG) |
| { |
| if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
| return 1; |
| |
| if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) |
| return 2; |
| } |
| |
| return 0; |
| } |
| |
| typedef struct |
| { |
| const_tree type; |
| HOST_WIDE_INT offset; |
| } loongarch_aggregate_field; |
| |
| /* Identify subfields of aggregates that are candidates for passing in |
| floating-point registers. */ |
| |
| static int |
| loongarch_flatten_aggregate_field (const_tree type, |
| loongarch_aggregate_field fields[2], int n, |
| HOST_WIDE_INT offset) |
| { |
| switch (TREE_CODE (type)) |
| { |
| case RECORD_TYPE: |
| /* Can't handle incomplete types nor sizes that are not fixed. */ |
| if (!COMPLETE_TYPE_P (type) |
| || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST |
| || !tree_fits_uhwi_p (TYPE_SIZE (type))) |
| return -1; |
| |
| for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f)) |
| if (TREE_CODE (f) == FIELD_DECL) |
| { |
| if (!TYPE_P (TREE_TYPE (f))) |
| return -1; |
| |
| if (DECL_SIZE (f) && integer_zerop (DECL_SIZE (f))) |
| continue; |
| |
| HOST_WIDE_INT pos = offset + int_byte_position (f); |
| n = loongarch_flatten_aggregate_field (TREE_TYPE (f), fields, n, |
| pos); |
| if (n < 0) |
| return -1; |
| } |
| return n; |
| |
| case ARRAY_TYPE: |
| { |
| HOST_WIDE_INT n_elts; |
| loongarch_aggregate_field subfields[2]; |
| tree index = TYPE_DOMAIN (type); |
| tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type)); |
| int n_subfields = loongarch_flatten_aggregate_field (TREE_TYPE (type), |
| subfields, 0, |
| offset); |
| |
| /* Can't handle incomplete types nor sizes that are not fixed. */ |
| if (n_subfields <= 0 |
| || !COMPLETE_TYPE_P (type) |
| || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST |
| || !index |
| || !TYPE_MAX_VALUE (index) |
| || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index)) |
| || !TYPE_MIN_VALUE (index) |
| || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index)) |
| || !tree_fits_uhwi_p (elt_size)) |
| return -1; |
| |
| n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index)) |
| - tree_to_uhwi (TYPE_MIN_VALUE (index)); |
| gcc_assert (n_elts >= 0); |
| |
| for (HOST_WIDE_INT i = 0; i < n_elts; i++) |
| for (int j = 0; j < n_subfields; j++) |
| { |
| if (n >= 2) |
| return -1; |
| |
| fields[n] = subfields[j]; |
| fields[n++].offset += i * tree_to_uhwi (elt_size); |
| } |
| |
| return n; |
| } |
| |
| case COMPLEX_TYPE: |
| { |
| /* Complex type need consume 2 field, so n must be 0. */ |
| if (n != 0) |
| return -1; |
| |
| HOST_WIDE_INT elt_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type))); |
| |
| if (elt_size <= UNITS_PER_FP_ARG) |
| { |
| fields[0].type = TREE_TYPE (type); |
| fields[0].offset = offset; |
| fields[1].type = TREE_TYPE (type); |
| fields[1].offset = offset + elt_size; |
| |
| return 2; |
| } |
| |
| return -1; |
| } |
| |
| default: |
| if (n < 2 |
| && ((SCALAR_FLOAT_TYPE_P (type) |
| && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_FP_ARG) |
| || (INTEGRAL_TYPE_P (type) |
| && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_WORD))) |
| { |
| fields[n].type = type; |
| fields[n].offset = offset; |
| return n + 1; |
| } |
| else |
| return -1; |
| } |
| } |
| |
| /* Identify candidate aggregates for passing in floating-point registers. |
| Candidates have at most two fields after flattening. */ |
| |
| static int |
| loongarch_flatten_aggregate_argument (const_tree type, |
| loongarch_aggregate_field fields[2]) |
| { |
| if (!type || TREE_CODE (type) != RECORD_TYPE) |
| return -1; |
| |
| return loongarch_flatten_aggregate_field (type, fields, 0, 0); |
| } |
| |
| /* See whether TYPE is a record whose fields should be returned in one or |
| two floating-point registers. If so, populate FIELDS accordingly. */ |
| |
| static unsigned |
| loongarch_pass_aggregate_num_fpr (const_tree type, |
| loongarch_aggregate_field fields[2]) |
| { |
| int n = loongarch_flatten_aggregate_argument (type, fields); |
| |
| for (int i = 0; i < n; i++) |
| if (!SCALAR_FLOAT_TYPE_P (fields[i].type)) |
| return 0; |
| |
| return n > 0 ? n : 0; |
| } |
| |
| /* See whether TYPE is a record whose fields should be returned in one |
| floating-point register and one integer register. If so, populate |
| FIELDS accordingly. */ |
| |
| static bool |
| loongarch_pass_aggregate_in_fpr_and_gpr_p (const_tree type, |
| loongarch_aggregate_field fields[2]) |
| { |
| unsigned num_int = 0, num_float = 0; |
| int n = loongarch_flatten_aggregate_argument (type, fields); |
| |
| for (int i = 0; i < n; i++) |
| { |
| num_float += SCALAR_FLOAT_TYPE_P (fields[i].type); |
| num_int += INTEGRAL_TYPE_P (fields[i].type); |
| } |
| |
| return num_int == 1 && num_float == 1; |
| } |
| |
| /* Return the representation of an argument passed or returned in an FPR |
| when the value has mode VALUE_MODE and the type has TYPE_MODE. The |
| two modes may be different for structures like: |
| |
| struct __attribute__((packed)) foo { float f; } |
| |
| where the SFmode value "f" is passed in REGNO but the struct itself |
| has mode BLKmode. */ |
| |
| static rtx |
| loongarch_pass_fpr_single (machine_mode type_mode, unsigned regno, |
| machine_mode value_mode, |
| HOST_WIDE_INT offset) |
| { |
| rtx x = gen_rtx_REG (value_mode, regno); |
| |
| if (type_mode != value_mode) |
| { |
| x = gen_rtx_EXPR_LIST (VOIDmode, x, GEN_INT (offset)); |
| x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x)); |
| } |
| return x; |
| } |
| |
| /* Pass or return a composite value in the FPR pair REGNO and REGNO + 1. |
| MODE is the mode of the composite. MODE1 and OFFSET1 are the mode and |
| byte offset for the first value, likewise MODE2 and OFFSET2 for the |
| second value. */ |
| |
| static rtx |
| loongarch_pass_fpr_pair (machine_mode mode, unsigned regno1, |
| machine_mode mode1, HOST_WIDE_INT offset1, |
| unsigned regno2, machine_mode mode2, |
| HOST_WIDE_INT offset2) |
| { |
| return gen_rtx_PARALLEL ( |
| mode, gen_rtvec (2, |
| gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode1, regno1), |
| GEN_INT (offset1)), |
| gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode2, regno2), |
| GEN_INT (offset2)))); |
| } |
| |
| /* Fill INFO with information about a single argument, and return an |
| RTL pattern to pass or return the argument. CUM is the cumulative |
| state for earlier arguments. MODE is the mode of this argument and |
| TYPE is its type (if known). NAMED is true if this is a named |
| (fixed) argument rather than a variable one. RETURN_P is true if |
| returning the argument, or false if passing the argument. */ |
| |
| static rtx |
| loongarch_get_arg_info (struct loongarch_arg_info *info, |
| const CUMULATIVE_ARGS *cum, machine_mode mode, |
| const_tree type, bool named, bool return_p) |
| { |
| unsigned num_bytes, num_words; |
| unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST; |
| unsigned gpr_base = return_p ? GP_RETURN : GP_ARG_FIRST; |
| unsigned alignment = loongarch_function_arg_boundary (mode, type); |
| |
| memset (info, 0, sizeof (*info)); |
| info->gpr_offset = cum->num_gprs; |
| info->fpr_offset = cum->num_fprs; |
| |
| if (named) |
| { |
| loongarch_aggregate_field fields[2]; |
| unsigned fregno = fpr_base + info->fpr_offset; |
| unsigned gregno = gpr_base + info->gpr_offset; |
| |
| /* Pass one- or two-element floating-point aggregates in FPRs. */ |
| if ((info->num_fprs |
| = loongarch_pass_aggregate_num_fpr (type, fields)) |
| && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS) |
| switch (info->num_fprs) |
| { |
| case 1: |
| return loongarch_pass_fpr_single (mode, fregno, |
| TYPE_MODE (fields[0].type), |
| fields[0].offset); |
| |
| case 2: |
| return loongarch_pass_fpr_pair (mode, fregno, |
| TYPE_MODE (fields[0].type), |
| fields[0].offset, |
| fregno + 1, |
| TYPE_MODE (fields[1].type), |
| fields[1].offset); |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| /* Pass real and complex floating-point numbers in FPRs. */ |
| if ((info->num_fprs = loongarch_pass_mode_in_fpr_p (mode)) |
| && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS) |
| switch (GET_MODE_CLASS (mode)) |
| { |
| case MODE_FLOAT: |
| return gen_rtx_REG (mode, fregno); |
| |
| case MODE_COMPLEX_FLOAT: |
| return loongarch_pass_fpr_pair (mode, fregno, |
| GET_MODE_INNER (mode), 0, |
| fregno + 1, GET_MODE_INNER (mode), |
| GET_MODE_UNIT_SIZE (mode)); |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| /* Pass structs with one float and one integer in an FPR and a GPR. */ |
| if (loongarch_pass_aggregate_in_fpr_and_gpr_p (type, fields) |
| && info->gpr_offset < MAX_ARGS_IN_REGISTERS |
| && info->fpr_offset < MAX_ARGS_IN_REGISTERS) |
| { |
| info->num_gprs = 1; |
| info->num_fprs = 1; |
| |
| if (!SCALAR_FLOAT_TYPE_P (fields[0].type)) |
| std::swap (fregno, gregno); |
| |
| return loongarch_pass_fpr_pair (mode, fregno, |
| TYPE_MODE (fields[0].type), |
| fields[0].offset, gregno, |
| TYPE_MODE (fields[1].type), |
| fields[1].offset); |
| } |
| } |
| |
| /* Work out the size of the argument. */ |
| num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode); |
| num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; |
| |
| /* Doubleword-aligned varargs start on an even register boundary. */ |
| if (!named && num_bytes != 0 && alignment > BITS_PER_WORD) |
| info->gpr_offset += info->gpr_offset & 1; |
| |
| /* Partition the argument between registers and stack. */ |
| info->num_fprs = 0; |
| info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset); |
| info->stack_p = (num_words - info->num_gprs) != 0; |
| |
| if (info->num_gprs || return_p) |
| return gen_rtx_REG (mode, gpr_base + info->gpr_offset); |
| |
| return NULL_RTX; |
| } |
| |
| /* Implement TARGET_FUNCTION_ARG. */ |
| |
| static rtx |
| loongarch_function_arg (cumulative_args_t cum_v, const function_arg_info &arg) |
| { |
| CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); |
| struct loongarch_arg_info info; |
| |
| if (arg.end_marker_p ()) |
| return NULL; |
| |
| return loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, |
| false); |
| } |
| |
| /* Implement TARGET_FUNCTION_ARG_ADVANCE. */ |
| |
| static void |
| loongarch_function_arg_advance (cumulative_args_t cum_v, |
| const function_arg_info &arg) |
| { |
| CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); |
| struct loongarch_arg_info info; |
| |
| loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false); |
| |
| /* Advance the register count. This has the effect of setting |
| num_gprs to MAX_ARGS_IN_REGISTERS if a doubleword-aligned |
| argument required us to skip the final GPR and pass the whole |
| argument on the stack. */ |
| cum->num_fprs = info.fpr_offset + info.num_fprs; |
| cum->num_gprs = info.gpr_offset + info.num_gprs; |
| } |
| |
| /* Implement TARGET_ARG_PARTIAL_BYTES. */ |
| |
| static int |
| loongarch_arg_partial_bytes (cumulative_args_t cum, |
| const function_arg_info &generic_arg) |
| { |
| struct loongarch_arg_info arg; |
| |
| loongarch_get_arg_info (&arg, get_cumulative_args (cum), generic_arg.mode, |
| generic_arg.type, generic_arg.named, false); |
| return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0; |
| } |
| |
| /* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls, |
| VALTYPE is the return type and MODE is VOIDmode. For libcalls, |
| VALTYPE is null and MODE is the mode of the return value. */ |
| |
| static rtx |
| loongarch_function_value_1 (const_tree type, const_tree func, |
| machine_mode mode) |
| { |
| struct loongarch_arg_info info; |
| CUMULATIVE_ARGS args; |
| |
| if (type) |
| { |
| int unsigned_p = TYPE_UNSIGNED (type); |
| |
| mode = TYPE_MODE (type); |
| |
| /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes, |
| return values, promote the mode here too. */ |
| mode = promote_function_mode (type, mode, &unsigned_p, func, 1); |
| } |
| |
| memset (&args, 0, sizeof (args)); |
| return loongarch_get_arg_info (&info, &args, mode, type, true, true); |
| } |
| |
| |
| /* Implement TARGET_FUNCTION_VALUE. */ |
| |
| static rtx |
| loongarch_function_value (const_tree valtype, const_tree fn_decl_or_type, |
| bool outgoing ATTRIBUTE_UNUSED) |
| { |
| return loongarch_function_value_1 (valtype, fn_decl_or_type, VOIDmode); |
| } |
| |
| /* Implement TARGET_LIBCALL_VALUE. */ |
| |
| static rtx |
| loongarch_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) |
| { |
| return loongarch_function_value_1 (NULL_TREE, NULL_TREE, mode); |
| } |
| |
| |
| /* Implement TARGET_PASS_BY_REFERENCE. */ |
| |
| static bool |
| loongarch_pass_by_reference (cumulative_args_t cum_v, |
| const function_arg_info &arg) |
| { |
| HOST_WIDE_INT size = arg.type_size_in_bytes (); |
| struct loongarch_arg_info info; |
| CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); |
| |
| /* ??? std_gimplify_va_arg_expr passes NULL for cum. Fortunately, we |
| never pass variadic arguments in floating-point registers, so we can |
| avoid the call to loongarch_get_arg_info in this case. */ |
| if (cum != NULL) |
| { |
| /* Don't pass by reference if we can use a floating-point register. */ |
| loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, |
| false); |
| if (info.num_fprs) |
| return false; |
| } |
| |
| /* Pass by reference if the data do not fit in two integer registers. */ |
| return !IN_RANGE (size, 0, 2 * UNITS_PER_WORD); |
| } |
| |
| /* Implement TARGET_RETURN_IN_MEMORY. */ |
| |
| static bool |
| loongarch_return_in_memory (const_tree type, |
| const_tree fndecl ATTRIBUTE_UNUSED) |
| { |
| CUMULATIVE_ARGS args; |
| cumulative_args_t cum = pack_cumulative_args (&args); |
| |
| /* The rules for returning in memory are the same as for passing the |
| first named argument by reference. */ |
| memset (&args, 0, sizeof (args)); |
| function_arg_info arg (const_cast<tree> (type), /*named=*/true); |
| return loongarch_pass_by_reference (cum, arg); |
| } |
| |
| /* Implement TARGET_SETUP_INCOMING_VARARGS. */ |
| |
| static void |
| loongarch_setup_incoming_varargs (cumulative_args_t cum, |
| const function_arg_info &arg, |
| int *pretend_size ATTRIBUTE_UNUSED, |
| int no_rtl) |
| { |
| CUMULATIVE_ARGS local_cum; |
| int gp_saved; |
| |
| /* The caller has advanced CUM up to, but not beyond, the last named |
| argument. Advance a local copy of CUM past the last "real" named |
| argument, to find out how many registers are left over. */ |
| local_cum = *get_cumulative_args (cum); |
| if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))) |
| loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg); |
| |
| /* Found out how many registers we need to save. */ |
| gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs; |
| |
| if (!no_rtl && gp_saved > 0) |
| { |
| rtx ptr = plus_constant (Pmode, virtual_incoming_args_rtx, |
| REG_PARM_STACK_SPACE (cfun->decl) |
| - gp_saved * UNITS_PER_WORD); |
| rtx mem = gen_frame_mem (BLKmode, ptr); |
| set_mem_alias_set (mem, get_varargs_alias_set ()); |
| |
| move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST, mem, gp_saved); |
| } |
| if (REG_PARM_STACK_SPACE (cfun->decl) == 0) |
| cfun->machine->varargs_size = gp_saved * UNITS_PER_WORD; |
| } |
| |
| /* Make the last instruction frame-related and note that it performs |
| the operation described by FRAME_PATTERN. */ |
| |
| static void |
| loongarch_set_frame_expr (rtx frame_pattern) |
| { |
| rtx insn; |
| |
| insn = get_last_insn (); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR, frame_pattern, |
| REG_NOTES (insn)); |
| } |
| |
| /* Return a frame-related rtx that stores REG at MEM. |
| REG must be a single register. */ |
| |
| static rtx |
| loongarch_frame_set (rtx mem, rtx reg) |
| { |
| rtx set = gen_rtx_SET (mem, reg); |
| RTX_FRAME_RELATED_P (set) = 1; |
| return set; |
| } |
| |
| /* Return true if the current function must save register REGNO. */ |
| |
| static bool |
| loongarch_save_reg_p (unsigned int regno) |
| { |
| bool call_saved = !global_regs[regno] && !call_used_regs[regno]; |
| bool might_clobber |
| = crtl->saves_all_registers || df_regs_ever_live_p (regno); |
| |
| if (call_saved && might_clobber) |
| return true; |
| |
| if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed) |
| return true; |
| |
| if (regno == RETURN_ADDR_REGNUM && crtl->calls_eh_return) |
| return true; |
| |
| return false; |
| } |
| |
| /* Determine which GPR save/restore routine to call. */ |
| |
| static unsigned |
| loongarch_save_libcall_count (unsigned mask) |
| { |
| for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--) |
| if (BITSET_P (mask, n)) |
| return CALLEE_SAVED_REG_NUMBER (n) + 1; |
| abort (); |
| } |
| |
| /* Populate the current function's loongarch_frame_info structure. |
| |
| LoongArch stack frames grown downward. High addresses are at the top. |
| |
| +-------------------------------+ |
| | | |
| | incoming stack arguments | |
| | | |
| +-------------------------------+ <-- incoming stack pointer |
| | | |
| | callee-allocated save area | |
| | for arguments that are | |
| | split between registers and | |
| | the stack | |
| | | |
| +-------------------------------+ <-- arg_pointer_rtx (virtual) |
| | | |
| | callee-allocated save area | |
| | for register varargs | |
| | | |
| +-------------------------------+ <-- hard_frame_pointer_rtx; |
| | | stack_pointer_rtx + gp_sp_offset |
| | GPR save area | + UNITS_PER_WORD |
| | | |
| +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset |
| | | + UNITS_PER_HWVALUE |
| | FPR save area | |
| | | |
| +-------------------------------+ <-- frame_pointer_rtx (virtual) |
| | | |
| | local variables | |
| | | |
| P +-------------------------------+ |
| | | |
| | outgoing stack arguments | |
| | | |
| +-------------------------------+ <-- stack_pointer_rtx |
| |
| Dynamic stack allocations such as alloca insert data at point P. |
| They decrease stack_pointer_rtx but leave frame_pointer_rtx and |
| hard_frame_pointer_rtx unchanged. */ |
| |
| static void |
| loongarch_compute_frame_info (void) |
| { |
| struct loongarch_frame_info *frame; |
| HOST_WIDE_INT offset; |
| unsigned int regno, i, num_x_saved = 0, num_f_saved = 0; |
| |
| frame = &cfun->machine->frame; |
| memset (frame, 0, sizeof (*frame)); |
| |
| /* Find out which GPRs we need to save. */ |
| for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) |
| if (loongarch_save_reg_p (regno)) |
| frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++; |
| |
| /* If this function calls eh_return, we must also save and restore the |
| EH data registers. */ |
| if (crtl->calls_eh_return) |
| for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++) |
| frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++; |
| |
| /* Find out which FPRs we need to save. This loop must iterate over |
| the same space as its companion in loongarch_for_each_saved_reg. */ |
| if (TARGET_HARD_FLOAT) |
| for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) |
| if (loongarch_save_reg_p (regno)) |
| frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++; |
| |
| /* At the bottom of the frame are any outgoing stack arguments. */ |
| offset = LARCH_STACK_ALIGN (crtl->outgoing_args_size); |
| /* Next are local stack variables. */ |
| offset += LARCH_STACK_ALIGN (get_frame_size ()); |
| /* The virtual frame pointer points above the local variables. */ |
| frame->frame_pointer_offset = offset; |
| /* Next are the callee-saved FPRs. */ |
| if (frame->fmask) |
| { |
| offset += LARCH_STACK_ALIGN (num_f_saved * UNITS_PER_FP_REG); |
| frame->fp_sp_offset = offset - UNITS_PER_FP_REG; |
| } |
| else |
| frame->fp_sp_offset = offset; |
| /* Next are the callee-saved GPRs. */ |
| if (frame->mask) |
| { |
| unsigned x_save_size = LARCH_STACK_ALIGN (num_x_saved * UNITS_PER_WORD); |
| unsigned num_save_restore |
| = 1 + loongarch_save_libcall_count (frame->mask); |
| |
| /* Only use save/restore routines if they don't alter the stack size. */ |
| if (LARCH_STACK_ALIGN (num_save_restore * UNITS_PER_WORD) == x_save_size) |
| frame->save_libcall_adjustment = x_save_size; |
| |
| offset += x_save_size; |
| frame->gp_sp_offset = offset - UNITS_PER_WORD; |
| } |
| else |
| frame->gp_sp_offset = offset; |
| /* The hard frame pointer points above the callee-saved GPRs. */ |
| frame->hard_frame_pointer_offset = offset; |
| /* Above the hard frame pointer is the callee-allocated varags save area. */ |
| offset += LARCH_STACK_ALIGN (cfun->machine->varargs_size); |
| /* Next is the callee-allocated area for pretend stack arguments. */ |
| offset += LARCH_STACK_ALIGN (crtl->args.pretend_args_size); |
| /* Arg pointer must be below pretend args, but must be above alignment |
| padding. */ |
| frame->arg_pointer_offset = offset - crtl->args.pretend_args_size; |
| frame->total_size = offset; |
| /* Next points the incoming stack pointer and any incoming arguments. */ |
| |
| /* Only use save/restore routines when the GPRs are atop the frame. */ |
| if (frame->hard_frame_pointer_offset != frame->total_size) |
| frame->save_libcall_adjustment = 0; |
| } |
| |
| /* Implement INITIAL_ELIMINATION_OFFSET. FROM is either the frame pointer |
| or argument pointer. TO is either the stack pointer or hard frame |
| pointer. */ |
| |
| HOST_WIDE_INT |
| loongarch_initial_elimination_offset (int from, int to) |
| { |
| HOST_WIDE_INT src, dest; |
| |
| loongarch_compute_frame_info (); |
| |
| if (to == HARD_FRAME_POINTER_REGNUM) |
| dest = cfun->machine->frame.hard_frame_pointer_offset; |
| else if (to == STACK_POINTER_REGNUM) |
| dest = 0; /* The stack pointer is the base of all offsets, hence 0. */ |
| else |
| gcc_unreachable (); |
| |
| if (from == FRAME_POINTER_REGNUM) |
| src = cfun->machine->frame.frame_pointer_offset; |
| else if (from == ARG_POINTER_REGNUM) |
| src = cfun->machine->frame.arg_pointer_offset; |
| else |
| gcc_unreachable (); |
| |
| return src - dest; |
| } |
| |
| /* A function to save or store a register. The first argument is the |
| register and the second is the stack slot. */ |
| typedef void (*loongarch_save_restore_fn) (rtx, rtx); |
| |
| /* Use FN to save or restore register REGNO. MODE is the register's |
| mode and OFFSET is the offset of its save slot from the current |
| stack pointer. */ |
| |
| static void |
| loongarch_save_restore_reg (machine_mode mode, int regno, HOST_WIDE_INT offset, |
| loongarch_save_restore_fn fn) |
| { |
| rtx mem; |
| |
| mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, offset)); |
| fn (gen_rtx_REG (mode, regno), mem); |
| } |
| |
| /* Call FN for each register that is saved by the current function. |
| SP_OFFSET is the offset of the current stack pointer from the start |
| of the frame. */ |
| |
| static void |
| loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset, |
| loongarch_save_restore_fn fn) |
| { |
| HOST_WIDE_INT offset; |
| |
| /* Save the link register and s-registers. */ |
| offset = cfun->machine->frame.gp_sp_offset - sp_offset; |
| for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) |
| if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) |
| { |
| loongarch_save_restore_reg (word_mode, regno, offset, fn); |
| offset -= UNITS_PER_WORD; |
| } |
| |
| /* This loop must iterate over the same space as its companion in |
| loongarch_compute_frame_info. */ |
| offset = cfun->machine->frame.fp_sp_offset - sp_offset; |
| for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) |
| if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST)) |
| { |
| machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode; |
| |
| loongarch_save_restore_reg (mode, regno, offset, fn); |
| offset -= GET_MODE_SIZE (mode); |
| } |
| } |
| |
| /* Emit a move from SRC to DEST. Assume that the move expanders can |
| handle all moves if !can_create_pseudo_p (). The distinction is |
| important because, unlike emit_move_insn, the move expanders know |
| how to force Pmode objects into the constant pool even when the |
| constant pool address is not itself legitimate. */ |
| |
| rtx |
| loongarch_emit_move (rtx dest, rtx src) |
| { |
| return (can_create_pseudo_p () ? emit_move_insn (dest, src) |
| : emit_move_insn_1 (dest, src)); |
| } |
| |
| /* Save register REG to MEM. Make the instruction frame-related. */ |
| |
| static void |
| loongarch_save_reg (rtx reg, rtx mem) |
| { |
| loongarch_emit_move (mem, reg); |
| loongarch_set_frame_expr (loongarch_frame_set (mem, reg)); |
| } |
| |
| /* Restore register REG from MEM. */ |
| |
| static void |
| loongarch_restore_reg (rtx reg, rtx mem) |
| { |
| rtx insn = loongarch_emit_move (reg, mem); |
| rtx dwarf = NULL_RTX; |
| dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); |
| REG_NOTES (insn) = dwarf; |
| |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| |
| /* For stack frames that can't be allocated with a single ADDI instruction, |
| compute the best value to initially allocate. It must at a minimum |
| allocate enough space to spill the callee-saved registers. */ |
| |
| static HOST_WIDE_INT |
| loongarch_first_stack_step (struct loongarch_frame_info *frame) |
| { |
| if (IMM12_OPERAND (frame->total_size)) |
| return frame->total_size; |
| |
| HOST_WIDE_INT min_first_step |
| = LARCH_STACK_ALIGN (frame->total_size - frame->fp_sp_offset); |
| HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8; |
| HOST_WIDE_INT min_second_step = frame->total_size - max_first_step; |
| gcc_assert (min_first_step <= max_first_step); |
| |
| /* As an optimization, use the least-significant bits of the total frame |
| size, so that the second adjustment step is just LU12I + ADD. */ |
| if (!IMM12_OPERAND (min_second_step) |
| && frame->total_size % IMM_REACH < IMM_REACH / 2 |
| && frame->total_size % IMM_REACH >= min_first_step) |
| return frame->total_size % IMM_REACH; |
| |
| return max_first_step; |
| } |
| |
| static void |
| loongarch_emit_stack_tie (void) |
| { |
| emit_insn (gen_stack_tie (Pmode, stack_pointer_rtx, hard_frame_pointer_rtx)); |
| } |
| |
| #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) |
| |
| #if PROBE_INTERVAL > 16384 |
| #error Cannot use indexed addressing mode for stack probing |
| #endif |
| |
| /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, |
| inclusive. These are offsets from the current stack pointer. */ |
| |
| static void |
| loongarch_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) |
| { |
| /* See if we have a constant small number of probes to generate. If so, |
| that's the easy case. */ |
| if ((TARGET_64BIT && (first + size <= 32768)) |
| || (!TARGET_64BIT && (first + size <= 2048))) |
| { |
| HOST_WIDE_INT i; |
| |
| /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until |
| it exceeds SIZE. If only one probe is needed, this will not |
| generate any code. Then probe at FIRST + SIZE. */ |
| for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) |
| emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, |
| -(first + i))); |
| |
| emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, |
| -(first + size))); |
| } |
| |
| /* Otherwise, do the same as above, but in a loop. Note that we must be |
| extra careful with variables wrapping around because we might be at |
| the very top (or the very bottom) of the address space and we have |
| to be able to handle this case properly; in particular, we use an |
| equality test for the loop condition. */ |
| else |
| { |
| HOST_WIDE_INT rounded_size; |
| rtx r13 = LARCH_PROLOGUE_TEMP (Pmode); |
| rtx r12 = LARCH_PROLOGUE_TEMP2 (Pmode); |
| rtx r14 = LARCH_PROLOGUE_TEMP3 (Pmode); |
| |
| /* Sanity check for the addressing mode we're going to use. */ |
| gcc_assert (first <= 16384); |
| |
| |
| /* Step 1: round SIZE to the previous multiple of the interval. */ |
| |
| rounded_size = ROUND_DOWN (size, PROBE_INTERVAL); |
| |
| /* TEST_ADDR = SP + FIRST */ |
| if (first != 0) |
| { |
| emit_move_insn (r14, GEN_INT (first)); |
| emit_insn (gen_rtx_SET (r13, gen_rtx_MINUS (Pmode, |
| stack_pointer_rtx, |
| r14))); |
| } |
| else |
| emit_move_insn (r13, stack_pointer_rtx); |
| |
| /* Step 2: compute initial and final value of the loop counter. */ |
| |
| emit_move_insn (r14, GEN_INT (PROBE_INTERVAL)); |
| /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */ |
| if (rounded_size == 0) |
| emit_move_insn (r12, r13); |
| else |
| { |
| emit_move_insn (r12, GEN_INT (rounded_size)); |
| emit_insn (gen_rtx_SET (r12, gen_rtx_MINUS (Pmode, r13, r12))); |
| /* Step 3: the loop |
| |
| do |
| { |
| TEST_ADDR = TEST_ADDR + PROBE_INTERVAL |
| probe at TEST_ADDR |
| } |
| while (TEST_ADDR != LAST_ADDR) |
| |
| probes at FIRST + N * PROBE_INTERVAL for values of N from 1 |
| until it is equal to ROUNDED_SIZE. */ |
| |
| emit_insn (gen_probe_stack_range (Pmode, r13, r13, r12, r14)); |
| } |
| |
| /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time |
| that SIZE is equal to ROUNDED_SIZE. */ |
| |
| if (size != rounded_size) |
| { |
| if (TARGET_64BIT) |
| emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size)); |
| else |
| { |
| HOST_WIDE_INT i; |
| for (i = 2048; i < (size - rounded_size); i += 2048) |
| { |
| emit_stack_probe (plus_constant (Pmode, r12, -i)); |
| emit_insn (gen_rtx_SET (r12, |
| plus_constant (Pmode, r12, -2048))); |
| } |
| rtx r1 = plus_constant (Pmode, r12, |
| -(size - rounded_size - i + 2048)); |
| emit_stack_probe (r1); |
| } |
| } |
| } |
| |
| /* Make sure nothing is scheduled before we are done. */ |
| emit_insn (gen_blockage ()); |
| } |
| |
| /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are |
| absolute addresses. */ |
| const char * |
| loongarch_output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3) |
| { |
| static int labelno = 0; |
| char loop_lab[32], tmp[64]; |
| rtx xops[3]; |
| |
| ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); |
| |
| /* Loop. */ |
| ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); |
| |
| /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ |
| xops[0] = reg1; |
| xops[1] = GEN_INT (-PROBE_INTERVAL); |
| xops[2] = reg3; |
| if (TARGET_64BIT) |
| output_asm_insn ("sub.d\t%0,%0,%2", xops); |
| else |
| output_asm_insn ("sub.w\t%0,%0,%2", xops); |
| |
| /* Probe at TEST_ADDR, test if TEST_ADDR == LAST_ADDR and branch. */ |
| xops[1] = reg2; |
| strcpy (tmp, "bne\t%0,%1,"); |
| if (TARGET_64BIT) |
| output_asm_insn ("st.d\t$r0,%0,0", xops); |
| else |
| output_asm_insn ("st.w\t$r0,%0,0", xops); |
| output_asm_insn (strcat (tmp, &loop_lab[1]), xops); |
| |
| return ""; |
| } |
| |
| /* Expand the "prologue" pattern. */ |
| |
| void |
| loongarch_expand_prologue (void) |
| { |
| struct loongarch_frame_info *frame = &cfun->machine->frame; |
| HOST_WIDE_INT size = frame->total_size; |
| HOST_WIDE_INT tmp; |
| rtx insn; |
| |
| if (flag_stack_usage_info) |
| current_function_static_stack_size = size; |
| |
| if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK |
| || flag_stack_clash_protection) |
| { |
| if (crtl->is_leaf && !cfun->calls_alloca) |
| { |
| if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) |
| { |
| tmp = size - get_stack_check_protect (); |
| loongarch_emit_probe_stack_range (get_stack_check_protect (), |
| tmp); |
| } |
| } |
| else if (size > 0) |
| loongarch_emit_probe_stack_range (get_stack_check_protect (), size); |
| } |
| |
| /* Save the registers. */ |
| if ((frame->mask | frame->fmask) != 0) |
| { |
| HOST_WIDE_INT step1 = MIN (size, loongarch_first_stack_step (frame)); |
| |
| insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, |
| GEN_INT (-step1)); |
| RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; |
| size -= step1; |
| loongarch_for_each_saved_reg (size, loongarch_save_reg); |
| } |
| |
| |
| /* Set up the frame pointer, if we're using one. */ |
| if (frame_pointer_needed) |
| { |
| insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, |
| GEN_INT (frame->hard_frame_pointer_offset - size)); |
| RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; |
| |
| loongarch_emit_stack_tie (); |
| } |
| |
| /* Allocate the rest of the frame. */ |
| if (size > 0) |
| { |
| if (IMM12_OPERAND (-size)) |
| { |
| insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, |
| GEN_INT (-size)); |
| RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; |
| } |
| else |
| { |
| loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), GEN_INT (-size)); |
| emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, |
| LARCH_PROLOGUE_TEMP (Pmode))); |
| |
| /* Describe the effect of the previous instructions. */ |
| insn = plus_constant (Pmode, stack_pointer_rtx, -size); |
| insn = gen_rtx_SET (stack_pointer_rtx, insn); |
| loongarch_set_frame_expr (insn); |
| } |
| } |
| } |
| |
| /* Return nonzero if this function is known to have a null epilogue. |
| This allows the optimizer to omit jumps to jumps if no stack |
| was created. */ |
| |
| bool |
| loongarch_can_use_return_insn (void) |
| { |
| return reload_completed && cfun->machine->frame.total_size == 0; |
| } |
| |
| /* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P |
| says which. */ |
| |
| void |
| loongarch_expand_epilogue (bool sibcall_p) |
| { |
| /* Split the frame into two. STEP1 is the amount of stack we should |
| deallocate before restoring the registers. STEP2 is the amount we |
| should deallocate afterwards. |
| |
| Start off by assuming that no registers need to be restored. */ |
| struct loongarch_frame_info *frame = &cfun->machine->frame; |
| HOST_WIDE_INT step1 = frame->total_size; |
| HOST_WIDE_INT step2 = 0; |
| rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); |
| rtx insn; |
| |
| /* We need to add memory barrier to prevent read from deallocated stack. */ |
| bool need_barrier_p |
| = (get_frame_size () + cfun->machine->frame.arg_pointer_offset) != 0; |
| |
| if (!sibcall_p && loongarch_can_use_return_insn ()) |
| { |
| emit_jump_insn (gen_return ()); |
| return; |
| } |
| |
| /* Move past any dynamic stack allocations. */ |
| if (cfun->calls_alloca) |
| { |
| /* Emit a barrier to prevent loads from a deallocated stack. */ |
| loongarch_emit_stack_tie (); |
| need_barrier_p = false; |
| |
| rtx adjust = GEN_INT (-frame->hard_frame_pointer_offset); |
| if (!IMM12_OPERAND (INTVAL (adjust))) |
| { |
| loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), adjust); |
| adjust = LARCH_PROLOGUE_TEMP (Pmode); |
| } |
| |
| insn = emit_insn (gen_add3_insn (stack_pointer_rtx, |
| hard_frame_pointer_rtx, |
| adjust)); |
| |
| rtx dwarf = NULL_RTX; |
| rtx minus_offset = GEN_INT (-frame->hard_frame_pointer_offset); |
| rtx cfa_adjust_value = gen_rtx_PLUS (Pmode, |
| hard_frame_pointer_rtx, |
| minus_offset); |
| |
| rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value); |
| dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| REG_NOTES (insn) = dwarf; |
| } |
| |
| /* If we need to restore registers, deallocate as much stack as |
| possible in the second step without going out of range. */ |
| if ((frame->mask | frame->fmask) != 0) |
| { |
| step2 = loongarch_first_stack_step (frame); |
| step1 -= step2; |
| } |
| |
| /* Set TARGET to BASE + STEP1. */ |
| if (step1 > 0) |
| { |
| /* Emit a barrier to prevent loads from a deallocated stack. */ |
| loongarch_emit_stack_tie (); |
| need_barrier_p = false; |
| |
| /* Get an rtx for STEP1 that we can add to BASE. */ |
| rtx adjust = GEN_INT (step1); |
| if (!IMM12_OPERAND (step1)) |
| { |
| loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), adjust); |
| adjust = LARCH_PROLOGUE_TEMP (Pmode); |
| } |
| |
| insn = emit_insn (gen_add3_insn (stack_pointer_rtx, |
| stack_pointer_rtx, |
| adjust)); |
| |
| rtx dwarf = NULL_RTX; |
| rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, |
| GEN_INT (step2)); |
| |
| dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| REG_NOTES (insn) = dwarf; |
| } |
| |
| /* Restore the registers. */ |
| loongarch_for_each_saved_reg (frame->total_size - step2, |
| loongarch_restore_reg); |
| |
| if (need_barrier_p) |
| loongarch_emit_stack_tie (); |
| |
| /* Deallocate the final bit of the frame. */ |
| if (step2 > 0) |
| { |
| insn = emit_insn (gen_add3_insn (stack_pointer_rtx, |
| stack_pointer_rtx, |
| GEN_INT (step2))); |
| |
| rtx dwarf = NULL_RTX; |
| rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, const0_rtx); |
| dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| REG_NOTES (insn) = dwarf; |
| } |
| |
| /* Add in the __builtin_eh_return stack adjustment. */ |
| if (crtl->calls_eh_return) |
| emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, |
| EH_RETURN_STACKADJ_RTX)); |
| |
| if (!sibcall_p) |
| emit_jump_insn (gen_simple_return_internal (ra)); |
| } |
| |
| #define LU32I_B (0xfffffULL << 32) |
| #define LU52I_B (0xfffULL << 52) |
| |
| /* Fill CODES with a sequence of rtl operations to load VALUE. |
| Return the number of operations needed. */ |
| |
| static unsigned int |
| loongarch_build_integer (struct loongarch_integer_op *codes, |
| HOST_WIDE_INT value) |
| |
| { |
| unsigned int cost = 0; |
| |
| /* Get the lower 32 bits of the value. */ |
| HOST_WIDE_INT low_part = (int32_t)value; |
| |
| if (IMM12_OPERAND (low_part) || IMM12_OPERAND_UNSIGNED (low_part)) |
| { |
| /* The value of the lower 32 bit be loaded with one instruction. |
| lu12i.w. */ |
| codes[0].code = UNKNOWN; |
| codes[0].method = METHOD_NORMAL; |
| codes[0].value = low_part; |
| cost++; |
| } |
| else |
| { |
| /* lu12i.w + ior. */ |
| codes[0].code = UNKNOWN; |
| codes[0].method = METHOD_NORMAL; |
| codes[0].value = low_part & ~(IMM_REACH - 1); |
| cost++; |
| HOST_WIDE_INT iorv = low_part & (IMM_REACH - 1); |
| if (iorv != 0) |
| { |
| codes[1].code = IOR; |
| codes[1].method = METHOD_NORMAL; |
| codes[1].value = iorv; |
| cost++; |
| } |
| } |
| |
| if (TARGET_64BIT) |
| { |
| bool lu32i[2] = {(value & LU32I_B) == 0, (value & LU32I_B) == LU32I_B}; |
| bool lu52i[2] = {(value & LU52I_B) == 0, (value & LU52I_B) == LU52I_B}; |
| |
| int sign31 = (value & (HOST_WIDE_INT_1U << 31)) >> 31; |
| int sign51 = (value & (HOST_WIDE_INT_1U << 51)) >> 51; |
| /* Determine whether the upper 32 bits are sign-extended from the lower |
| 32 bits. If it is, the instructions to load the high order can be |
| ommitted. */ |
| if (lu32i[sign31] && lu52i[sign31]) |
| return cost; |
| /* Determine whether bits 32-51 are sign-extended from the lower 32 |
| bits. If so, directly load 52-63 bits. */ |
| else if (lu32i[sign31]) |
| { |
| codes[cost].method = METHOD_LU52I; |
| codes[cost].value = value & LU52I_B; |
| return cost + 1; |
| } |
| |
| codes[cost].method = METHOD_LU32I; |
| codes[cost].value = (value & LU32I_B) | (sign51 ? LU52I_B : 0); |
| cost++; |
| |
| /* Determine whether the 52-61 bits are sign-extended from the low order, |
| and if not, load the 52-61 bits. */ |
| if (!lu52i[(value & (HOST_WIDE_INT_1U << 51)) >> 51]) |
| { |
| codes[cost].method = METHOD_LU52I; |
| codes[cost].value = value & LU52I_B; |
| cost++; |
| } |
| } |
| |
| gcc_assert (cost <= LARCH_MAX_INTEGER_OPS); |
| |
| return cost; |
| } |
| |
| /* Fill CODES with a sequence of rtl operations to load VALUE. |
| Return the number of operations needed. |
| Split interger in loongarch_output_move. */ |
| |
| static unsigned int |
| loongarch_integer_cost (HOST_WIDE_INT value) |
| { |
| struct loongarch_integer_op codes[LARCH_MAX_INTEGER_OPS]; |
| return loongarch_build_integer (codes, value); |
| } |
| |
| /* Implement TARGET_LEGITIMATE_CONSTANT_P. */ |
| |
| static bool |
| loongarch_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x) |
| { |
| return loongarch_const_insns (x) > 0; |
| } |
| |
| /* Return true if X is a thread-local symbol. */ |
| |
| static bool |
| loongarch_tls_symbol_p (rtx x) |
| { |
| return SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0; |
| } |
| |
| /* Return true if SYMBOL_REF X is associated with a global symbol |
| (in the STB_GLOBAL sense). */ |
| |
| bool |
| loongarch_global_symbol_p (const_rtx x) |
| { |
| if (LABEL_REF_P (x)) |
| return false; |
| |
| const_tree decl = SYMBOL_REF_DECL (x); |
| |
| if (!decl) |
| return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x); |
| |
| /* Weakref symbols are not TREE_PUBLIC, but their targets are global |
| or weak symbols. Relocations in the object file will be against |
| the target symbol, so it's that symbol's binding that matters here. */ |
| return DECL_P (decl) && (TREE_PUBLIC (decl) || DECL_WEAK (decl)); |
| } |
| |
| bool |
| loongarch_global_symbol_noweak_p (const_rtx x) |
| { |
| if (LABEL_REF_P (x)) |
| return false; |
| |
| const_tree decl = SYMBOL_REF_DECL (x); |
| |
| if (!decl) |
| return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x); |
| |
| return DECL_P (decl) && TREE_PUBLIC (decl); |
| } |
| |
| bool |
| loongarch_weak_symbol_p (const_rtx x) |
| { |
| const_tree decl; |
| if (LABEL_REF_P (x) || !(decl = SYMBOL_REF_DECL (x))) |
| return false; |
| return DECL_P (decl) && DECL_WEAK (decl); |
| } |
| |
| /* Return true if SYMBOL_REF X binds locally. */ |
| |
| bool |
| loongarch_symbol_binds_local_p (const_rtx x) |
| { |
| if (TARGET_DIRECT_EXTERN_ACCESS) |
| return true; |
| |
| if (SYMBOL_REF_P (x)) |
| return (SYMBOL_REF_DECL (x) |
| ? targetm.binds_local_p (SYMBOL_REF_DECL (x)) |
| : SYMBOL_REF_LOCAL_P (x)); |
| else |
| return false; |
| } |
| |
| /* Return true if rtx constants of mode MODE should be put into a small |
| data section. */ |
| |
| static bool |
| loongarch_rtx_constant_in_small_data_p (machine_mode mode) |
| { |
| return (GET_MODE_SIZE (mode) <= g_switch_value); |
| } |
| |
| /* Return the method that should be used to access SYMBOL_REF or |
| LABEL_REF X. */ |
| |
| static enum loongarch_symbol_type |
| loongarch_classify_symbol (const_rtx x) |
| { |
| enum loongarch_symbol_type pcrel = |
| TARGET_CMODEL_EXTREME ? SYMBOL_PCREL64 : SYMBOL_PCREL; |
| |
| if (!SYMBOL_REF_P (x)) |
| return pcrel; |
| |
| if (SYMBOL_REF_TLS_MODEL (x)) |
| return SYMBOL_TLS; |
| |
| if (!loongarch_symbol_binds_local_p (x)) |
| return SYMBOL_GOT_DISP; |
| |
| tree t = SYMBOL_REF_DECL (x); |
| if (!t) |
| return pcrel; |
| |
| t = lookup_attribute ("model", DECL_ATTRIBUTES (t)); |
| if (!t) |
| return pcrel; |
| |
| t = TREE_VALUE (TREE_VALUE (t)); |
| |
| /* loongarch_handle_model_attribute should reject other values. */ |
| gcc_assert (TREE_CODE (t) == STRING_CST); |
| |
| const char *model = TREE_STRING_POINTER (t); |
| if (strcmp (model, "normal") == 0) |
| return SYMBOL_PCREL; |
| if (strcmp (model, "extreme") == 0) |
| return SYMBOL_PCREL64; |
| |
| /* loongarch_handle_model_attribute should reject unknown model |
| name. */ |
| gcc_unreachable (); |
| } |
| |
| /* Classify the base of symbolic expression X, given that X appears in |
| context CONTEXT. */ |
| |
| static enum loongarch_symbol_type |
| loongarch_classify_symbolic_expression (rtx x) |
| { |
| rtx offset; |
| |
| split_const (x, &x, &offset); |
| if (UNSPEC_ADDRESS_P (x)) |
| return UNSPEC_ADDRESS_TYPE (x); |
| |
| return loongarch_classify_symbol (x); |
| } |
| |
| /* Return true if X is a symbolic constant. If it is, |
| store the type of the symbol in *SYMBOL_TYPE. */ |
| |
| bool |
| loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type) |
| { |
| rtx offset; |
| |
| split_const (x, &x, &offset); |
| if (UNSPEC_ADDRESS_P (x)) |
| { |
| *symbol_type = UNSPEC_ADDRESS_TYPE (x); |
| x = UNSPEC_ADDRESS (x); |
| } |
| else if (SYMBOL_REF_P (x) || LABEL_REF_P (x)) |
| { |
| *symbol_type = loongarch_classify_symbol (x); |
| if (*symbol_type == SYMBOL_TLS) |
| return true; |
| } |
| else |
| return false; |
| |
| if (offset == const0_rtx) |
| return true; |
| |
| /* Check whether a nonzero offset is valid for the underlying |
| relocations. */ |
| switch (*symbol_type) |
| { |
| case SYMBOL_TLS_IE: |
| case SYMBOL_TLS_LE: |
| case SYMBOL_TLSGD: |
| case SYMBOL_TLSLDM: |
| case SYMBOL_PCREL: |
| case SYMBOL_PCREL64: |
| /* GAS rejects offsets outside the range [-2^31, 2^31-1]. */ |
| return sext_hwi (INTVAL (offset), 32) == INTVAL (offset); |
| |
| case SYMBOL_GOT_DISP: |
| case SYMBOL_TLS: |
| return false; |
| } |
| gcc_unreachable (); |
| } |
| |
| /* Returns the number of instructions necessary to reference a symbol. */ |
| |
| static int |
| loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode) |
| { |
| switch (type) |
| { |
| case SYMBOL_GOT_DISP: |
| /* The constant will have to be loaded from the GOT before it |
| is used in an address. */ |
| if (!TARGET_EXPLICIT_RELOCS && mode != MAX_MACHINE_MODE) |
| return 0; |
| |
| return 3; |
| |
| case SYMBOL_PCREL: |
| case SYMBOL_TLS_IE: |
| case SYMBOL_TLS_LE: |
| return 2; |
| |
| case SYMBOL_TLSGD: |
| case SYMBOL_TLSLDM: |
| return 3; |
| |
| case SYMBOL_PCREL64: |
| return 5; |
| |
| case SYMBOL_TLS: |
| /* We don't treat a bare TLS symbol as a constant. */ |
| return 0; |
| } |
| gcc_unreachable (); |
| } |
| |
| /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ |
| |
| static bool |
| loongarch_cannot_force_const_mem (machine_mode mode, rtx x) |
| { |
| enum loongarch_symbol_type type; |
| rtx base, offset; |
| |
| /* As an optimization, reject constants that loongarch_legitimize_move |
| can expand inline. |
| |
| Suppose we have a multi-instruction sequence that loads constant C |
| into register R. If R does not get allocated a hard register, and |
| R is used in an operand that allows both registers and memory |
| references, reload will consider forcing C into memory and using |
| one of the instruction's memory alternatives. Returning false |
| here will force it to use an input reload instead. */ |
| if (CONST_INT_P (x) && loongarch_legitimate_constant_p (mode, x)) |
| return true; |
| |
| split_const (x, &base, &offset); |
| if (loongarch_symbolic_constant_p (base, &type)) |
| { |
| /* The same optimization as for CONST_INT. */ |
| if (IMM12_INT (offset) |
| && loongarch_symbol_insns (type, MAX_MACHINE_MODE) > 0) |
| return true; |
| } |
| |
| /* TLS symbols must be computed by loongarch_legitimize_move. */ |
| if (tls_referenced_p (x)) |
| return true; |
| |
| return false; |
| } |
| |
| /* Return true if register REGNO is a valid base register for mode MODE. |
| STRICT_P is true if REG_OK_STRICT is in effect. */ |
| |
| int |
| loongarch_regno_mode_ok_for_base_p (int regno, |
| machine_mode mode ATTRIBUTE_UNUSED, |
| bool strict_p) |
| { |
| if (!HARD_REGISTER_NUM_P (regno)) |
| { |
| if (!strict_p) |
| return true; |
| regno = reg_renumber[regno]; |
| } |
| |
| /* These fake registers will be eliminated to either the stack or |
| hard frame pointer, both of which are usually valid base registers. |
| Reload deals with the cases where the eliminated form isn't valid. */ |
| if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM) |
| return true; |
| |
| return GP_REG_P (regno); |
| } |
| |
| /* Return true if X is a valid base register for mode MODE. |
| STRICT_P is true if REG_OK_STRICT is in effect. */ |
| |
| static bool |
| loongarch_valid_base_register_p (rtx x, machine_mode mode, bool strict_p) |
| { |
| if (!strict_p && SUBREG_P (x)) |
| x = SUBREG_REG (x); |
| |
| return (REG_P (x) |
| && loongarch_regno_mode_ok_for_base_p (REGNO (x), mode, strict_p)); |
| } |
| |
| /* Return true if, for every base register BASE_REG, (plus BASE_REG X) |
| can address a value of mode MODE. */ |
| |
| static bool |
| loongarch_valid_offset_p (rtx x, machine_mode mode) |
| { |
| /* Check that X is a signed 12-bit number, |
| or check that X is a signed 16-bit number |
| and offset 4 byte aligned. */ |
| if (!(const_arith_operand (x, Pmode) |
| || ((mode == E_SImode || mode == E_DImode) |
| && const_imm16_operand (x, Pmode) |
| && (loongarch_signed_immediate_p (INTVAL (x), 14, 2))))) |
| return false; |
| |
| /* We may need to split multiword moves, so make sure that every word |
| is accessible. */ |
| if (GET_MODE_SIZE (mode) > UNITS_PER_WORD |
| && !IMM12_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD)) |
| return false; |
| |
| return true; |
| } |
| |
| /* Should a symbol of type SYMBOL_TYPE should be split in two or more? */ |
| |
| bool |
| loongarch_split_symbol_type (enum loongarch_symbol_type symbol_type) |
| { |
| switch (symbol_type) |
| { |
| case SYMBOL_PCREL: |
| case SYMBOL_PCREL64: |
| case SYMBOL_GOT_DISP: |
| case SYMBOL_TLS_IE: |
| case SYMBOL_TLS_LE: |
| case SYMBOL_TLSGD: |
| case SYMBOL_TLSLDM: |
| return true; |
| |
| case SYMBOL_TLS: |
| return false; |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Return true if a LO_SUM can address a value of mode MODE when the |
| LO_SUM symbol has type SYMBOL_TYPE. */ |
| |
| static bool |
| loongarch_valid_lo_sum_p (enum loongarch_symbol_type symbol_type, |
| machine_mode mode, rtx x) |
| { |
| int align, size; |
| |
| /* Check that symbols of type SYMBOL_TYPE can be used to access values |
| of mode MODE. */ |
| if (loongarch_symbol_insns (symbol_type, mode) == 0) |
| return false; |
| |
| /* Check that there is a known low-part relocation. */ |
| if (!loongarch_split_symbol_type (symbol_type)) |
| return false; |
| |
| /* We can't tell size or alignment when we have BLKmode, so try extracing a |
| decl from the symbol if possible. */ |
| if (mode == BLKmode) |
| { |
| rtx offset; |
| |
| /* Extract the symbol from the LO_SUM operand, if any. */ |
| split_const (x, &x, &offset); |
| |
| /* Might be a CODE_LABEL. We can compute align but not size for that, |
| so don't bother trying to handle it. */ |
| if (!SYMBOL_REF_P (x)) |
| return false; |
| |
| /* Use worst case assumptions if we don't have a SYMBOL_REF_DECL. */ |
| align = (SYMBOL_REF_DECL (x) |
| ? DECL_ALIGN (SYMBOL_REF_DECL (x)) |
| : 1); |
| size = (SYMBOL_REF_DECL (x) && DECL_SIZE (SYMBOL_REF_DECL (x)) |
| ? tree_to_uhwi (DECL_SIZE (SYMBOL_REF_DECL (x))) |
| : 2*BITS_PER_WORD); |
| } |
| else |
| { |
| align = GET_MODE_ALIGNMENT (mode); |
| size = GET_MODE_BITSIZE (mode); |
| } |
| |
| /* We may need to split multiword moves, so make sure that each word |
| can be accessed without inducing a carry. */ |
| if (size > BITS_PER_WORD |
| && (!TARGET_STRICT_ALIGN || size > align)) |
| return false; |
| |
| return true; |
| } |
| |
| static bool |
| loongarch_valid_index_p (struct loongarch_address_info *info, rtx x, |
| machine_mode mode, bool strict_p) |
| { |
| rtx index; |
| |
| if ((REG_P (x) || SUBREG_P (x)) |
| && GET_MODE (x) == Pmode) |
| { |
| index = x; |
| } |
| else |
| return false; |
| |
| if (!strict_p |
| && SUBREG_P (index) |
| && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))]) |
| index = SUBREG_REG (index); |
| |
| if (loongarch_valid_base_register_p (index, mode, strict_p)) |
| { |
| info->type = ADDRESS_REG_REG; |
| info->offset = index; |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* Return true if X is a valid address for machine mode MODE. If it is, |
| fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in |
| effect. */ |
| |
| static bool |
| loongarch_classify_address (struct loongarch_address_info *info, rtx x, |
| machine_mode mode, bool strict_p) |
| { |
| switch (GET_CODE (x)) |
| { |
| case REG: |
| case SUBREG: |
| info->type = ADDRESS_REG; |
| info->reg = x; |
| info->offset = const0_rtx; |
| return loongarch_valid_base_register_p (info->reg, mode, strict_p); |
| |
| case PLUS: |
| if (loongarch_valid_base_register_p (XEXP (x, 0), mode, strict_p) |
| && loongarch_valid_index_p (info, XEXP (x, 1), mode, strict_p)) |
| { |
| info->reg = XEXP (x, 0); |
| return true; |
| } |
| |
| if (loongarch_valid_base_register_p (XEXP (x, 1), mode, strict_p) |
| && loongarch_valid_index_p (info, XEXP (x, 0), mode, strict_p)) |
| { |
| info->reg = XEXP (x, 1); |
| return true; |
| } |
| |
| info->type = ADDRESS_REG; |
| info->reg = XEXP (x, 0); |
| info->offset = XEXP (x, 1); |
| return (loongarch_valid_base_register_p (info->reg, mode, strict_p) |
| && loongarch_valid_offset_p (info->offset, mode)); |
| |
| case LO_SUM: |
| info->type = ADDRESS_LO_SUM; |
| info->reg = XEXP (x, 0); |
| info->offset = XEXP (x, 1); |
| /* We have to trust the creator of the LO_SUM to do something vaguely |
| sane. Target-independent code that creates a LO_SUM should also |
| create and verify the matching HIGH. Target-independent code that |
| adds an offset to a LO_SUM must prove that the offset will not |
| induce a carry. Failure to do either of these things would be |
| a bug, and we are not required to check for it here. The MIPS |
| backend itself should only create LO_SUMs for valid symbolic |
| constants, with the high part being either a HIGH or a copy |
| of _gp. */ |
| info->symbol_type |
| = loongarch_classify_symbolic_expression (info->offset); |
| return (loongarch_valid_base_register_p (info->reg, mode, strict_p) |
| && loongarch_valid_lo_sum_p (info->symbol_type, mode, |
| info->offset)); |
| |
| default: |
| return false; |
| } |
| } |
| |
| /* Implement TARGET_LEGITIMATE_ADDRESS_P. */ |
| |
| static bool |
| loongarch_legitimate_address_p (machine_mode mode, rtx x, bool strict_p) |
| { |
| struct loongarch_address_info addr; |
| |
| return loongarch_classify_address (&addr, x, mode, strict_p); |
| } |
| |
| /* Return true if ADDR matches the pattern for the indexed address |
| instruction. */ |
| |
| static bool |
| loongarch_index_address_p (rtx addr, machine_mode mode ATTRIBUTE_UNUSED) |
| { |
| if (GET_CODE (addr) != PLUS |
| || !REG_P (XEXP (addr, 0)) |
| || !REG_P (XEXP (addr, 1))) |
| return false; |
| return true; |
| } |
| |
| /* Return the number of instructions needed to load or store a value |
| of mode MODE at address X. Return 0 if X isn't valid for MODE. |
| Assume that multiword moves may need to be split into word moves |
| if MIGHT_SPLIT_P, otherwise assume that a single load or store is |
| enough. */ |
| |
| int |
| loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p) |
| { |
| struct loongarch_address_info addr; |
| int factor; |
| |
| if (!loongarch_classify_address (&addr, x, mode, false)) |
| return 0; |
| |
| /* BLKmode is used for single unaligned loads and stores and should |
| not count as a multiword mode. (GET_MODE_SIZE (BLKmode) is pretty |
| meaningless, so we have to single it out as a special case one way |
| or the other.) */ |
| if (mode != BLKmode && might_split_p) |
| factor = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; |
| else |
| factor = 1; |
| |
| if (loongarch_classify_address (&addr, x, mode, false)) |
| switch (addr.type) |
| { |
| case ADDRESS_REG: |
| case ADDRESS_REG_REG: |
| case ADDRESS_CONST_INT: |
| return factor; |
| |
| case ADDRESS_LO_SUM: |
| return factor + 1; |
| |
| case ADDRESS_SYMBOLIC: |
| return factor * loongarch_symbol_insns (addr.symbol_type, mode); |
| } |
| return 0; |
| } |
| |
| /* Return true if X fits within an unsigned field of BITS bits that is |
| shifted left SHIFT bits before being used. */ |
| |
| bool |
| loongarch_unsigned_immediate_p (unsigned HOST_WIDE_INT x, int bits, |
| int shift = 0) |
| { |
| return (x & ((1 << shift) - 1)) == 0 && x < ((unsigned) 1 << (shift + bits)); |
| } |
| |
| /* Return true if X fits within a signed field of BITS bits that is |
| shifted left SHIFT bits before being used. */ |
| |
| bool |
| loongarch_signed_immediate_p (unsigned HOST_WIDE_INT x, int bits, |
| int shift = 0) |
| { |
| x += 1 << (bits + shift - 1); |
| return loongarch_unsigned_immediate_p (x, bits, shift); |
| } |
| |
| /* Return true if X is a legitimate address with a 12-bit offset |
| or addr.type is ADDRESS_LO_SUM. |
| MODE is the mode of the value being accessed. */ |
| |
| bool |
| loongarch_12bit_offset_address_p (rtx x, machine_mode mode) |
| { |
| struct loongarch_address_info addr; |
| |
| return (loongarch_classify_address (&addr, x, mode, false) |
| && ((addr.type == ADDRESS_REG |
| && CONST_INT_P (addr.offset) |
| && LARCH_12BIT_OFFSET_P (INTVAL (addr.offset))) |
| || addr.type == ADDRESS_LO_SUM)); |
| } |
| |
| /* Return true if X is a legitimate address with a 14-bit offset shifted 2. |
| MODE is the mode of the value being accessed. */ |
| |
| bool |
| loongarch_14bit_shifted_offset_address_p (rtx x, machine_mode mode) |
| { |
| struct loongarch_address_info addr; |
| |
| return (loongarch_classify_address (&addr, x, mode, false) |
| && addr.type == ADDRESS_REG |
| && CONST_INT_P (addr.offset) |
| && LARCH_16BIT_OFFSET_P (INTVAL (addr.offset)) |
| && LARCH_SHIFT_2_OFFSET_P (INTVAL (addr.offset))); |
| } |
| |
| /* Return true if X is a legitimate address with base and index. |
| MODE is the mode of the value being accessed. */ |
| |
| bool |
| loongarch_base_index_address_p (rtx x, machine_mode mode) |
| { |
| struct loongarch_address_info addr; |
| |
| return (loongarch_classify_address (&addr, x, mode, false) |
| && addr.type == ADDRESS_REG_REG |
| && REG_P (addr.offset)); |
| } |
| |
| /* Return the number of instructions needed to load constant X, |
| Return 0 if X isn't a valid constant. */ |
| |
| int |
| loongarch_const_insns (rtx x) |
| { |
| enum loongarch_symbol_type symbol_type; |
| rtx offset; |
| |
| switch (GET_CODE (x)) |
| { |
| case HIGH: |
| if (!loongarch_symbolic_constant_p (XEXP (x, 0), &symbol_type) |
| || !loongarch_split_symbol_type (symbol_type)) |
| return 0; |
| |
| /* This is simply a PCALAU12I. */ |
| return 1; |
| |
| case CONST_INT: |
| return loongarch_integer_cost (INTVAL (x)); |
| |
| case CONST_VECTOR: |
| /* Fall through. */ |
| case CONST_DOUBLE: |
| return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0; |
| |
| case CONST: |
| /* See if we can refer to X directly. */ |
| if (loongarch_symbolic_constant_p (x, &symbol_type)) |
| return loongarch_symbol_insns (symbol_type, MAX_MACHINE_MODE); |
| |
| /* Otherwise try splitting the constant into a base and offset. |
| If the offset is a 12-bit value, we can load the base address |
| into a register and then use ADDI.{W/D} to add in the offset. |
| If the offset is larger, we can load the base and offset |
| into separate registers and add them together with ADD.{W/D}. |
| However, the latter is only possible before reload; during |
| and after reload, we must have the option of forcing the |
| constant into the pool instead. */ |
| split_const (x, &x, &offset); |
| if (offset != 0) |
| { |
| int n = loongarch_const_insns (x); |
| if (n != 0) |
| { |
| if (IMM12_INT (offset)) |
| return n + 1; |
| else if (!targetm.cannot_force_const_mem (GET_MODE (x), x)) |
| return n + 1 + loongarch_integer_cost (INTVAL (offset)); |
| } |
| } |
| return 0; |
| |
| case SYMBOL_REF: |
| case LABEL_REF: |
| return loongarch_symbol_insns ( |
| loongarch_classify_symbol (x), MAX_MACHINE_MODE); |
| |
| default: |
| return 0; |
| } |
| } |
| |
| /* X is a doubleword constant that can be handled by splitting it into |
| two words and loading each word separately. Return the number of |
| instructions required to do this. */ |
| |
| int |
| loongarch_split_const_insns (rtx x) |
| { |
| unsigned int low, high; |
| |
| low = loongarch_const_insns (loongarch_subword (x, false)); |
| high = loongarch_const_insns (loongarch_subword (x, true)); |
| gcc_assert (low > 0 && high > 0); |
| return low + high; |
| } |
| |
| static bool loongarch_split_move_insn_p (rtx dest, rtx src); |
| |
| /* Return the number of instructions needed to implement INSN, |
| given that it loads from or stores to MEM. */ |
| |
| int |
| loongarch_load_store_insns (rtx mem, rtx_insn *insn) |
| { |
| machine_mode mode; |
| bool might_split_p; |
| rtx set; |
| |
| gcc_assert (MEM_P (mem)); |
| mode = GET_MODE (mem); |
| |
| /* Try to prove that INSN does not need to be split. */ |
| might_split_p = GET_MODE_SIZE (mode) > UNITS_PER_WORD; |
| if (might_split_p) |
| { |
| set = single_set (insn); |
| if (set |
| && !loongarch_split_move_insn_p (SET_DEST (set), SET_SRC (set))) |
| might_split_p = false; |
| } |
| |
| return loongarch_address_insns (XEXP (mem, 0), mode, might_split_p); |
| } |
| |
| /* Return true if we need to trap on division by zero. */ |
| |
| bool |
| loongarch_check_zero_div_p (void) |
| { |
| /* if -m[no-]check-zero-division is given explicitly. */ |
| if (target_flags_explicit & MASK_CHECK_ZERO_DIV) |
| return TARGET_CHECK_ZERO_DIV; |
| |
| /* if not, don't trap for optimized code except -Og. */ |
| return !optimize || optimize_debug; |
| } |
| |
| /* Return the number of instructions needed for an integer division. */ |
| |
| int |
| loongarch_idiv_insns (machine_mode mode ATTRIBUTE_UNUSED) |
| { |
| int count; |
| |
| count = 1; |
| if (loongarch_check_zero_div_p ()) |
| count += 2; |
| |
| return count; |
| } |
| |
| /* Emit an instruction of the form (set TARGET (CODE OP0 OP1)). */ |
| |
| void |
| loongarch_emit_binary (enum rtx_code code, rtx target, rtx op0, rtx op1) |
| { |
| emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (code, GET_MODE (target), |
| op0, op1))); |
| } |
| |
| /* Compute (CODE OP0 OP1) and store the result in a new register |
| of mode MODE. Return that new register. */ |
| |
| static rtx |
| loongarch_force_binary (machine_mode mode, enum rtx_code code, rtx op0, |
| rtx op1) |
| { |
| rtx reg; |
| |
| reg = gen_reg_rtx (mode); |
| loongarch_emit_binary (code, reg, op0, op1); |
| return reg; |
| } |
| |
| /* Copy VALUE to a register and return that register. If new pseudos |
| are allowed, copy it into a new register, otherwise use DEST. */ |
| |
| static rtx |
| loongarch_force_temporary (rtx dest, rtx value) |
| { |
| if (can_create_pseudo_p ()) |
| return force_reg (Pmode, value); |
| else |
| { |
| loongarch_emit_move (dest, value); |
| return dest; |
| } |
| } |
| |
| /* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE, |
| then add CONST_INT OFFSET to the result. */ |
| |
| static rtx |
| loongarch_unspec_address_offset (rtx base, rtx offset, |
| enum loongarch_symbol_type symbol_type) |
| { |
| base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base), |
| UNSPEC_ADDRESS_FIRST + symbol_type); |
| if (offset != const0_rtx) |
| base = gen_rtx_PLUS (Pmode, base, offset); |
| return gen_rtx_CONST (Pmode, base); |
| } |
| |
| /* Return an UNSPEC address with underlying address ADDRESS and symbol |
| type SYMBOL_TYPE. */ |
| |
| rtx |
| loongarch_unspec_address (rtx address, enum loongarch_symbol_type symbol_type) |
| { |
| rtx base, offset; |
| |
| split_const (address, &base, &offset); |
| return loongarch_unspec_address_offset (base, offset, symbol_type); |
| } |
| |
| /* Emit an instruction of the form (set TARGET SRC). */ |
| |
| static rtx |
| loongarch_emit_set (rtx target, rtx src) |
| { |
| emit_insn (gen_rtx_SET (target, src)); |
| return target; |
| } |
| |
| /* If OP is an UNSPEC address, return the address to which it refers, |
| otherwise return OP itself. */ |
| |
| rtx |
| loongarch_strip_unspec_address (rtx op) |
| { |
| rtx base, offset; |
| |
| split_const (op, &base, &offset); |
| if (UNSPEC_ADDRESS_P (base)) |
| op = plus_constant (Pmode, UNSPEC_ADDRESS (base), INTVAL (offset)); |
| return op; |
| } |
| |
| /* Return a legitimate address for REG + OFFSET. TEMP is as for |
| loongarch_force_temporary; it is only needed when OFFSET is not a |
| IMM12_OPERAND. */ |
| |
| static rtx |
| loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset) |
| { |
| if (!IMM12_OPERAND (offset)) |
| { |
| rtx high; |
| |
| /* Leave OFFSET as a 12-bit offset and put the excess in HIGH. |
| The addition inside the macro CONST_HIGH_PART may cause an |
| overflow, so we need to force a sign-extension check. */ |
| high = gen_int_mode (CONST_HIGH_PART (offset), Pmode); |
| offset = CONST_LOW_PART (offset); |
| high = loongarch_force_temporary (temp, high); |
| reg = loongarch_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg)); |
| } |
| return plus_constant (Pmode, reg, offset); |
| } |
| |
| /* The __tls_get_attr symbol. */ |
| static GTY (()) rtx loongarch_tls_symbol; |
| |
| /* Load an entry from the GOT for a TLS GD access. */ |
| |
| static rtx |
| loongarch_got_load_tls_gd (rtx dest, rtx sym) |
| { |
| return gen_got_load_tls_gd (Pmode, dest, sym); |
| } |
| |
| /* Load an entry from the GOT for a TLS LD access. */ |
| |
| static rtx |
| loongarch_got_load_tls_ld (rtx dest, rtx sym) |
| { |
| return gen_got_load_tls_ld (Pmode, dest, sym); |
| } |
| |
| /* Load an entry from the GOT for a TLS IE access. */ |
| |
| static rtx |
| loongarch_got_load_tls_ie (rtx dest, rtx sym) |
| { |
| return gen_got_load_tls_ie (Pmode, dest, sym); |
| } |
| |
| /* Add in the thread pointer for a TLS LE access. */ |
| |
| static rtx |
| loongarch_got_load_tls_le (rtx dest, rtx sym) |
| { |
| return gen_got_load_tls_le (Pmode, dest, sym); |
| } |
| |
| /* Return an instruction sequence that calls __tls_get_addr. SYM is |
| the TLS symbol we are referencing and TYPE is the symbol type to use |
| (either global dynamic or local dynamic). V0 is an RTX for the |
| return value location. */ |
| |
| static rtx_insn * |
| loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) |
| { |
| rtx loc, a0; |
| rtx_insn *insn; |
| rtx tmp = gen_reg_rtx (Pmode); |
| |
| a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST); |
| |
| if (!loongarch_tls_symbol) |
| loongarch_tls_symbol = init_one_libfunc ("__tls_get_addr"); |
| |
| loc = loongarch_unspec_address (sym, type); |
| |
| start_sequence (); |
| |
| if (TARGET_EXPLICIT_RELOCS) |
| { |
| /* Split tls symbol to high and low. */ |
| rtx high = gen_rtx_HIGH (Pmode, copy_rtx (loc)); |
| high = loongarch_force_temporary (tmp, high); |
| |
| if (TARGET_CMODEL_EXTREME) |
| { |
| gcc_assert (TARGET_EXPLICIT_RELOCS); |
| |
| rtx tmp1 = gen_reg_rtx (Pmode); |
| emit_insn (gen_tls_low (Pmode, tmp1, gen_rtx_REG (Pmode, 0), loc)); |
| emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loc)); |
| emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loc)); |
| emit_move_insn (a0, gen_rtx_PLUS (Pmode, high, tmp1)); |
| } |
| else |
| emit_insn (gen_tls_low (Pmode, a0, high, loc)); |
| } |
| else |
| { |
| if (type == SYMBOL_TLSLDM) |
| emit_insn (loongarch_got_load_tls_ld (a0, loc)); |
| else if (type == SYMBOL_TLSGD) |
| emit_insn (loongarch_got_load_tls_gd (a0, loc)); |
| else |
| gcc_unreachable (); |
| } |
| |
| if (flag_plt) |
| { |
| switch (la_opt_cmodel) |
| { |
| case CMODEL_NORMAL: |
| insn = emit_call_insn (gen_call_value_internal (v0, |
| loongarch_tls_symbol, |
| const0_rtx)); |
| break; |
| |
| case CMODEL_MEDIUM: |
| { |
| rtx reg = gen_reg_rtx (Pmode); |
| if (TARGET_EXPLICIT_RELOCS) |
| { |
| emit_insn (gen_pcalau12i (Pmode, reg, loongarch_tls_symbol)); |
| rtx call = gen_call_value_internal_1 (Pmode, v0, reg, |
| loongarch_tls_symbol, |
| const0_rtx); |
| insn = emit_call_insn (call); |
| } |
| else |
| { |
| emit_move_insn (reg, loongarch_tls_symbol); |
| insn = emit_call_insn (gen_call_value_internal (v0, |
| reg, |
| const0_rtx)); |
| } |
| break; |
| } |
| |
| /* code model extreme not support plt. */ |
| case CMODEL_EXTREME: |
| case CMODEL_LARGE: |
| case CMODEL_TINY: |
| case CMODEL_TINY_STATIC: |
| default: |
| gcc_unreachable (); |
| } |
| } |
| else |
| { |
| rtx dest = gen_reg_rtx (Pmode); |
| |
| switch (la_opt_cmodel) |
| { |
| case CMODEL_NORMAL: |
| case CMODEL_MEDIUM: |
| { |
| if (TARGET_EXPLICIT_RELOCS) |
| { |
| rtx high = gen_reg_rtx (Pmode); |
| loongarch_emit_move (high, |
| gen_rtx_HIGH (Pmode, |
| loongarch_tls_symbol)); |
| emit_insn (gen_ld_from_got (Pmode, dest, high, |
| loongarch_tls_symbol)); |
| } |
| else |
| loongarch_emit_move (dest, loongarch_tls_symbol); |
| break; |
| } |
| |
| case CMODEL_EXTREME: |
| { |
| gcc_assert (TARGET_EXPLICIT_RELOCS); |
| |
| rtx tmp1 = gen_reg_rtx (Pmode); |
| rtx high = gen_reg_rtx (Pmode); |
| |
| loongarch_emit_move (high, |
| gen_rtx_HIGH (Pmode, loongarch_tls_symbol)); |
| loongarch_emit_move (tmp1, gen_rtx_LO_SUM (Pmode, |
| gen_rtx_REG (Pmode, 0), |
| loongarch_tls_symbol)); |
| emit_insn (gen_lui_h_lo20 (tmp1, tmp1, loongarch_tls_symbol)); |
| emit_insn (gen_lui_h_hi12 (tmp1, tmp1, loongarch_tls_symbol)); |
| loongarch_emit_move (dest, |
| gen_rtx_MEM (Pmode, |
| gen_rtx_PLUS (Pmode, |
| high, tmp1))); |
| } |
| break; |
| |
| case CMODEL_LARGE: |
| case CMODEL_TINY: |
| case CMODEL_TINY_STATIC: |
| default: |
| gcc_unreachable (); |
| } |
| |
| insn = emit_call_insn (gen_call_value_internal (v0, dest, const0_rtx)); |
| } |
| |
| RTL_CONST_CALL_P (insn) = 1; |
| use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0); |
| insn = get_insns (); |
| |
| end_sequence (); |
| |
| return insn; |
| } |
| |
| /* Generate the code to access LOC, a thread-local SYMBOL_REF, and return |
| its address. The return value will be both a valid address and a valid |
| SET_SRC (either a REG or a LO_SUM). */ |
| |
| static rtx |
| loongarch_legitimize_tls_address (rtx loc) |
| { |
| rtx dest, tp, tmp, tmp1, tmp2, tmp3; |
| enum tls_model model = SYMBOL_REF_TLS_MODEL (loc); |
| rtx_insn *insn; |
| |
| switch (model) |
| { |
| case TLS_MODEL_LOCAL_DYNAMIC: |
| tmp = gen_rtx_REG (Pmode, GP_RETURN); |
| dest = gen_reg_rtx (Pmode); |
| insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSLDM, tmp); |
| emit_libcall_block (insn, dest, tmp, loc); |
| break; |
| |
| case TLS_MODEL_GLOBAL_DYNAMIC: |
| tmp = gen_rtx_REG (Pmode, GP_RETURN); |
| dest = gen_reg_rtx (Pmode); |
| insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSGD, tmp); |
| emit_libcall_block (insn, dest, tmp, loc); |
| break; |
| |
| case TLS_MODEL_INITIAL_EXEC: |
| { |
| /* la.tls.ie; tp-relative add. */ |
| tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); |
| tmp1 = gen_reg_rtx (Pmode); |
| dest = gen_reg_rtx (Pmode); |
| if (TARGET_EXPLICIT_RELOCS) |
| { |
| tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_IE); |
| tmp3 = gen_reg_rtx (Pmode); |
| rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); |
| high = loongarch_force_temporary (tmp3, high); |
| |
| if (TARGET_CMODEL_EXTREME) |
| { |
| gcc_assert (TARGET_EXPLICIT_RELOCS); |
| |
| rtx tmp3 = gen_reg_rtx (Pmode); |
| emit_insn (gen_tls_low (Pmode, tmp3, |
| gen_rtx_REG (Pmode, 0), tmp2)); |
| emit_insn (gen_lui_h_lo20 (tmp3, tmp3, tmp2)); |
| emit_insn (gen_lui_h_hi12 (tmp3, tmp3, tmp2)); |
| emit_move_insn (tmp1, |
| gen_rtx_MEM (Pmode, |
| gen_rtx_PLUS (Pmode, |
| high, tmp3))); |
| } |
| else |
| emit_insn (gen_ld_from_got (Pmode, tmp1, high, tmp2)); |
| } |
| else |
| emit_insn (loongarch_got_load_tls_ie (tmp1, loc)); |
| emit_insn (gen_add3_insn (dest, tmp1, tp)); |
| } |
| break; |
| |
| case TLS_MODEL_LOCAL_EXEC: |
| { |
| /* la.tls.le; tp-relative add. */ |
| tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); |
| tmp1 = gen_reg_rtx (Pmode); |
| dest = gen_reg_rtx (Pmode); |
| |
| if (TARGET_EXPLICIT_RELOCS) |
| { |
| tmp2 = loongarch_unspec_address (loc, SYMBOL_TLS_LE); |
| tmp3 = gen_reg_rtx (Pmode); |
| rtx high = gen_rtx_HIGH (Pmode, copy_rtx (tmp2)); |
| high = loongarch_force_temporary (tmp3, high); |
| emit_insn (gen_ori_l_lo12 (Pmode, tmp1, high, tmp2)); |
| |
| if (TARGET_CMODEL_EXTREME) |
| { |
| gcc_assert (TARGET_EXPLICIT_RELOCS); |
| |
| emit_insn (gen_lui_h_lo20 (tmp1, tmp1, tmp2)); |
| emit_insn (gen_lui_h_hi12 (tmp1, tmp1, tmp2)); |
| } |
| } |
| else |
| emit_insn (loongarch_got_load_tls_le (tmp1, loc)); |
| emit_insn (gen_add3_insn (dest, tmp1, tp)); |
| } |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| return dest; |
| } |
| |
| rtx |
| loongarch_legitimize_call_address (rtx addr) |
| { |
| if (!call_insn_operand (addr, VOIDmode)) |
| { |
| rtx reg = gen_reg_rtx (Pmode); |
| loongarch_emit_move (reg, addr); |
| return reg; |
| } |
| |
| enum loongarch_symbol_type symbol_type = loongarch_classify_symbol (addr); |
| |
| /* Split function call insn 'bl sym' or 'bl %plt(sym)' to : |
| pcalau12i $rd, %pc_hi20(sym) |
| jr $rd, %pc_lo12(sym). */ |
| |
| if (TARGET_CMODEL_MEDIUM |
| && TARGET_EXPLICIT_RELOCS |
| && (SYMBOL_REF_P (addr) || LABEL_REF_P (addr)) |
| && (symbol_type == SYMBOL_PCREL |
| || (symbol_type == SYMBOL_GOT_DISP && flag_plt))) |
| { |
| rtx reg = gen_reg_rtx (Pmode); |
| emit_insn (gen_pcalau12i (Pmode, reg, addr)); |
| return gen_rtx_LO_SUM (Pmode, reg, addr); |
| } |
| |
| return addr; |
| } |
| |
| /* If X is a PLUS of a CONST_INT, return the two terms in *BASE_PTR |
| and *OFFSET_PTR. Return X in *BASE_PTR and 0 in *OFFSET_PTR otherwise. */ |
| |
| static void |
| loongarch_split_plus (rtx x, rtx *base_ptr, HOST_WIDE_INT *offset_ptr) |
| { |
| if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))) |
| { |
| *base_ptr = XEXP (x, 0); |
| *offset_ptr = INTVAL (XEXP (x, 1)); |
| } |
| else |
| { |
| *base_ptr = x; |
| *offset_ptr = 0; |
| } |
| } |
| |
| /* If X is not a valid address for mode MODE, force it into a register. */ |
| |
| static rtx |
| loongarch_force_address (rtx x, machine_mode mode) |
| { |
| if (!loongarch_legitimate_address_p (mode, x, false)) |
| x = force_reg (Pmode, x); |
| return x; |
| } |
| |
| static bool |
| loongarch_symbol_extreme_p (enum loongarch_symbol_type type) |
| { |
| switch (type) |
| { |
| case SYMBOL_PCREL: |
| return false; |
| case SYMBOL_PCREL64: |
| return true; |
| default: |
| return TARGET_CMODEL_EXTREME; |
| } |
| } |
| |
| /* If MODE is MAX_MACHINE_MODE, ADDR appears as a move operand, otherwise |
| it appears in a MEM of that mode. Return true if ADDR is a legitimate |
| constant in that context and can be split into high and low parts. |
| If so, and if LOW_OUT is nonnull, emit the high part and store the |
| low part in *LOW_OUT. Leave *LOW_OUT unchanged otherwise. |
| |
| Return false if build with '-mno-explicit-relocs'. |
| |
| TEMP is as for loongarch_force_temporary and is used to load the high |
| part into a register. |
| |
| When MODE is MAX_MACHINE_MODE, the low part is guaranteed to be |
| a legitimize SET_SRC for an .md pattern, otherwise the low part |
| is guaranteed to be a legitimate address for mode MODE. */ |
| |
| bool |
| loongarch_split_symbol (rtx temp, rtx addr, machine_mode mode, rtx *low_out) |
| { |
| enum loongarch_symbol_type symbol_type; |
| |
| /* If build with '-mno-explicit-relocs', don't split symbol. */ |
| if (!TARGET_EXPLICIT_RELOCS) |
| return false; |
| |
| if ((GET_CODE (addr) == HIGH && mode == MAX_MACHINE_MODE) |
| || !loongarch_symbolic_constant_p (addr, &symbol_type) |
| || loongarch_symbol_insns (symbol_type, mode) == 0 |
| || !loongarch_split_symbol_type (symbol_type)) |
| return false; |
| |
| rtx high, temp1 = NULL; |
| |
| if (temp == NULL) |
| temp = gen_reg_rtx (Pmode); |
| |
| /* Get the 12-31 bits of the address. */ |
| high = gen_rtx_HIGH (Pmode, copy_rtx (addr)); |
| high = loongarch_force_temporary (temp, high); |
| |
| if (loongarch_symbol_extreme_p (symbol_type) && can_create_pseudo_p ()) |
| { |
| gcc_assert (TARGET_EXPLICIT_RELOCS); |
| |
| temp1 = gen_reg_rtx (Pmode); |
| emit_move_insn (temp1, gen_rtx_LO_SUM (Pmode, gen_rtx_REG (Pmode, 0), |
| addr)); |
| emit_insn (gen_lui_h_lo20 (temp1, temp1, addr)); |
| emit_insn (gen_lui_h_hi12 (temp1, temp1, addr)); |
| } |
| |
| if (low_out) |
| switch (symbol_type) |
| { |
| case SYMBOL_PCREL64: |
| if (can_create_pseudo_p ()) |
| { |
| *low_out = gen_rtx_PLUS (Pmode, high, temp1); |
| break; |
| } |
| /* fall through */ |
| case SYMBOL_PCREL: |
| *low_out = gen_rtx_LO_SUM (Pmode, high, addr); |
| break; |
| |
| case SYMBOL_GOT_DISP: |
| /* SYMBOL_GOT_DISP symbols are loaded from the GOT. */ |
| { |
| if (TARGET_CMODEL_EXTREME && can_create_pseudo_p ()) |
| *low_out = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, high, temp1)); |
| else |
| { |
| rtx low = gen_rtx_LO_SUM (Pmode, high, addr); |
| rtx mem = gen_rtx_MEM (Pmode, low); |
| *low_out = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, mem), |
| UNSPEC_LOAD_FROM_GOT); |
| } |
| |
| break; |
| } |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| return true; |
| } |
| |
| /* This function is used to implement LEGITIMIZE_ADDRESS. If X can |
| be legitimized in a way that the generic machinery might not expect, |
| return a new address, otherwise return NULL. MODE is the mode of |
| the memory being accessed. */ |
| |
| static rtx |
| loongarch_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, |
| machine_mode mode) |
| { |
| rtx base, addr; |
| HOST_WIDE_INT offset; |
| |
| if (loongarch_tls_symbol_p (x)) |
| return loongarch_legitimize_tls_address (x); |
| |
| /* See if the address can split into a high part and a LO_SUM. */ |
| if (loongarch_split_symbol (NULL, x, mode, &addr)) |
| return loongarch_force_address (addr, mode); |
| |
| /* Handle BASE + OFFSET using loongarch_add_offset. */ |
| loongarch_split_plus (x, &base, &offset); |
| if (offset != 0) |
| { |
| if (!loongarch_valid_base_register_p (base, mode, false)) |
| base = copy_to_mode_reg (Pmode, base); |
| addr = loongarch_add_offset (NULL, base, offset); |
| return loongarch_force_address (addr, mode); |
| } |
| |
| return x; |
| } |
| |
| /* Load VALUE into DEST. TEMP is as for loongarch_force_temporary. */ |
| |
| void |
| loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value) |
| { |
| struct loongarch_integer_op codes[LARCH_MAX_INTEGER_OPS]; |
| machine_mode mode; |
| unsigned int i, num_ops; |
| rtx x; |
| |
| mode = GET_MODE (dest); |
| num_ops = loongarch_build_integer (codes, value); |
| |
| /* Apply each binary operation to X. Invariant: X is a legitimate |
| source operand for a SET pattern. */ |
| x = GEN_INT (codes[0].value); |
| for (i = 1; i < num_ops; i++) |
| { |
| if (!can_create_pseudo_p ()) |
| { |
| emit_insn (gen_rtx_SET (temp, x)); |
| x = temp; |
| } |
| else |
| x = force_reg (mode, x); |
| |
| switch (codes[i].method) |
| { |
| case METHOD_NORMAL: |
| x = gen_rtx_fmt_ee (codes[i].code, mode, x, |
| GEN_INT (codes[i].value)); |
| break; |
| case METHOD_LU32I: |
| emit_insn ( |
| gen_rtx_SET (x, |
| gen_rtx_IOR (DImode, |
| gen_rtx_ZERO_EXTEND ( |
| DImode, gen_rtx_SUBREG (SImode, x, 0)), |
| GEN_INT (codes[i].value)))); |
| break; |
| case METHOD_LU52I: |
| emit_insn (gen_lu52i_d (x, x, GEN_INT (0xfffffffffffff), |
| GEN_INT (codes[i].value))); |
| break; |
| case METHOD_INSV: |
| emit_insn ( |
| gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, x, GEN_INT (20), |
| GEN_INT (32)), |
| gen_rtx_REG (DImode, 0))); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| emit_insn (gen_rtx_SET (dest, x)); |
| } |
| |
| /* Subroutine of loongarch_legitimize_move. Move constant SRC into register |
| DEST given that SRC satisfies immediate_operand but doesn't satisfy |
| move_operand. */ |
| |
| static void |
| loongarch_legitimize_const_move (machine_mode mode, rtx dest, rtx src) |
| { |
| rtx base, offset; |
| |
| /* Split moves of big integers into smaller pieces. */ |
| if (splittable_const_int_operand (src, mode)) |
| { |
| loongarch_move_integer (dest, dest, INTVAL (src)); |
| return; |
| } |
| |
| /* Split moves of symbolic constants into high and low. */ |
| if (loongarch_split_symbol (dest, src, MAX_MACHINE_MODE, &src)) |
| { |
| loongarch_emit_set (dest, src); |
| return; |
| } |
| |
| /* Generate the appropriate access sequences for TLS symbols. */ |
| if (loongarch_tls_symbol_p (src)) |
| { |
| loongarch_emit_move (dest, loongarch_legitimize_tls_address (src)); |
| return; |
| } |
| |
| /* If we have (const (plus symbol offset)), and that expression cannot |
| be forced into memory, load the symbol first and add in the offset. |
| prefer to do this even if the constant _can_ be forced into memory, |
| as it usually produces better code. */ |
| split_const (src, &base, &offset); |
| if (offset != const0_rtx |
| && (targetm.cannot_force_const_mem (mode, src) |
| || (can_create_pseudo_p ()))) |
| { |
| base = loongarch_force_temporary (dest, base); |
| loongarch_emit_move (dest, |
| loongarch_add_offset (NULL, base, INTVAL (offset))); |
| return; |
| } |
| |
| src = force_const_mem (mode, src); |
| |
| loongarch_emit_move (dest, src); |
| } |
| |
| /* If (set DEST SRC) is not a valid move instruction, emit an equivalent |
| sequence that is valid. */ |
| |
| bool |
| loongarch_legitimize_move (machine_mode mode, rtx dest, rtx src) |
| { |
| if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode)) |
| { |
| loongarch_emit_move (dest, force_reg (mode, src)); |
| return true; |
| } |
| |
| /* Both src and dest are non-registers; one special case is supported where |
| the source is (const_int 0) and the store can source the zero register. |
| */ |
| if (!register_operand (dest, mode) && !register_operand (src, mode) |
| && !const_0_operand (src, mode)) |
| { |
| loongarch_emit_move (dest, force_reg (mode, src)); |
| return true; |
| } |
| |
| /* We need to deal with constants that would be legitimate |
| immediate_operands but aren't legitimate move_operands. */ |
| if (CONSTANT_P (src) && !move_operand (src, mode)) |
| { |
| loongarch_legitimize_const_move (mode, dest, src); |
| set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (src)); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* Return true if OP refers to small data symbols directly. */ |
| |
| static int |
| loongarch_small_data_pattern_1 (rtx x) |
| { |
| subrtx_var_iterator::array_type array; |
| FOR_EACH_SUBRTX_VAR (iter, array, x, ALL) |
| { |
| rtx x = *iter; |
| |
| /* We make no particular guarantee about which symbolic constants are |
| acceptable as asm operands versus which must be forced into a GPR. */ |
| if (GET_CODE (x) == ASM_OPERANDS) |
| iter.skip_subrtxes (); |
| else if (MEM_P (x)) |
| { |
| if (loongarch_small_data_pattern_1 (XEXP (x, 0))) |
| return true; |
| iter.skip_subrtxes (); |
| } |
| } |
| return false; |
| } |
| |
| /* Return true if OP refers to small data symbols directly. */ |
| |
| bool |
| loongarch_small_data_pattern_p (rtx op) |
| { |
| return loongarch_small_data_pattern_1 (op); |
| } |
| |
| /* Rewrite *LOC so that it refers to small data using explicit |
| relocations. */ |
| |
| static void |
| loongarch_rewrite_small_data_1 (rtx *loc) |
| { |
| subrtx_ptr_iterator::array_type array; |
| FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL) |
| { |
| rtx *loc = *iter; |
| if (MEM_P (*loc)) |
| { |
| loongarch_rewrite_small_data_1 (&XEXP (*loc, 0)); |
| iter.skip_subrtxes (); |
| } |
| } |
| } |
| |
| /* Rewrite instruction pattern PATTERN so that it refers to small data |
| using explicit relocations. */ |
| |
| rtx |
| loongarch_rewrite_small_data (rtx pattern) |
| { |
| pattern = copy_insn (pattern); |
| loongarch_rewrite_small_data_1 (&pattern); |
| return pattern; |
| } |
| |
| /* The cost of loading values from the constant pool. It should be |
| larger than the cost of any constant we want to synthesize inline. */ |
| #define CONSTANT_POOL_COST COSTS_N_INSNS (8) |
| |
| /* Return true if there is a instruction that implements CODE |
| and if that instruction accepts X as an immediate operand. */ |
| |
| static int |
| loongarch_immediate_operand_p (int code, HOST_WIDE_INT x) |
| { |
| switch (code) |
| { |
| case ASHIFT: |
| case ASHIFTRT: |
| case LSHIFTRT: |
| /* All shift counts are truncated to a valid constant. */ |
| return true; |
| |
| case ROTATE: |
| case ROTATERT: |
| return true; |
| |
| case AND: |
| case IOR: |
| case XOR: |
| /* These instructions take 12-bit unsigned immediates. */ |
| return IMM12_OPERAND_UNSIGNED (x); |
| |
| case PLUS: |
| case LT: |
| case LTU: |
| /* These instructions take 12-bit signed immediates. */ |
| return IMM12_OPERAND (x); |
| |
| case EQ: |
| case NE: |
| case GT: |
| case GTU: |
| /* The "immediate" forms of these instructions are really |
| implemented as comparisons with register 0. */ |
| return x == 0; |
| |
| case GE: |
| case GEU: |
| /* Likewise, meaning that the only valid immediate operand is 1. */ |
| return x == 1; |
| |
| case LE: |
| /* We add 1 to the immediate and use SLT. */ |
| return IMM12_OPERAND (x + 1); |
| |
| case LEU: |
| /* Likewise SLTU, but reject the always-true case. */ |
| return IMM12_OPERAND (x + 1) && x + 1 != 0; |
| |
| case SIGN_EXTRACT: |
| case ZERO_EXTRACT: |
| /* The bit position and size are immediate operands. */ |
| return 1; |
| |
| default: |
| /* By default assume that $0 can be used for 0. */ |
| return x == 0; |
| } |
| } |
| |
| /* Return the cost of binary operation X, given that the instruction |
| sequence for a word-sized or smaller operation has cost SINGLE_COST |
| and that the sequence of a double-word operation has cost DOUBLE_COST. |
| If SPEED is true, optimize for speed otherwise optimize for size. */ |
| |
| static int |
| loongarch_binary_cost (rtx x, int single_cost, int double_cost, bool speed) |
| { |
| int cost; |
| |
| if (GET_MODE_SIZE (GET_MODE (x)) == UNITS_PER_WORD * 2) |
| cost = double_cost; |
| else |
| cost = single_cost; |
| return (cost |
| + set_src_cost (XEXP (x, 0), GET_MODE (x), speed) |
| + rtx_cost (XEXP (x, 1), GET_MODE (x), GET_CODE (x), 1, speed)); |
| } |
| |
| /* Return the cost of floating-point multiplications of mode MODE. */ |
| |
| static int |
| loongarch_fp_mult_cost (machine_mode mode) |
| { |
| return mode == DFmode ? loongarch_cost->fp_mult_df |
| : loongarch_cost->fp_mult_sf; |
| } |
| |
| /* Return the cost of floating-point divisions of mode MODE. */ |
| |
| static int |
| loongarch_fp_div_cost (machine_mode mode) |
| { |
| return mode == DFmode ? loongarch_cost->fp_div_df |
| : loongarch_cost->fp_div_sf; |
| } |
| |
| /* Return the cost of sign-extending OP to mode MODE, not including the |
| cost of OP itself. */ |
| |
| static int |
| loongarch_sign_extend_cost (rtx op) |
| { |
| if (MEM_P (op)) |
| /* Extended loads are as cheap as unextended ones. */ |
| return 0; |
| |
| return COSTS_N_INSNS (1); |
| } |
| |
| /* Return the cost of zero-extending OP to mode MODE, not including the |
| cost of OP itself. */ |
| |
| static int |
| loongarch_zero_extend_cost (rtx op) |
| { |
| if (MEM_P (op)) |
| /* Extended loads are as cheap as unextended ones. */ |
| return 0; |
| |
| /* We can use ANDI. */ |
| return COSTS_N_INSNS (1); |
| } |
| |
| /* Return the cost of moving between two registers of mode MODE, |
| assuming that the move will be in pieces of at most UNITS bytes. */ |
| |
| static int |
| loongarch_set_reg_reg_piece_cost (machine_mode mode, unsigned int units) |
| { |
| return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units); |
| } |
| |
| /* Return the cost of moving between two registers of mode MODE. */ |
| |
| static int |
| loongarch_set_reg_reg_cost (machine_mode mode) |
| { |
| switch (GET_MODE_CLASS (mode)) |
| { |
| case MODE_CC: |
| return loongarch_set_reg_reg_piece_cost (mode, GET_MODE_SIZE (CCmode)); |
| |
| case MODE_FLOAT: |
| case MODE_COMPLEX_FLOAT: |
| case MODE_VECTOR_FLOAT: |
| if (TARGET_HARD_FLOAT) |
| return loongarch_set_reg_reg_piece_cost (mode, UNITS_PER_HWFPVALUE); |
| /* Fall through. */ |
| |
| default: |
| return loongarch_set_reg_reg_piece_cost (mode, UNITS_PER_WORD); |
| } |
| } |
| |
| /* Implement TARGET_RTX_COSTS. */ |
| |
| static bool |
| loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, |
| int opno ATTRIBUTE_UNUSED, int *total, bool speed) |
| { |
| int code = GET_CODE (x); |
| bool float_mode_p = FLOAT_MODE_P (mode); |
| int cost; |
| rtx addr; |
| |
| if (outer_code == COMPARE) |
| { |
| gcc_assert (CONSTANT_P (x)); |
| *total = 0; |
| return true; |
| } |
| |
| switch (code) |
| { |
| case CONST_INT: |
| if (TARGET_64BIT && outer_code == AND && UINTVAL (x) == 0xffffffff) |
| { |
| *total = 0; |
| return true; |
| } |
| |
| /* When not optimizing for size, we care more about the cost |
| of hot code, and hot code is often in a loop. If a constant |
| operand needs to be forced into a register, we will often be |
| able to hoist the constant load out of the loop, so the load |
| should not contribute to the cost. */ |
| if (speed || loongarch_immediate_operand_p (outer_code, INTVAL (x))) |
| { |
| *total = 0; |
| return true; |
| } |
| /* Fall through. */ |
| |
| case CONST: |
| case SYMBOL_REF: |
| case LABEL_REF: |
| case CONST_DOUBLE: |
| cost = loongarch_const_insns (x); |
| if (cost > 0) |
| { |
| if (cost == 1 && outer_code == SET |
| && !(float_mode_p && TARGET_HARD_FLOAT)) |
| cost = 0; |
| else if ((outer_code == SET || GET_MODE (x) == VOIDmode)) |
| cost = 1; |
| *total = COSTS_N_INSNS (cost); |
| return true; |
| } |
| /* The value will need to be fetched from the constant pool. */ |
| *total = CONSTANT_POOL_COST; |
| return true; |
| |
| case MEM: |
| /* If the address is legitimate, return the number of |
| instructions it needs. */ |
| addr = XEXP (x, 0); |
| /* Check for a scaled indexed address. */ |
| if (loongarch_index_address_p (addr, mode)) |
| { |
| *total = COSTS_N_INSNS (2); |
| return true; |
|