| /* Subroutines used for LoongArch code generation. |
| Copyright (C) 2021-2022 Free Software Foundation, Inc. |
| Contributed by Loongson Ltd. |
| Based on MIPS and RISC-V target for GNU compiler. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| GCC is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #define IN_TARGET_CODE 1 |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "backend.h" |
| #include "target.h" |
| #include "rtl.h" |
| #include "tree.h" |
| #include "memmodel.h" |
| #include "gimple.h" |
| #include "cfghooks.h" |
| #include "df.h" |
| #include "tm_p.h" |
| #include "stringpool.h" |
| #include "attribs.h" |
| #include "optabs.h" |
| #include "regs.h" |
| #include "emit-rtl.h" |
| #include "recog.h" |
| #include "cgraph.h" |
| #include "diagnostic.h" |
| #include "insn-attr.h" |
| #include "output.h" |
| #include "alias.h" |
| #include "fold-const.h" |
| #include "varasm.h" |
| #include "stor-layout.h" |
| #include "calls.h" |
| #include "explow.h" |
| #include "expr.h" |
| #include "libfuncs.h" |
| #include "reload.h" |
| #include "common/common-target.h" |
| #include "langhooks.h" |
| #include "cfgrtl.h" |
| #include "cfganal.h" |
| #include "sched-int.h" |
| #include "gimplify.h" |
| #include "target-globals.h" |
| #include "tree-pass.h" |
| #include "context.h" |
| #include "builtins.h" |
| #include "rtl-iter.h" |
| |
| /* This file should be included last. */ |
| #include "target-def.h" |
| |
| /* True if X is an UNSPEC wrapper around a SYMBOL_REF or LABEL_REF. */ |
| #define UNSPEC_ADDRESS_P(X) \ |
| (GET_CODE (X) == UNSPEC \ |
| && XINT (X, 1) >= UNSPEC_ADDRESS_FIRST \ |
| && XINT (X, 1) < UNSPEC_ADDRESS_FIRST + NUM_SYMBOL_TYPES) |
| |
| /* Extract the symbol or label from UNSPEC wrapper X. */ |
| #define UNSPEC_ADDRESS(X) XVECEXP (X, 0, 0) |
| |
| /* Extract the symbol type from UNSPEC wrapper X. */ |
| #define UNSPEC_ADDRESS_TYPE(X) \ |
| ((enum loongarch_symbol_type) (XINT (X, 1) - UNSPEC_ADDRESS_FIRST)) |
| |
| /* True if INSN is a loongarch.md pattern or asm statement. */ |
| /* ??? This test exists through the compiler, perhaps it should be |
| moved to rtl.h. */ |
| #define USEFUL_INSN_P(INSN) \ |
| (NONDEBUG_INSN_P (INSN) \ |
| && GET_CODE (PATTERN (INSN)) != USE \ |
| && GET_CODE (PATTERN (INSN)) != CLOBBER) |
| |
| /* True if bit BIT is set in VALUE. */ |
| #define BITSET_P(VALUE, BIT) (((VALUE) & (1 << (BIT))) != 0) |
| |
| /* Classifies an address. |
| |
| ADDRESS_REG |
| A natural register + offset address. The register satisfies |
| loongarch_valid_base_register_p and the offset is a const_arith_operand. |
| |
| ADDRESS_REG_REG |
| A base register indexed by (optionally scaled) register. |
| |
| ADDRESS_CONST_INT |
| A signed 16-bit constant address. |
| |
| ADDRESS_SYMBOLIC: |
| A constant symbolic address. */ |
| enum loongarch_address_type |
| { |
| ADDRESS_REG, |
| ADDRESS_REG_REG, |
| ADDRESS_CONST_INT, |
| ADDRESS_SYMBOLIC |
| }; |
| |
| |
| /* Information about an address described by loongarch_address_type. |
| |
| ADDRESS_CONST_INT |
| No fields are used. |
| |
| ADDRESS_REG |
| REG is the base register and OFFSET is the constant offset. |
| |
| ADDRESS_REG_REG |
| A base register indexed by (optionally scaled) register. |
| |
| ADDRESS_SYMBOLIC |
| SYMBOL_TYPE is the type of symbol that the address references. */ |
| struct loongarch_address_info |
| { |
| enum loongarch_address_type type; |
| rtx reg; |
| rtx offset; |
| enum loongarch_symbol_type symbol_type; |
| }; |
| |
| /* Method of loading instant numbers: |
| |
| METHOD_NORMAL: |
| Load 0-31 bit of the immediate number. |
| |
| METHOD_LU32I: |
| Load 32-51 bit of the immediate number. |
| |
| METHOD_LU52I: |
| Load 52-63 bit of the immediate number. |
| |
| METHOD_INSV: |
| immediate like 0xfff00000fffffxxx |
| */ |
| enum loongarch_load_imm_method |
| { |
| METHOD_NORMAL, |
| METHOD_LU32I, |
| METHOD_LU52I, |
| METHOD_INSV |
| }; |
| |
| struct loongarch_integer_op |
| { |
| enum rtx_code code; |
| unsigned HOST_WIDE_INT value; |
| enum loongarch_load_imm_method method; |
| }; |
| |
| /* The largest number of operations needed to load an integer constant. |
| The worst accepted case for 64-bit constants is LU12I.W,LU32I.D,LU52I.D,ORI |
| or LU12I.W,LU32I.D,LU52I.D,ADDI.D DECL_ASSEMBLER_NAME. */ |
| #define LARCH_MAX_INTEGER_OPS 4 |
| |
| /* Arrays that map GCC register numbers to debugger register numbers. */ |
| int loongarch_dwarf_regno[FIRST_PSEUDO_REGISTER]; |
| |
| /* Index [M][R] is true if register R is allowed to hold a value of mode M. */ |
| static bool loongarch_hard_regno_mode_ok_p[MAX_MACHINE_MODE] |
| [FIRST_PSEUDO_REGISTER]; |
| |
| /* Index C is true if character C is a valid PRINT_OPERAND punctation |
| character. */ |
| static bool loongarch_print_operand_punct[256]; |
| |
| /* Cached value of can_issue_more. This is cached in loongarch_variable_issue |
| hook and returned from loongarch_sched_reorder2. */ |
| static int cached_can_issue_more; |
| |
| /* Index R is the smallest register class that contains register R. */ |
| const enum reg_class loongarch_regno_to_class[FIRST_PSEUDO_REGISTER] = { |
| GR_REGS, GR_REGS, GR_REGS, GR_REGS, |
| JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS, |
| JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS, |
| SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, |
| SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, SIBCALL_REGS, |
| SIBCALL_REGS, GR_REGS, GR_REGS, JIRL_REGS, |
| JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS, |
| JIRL_REGS, JIRL_REGS, JIRL_REGS, JIRL_REGS, |
| |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FP_REGS, FP_REGS, FP_REGS, FP_REGS, |
| FCC_REGS, FCC_REGS, FCC_REGS, FCC_REGS, |
| FCC_REGS, FCC_REGS, FCC_REGS, FCC_REGS, |
| FRAME_REGS, FRAME_REGS |
| }; |
| |
| /* Which cost information to use. */ |
| static const struct loongarch_rtx_cost_data *loongarch_cost; |
| |
| /* Information about a single argument. */ |
| struct loongarch_arg_info |
| { |
| /* True if the argument is at least partially passed on the stack. */ |
| bool stack_p; |
| |
| /* The number of integer registers allocated to this argument. */ |
| unsigned int num_gprs; |
| |
| /* The offset of the first register used, provided num_gprs is nonzero. |
| If passed entirely on the stack, the value is MAX_ARGS_IN_REGISTERS. */ |
| unsigned int gpr_offset; |
| |
| /* The number of floating-point registers allocated to this argument. */ |
| unsigned int num_fprs; |
| |
| /* The offset of the first register used, provided num_fprs is nonzero. */ |
| unsigned int fpr_offset; |
| }; |
| |
| /* Invoke MACRO (COND) for each fcmp.cond.{s/d} condition. */ |
| #define LARCH_FP_CONDITIONS(MACRO) \ |
| MACRO (f), \ |
| MACRO (un), \ |
| MACRO (eq), \ |
| MACRO (ueq), \ |
| MACRO (olt), \ |
| MACRO (ult), \ |
| MACRO (ole), \ |
| MACRO (ule), \ |
| MACRO (sf), \ |
| MACRO (ngle), \ |
| MACRO (seq), \ |
| MACRO (ngl), \ |
| MACRO (lt), \ |
| MACRO (nge), \ |
| MACRO (le), \ |
| MACRO (ngt) |
| |
| /* Enumerates the codes above as LARCH_FP_COND_<X>. */ |
| #define DECLARE_LARCH_COND(X) LARCH_FP_COND_##X |
| enum loongarch_fp_condition |
| { |
| LARCH_FP_CONDITIONS (DECLARE_LARCH_COND) |
| }; |
| #undef DECLARE_LARCH_COND |
| |
| /* Index X provides the string representation of LARCH_FP_COND_<X>. */ |
| #define STRINGIFY(X) #X |
| const char *const |
| loongarch_fp_conditions[16]= {LARCH_FP_CONDITIONS (STRINGIFY)}; |
| #undef STRINGIFY |
| |
| /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Every parameter gets at |
| least PARM_BOUNDARY bits of alignment, but will be given anything up |
| to PREFERRED_STACK_BOUNDARY bits if the type requires it. */ |
| |
| static unsigned int |
| loongarch_function_arg_boundary (machine_mode mode, const_tree type) |
| { |
| unsigned int alignment; |
| |
| /* Use natural alignment if the type is not aggregate data. */ |
| if (type && !AGGREGATE_TYPE_P (type)) |
| alignment = TYPE_ALIGN (TYPE_MAIN_VARIANT (type)); |
| else |
| alignment = type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode); |
| |
| return MIN (PREFERRED_STACK_BOUNDARY, MAX (PARM_BOUNDARY, alignment)); |
| } |
| |
| /* If MODE represents an argument that can be passed or returned in |
| floating-point registers, return the number of registers, else 0. */ |
| |
| static unsigned |
| loongarch_pass_mode_in_fpr_p (machine_mode mode) |
| { |
| if (GET_MODE_UNIT_SIZE (mode) <= UNITS_PER_FP_ARG) |
| { |
| if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
| return 1; |
| |
| if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) |
| return 2; |
| } |
| |
| return 0; |
| } |
| |
| typedef struct |
| { |
| const_tree type; |
| HOST_WIDE_INT offset; |
| } loongarch_aggregate_field; |
| |
| /* Identify subfields of aggregates that are candidates for passing in |
| floating-point registers. */ |
| |
| static int |
| loongarch_flatten_aggregate_field (const_tree type, |
| loongarch_aggregate_field fields[2], int n, |
| HOST_WIDE_INT offset) |
| { |
| switch (TREE_CODE (type)) |
| { |
| case RECORD_TYPE: |
| /* Can't handle incomplete types nor sizes that are not fixed. */ |
| if (!COMPLETE_TYPE_P (type) |
| || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST |
| || !tree_fits_uhwi_p (TYPE_SIZE (type))) |
| return -1; |
| |
| for (tree f = TYPE_FIELDS (type); f; f = DECL_CHAIN (f)) |
| if (TREE_CODE (f) == FIELD_DECL) |
| { |
| if (!TYPE_P (TREE_TYPE (f))) |
| return -1; |
| |
| if (DECL_SIZE (f) && integer_zerop (DECL_SIZE (f))) |
| continue; |
| |
| HOST_WIDE_INT pos = offset + int_byte_position (f); |
| n = loongarch_flatten_aggregate_field (TREE_TYPE (f), fields, n, |
| pos); |
| if (n < 0) |
| return -1; |
| } |
| return n; |
| |
| case ARRAY_TYPE: |
| { |
| HOST_WIDE_INT n_elts; |
| loongarch_aggregate_field subfields[2]; |
| tree index = TYPE_DOMAIN (type); |
| tree elt_size = TYPE_SIZE_UNIT (TREE_TYPE (type)); |
| int n_subfields = loongarch_flatten_aggregate_field (TREE_TYPE (type), |
| subfields, 0, |
| offset); |
| |
| /* Can't handle incomplete types nor sizes that are not fixed. */ |
| if (n_subfields <= 0 |
| || !COMPLETE_TYPE_P (type) |
| || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST |
| || !index |
| || !TYPE_MAX_VALUE (index) |
| || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index)) |
| || !TYPE_MIN_VALUE (index) |
| || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index)) |
| || !tree_fits_uhwi_p (elt_size)) |
| return -1; |
| |
| n_elts = 1 + tree_to_uhwi (TYPE_MAX_VALUE (index)) |
| - tree_to_uhwi (TYPE_MIN_VALUE (index)); |
| gcc_assert (n_elts >= 0); |
| |
| for (HOST_WIDE_INT i = 0; i < n_elts; i++) |
| for (int j = 0; j < n_subfields; j++) |
| { |
| if (n >= 2) |
| return -1; |
| |
| fields[n] = subfields[j]; |
| fields[n++].offset += i * tree_to_uhwi (elt_size); |
| } |
| |
| return n; |
| } |
| |
| case COMPLEX_TYPE: |
| { |
| /* Complex type need consume 2 field, so n must be 0. */ |
| if (n != 0) |
| return -1; |
| |
| HOST_WIDE_INT elt_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (type))); |
| |
| if (elt_size <= UNITS_PER_FP_ARG) |
| { |
| fields[0].type = TREE_TYPE (type); |
| fields[0].offset = offset; |
| fields[1].type = TREE_TYPE (type); |
| fields[1].offset = offset + elt_size; |
| |
| return 2; |
| } |
| |
| return -1; |
| } |
| |
| default: |
| if (n < 2 |
| && ((SCALAR_FLOAT_TYPE_P (type) |
| && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_FP_ARG) |
| || (INTEGRAL_TYPE_P (type) |
| && GET_MODE_SIZE (TYPE_MODE (type)) <= UNITS_PER_WORD))) |
| { |
| fields[n].type = type; |
| fields[n].offset = offset; |
| return n + 1; |
| } |
| else |
| return -1; |
| } |
| } |
| |
| /* Identify candidate aggregates for passing in floating-point registers. |
| Candidates have at most two fields after flattening. */ |
| |
| static int |
| loongarch_flatten_aggregate_argument (const_tree type, |
| loongarch_aggregate_field fields[2]) |
| { |
| if (!type || TREE_CODE (type) != RECORD_TYPE) |
| return -1; |
| |
| return loongarch_flatten_aggregate_field (type, fields, 0, 0); |
| } |
| |
| /* See whether TYPE is a record whose fields should be returned in one or |
| two floating-point registers. If so, populate FIELDS accordingly. */ |
| |
| static unsigned |
| loongarch_pass_aggregate_num_fpr (const_tree type, |
| loongarch_aggregate_field fields[2]) |
| { |
| int n = loongarch_flatten_aggregate_argument (type, fields); |
| |
| for (int i = 0; i < n; i++) |
| if (!SCALAR_FLOAT_TYPE_P (fields[i].type)) |
| return 0; |
| |
| return n > 0 ? n : 0; |
| } |
| |
| /* See whether TYPE is a record whose fields should be returned in one |
| floating-point register and one integer register. If so, populate |
| FIELDS accordingly. */ |
| |
| static bool |
| loongarch_pass_aggregate_in_fpr_and_gpr_p (const_tree type, |
| loongarch_aggregate_field fields[2]) |
| { |
| unsigned num_int = 0, num_float = 0; |
| int n = loongarch_flatten_aggregate_argument (type, fields); |
| |
| for (int i = 0; i < n; i++) |
| { |
| num_float += SCALAR_FLOAT_TYPE_P (fields[i].type); |
| num_int += INTEGRAL_TYPE_P (fields[i].type); |
| } |
| |
| return num_int == 1 && num_float == 1; |
| } |
| |
| /* Return the representation of an argument passed or returned in an FPR |
| when the value has mode VALUE_MODE and the type has TYPE_MODE. The |
| two modes may be different for structures like: |
| |
| struct __attribute__((packed)) foo { float f; } |
| |
| where the SFmode value "f" is passed in REGNO but the struct itself |
| has mode BLKmode. */ |
| |
| static rtx |
| loongarch_pass_fpr_single (machine_mode type_mode, unsigned regno, |
| machine_mode value_mode, |
| HOST_WIDE_INT offset) |
| { |
| rtx x = gen_rtx_REG (value_mode, regno); |
| |
| if (type_mode != value_mode) |
| { |
| x = gen_rtx_EXPR_LIST (VOIDmode, x, GEN_INT (offset)); |
| x = gen_rtx_PARALLEL (type_mode, gen_rtvec (1, x)); |
| } |
| return x; |
| } |
| |
| /* Pass or return a composite value in the FPR pair REGNO and REGNO + 1. |
| MODE is the mode of the composite. MODE1 and OFFSET1 are the mode and |
| byte offset for the first value, likewise MODE2 and OFFSET2 for the |
| second value. */ |
| |
| static rtx |
| loongarch_pass_fpr_pair (machine_mode mode, unsigned regno1, |
| machine_mode mode1, HOST_WIDE_INT offset1, |
| unsigned regno2, machine_mode mode2, |
| HOST_WIDE_INT offset2) |
| { |
| return gen_rtx_PARALLEL ( |
| mode, gen_rtvec (2, |
| gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode1, regno1), |
| GEN_INT (offset1)), |
| gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode2, regno2), |
| GEN_INT (offset2)))); |
| } |
| |
| /* Fill INFO with information about a single argument, and return an |
| RTL pattern to pass or return the argument. CUM is the cumulative |
| state for earlier arguments. MODE is the mode of this argument and |
| TYPE is its type (if known). NAMED is true if this is a named |
| (fixed) argument rather than a variable one. RETURN_P is true if |
| returning the argument, or false if passing the argument. */ |
| |
| static rtx |
| loongarch_get_arg_info (struct loongarch_arg_info *info, |
| const CUMULATIVE_ARGS *cum, machine_mode mode, |
| const_tree type, bool named, bool return_p) |
| { |
| unsigned num_bytes, num_words; |
| unsigned fpr_base = return_p ? FP_RETURN : FP_ARG_FIRST; |
| unsigned gpr_base = return_p ? GP_RETURN : GP_ARG_FIRST; |
| unsigned alignment = loongarch_function_arg_boundary (mode, type); |
| |
| memset (info, 0, sizeof (*info)); |
| info->gpr_offset = cum->num_gprs; |
| info->fpr_offset = cum->num_fprs; |
| |
| if (named) |
| { |
| loongarch_aggregate_field fields[2]; |
| unsigned fregno = fpr_base + info->fpr_offset; |
| unsigned gregno = gpr_base + info->gpr_offset; |
| |
| /* Pass one- or two-element floating-point aggregates in FPRs. */ |
| if ((info->num_fprs |
| = loongarch_pass_aggregate_num_fpr (type, fields)) |
| && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS) |
| switch (info->num_fprs) |
| { |
| case 1: |
| return loongarch_pass_fpr_single (mode, fregno, |
| TYPE_MODE (fields[0].type), |
| fields[0].offset); |
| |
| case 2: |
| return loongarch_pass_fpr_pair (mode, fregno, |
| TYPE_MODE (fields[0].type), |
| fields[0].offset, |
| fregno + 1, |
| TYPE_MODE (fields[1].type), |
| fields[1].offset); |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| /* Pass real and complex floating-point numbers in FPRs. */ |
| if ((info->num_fprs = loongarch_pass_mode_in_fpr_p (mode)) |
| && info->fpr_offset + info->num_fprs <= MAX_ARGS_IN_REGISTERS) |
| switch (GET_MODE_CLASS (mode)) |
| { |
| case MODE_FLOAT: |
| return gen_rtx_REG (mode, fregno); |
| |
| case MODE_COMPLEX_FLOAT: |
| return loongarch_pass_fpr_pair (mode, fregno, |
| GET_MODE_INNER (mode), 0, |
| fregno + 1, GET_MODE_INNER (mode), |
| GET_MODE_UNIT_SIZE (mode)); |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| /* Pass structs with one float and one integer in an FPR and a GPR. */ |
| if (loongarch_pass_aggregate_in_fpr_and_gpr_p (type, fields) |
| && info->gpr_offset < MAX_ARGS_IN_REGISTERS |
| && info->fpr_offset < MAX_ARGS_IN_REGISTERS) |
| { |
| info->num_gprs = 1; |
| info->num_fprs = 1; |
| |
| if (!SCALAR_FLOAT_TYPE_P (fields[0].type)) |
| std::swap (fregno, gregno); |
| |
| return loongarch_pass_fpr_pair (mode, fregno, |
| TYPE_MODE (fields[0].type), |
| fields[0].offset, gregno, |
| TYPE_MODE (fields[1].type), |
| fields[1].offset); |
| } |
| } |
| |
| /* Work out the size of the argument. */ |
| num_bytes = type ? int_size_in_bytes (type) : GET_MODE_SIZE (mode); |
| num_words = (num_bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; |
| |
| /* Doubleword-aligned varargs start on an even register boundary. */ |
| if (!named && num_bytes != 0 && alignment > BITS_PER_WORD) |
| info->gpr_offset += info->gpr_offset & 1; |
| |
| /* Partition the argument between registers and stack. */ |
| info->num_fprs = 0; |
| info->num_gprs = MIN (num_words, MAX_ARGS_IN_REGISTERS - info->gpr_offset); |
| info->stack_p = (num_words - info->num_gprs) != 0; |
| |
| if (info->num_gprs || return_p) |
| return gen_rtx_REG (mode, gpr_base + info->gpr_offset); |
| |
| return NULL_RTX; |
| } |
| |
| /* Implement TARGET_FUNCTION_ARG. */ |
| |
| static rtx |
| loongarch_function_arg (cumulative_args_t cum_v, const function_arg_info &arg) |
| { |
| CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); |
| struct loongarch_arg_info info; |
| |
| if (arg.end_marker_p ()) |
| return NULL; |
| |
| return loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, |
| false); |
| } |
| |
| /* Implement TARGET_FUNCTION_ARG_ADVANCE. */ |
| |
| static void |
| loongarch_function_arg_advance (cumulative_args_t cum_v, |
| const function_arg_info &arg) |
| { |
| CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); |
| struct loongarch_arg_info info; |
| |
| loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, false); |
| |
| /* Advance the register count. This has the effect of setting |
| num_gprs to MAX_ARGS_IN_REGISTERS if a doubleword-aligned |
| argument required us to skip the final GPR and pass the whole |
| argument on the stack. */ |
| cum->num_fprs = info.fpr_offset + info.num_fprs; |
| cum->num_gprs = info.gpr_offset + info.num_gprs; |
| } |
| |
| /* Implement TARGET_ARG_PARTIAL_BYTES. */ |
| |
| static int |
| loongarch_arg_partial_bytes (cumulative_args_t cum, |
| const function_arg_info &generic_arg) |
| { |
| struct loongarch_arg_info arg; |
| |
| loongarch_get_arg_info (&arg, get_cumulative_args (cum), generic_arg.mode, |
| generic_arg.type, generic_arg.named, false); |
| return arg.stack_p ? arg.num_gprs * UNITS_PER_WORD : 0; |
| } |
| |
| /* Implement FUNCTION_VALUE and LIBCALL_VALUE. For normal calls, |
| VALTYPE is the return type and MODE is VOIDmode. For libcalls, |
| VALTYPE is null and MODE is the mode of the return value. */ |
| |
| static rtx |
| loongarch_function_value_1 (const_tree type, const_tree func, |
| machine_mode mode) |
| { |
| struct loongarch_arg_info info; |
| CUMULATIVE_ARGS args; |
| |
| if (type) |
| { |
| int unsigned_p = TYPE_UNSIGNED (type); |
| |
| mode = TYPE_MODE (type); |
| |
| /* Since TARGET_PROMOTE_FUNCTION_MODE unconditionally promotes, |
| return values, promote the mode here too. */ |
| mode = promote_function_mode (type, mode, &unsigned_p, func, 1); |
| } |
| |
| memset (&args, 0, sizeof (args)); |
| return loongarch_get_arg_info (&info, &args, mode, type, true, true); |
| } |
| |
| |
| /* Implement TARGET_FUNCTION_VALUE. */ |
| |
| static rtx |
| loongarch_function_value (const_tree valtype, const_tree fn_decl_or_type, |
| bool outgoing ATTRIBUTE_UNUSED) |
| { |
| return loongarch_function_value_1 (valtype, fn_decl_or_type, VOIDmode); |
| } |
| |
| /* Implement TARGET_LIBCALL_VALUE. */ |
| |
| static rtx |
| loongarch_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED) |
| { |
| return loongarch_function_value_1 (NULL_TREE, NULL_TREE, mode); |
| } |
| |
| |
| /* Implement TARGET_PASS_BY_REFERENCE. */ |
| |
| static bool |
| loongarch_pass_by_reference (cumulative_args_t cum_v, |
| const function_arg_info &arg) |
| { |
| HOST_WIDE_INT size = arg.type_size_in_bytes (); |
| struct loongarch_arg_info info; |
| CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); |
| |
| /* ??? std_gimplify_va_arg_expr passes NULL for cum. Fortunately, we |
| never pass variadic arguments in floating-point registers, so we can |
| avoid the call to loongarch_get_arg_info in this case. */ |
| if (cum != NULL) |
| { |
| /* Don't pass by reference if we can use a floating-point register. */ |
| loongarch_get_arg_info (&info, cum, arg.mode, arg.type, arg.named, |
| false); |
| if (info.num_fprs) |
| return false; |
| } |
| |
| /* Pass by reference if the data do not fit in two integer registers. */ |
| return !IN_RANGE (size, 0, 2 * UNITS_PER_WORD); |
| } |
| |
| /* Implement TARGET_RETURN_IN_MEMORY. */ |
| |
| static bool |
| loongarch_return_in_memory (const_tree type, |
| const_tree fndecl ATTRIBUTE_UNUSED) |
| { |
| CUMULATIVE_ARGS args; |
| cumulative_args_t cum = pack_cumulative_args (&args); |
| |
| /* The rules for returning in memory are the same as for passing the |
| first named argument by reference. */ |
| memset (&args, 0, sizeof (args)); |
| function_arg_info arg (const_cast<tree> (type), /*named=*/true); |
| return loongarch_pass_by_reference (cum, arg); |
| } |
| |
| /* Implement TARGET_SETUP_INCOMING_VARARGS. */ |
| |
| static void |
| loongarch_setup_incoming_varargs (cumulative_args_t cum, |
| const function_arg_info &arg, |
| int *pretend_size ATTRIBUTE_UNUSED, |
| int no_rtl) |
| { |
| CUMULATIVE_ARGS local_cum; |
| int gp_saved; |
| |
| /* The caller has advanced CUM up to, but not beyond, the last named |
| argument. Advance a local copy of CUM past the last "real" named |
| argument, to find out how many registers are left over. */ |
| local_cum = *get_cumulative_args (cum); |
| loongarch_function_arg_advance (pack_cumulative_args (&local_cum), arg); |
| |
| /* Found out how many registers we need to save. */ |
| gp_saved = MAX_ARGS_IN_REGISTERS - local_cum.num_gprs; |
| |
| if (!no_rtl && gp_saved > 0) |
| { |
| rtx ptr = plus_constant (Pmode, virtual_incoming_args_rtx, |
| REG_PARM_STACK_SPACE (cfun->decl) |
| - gp_saved * UNITS_PER_WORD); |
| rtx mem = gen_frame_mem (BLKmode, ptr); |
| set_mem_alias_set (mem, get_varargs_alias_set ()); |
| |
| move_block_from_reg (local_cum.num_gprs + GP_ARG_FIRST, mem, gp_saved); |
| } |
| if (REG_PARM_STACK_SPACE (cfun->decl) == 0) |
| cfun->machine->varargs_size = gp_saved * UNITS_PER_WORD; |
| } |
| |
| /* Make the last instruction frame-related and note that it performs |
| the operation described by FRAME_PATTERN. */ |
| |
| static void |
| loongarch_set_frame_expr (rtx frame_pattern) |
| { |
| rtx insn; |
| |
| insn = get_last_insn (); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| REG_NOTES (insn) = alloc_EXPR_LIST (REG_FRAME_RELATED_EXPR, frame_pattern, |
| REG_NOTES (insn)); |
| } |
| |
| /* Return a frame-related rtx that stores REG at MEM. |
| REG must be a single register. */ |
| |
| static rtx |
| loongarch_frame_set (rtx mem, rtx reg) |
| { |
| rtx set = gen_rtx_SET (mem, reg); |
| RTX_FRAME_RELATED_P (set) = 1; |
| return set; |
| } |
| |
| /* Return true if the current function must save register REGNO. */ |
| |
| static bool |
| loongarch_save_reg_p (unsigned int regno) |
| { |
| bool call_saved = !global_regs[regno] && !call_used_regs[regno]; |
| bool might_clobber |
| = crtl->saves_all_registers || df_regs_ever_live_p (regno); |
| |
| if (call_saved && might_clobber) |
| return true; |
| |
| if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed) |
| return true; |
| |
| if (regno == RETURN_ADDR_REGNUM && crtl->calls_eh_return) |
| return true; |
| |
| return false; |
| } |
| |
| /* Determine which GPR save/restore routine to call. */ |
| |
| static unsigned |
| loongarch_save_libcall_count (unsigned mask) |
| { |
| for (unsigned n = GP_REG_LAST; n > GP_REG_FIRST; n--) |
| if (BITSET_P (mask, n)) |
| return CALLEE_SAVED_REG_NUMBER (n) + 1; |
| abort (); |
| } |
| |
| /* Populate the current function's loongarch_frame_info structure. |
| |
| LoongArch stack frames grown downward. High addresses are at the top. |
| |
| +-------------------------------+ |
| | | |
| | incoming stack arguments | |
| | | |
| +-------------------------------+ <-- incoming stack pointer |
| | | |
| | callee-allocated save area | |
| | for arguments that are | |
| | split between registers and | |
| | the stack | |
| | | |
| +-------------------------------+ <-- arg_pointer_rtx (virtual) |
| | | |
| | callee-allocated save area | |
| | for register varargs | |
| | | |
| +-------------------------------+ <-- hard_frame_pointer_rtx; |
| | | stack_pointer_rtx + gp_sp_offset |
| | GPR save area | + UNITS_PER_WORD |
| | | |
| +-------------------------------+ <-- stack_pointer_rtx + fp_sp_offset |
| | | + UNITS_PER_HWVALUE |
| | FPR save area | |
| | | |
| +-------------------------------+ <-- frame_pointer_rtx (virtual) |
| | | |
| | local variables | |
| | | |
| P +-------------------------------+ |
| | | |
| | outgoing stack arguments | |
| | | |
| +-------------------------------+ <-- stack_pointer_rtx |
| |
| Dynamic stack allocations such as alloca insert data at point P. |
| They decrease stack_pointer_rtx but leave frame_pointer_rtx and |
| hard_frame_pointer_rtx unchanged. */ |
| |
| static void |
| loongarch_compute_frame_info (void) |
| { |
| struct loongarch_frame_info *frame; |
| HOST_WIDE_INT offset; |
| unsigned int regno, i, num_x_saved = 0, num_f_saved = 0; |
| |
| frame = &cfun->machine->frame; |
| memset (frame, 0, sizeof (*frame)); |
| |
| /* Find out which GPRs we need to save. */ |
| for (regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) |
| if (loongarch_save_reg_p (regno)) |
| frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++; |
| |
| /* If this function calls eh_return, we must also save and restore the |
| EH data registers. */ |
| if (crtl->calls_eh_return) |
| for (i = 0; (regno = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++) |
| frame->mask |= 1 << (regno - GP_REG_FIRST), num_x_saved++; |
| |
| /* Find out which FPRs we need to save. This loop must iterate over |
| the same space as its companion in loongarch_for_each_saved_reg. */ |
| if (TARGET_HARD_FLOAT) |
| for (regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) |
| if (loongarch_save_reg_p (regno)) |
| frame->fmask |= 1 << (regno - FP_REG_FIRST), num_f_saved++; |
| |
| /* At the bottom of the frame are any outgoing stack arguments. */ |
| offset = LARCH_STACK_ALIGN (crtl->outgoing_args_size); |
| /* Next are local stack variables. */ |
| offset += LARCH_STACK_ALIGN (get_frame_size ()); |
| /* The virtual frame pointer points above the local variables. */ |
| frame->frame_pointer_offset = offset; |
| /* Next are the callee-saved FPRs. */ |
| if (frame->fmask) |
| offset += LARCH_STACK_ALIGN (num_f_saved * UNITS_PER_FP_REG); |
| frame->fp_sp_offset = offset - UNITS_PER_FP_REG; |
| /* Next are the callee-saved GPRs. */ |
| if (frame->mask) |
| { |
| unsigned x_save_size = LARCH_STACK_ALIGN (num_x_saved * UNITS_PER_WORD); |
| unsigned num_save_restore |
| = 1 + loongarch_save_libcall_count (frame->mask); |
| |
| /* Only use save/restore routines if they don't alter the stack size. */ |
| if (LARCH_STACK_ALIGN (num_save_restore * UNITS_PER_WORD) == x_save_size) |
| frame->save_libcall_adjustment = x_save_size; |
| |
| offset += x_save_size; |
| } |
| frame->gp_sp_offset = offset - UNITS_PER_WORD; |
| /* The hard frame pointer points above the callee-saved GPRs. */ |
| frame->hard_frame_pointer_offset = offset; |
| /* Above the hard frame pointer is the callee-allocated varags save area. */ |
| offset += LARCH_STACK_ALIGN (cfun->machine->varargs_size); |
| /* Next is the callee-allocated area for pretend stack arguments. */ |
| offset += LARCH_STACK_ALIGN (crtl->args.pretend_args_size); |
| /* Arg pointer must be below pretend args, but must be above alignment |
| padding. */ |
| frame->arg_pointer_offset = offset - crtl->args.pretend_args_size; |
| frame->total_size = offset; |
| /* Next points the incoming stack pointer and any incoming arguments. */ |
| |
| /* Only use save/restore routines when the GPRs are atop the frame. */ |
| if (frame->hard_frame_pointer_offset != frame->total_size) |
| frame->save_libcall_adjustment = 0; |
| } |
| |
| /* Implement INITIAL_ELIMINATION_OFFSET. FROM is either the frame pointer |
| or argument pointer. TO is either the stack pointer or hard frame |
| pointer. */ |
| |
| HOST_WIDE_INT |
| loongarch_initial_elimination_offset (int from, int to) |
| { |
| HOST_WIDE_INT src, dest; |
| |
| loongarch_compute_frame_info (); |
| |
| if (to == HARD_FRAME_POINTER_REGNUM) |
| dest = cfun->machine->frame.hard_frame_pointer_offset; |
| else if (to == STACK_POINTER_REGNUM) |
| dest = 0; /* The stack pointer is the base of all offsets, hence 0. */ |
| else |
| gcc_unreachable (); |
| |
| if (from == FRAME_POINTER_REGNUM) |
| src = cfun->machine->frame.frame_pointer_offset; |
| else if (from == ARG_POINTER_REGNUM) |
| src = cfun->machine->frame.arg_pointer_offset; |
| else |
| gcc_unreachable (); |
| |
| return src - dest; |
| } |
| |
| /* A function to save or store a register. The first argument is the |
| register and the second is the stack slot. */ |
| typedef void (*loongarch_save_restore_fn) (rtx, rtx); |
| |
| /* Use FN to save or restore register REGNO. MODE is the register's |
| mode and OFFSET is the offset of its save slot from the current |
| stack pointer. */ |
| |
| static void |
| loongarch_save_restore_reg (machine_mode mode, int regno, HOST_WIDE_INT offset, |
| loongarch_save_restore_fn fn) |
| { |
| rtx mem; |
| |
| mem = gen_frame_mem (mode, plus_constant (Pmode, stack_pointer_rtx, offset)); |
| fn (gen_rtx_REG (mode, regno), mem); |
| } |
| |
| /* Call FN for each register that is saved by the current function. |
| SP_OFFSET is the offset of the current stack pointer from the start |
| of the frame. */ |
| |
| static void |
| loongarch_for_each_saved_reg (HOST_WIDE_INT sp_offset, |
| loongarch_save_restore_fn fn) |
| { |
| HOST_WIDE_INT offset; |
| |
| /* Save the link register and s-registers. */ |
| offset = cfun->machine->frame.gp_sp_offset - sp_offset; |
| for (int regno = GP_REG_FIRST; regno <= GP_REG_LAST; regno++) |
| if (BITSET_P (cfun->machine->frame.mask, regno - GP_REG_FIRST)) |
| { |
| loongarch_save_restore_reg (word_mode, regno, offset, fn); |
| offset -= UNITS_PER_WORD; |
| } |
| |
| /* This loop must iterate over the same space as its companion in |
| loongarch_compute_frame_info. */ |
| offset = cfun->machine->frame.fp_sp_offset - sp_offset; |
| for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++) |
| if (BITSET_P (cfun->machine->frame.fmask, regno - FP_REG_FIRST)) |
| { |
| machine_mode mode = TARGET_DOUBLE_FLOAT ? DFmode : SFmode; |
| |
| loongarch_save_restore_reg (mode, regno, offset, fn); |
| offset -= GET_MODE_SIZE (mode); |
| } |
| } |
| |
| /* Emit a move from SRC to DEST. Assume that the move expanders can |
| handle all moves if !can_create_pseudo_p (). The distinction is |
| important because, unlike emit_move_insn, the move expanders know |
| how to force Pmode objects into the constant pool even when the |
| constant pool address is not itself legitimate. */ |
| |
| rtx |
| loongarch_emit_move (rtx dest, rtx src) |
| { |
| return (can_create_pseudo_p () ? emit_move_insn (dest, src) |
| : emit_move_insn_1 (dest, src)); |
| } |
| |
| /* Save register REG to MEM. Make the instruction frame-related. */ |
| |
| static void |
| loongarch_save_reg (rtx reg, rtx mem) |
| { |
| loongarch_emit_move (mem, reg); |
| loongarch_set_frame_expr (loongarch_frame_set (mem, reg)); |
| } |
| |
| /* Restore register REG from MEM. */ |
| |
| static void |
| loongarch_restore_reg (rtx reg, rtx mem) |
| { |
| rtx insn = loongarch_emit_move (reg, mem); |
| rtx dwarf = NULL_RTX; |
| dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf); |
| REG_NOTES (insn) = dwarf; |
| |
| RTX_FRAME_RELATED_P (insn) = 1; |
| } |
| |
| /* For stack frames that can't be allocated with a single ADDI instruction, |
| compute the best value to initially allocate. It must at a minimum |
| allocate enough space to spill the callee-saved registers. */ |
| |
| static HOST_WIDE_INT |
| loongarch_first_stack_step (struct loongarch_frame_info *frame) |
| { |
| if (IMM12_OPERAND (frame->total_size)) |
| return frame->total_size; |
| |
| HOST_WIDE_INT min_first_step |
| = LARCH_STACK_ALIGN (frame->total_size - frame->fp_sp_offset); |
| HOST_WIDE_INT max_first_step = IMM_REACH / 2 - PREFERRED_STACK_BOUNDARY / 8; |
| HOST_WIDE_INT min_second_step = frame->total_size - max_first_step; |
| gcc_assert (min_first_step <= max_first_step); |
| |
| /* As an optimization, use the least-significant bits of the total frame |
| size, so that the second adjustment step is just LU12I + ADD. */ |
| if (!IMM12_OPERAND (min_second_step) |
| && frame->total_size % IMM_REACH < IMM_REACH / 2 |
| && frame->total_size % IMM_REACH >= min_first_step) |
| return frame->total_size % IMM_REACH; |
| |
| return max_first_step; |
| } |
| |
| static void |
| loongarch_emit_stack_tie (void) |
| { |
| emit_insn (gen_stack_tie (Pmode, stack_pointer_rtx, hard_frame_pointer_rtx)); |
| } |
| |
| #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP) |
| |
| #if PROBE_INTERVAL > 16384 |
| #error Cannot use indexed addressing mode for stack probing |
| #endif |
| |
| /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE, |
| inclusive. These are offsets from the current stack pointer. */ |
| |
| static void |
| loongarch_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size) |
| { |
| /* See if we have a constant small number of probes to generate. If so, |
| that's the easy case. */ |
| if ((TARGET_64BIT && (first + size <= 32768)) |
| || (!TARGET_64BIT && (first + size <= 2048))) |
| { |
| HOST_WIDE_INT i; |
| |
| /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until |
| it exceeds SIZE. If only one probe is needed, this will not |
| generate any code. Then probe at FIRST + SIZE. */ |
| for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL) |
| emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, |
| -(first + i))); |
| |
| emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx, |
| -(first + size))); |
| } |
| |
| /* Otherwise, do the same as above, but in a loop. Note that we must be |
| extra careful with variables wrapping around because we might be at |
| the very top (or the very bottom) of the address space and we have |
| to be able to handle this case properly; in particular, we use an |
| equality test for the loop condition. */ |
| else |
| { |
| HOST_WIDE_INT rounded_size; |
| rtx r13 = LARCH_PROLOGUE_TEMP (Pmode); |
| rtx r12 = LARCH_PROLOGUE_TEMP2 (Pmode); |
| rtx r14 = LARCH_PROLOGUE_TEMP3 (Pmode); |
| |
| /* Sanity check for the addressing mode we're going to use. */ |
| gcc_assert (first <= 16384); |
| |
| |
| /* Step 1: round SIZE to the previous multiple of the interval. */ |
| |
| rounded_size = ROUND_DOWN (size, PROBE_INTERVAL); |
| |
| /* TEST_ADDR = SP + FIRST */ |
| if (first != 0) |
| { |
| emit_move_insn (r14, GEN_INT (first)); |
| emit_insn (gen_rtx_SET (r13, gen_rtx_MINUS (Pmode, |
| stack_pointer_rtx, |
| r14))); |
| } |
| else |
| emit_move_insn (r13, stack_pointer_rtx); |
| |
| /* Step 2: compute initial and final value of the loop counter. */ |
| |
| emit_move_insn (r14, GEN_INT (PROBE_INTERVAL)); |
| /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */ |
| if (rounded_size == 0) |
| emit_move_insn (r12, r13); |
| else |
| { |
| emit_move_insn (r12, GEN_INT (rounded_size)); |
| emit_insn (gen_rtx_SET (r12, gen_rtx_MINUS (Pmode, r13, r12))); |
| /* Step 3: the loop |
| |
| do |
| { |
| TEST_ADDR = TEST_ADDR + PROBE_INTERVAL |
| probe at TEST_ADDR |
| } |
| while (TEST_ADDR != LAST_ADDR) |
| |
| probes at FIRST + N * PROBE_INTERVAL for values of N from 1 |
| until it is equal to ROUNDED_SIZE. */ |
| |
| emit_insn (gen_probe_stack_range (Pmode, r13, r13, r12, r14)); |
| } |
| |
| /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time |
| that SIZE is equal to ROUNDED_SIZE. */ |
| |
| if (size != rounded_size) |
| { |
| if (TARGET_64BIT) |
| emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size)); |
| else |
| { |
| HOST_WIDE_INT i; |
| for (i = 2048; i < (size - rounded_size); i += 2048) |
| { |
| emit_stack_probe (plus_constant (Pmode, r12, -i)); |
| emit_insn (gen_rtx_SET (r12, |
| plus_constant (Pmode, r12, -2048))); |
| } |
| rtx r1 = plus_constant (Pmode, r12, |
| -(size - rounded_size - i + 2048)); |
| emit_stack_probe (r1); |
| } |
| } |
| } |
| |
| /* Make sure nothing is scheduled before we are done. */ |
| emit_insn (gen_blockage ()); |
| } |
| |
| /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are |
| absolute addresses. */ |
| const char * |
| loongarch_output_probe_stack_range (rtx reg1, rtx reg2, rtx reg3) |
| { |
| static int labelno = 0; |
| char loop_lab[32], tmp[64]; |
| rtx xops[3]; |
| |
| ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++); |
| |
| /* Loop. */ |
| ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab); |
| |
| /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */ |
| xops[0] = reg1; |
| xops[1] = GEN_INT (-PROBE_INTERVAL); |
| xops[2] = reg3; |
| if (TARGET_64BIT) |
| output_asm_insn ("sub.d\t%0,%0,%2", xops); |
| else |
| output_asm_insn ("sub.w\t%0,%0,%2", xops); |
| |
| /* Probe at TEST_ADDR, test if TEST_ADDR == LAST_ADDR and branch. */ |
| xops[1] = reg2; |
| strcpy (tmp, "bne\t%0,%1,"); |
| if (TARGET_64BIT) |
| output_asm_insn ("st.d\t$r0,%0,0", xops); |
| else |
| output_asm_insn ("st.w\t$r0,%0,0", xops); |
| output_asm_insn (strcat (tmp, &loop_lab[1]), xops); |
| |
| return ""; |
| } |
| |
| /* Expand the "prologue" pattern. */ |
| |
| void |
| loongarch_expand_prologue (void) |
| { |
| struct loongarch_frame_info *frame = &cfun->machine->frame; |
| HOST_WIDE_INT size = frame->total_size; |
| HOST_WIDE_INT tmp; |
| rtx insn; |
| |
| if (flag_stack_usage_info) |
| current_function_static_stack_size = size; |
| |
| if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK |
| || flag_stack_clash_protection) |
| { |
| if (crtl->is_leaf && !cfun->calls_alloca) |
| { |
| if (size > PROBE_INTERVAL && size > get_stack_check_protect ()) |
| { |
| tmp = size - get_stack_check_protect (); |
| loongarch_emit_probe_stack_range (get_stack_check_protect (), |
| tmp); |
| } |
| } |
| else if (size > 0) |
| loongarch_emit_probe_stack_range (get_stack_check_protect (), size); |
| } |
| |
| /* Save the registers. */ |
| if ((frame->mask | frame->fmask) != 0) |
| { |
| HOST_WIDE_INT step1 = MIN (size, loongarch_first_stack_step (frame)); |
| |
| insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, |
| GEN_INT (-step1)); |
| RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; |
| size -= step1; |
| loongarch_for_each_saved_reg (size, loongarch_save_reg); |
| } |
| |
| |
| /* Set up the frame pointer, if we're using one. */ |
| if (frame_pointer_needed) |
| { |
| insn = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx, |
| GEN_INT (frame->hard_frame_pointer_offset - size)); |
| RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; |
| |
| loongarch_emit_stack_tie (); |
| } |
| |
| /* Allocate the rest of the frame. */ |
| if (size > 0) |
| { |
| if (IMM12_OPERAND (-size)) |
| { |
| insn = gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, |
| GEN_INT (-size)); |
| RTX_FRAME_RELATED_P (emit_insn (insn)) = 1; |
| } |
| else |
| { |
| loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), GEN_INT (-size)); |
| emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, |
| LARCH_PROLOGUE_TEMP (Pmode))); |
| |
| /* Describe the effect of the previous instructions. */ |
| insn = plus_constant (Pmode, stack_pointer_rtx, -size); |
| insn = gen_rtx_SET (stack_pointer_rtx, insn); |
| loongarch_set_frame_expr (insn); |
| } |
| } |
| } |
| |
| /* Return nonzero if this function is known to have a null epilogue. |
| This allows the optimizer to omit jumps to jumps if no stack |
| was created. */ |
| |
| bool |
| loongarch_can_use_return_insn (void) |
| { |
| return reload_completed && cfun->machine->frame.total_size == 0; |
| } |
| |
| /* Expand an "epilogue" or "sibcall_epilogue" pattern; SIBCALL_P |
| says which. */ |
| |
| void |
| loongarch_expand_epilogue (bool sibcall_p) |
| { |
| /* Split the frame into two. STEP1 is the amount of stack we should |
| deallocate before restoring the registers. STEP2 is the amount we |
| should deallocate afterwards. |
| |
| Start off by assuming that no registers need to be restored. */ |
| struct loongarch_frame_info *frame = &cfun->machine->frame; |
| HOST_WIDE_INT step1 = frame->total_size; |
| HOST_WIDE_INT step2 = 0; |
| rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM); |
| rtx insn; |
| |
| /* We need to add memory barrier to prevent read from deallocated stack. */ |
| bool need_barrier_p |
| = (get_frame_size () + cfun->machine->frame.arg_pointer_offset) != 0; |
| |
| if (!sibcall_p && loongarch_can_use_return_insn ()) |
| { |
| emit_jump_insn (gen_return ()); |
| return; |
| } |
| |
| /* Move past any dynamic stack allocations. */ |
| if (cfun->calls_alloca) |
| { |
| /* Emit a barrier to prevent loads from a deallocated stack. */ |
| loongarch_emit_stack_tie (); |
| need_barrier_p = false; |
| |
| rtx adjust = GEN_INT (-frame->hard_frame_pointer_offset); |
| if (!IMM12_OPERAND (INTVAL (adjust))) |
| { |
| loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), adjust); |
| adjust = LARCH_PROLOGUE_TEMP (Pmode); |
| } |
| |
| insn = emit_insn (gen_add3_insn (stack_pointer_rtx, |
| hard_frame_pointer_rtx, |
| adjust)); |
| |
| rtx dwarf = NULL_RTX; |
| rtx minus_offset = GEN_INT (-frame->hard_frame_pointer_offset); |
| rtx cfa_adjust_value = gen_rtx_PLUS (Pmode, |
| hard_frame_pointer_rtx, |
| minus_offset); |
| |
| rtx cfa_adjust_rtx = gen_rtx_SET (stack_pointer_rtx, cfa_adjust_value); |
| dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, cfa_adjust_rtx, dwarf); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| REG_NOTES (insn) = dwarf; |
| } |
| |
| /* If we need to restore registers, deallocate as much stack as |
| possible in the second step without going out of range. */ |
| if ((frame->mask | frame->fmask) != 0) |
| { |
| step2 = loongarch_first_stack_step (frame); |
| step1 -= step2; |
| } |
| |
| /* Set TARGET to BASE + STEP1. */ |
| if (step1 > 0) |
| { |
| /* Emit a barrier to prevent loads from a deallocated stack. */ |
| loongarch_emit_stack_tie (); |
| need_barrier_p = false; |
| |
| /* Get an rtx for STEP1 that we can add to BASE. */ |
| rtx adjust = GEN_INT (step1); |
| if (!IMM12_OPERAND (step1)) |
| { |
| loongarch_emit_move (LARCH_PROLOGUE_TEMP (Pmode), adjust); |
| adjust = LARCH_PROLOGUE_TEMP (Pmode); |
| } |
| |
| insn = emit_insn (gen_add3_insn (stack_pointer_rtx, |
| stack_pointer_rtx, |
| adjust)); |
| |
| rtx dwarf = NULL_RTX; |
| rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, |
| GEN_INT (step2)); |
| |
| dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| REG_NOTES (insn) = dwarf; |
| } |
| |
| /* Restore the registers. */ |
| loongarch_for_each_saved_reg (frame->total_size - step2, |
| loongarch_restore_reg); |
| |
| if (need_barrier_p) |
| loongarch_emit_stack_tie (); |
| |
| /* Deallocate the final bit of the frame. */ |
| if (step2 > 0) |
| { |
| insn = emit_insn (gen_add3_insn (stack_pointer_rtx, |
| stack_pointer_rtx, |
| GEN_INT (step2))); |
| |
| rtx dwarf = NULL_RTX; |
| rtx cfa_adjust_rtx = gen_rtx_PLUS (Pmode, stack_pointer_rtx, const0_rtx); |
| dwarf = alloc_reg_note (REG_CFA_DEF_CFA, cfa_adjust_rtx, dwarf); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| |
| REG_NOTES (insn) = dwarf; |
| } |
| |
| /* Add in the __builtin_eh_return stack adjustment. */ |
| if (crtl->calls_eh_return) |
| emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx, |
| EH_RETURN_STACKADJ_RTX)); |
| |
| if (!sibcall_p) |
| emit_jump_insn (gen_simple_return_internal (ra)); |
| } |
| |
| #define LU32I_B (0xfffffULL << 32) |
| #define LU52I_B (0xfffULL << 52) |
| |
| /* Fill CODES with a sequence of rtl operations to load VALUE. |
| Return the number of operations needed. */ |
| |
| static unsigned int |
| loongarch_build_integer (struct loongarch_integer_op *codes, |
| HOST_WIDE_INT value) |
| |
| { |
| unsigned int cost = 0; |
| |
| /* Get the lower 32 bits of the value. */ |
| HOST_WIDE_INT low_part = TARGET_64BIT ? value << 32 >> 32 : value; |
| |
| if (IMM12_OPERAND (low_part) || IMM12_OPERAND_UNSIGNED (low_part)) |
| { |
| /* The value of the lower 32 bit be loaded with one instruction. |
| lu12i.w. */ |
| codes[0].code = UNKNOWN; |
| codes[0].method = METHOD_NORMAL; |
| codes[0].value = low_part; |
| cost++; |
| } |
| else |
| { |
| /* lu12i.w + ior. */ |
| codes[0].code = UNKNOWN; |
| codes[0].method = METHOD_NORMAL; |
| codes[0].value = low_part & ~(IMM_REACH - 1); |
| cost++; |
| HOST_WIDE_INT iorv = low_part & (IMM_REACH - 1); |
| if (iorv != 0) |
| { |
| codes[1].code = IOR; |
| codes[1].method = METHOD_NORMAL; |
| codes[1].value = iorv; |
| cost++; |
| } |
| } |
| |
| if (TARGET_64BIT) |
| { |
| bool lu32i[2] = {(value & LU32I_B) == 0, (value & LU32I_B) == LU32I_B}; |
| bool lu52i[2] = {(value & LU52I_B) == 0, (value & LU52I_B) == LU52I_B}; |
| |
| int sign31 = (value & (1UL << 31)) >> 31; |
| /* Determine whether the upper 32 bits are sign-extended from the lower |
| 32 bits. If it is, the instructions to load the high order can be |
| ommitted. */ |
| if (lu32i[sign31] && lu52i[sign31]) |
| return cost; |
| /* Determine whether bits 32-51 are sign-extended from the lower 32 |
| bits. If so, directly load 52-63 bits. */ |
| else if (lu32i[sign31]) |
| { |
| codes[cost].method = METHOD_LU52I; |
| codes[cost].value = (value >> 52) << 52; |
| return cost + 1; |
| } |
| |
| codes[cost].method = METHOD_LU32I; |
| codes[cost].value = ((value << 12) >> 44) << 32; |
| cost++; |
| |
| /* Determine whether the 52-61 bits are sign-extended from the low order, |
| and if not, load the 52-61 bits. */ |
| if (!lu52i[(value & (1ULL << 51)) >> 51]) |
| { |
| codes[cost].method = METHOD_LU52I; |
| codes[cost].value = (value >> 52) << 52; |
| cost++; |
| } |
| } |
| |
| gcc_assert (cost <= LARCH_MAX_INTEGER_OPS); |
| |
| return cost; |
| } |
| |
| /* Fill CODES with a sequence of rtl operations to load VALUE. |
| Return the number of operations needed. |
| Split interger in loongarch_output_move. */ |
| |
| static unsigned int |
| loongarch_integer_cost (HOST_WIDE_INT value) |
| { |
| struct loongarch_integer_op codes[LARCH_MAX_INTEGER_OPS]; |
| return loongarch_build_integer (codes, value); |
| } |
| |
| /* Implement TARGET_LEGITIMATE_CONSTANT_P. */ |
| |
| static bool |
| loongarch_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x) |
| { |
| return loongarch_const_insns (x) > 0; |
| } |
| |
| /* Return true if X is a thread-local symbol. */ |
| |
| static bool |
| loongarch_tls_symbol_p (rtx x) |
| { |
| return SYMBOL_REF_P (x) && SYMBOL_REF_TLS_MODEL (x) != 0; |
| } |
| |
| /* Return true if SYMBOL_REF X is associated with a global symbol |
| (in the STB_GLOBAL sense). */ |
| |
| bool |
| loongarch_global_symbol_p (const_rtx x) |
| { |
| if (LABEL_REF_P (x)) |
| return false; |
| |
| const_tree decl = SYMBOL_REF_DECL (x); |
| |
| if (!decl) |
| return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x); |
| |
| /* Weakref symbols are not TREE_PUBLIC, but their targets are global |
| or weak symbols. Relocations in the object file will be against |
| the target symbol, so it's that symbol's binding that matters here. */ |
| return DECL_P (decl) && (TREE_PUBLIC (decl) || DECL_WEAK (decl)); |
| } |
| |
| bool |
| loongarch_global_symbol_noweak_p (const_rtx x) |
| { |
| if (LABEL_REF_P (x)) |
| return false; |
| |
| const_tree decl = SYMBOL_REF_DECL (x); |
| |
| if (!decl) |
| return !SYMBOL_REF_LOCAL_P (x) || SYMBOL_REF_EXTERNAL_P (x); |
| |
| return DECL_P (decl) && TREE_PUBLIC (decl); |
| } |
| |
| bool |
| loongarch_weak_symbol_p (const_rtx x) |
| { |
| const_tree decl; |
| if (LABEL_REF_P (x) || !(decl = SYMBOL_REF_DECL (x))) |
| return false; |
| return DECL_P (decl) && DECL_WEAK (decl); |
| } |
| |
| /* Return true if SYMBOL_REF X binds locally. */ |
| |
| bool |
| loongarch_symbol_binds_local_p (const_rtx x) |
| { |
| if (LABEL_REF_P (x)) |
| return false; |
| |
| return (SYMBOL_REF_DECL (x) ? targetm.binds_local_p (SYMBOL_REF_DECL (x)) |
| : SYMBOL_REF_LOCAL_P (x)); |
| } |
| |
| /* Return true if rtx constants of mode MODE should be put into a small |
| data section. */ |
| |
| static bool |
| loongarch_rtx_constant_in_small_data_p (machine_mode mode) |
| { |
| return (GET_MODE_SIZE (mode) <= g_switch_value); |
| } |
| |
| /* Return the method that should be used to access SYMBOL_REF or |
| LABEL_REF X. */ |
| |
| static enum loongarch_symbol_type |
| loongarch_classify_symbol (const_rtx x) |
| { |
| if (LABEL_REF_P (x)) |
| return SYMBOL_GOT_DISP; |
| |
| gcc_assert (SYMBOL_REF_P (x)); |
| |
| if (SYMBOL_REF_TLS_MODEL (x)) |
| return SYMBOL_TLS; |
| |
| if (SYMBOL_REF_P (x)) |
| return SYMBOL_GOT_DISP; |
| |
| return SYMBOL_GOT_DISP; |
| } |
| |
| /* Return true if X is a symbolic constant. If it is, |
| store the type of the symbol in *SYMBOL_TYPE. */ |
| |
| bool |
| loongarch_symbolic_constant_p (rtx x, enum loongarch_symbol_type *symbol_type) |
| { |
| rtx offset; |
| |
| split_const (x, &x, &offset); |
| if (UNSPEC_ADDRESS_P (x)) |
| { |
| *symbol_type = UNSPEC_ADDRESS_TYPE (x); |
| x = UNSPEC_ADDRESS (x); |
| } |
| else if (SYMBOL_REF_P (x) || LABEL_REF_P (x)) |
| { |
| *symbol_type = loongarch_classify_symbol (x); |
| if (*symbol_type == SYMBOL_TLS) |
| return true; |
| } |
| else |
| return false; |
| |
| if (offset == const0_rtx) |
| return true; |
| |
| /* Check whether a nonzero offset is valid for the underlying |
| relocations. */ |
| switch (*symbol_type) |
| { |
| case SYMBOL_GOT_DISP: |
| case SYMBOL_TLSGD: |
| case SYMBOL_TLSLDM: |
| case SYMBOL_TLS: |
| return false; |
| } |
| gcc_unreachable (); |
| } |
| |
| /* Returns the number of instructions necessary to reference a symbol. */ |
| |
| static int |
| loongarch_symbol_insns (enum loongarch_symbol_type type, machine_mode mode) |
| { |
| switch (type) |
| { |
| case SYMBOL_GOT_DISP: |
| /* The constant will have to be loaded from the GOT before it |
| is used in an address. */ |
| if (mode != MAX_MACHINE_MODE) |
| return 0; |
| |
| return 3; |
| |
| case SYMBOL_TLSGD: |
| case SYMBOL_TLSLDM: |
| return 1; |
| |
| case SYMBOL_TLS: |
| /* We don't treat a bare TLS symbol as a constant. */ |
| return 0; |
| } |
| gcc_unreachable (); |
| } |
| |
| /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ |
| |
| static bool |
| loongarch_cannot_force_const_mem (machine_mode mode, rtx x) |
| { |
| enum loongarch_symbol_type type; |
| rtx base, offset; |
| |
| /* As an optimization, reject constants that loongarch_legitimize_move |
| can expand inline. |
| |
| Suppose we have a multi-instruction sequence that loads constant C |
| into register R. If R does not get allocated a hard register, and |
| R is used in an operand that allows both registers and memory |
| references, reload will consider forcing C into memory and using |
| one of the instruction's memory alternatives. Returning false |
| here will force it to use an input reload instead. */ |
| if (CONST_INT_P (x) && loongarch_legitimate_constant_p (mode, x)) |
| return true; |
| |
| split_const (x, &base, &offset); |
| if (loongarch_symbolic_constant_p (base, &type)) |
| { |
| /* The same optimization as for CONST_INT. */ |
| if (IMM12_INT (offset) |
| && loongarch_symbol_insns (type, MAX_MACHINE_MODE) > 0) |
| return true; |
| } |
| |
| /* TLS symbols must be computed by loongarch_legitimize_move. */ |
| if (tls_referenced_p (x)) |
| return true; |
| |
| return false; |
| } |
| |
| /* Return true if register REGNO is a valid base register for mode MODE. |
| STRICT_P is true if REG_OK_STRICT is in effect. */ |
| |
| int |
| loongarch_regno_mode_ok_for_base_p (int regno, |
| machine_mode mode ATTRIBUTE_UNUSED, |
| bool strict_p) |
| { |
| if (!HARD_REGISTER_NUM_P (regno)) |
| { |
| if (!strict_p) |
| return true; |
| regno = reg_renumber[regno]; |
| } |
| |
| /* These fake registers will be eliminated to either the stack or |
| hard frame pointer, both of which are usually valid base registers. |
| Reload deals with the cases where the eliminated form isn't valid. */ |
| if (regno == ARG_POINTER_REGNUM || regno == FRAME_POINTER_REGNUM) |
| return true; |
| |
| return GP_REG_P (regno); |
| } |
| |
| /* Return true if X is a valid base register for mode MODE. |
| STRICT_P is true if REG_OK_STRICT is in effect. */ |
| |
| static bool |
| loongarch_valid_base_register_p (rtx x, machine_mode mode, bool strict_p) |
| { |
| if (!strict_p && SUBREG_P (x)) |
| x = SUBREG_REG (x); |
| |
| return (REG_P (x) |
| && loongarch_regno_mode_ok_for_base_p (REGNO (x), mode, strict_p)); |
| } |
| |
| /* Return true if, for every base register BASE_REG, (plus BASE_REG X) |
| can address a value of mode MODE. */ |
| |
| static bool |
| loongarch_valid_offset_p (rtx x, machine_mode mode) |
| { |
| /* Check that X is a signed 12-bit number, |
| or check that X is a signed 16-bit number |
| and offset 4 byte aligned. */ |
| if (!(const_arith_operand (x, Pmode) |
| || ((mode == E_SImode || mode == E_DImode) |
| && const_imm16_operand (x, Pmode) |
| && (loongarch_signed_immediate_p (INTVAL (x), 14, 2))))) |
| return false; |
| |
| /* We may need to split multiword moves, so make sure that every word |
| is accessible. */ |
| if (GET_MODE_SIZE (mode) > UNITS_PER_WORD |
| && !IMM12_OPERAND (INTVAL (x) + GET_MODE_SIZE (mode) - UNITS_PER_WORD)) |
| return false; |
| |
| return true; |
| } |
| |
| static bool |
| loongarch_valid_index_p (struct loongarch_address_info *info, rtx x, |
| machine_mode mode, bool strict_p) |
| { |
| rtx index; |
| |
| if ((REG_P (x) || SUBREG_P (x)) |
| && GET_MODE (x) == Pmode) |
| { |
| index = x; |
| } |
| else |
| return false; |
| |
| if (!strict_p |
| && SUBREG_P (index) |
| && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))]) |
| index = SUBREG_REG (index); |
| |
| if (loongarch_valid_base_register_p (index, mode, strict_p)) |
| { |
| info->type = ADDRESS_REG_REG; |
| info->offset = index; |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* Return true if X is a valid address for machine mode MODE. If it is, |
| fill in INFO appropriately. STRICT_P is true if REG_OK_STRICT is in |
| effect. */ |
| |
| static bool |
| loongarch_classify_address (struct loongarch_address_info *info, rtx x, |
| machine_mode mode, bool strict_p) |
| { |
| switch (GET_CODE (x)) |
| { |
| case REG: |
| case SUBREG: |
| info->type = ADDRESS_REG; |
| info->reg = x; |
| info->offset = const0_rtx; |
| return loongarch_valid_base_register_p (info->reg, mode, strict_p); |
| |
| case PLUS: |
| if (loongarch_valid_base_register_p (XEXP (x, 0), mode, strict_p) |
| && loongarch_valid_index_p (info, XEXP (x, 1), mode, strict_p)) |
| { |
| info->reg = XEXP (x, 0); |
| return true; |
| } |
| |
| if (loongarch_valid_base_register_p (XEXP (x, 1), mode, strict_p) |
| && loongarch_valid_index_p (info, XEXP (x, 0), mode, strict_p)) |
| { |
| info->reg = XEXP (x, 1); |
| return true; |
| } |
| |
| info->type = ADDRESS_REG; |
| info->reg = XEXP (x, 0); |
| info->offset = XEXP (x, 1); |
| return (loongarch_valid_base_register_p (info->reg, mode, strict_p) |
| && loongarch_valid_offset_p (info->offset, mode)); |
| default: |
| return false; |
| } |
| } |
| |
| /* Implement TARGET_LEGITIMATE_ADDRESS_P. */ |
| |
| static bool |
| loongarch_legitimate_address_p (machine_mode mode, rtx x, bool strict_p) |
| { |
| struct loongarch_address_info addr; |
| |
| return loongarch_classify_address (&addr, x, mode, strict_p); |
| } |
| |
| /* Return true if ADDR matches the pattern for the indexed address |
| instruction. */ |
| |
| static bool |
| loongarch_index_address_p (rtx addr, machine_mode mode ATTRIBUTE_UNUSED) |
| { |
| if (GET_CODE (addr) != PLUS |
| || !REG_P (XEXP (addr, 0)) |
| || !REG_P (XEXP (addr, 1))) |
| return false; |
| return true; |
| } |
| |
| /* Return the number of instructions needed to load or store a value |
| of mode MODE at address X. Return 0 if X isn't valid for MODE. |
| Assume that multiword moves may need to be split into word moves |
| if MIGHT_SPLIT_P, otherwise assume that a single load or store is |
| enough. */ |
| |
| int |
| loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p) |
| { |
| struct loongarch_address_info addr; |
| int factor; |
| |
| if (!loongarch_classify_address (&addr, x, mode, false)) |
| return 0; |
| |
| /* BLKmode is used for single unaligned loads and stores and should |
| not count as a multiword mode. (GET_MODE_SIZE (BLKmode) is pretty |
| meaningless, so we have to single it out as a special case one way |
| or the other.) */ |
| if (mode != BLKmode && might_split_p) |
| factor = (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; |
| else |
| factor = 1; |
| |
| if (loongarch_classify_address (&addr, x, mode, false)) |
| switch (addr.type) |
| { |
| case ADDRESS_REG: |
| return factor; |
| |
| case ADDRESS_REG_REG: |
| return factor; |
| |
| case ADDRESS_CONST_INT: |
| return factor; |
| |
| case ADDRESS_SYMBOLIC: |
| return factor * loongarch_symbol_insns (addr.symbol_type, mode); |
| } |
| return 0; |
| } |
| |
| /* Return true if X fits within an unsigned field of BITS bits that is |
| shifted left SHIFT bits before being used. */ |
| |
| bool |
| loongarch_unsigned_immediate_p (unsigned HOST_WIDE_INT x, int bits, |
| int shift = 0) |
| { |
| return (x & ((1 << shift) - 1)) == 0 && x < ((unsigned) 1 << (shift + bits)); |
| } |
| |
| /* Return true if X fits within a signed field of BITS bits that is |
| shifted left SHIFT bits before being used. */ |
| |
| bool |
| loongarch_signed_immediate_p (unsigned HOST_WIDE_INT x, int bits, |
| int shift = 0) |
| { |
| x += 1 << (bits + shift - 1); |
| return loongarch_unsigned_immediate_p (x, bits, shift); |
| } |
| |
| /* Return true if X is a legitimate address with a 12-bit offset. |
| MODE is the mode of the value being accessed. */ |
| |
| bool |
| loongarch_12bit_offset_address_p (rtx x, machine_mode mode) |
| { |
| struct loongarch_address_info addr; |
| |
| return (loongarch_classify_address (&addr, x, mode, false) |
| && addr.type == ADDRESS_REG |
| && CONST_INT_P (addr.offset) |
| && LARCH_U12BIT_OFFSET_P (INTVAL (addr.offset))); |
| } |
| |
| /* Return true if X is a legitimate address with a 14-bit offset shifted 2. |
| MODE is the mode of the value being accessed. */ |
| |
| bool |
| loongarch_14bit_shifted_offset_address_p (rtx x, machine_mode mode) |
| { |
| struct loongarch_address_info addr; |
| |
| return (loongarch_classify_address (&addr, x, mode, false) |
| && addr.type == ADDRESS_REG |
| && CONST_INT_P (addr.offset) |
| && LARCH_16BIT_OFFSET_P (INTVAL (addr.offset)) |
| && LARCH_SHIFT_2_OFFSET_P (INTVAL (addr.offset))); |
| } |
| |
| bool |
| loongarch_base_index_address_p (rtx x, machine_mode mode) |
| { |
| struct loongarch_address_info addr; |
| |
| return (loongarch_classify_address (&addr, x, mode, false) |
| && addr.type == ADDRESS_REG_REG |
| && REG_P (addr.offset)); |
| } |
| |
| /* Return the number of instructions needed to load constant X, |
| Return 0 if X isn't a valid constant. */ |
| |
| int |
| loongarch_const_insns (rtx x) |
| { |
| enum loongarch_symbol_type symbol_type; |
| rtx offset; |
| |
| switch (GET_CODE (x)) |
| { |
| case CONST_INT: |
| return loongarch_integer_cost (INTVAL (x)); |
| |
| case CONST_VECTOR: |
| /* Fall through. */ |
| case CONST_DOUBLE: |
| return x == CONST0_RTX (GET_MODE (x)) ? 1 : 0; |
| |
| case CONST: |
| /* See if we can refer to X directly. */ |
| if (loongarch_symbolic_constant_p (x, &symbol_type)) |
| return loongarch_symbol_insns (symbol_type, MAX_MACHINE_MODE); |
| |
| /* Otherwise try splitting the constant into a base and offset. |
| If the offset is a 12-bit value, we can load the base address |
| into a register and then use ADDI.{W/D} to add in the offset. |
| If the offset is larger, we can load the base and offset |
| into separate registers and add them together with ADD.{W/D}. |
| However, the latter is only possible before reload; during |
| and after reload, we must have the option of forcing the |
| constant into the pool instead. */ |
| split_const (x, &x, &offset); |
| if (offset != 0) |
| { |
| int n = loongarch_const_insns (x); |
| if (n != 0) |
| { |
| if (IMM12_INT (offset)) |
| return n + 1; |
| else if (!targetm.cannot_force_const_mem (GET_MODE (x), x)) |
| return n + 1 + loongarch_integer_cost (INTVAL (offset)); |
| } |
| } |
| return 0; |
| |
| case SYMBOL_REF: |
| case LABEL_REF: |
| return loongarch_symbol_insns ( |
| loongarch_classify_symbol (x), MAX_MACHINE_MODE); |
| |
| default: |
| return 0; |
| } |
| } |
| |
| /* X is a doubleword constant that can be handled by splitting it into |
| two words and loading each word separately. Return the number of |
| instructions required to do this. */ |
| |
| int |
| loongarch_split_const_insns (rtx x) |
| { |
| unsigned int low, high; |
| |
| low = loongarch_const_insns (loongarch_subword (x, false)); |
| high = loongarch_const_insns (loongarch_subword (x, true)); |
| gcc_assert (low > 0 && high > 0); |
| return low + high; |
| } |
| |
| /* Return the number of instructions needed to implement INSN, |
| given that it loads from or stores to MEM. */ |
| |
| int |
| loongarch_load_store_insns (rtx mem, rtx_insn *insn) |
| { |
| machine_mode mode; |
| bool might_split_p; |
| rtx set; |
| |
| gcc_assert (MEM_P (mem)); |
| mode = GET_MODE (mem); |
| |
| /* Try to prove that INSN does not need to be split. */ |
| might_split_p = GET_MODE_SIZE (mode) > UNITS_PER_WORD; |
| if (might_split_p) |
| { |
| set = single_set (insn); |
| if (set |
| && !loongarch_split_move_insn_p (SET_DEST (set), SET_SRC (set))) |
| might_split_p = false; |
| } |
| |
| return loongarch_address_insns (XEXP (mem, 0), mode, might_split_p); |
| } |
| |
| /* Return the number of instructions needed for an integer division. */ |
| |
| int |
| loongarch_idiv_insns (machine_mode mode ATTRIBUTE_UNUSED) |
| { |
| int count; |
| |
| count = 1; |
| if (TARGET_CHECK_ZERO_DIV) |
| count += 2; |
| |
| return count; |
| } |
| |
| /* Emit an instruction of the form (set TARGET (CODE OP0 OP1)). */ |
| |
| void |
| loongarch_emit_binary (enum rtx_code code, rtx target, rtx op0, rtx op1) |
| { |
| emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (code, GET_MODE (target), |
| op0, op1))); |
| } |
| |
| /* Compute (CODE OP0 OP1) and store the result in a new register |
| of mode MODE. Return that new register. */ |
| |
| static rtx |
| loongarch_force_binary (machine_mode mode, enum rtx_code code, rtx op0, |
| rtx op1) |
| { |
| rtx reg; |
| |
| reg = gen_reg_rtx (mode); |
| loongarch_emit_binary (code, reg, op0, op1); |
| return reg; |
| } |
| |
| /* Copy VALUE to a register and return that register. If new pseudos |
| are allowed, copy it into a new register, otherwise use DEST. */ |
| |
| static rtx |
| loongarch_force_temporary (rtx dest, rtx value) |
| { |
| if (can_create_pseudo_p ()) |
| return force_reg (Pmode, value); |
| else |
| { |
| loongarch_emit_move (dest, value); |
| return dest; |
| } |
| } |
| |
| /* Wrap symbol or label BASE in an UNSPEC address of type SYMBOL_TYPE, |
| then add CONST_INT OFFSET to the result. */ |
| |
| static rtx |
| loongarch_unspec_address_offset (rtx base, rtx offset, |
| enum loongarch_symbol_type symbol_type) |
| { |
| base = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, base), |
| UNSPEC_ADDRESS_FIRST + symbol_type); |
| if (offset != const0_rtx) |
| base = gen_rtx_PLUS (Pmode, base, offset); |
| return gen_rtx_CONST (Pmode, base); |
| } |
| |
| /* Return an UNSPEC address with underlying address ADDRESS and symbol |
| type SYMBOL_TYPE. */ |
| |
| rtx |
| loongarch_unspec_address (rtx address, enum loongarch_symbol_type symbol_type) |
| { |
| rtx base, offset; |
| |
| split_const (address, &base, &offset); |
| return loongarch_unspec_address_offset (base, offset, symbol_type); |
| } |
| |
| /* If OP is an UNSPEC address, return the address to which it refers, |
| otherwise return OP itself. */ |
| |
| rtx |
| loongarch_strip_unspec_address (rtx op) |
| { |
| rtx base, offset; |
| |
| split_const (op, &base, &offset); |
| if (UNSPEC_ADDRESS_P (base)) |
| op = plus_constant (Pmode, UNSPEC_ADDRESS (base), INTVAL (offset)); |
| return op; |
| } |
| |
| /* Return a legitimate address for REG + OFFSET. TEMP is as for |
| loongarch_force_temporary; it is only needed when OFFSET is not a |
| IMM12_OPERAND. */ |
| |
| static rtx |
| loongarch_add_offset (rtx temp, rtx reg, HOST_WIDE_INT offset) |
| { |
| if (!IMM12_OPERAND (offset)) |
| { |
| rtx high; |
| |
| /* Leave OFFSET as a 12-bit offset and put the excess in HIGH. |
| The addition inside the macro CONST_HIGH_PART may cause an |
| overflow, so we need to force a sign-extension check. */ |
| high = gen_int_mode (CONST_HIGH_PART (offset), Pmode); |
| offset = CONST_LOW_PART (offset); |
| high = loongarch_force_temporary (temp, high); |
| reg = loongarch_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg)); |
| } |
| return plus_constant (Pmode, reg, offset); |
| } |
| |
| /* The __tls_get_attr symbol. */ |
| static GTY (()) rtx loongarch_tls_symbol; |
| |
| /* Load an entry from the GOT for a TLS GD access. */ |
| |
| static rtx |
| loongarch_got_load_tls_gd (rtx dest, rtx sym) |
| { |
| return gen_got_load_tls_gd (Pmode, dest, sym); |
| } |
| |
| /* Load an entry from the GOT for a TLS LD access. */ |
| |
| static rtx |
| loongarch_got_load_tls_ld (rtx dest, rtx sym) |
| { |
| return gen_got_load_tls_ld (Pmode, dest, sym); |
| } |
| |
| /* Load an entry from the GOT for a TLS IE access. */ |
| |
| static rtx |
| loongarch_got_load_tls_ie (rtx dest, rtx sym) |
| { |
| return gen_got_load_tls_ie (Pmode, dest, sym); |
| } |
| |
| /* Add in the thread pointer for a TLS LE access. */ |
| |
| static rtx |
| loongarch_got_load_tls_le (rtx dest, rtx sym) |
| { |
| return gen_got_load_tls_le (Pmode, dest, sym); |
| } |
| |
| /* Return an instruction sequence that calls __tls_get_addr. SYM is |
| the TLS symbol we are referencing and TYPE is the symbol type to use |
| (either global dynamic or local dynamic). V0 is an RTX for the |
| return value location. */ |
| |
| static rtx_insn * |
| loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0) |
| { |
| rtx loc, a0; |
| rtx_insn *insn; |
| |
| a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST); |
| |
| if (!loongarch_tls_symbol) |
| loongarch_tls_symbol = init_one_libfunc ("__tls_get_addr"); |
| |
| loc = loongarch_unspec_address (sym, type); |
| |
| start_sequence (); |
| |
| if (type == SYMBOL_TLSLDM) |
| emit_insn (loongarch_got_load_tls_ld (a0, loc)); |
| else if (type == SYMBOL_TLSGD) |
| emit_insn (loongarch_got_load_tls_gd (a0, loc)); |
| else |
| gcc_unreachable (); |
| |
| insn = emit_call_insn (gen_call_value_internal (v0, loongarch_tls_symbol, |
| const0_rtx)); |
| RTL_CONST_CALL_P (insn) = 1; |
| use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0); |
| insn = get_insns (); |
| |
| end_sequence (); |
| |
| return insn; |
| } |
| |
| /* Generate the code to access LOC, a thread-local SYMBOL_REF, and return |
| its address. The return value will be both a valid address and a valid |
| SET_SRC (either a REG or a LO_SUM). */ |
| |
| static rtx |
| loongarch_legitimize_tls_address (rtx loc) |
| { |
| rtx dest, tp, tmp; |
| enum tls_model model = SYMBOL_REF_TLS_MODEL (loc); |
| rtx_insn *insn; |
| |
| switch (model) |
| { |
| case TLS_MODEL_LOCAL_DYNAMIC: |
| tmp = gen_rtx_REG (Pmode, GP_RETURN); |
| dest = gen_reg_rtx (Pmode); |
| insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSLDM, tmp); |
| emit_libcall_block (insn, dest, tmp, loc); |
| break; |
| |
| case TLS_MODEL_GLOBAL_DYNAMIC: |
| tmp = gen_rtx_REG (Pmode, GP_RETURN); |
| dest = gen_reg_rtx (Pmode); |
| insn = loongarch_call_tls_get_addr (loc, SYMBOL_TLSGD, tmp); |
| emit_libcall_block (insn, dest, tmp, loc); |
| break; |
| |
| case TLS_MODEL_INITIAL_EXEC: |
| /* la.tls.ie; tp-relative add */ |
| tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); |
| tmp = gen_reg_rtx (Pmode); |
| emit_insn (loongarch_got_load_tls_ie (tmp, loc)); |
| dest = gen_reg_rtx (Pmode); |
| emit_insn (gen_add3_insn (dest, tmp, tp)); |
| break; |
| |
| case TLS_MODEL_LOCAL_EXEC: |
| /* la.tls.le; tp-relative add */ |
| tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM); |
| tmp = gen_reg_rtx (Pmode); |
| emit_insn (loongarch_got_load_tls_le (tmp, loc)); |
| dest = gen_reg_rtx (Pmode); |
| emit_insn (gen_add3_insn (dest, tmp, tp)); |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| return dest; |
| } |
| |
| rtx |
| loongarch_legitimize_call_address (rtx addr) |
| { |
| if (!call_insn_operand (addr, VOIDmode)) |
| { |
| rtx reg = gen_reg_rtx (Pmode); |
| loongarch_emit_move (reg, addr); |
| return reg; |
| } |
| return addr; |
| } |
| |
| /* If X is a PLUS of a CONST_INT, return the two terms in *BASE_PTR |
| and *OFFSET_PTR. Return X in *BASE_PTR and 0 in *OFFSET_PTR otherwise. */ |
| |
| static void |
| loongarch_split_plus (rtx x, rtx *base_ptr, HOST_WIDE_INT *offset_ptr) |
| { |
| if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))) |
| { |
| *base_ptr = XEXP (x, 0); |
| *offset_ptr = INTVAL (XEXP (x, 1)); |
| } |
| else |
| { |
| *base_ptr = x; |
| *offset_ptr = 0; |
| } |
| } |
| |
| /* If X is not a valid address for mode MODE, force it into a register. */ |
| |
| static rtx |
| loongarch_force_address (rtx x, machine_mode mode) |
| { |
| if (!loongarch_legitimate_address_p (mode, x, false)) |
| x = force_reg (Pmode, x); |
| return x; |
| } |
| |
| /* This function is used to implement LEGITIMIZE_ADDRESS. If X can |
| be legitimized in a way that the generic machinery might not expect, |
| return a new address, otherwise return NULL. MODE is the mode of |
| the memory being accessed. */ |
| |
| static rtx |
| loongarch_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, |
| machine_mode mode) |
| { |
| rtx base, addr; |
| HOST_WIDE_INT offset; |
| |
| if (loongarch_tls_symbol_p (x)) |
| return loongarch_legitimize_tls_address (x); |
| |
| /* Handle BASE + OFFSET using loongarch_add_offset. */ |
| loongarch_split_plus (x, &base, &offset); |
| if (offset != 0) |
| { |
| if (!loongarch_valid_base_register_p (base, mode, false)) |
| base = copy_to_mode_reg (Pmode, base); |
| addr = loongarch_add_offset (NULL, base, offset); |
| return loongarch_force_address (addr, mode); |
| } |
| |
| return x; |
| } |
| |
| /* Load VALUE into DEST. TEMP is as for loongarch_force_temporary. */ |
| |
| void |
| loongarch_move_integer (rtx temp, rtx dest, unsigned HOST_WIDE_INT value) |
| { |
| struct loongarch_integer_op codes[LARCH_MAX_INTEGER_OPS]; |
| machine_mode mode; |
| unsigned int i, num_ops; |
| rtx x; |
| |
| mode = GET_MODE (dest); |
| num_ops = loongarch_build_integer (codes, value); |
| |
| /* Apply each binary operation to X. Invariant: X is a legitimate |
| source operand for a SET pattern. */ |
| x = GEN_INT (codes[0].value); |
| for (i = 1; i < num_ops; i++) |
| { |
| if (!can_create_pseudo_p ()) |
| { |
| emit_insn (gen_rtx_SET (temp, x)); |
| x = temp; |
| } |
| else |
| x = force_reg (mode, x); |
| |
| switch (codes[i].method) |
| { |
| case METHOD_NORMAL: |
| x = gen_rtx_fmt_ee (codes[i].code, mode, x, |
| GEN_INT (codes[i].value)); |
| break; |
| case METHOD_LU32I: |
| emit_insn ( |
| gen_rtx_SET (x, |
| gen_rtx_IOR (DImode, |
| gen_rtx_ZERO_EXTEND ( |
| DImode, gen_rtx_SUBREG (SImode, x, 0)), |
| GEN_INT (codes[i].value)))); |
| break; |
| case METHOD_LU52I: |
| emit_insn (gen_lu52i_d (x, x, GEN_INT (0xfffffffffffff), |
| GEN_INT (codes[i].value))); |
| break; |
| case METHOD_INSV: |
| emit_insn ( |
| gen_rtx_SET (gen_rtx_ZERO_EXTRACT (DImode, x, GEN_INT (20), |
| GEN_INT (32)), |
| gen_rtx_REG (DImode, 0))); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| emit_insn (gen_rtx_SET (dest, x)); |
| } |
| |
| /* Subroutine of loongarch_legitimize_move. Move constant SRC into register |
| DEST given that SRC satisfies immediate_operand but doesn't satisfy |
| move_operand. */ |
| |
| static void |
| loongarch_legitimize_const_move (machine_mode mode, rtx dest, rtx src) |
| { |
| rtx base, offset; |
| |
| /* Split moves of big integers into smaller pieces. */ |
| if (splittable_const_int_operand (src, mode)) |
| { |
| loongarch_move_integer (dest, dest, INTVAL (src)); |
| return; |
| } |
| |
| /* Generate the appropriate access sequences for TLS symbols. */ |
| if (loongarch_tls_symbol_p (src)) |
| { |
| loongarch_emit_move (dest, loongarch_legitimize_tls_address (src)); |
| return; |
| } |
| |
| /* If we have (const (plus symbol offset)), and that expression cannot |
| be forced into memory, load the symbol first and add in the offset. |
| prefer to do this even if the constant _can_ be forced into memory, |
| as it usually produces better code. */ |
| split_const (src, &base, &offset); |
| if (offset != const0_rtx |
| && (targetm.cannot_force_const_mem (mode, src) |
| || (can_create_pseudo_p ()))) |
| { |
| base = loongarch_force_temporary (dest, base); |
| loongarch_emit_move (dest, |
| loongarch_add_offset (NULL, base, INTVAL (offset))); |
| return; |
| } |
| |
| src = force_const_mem (mode, src); |
| |
| loongarch_emit_move (dest, src); |
| } |
| |
| /* If (set DEST SRC) is not a valid move instruction, emit an equivalent |
| sequence that is valid. */ |
| |
| bool |
| loongarch_legitimize_move (machine_mode mode, rtx dest, rtx src) |
| { |
| if (!register_operand (dest, mode) && !reg_or_0_operand (src, mode)) |
| { |
| loongarch_emit_move (dest, force_reg (mode, src)); |
| return true; |
| } |
| |
| /* Both src and dest are non-registers; one special case is supported where |
| the source is (const_int 0) and the store can source the zero register. |
| */ |
| if (!register_operand (dest, mode) && !register_operand (src, mode) |
| && !const_0_operand (src, mode)) |
| { |
| loongarch_emit_move (dest, force_reg (mode, src)); |
| return true; |
| } |
| |
| /* We need to deal with constants that would be legitimate |
| immediate_operands but aren't legitimate move_operands. */ |
| if (CONSTANT_P (src) && !move_operand (src, mode)) |
| { |
| loongarch_legitimize_const_move (mode, dest, src); |
| set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (src)); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* Return true if OP refers to small data symbols directly. */ |
| |
| static int |
| loongarch_small_data_pattern_1 (rtx x) |
| { |
| subrtx_var_iterator::array_type array; |
| FOR_EACH_SUBRTX_VAR (iter, array, x, ALL) |
| { |
| rtx x = *iter; |
| |
| /* We make no particular guarantee about which symbolic constants are |
| acceptable as asm operands versus which must be forced into a GPR. */ |
| if (GET_CODE (x) == ASM_OPERANDS) |
| iter.skip_subrtxes (); |
| else if (MEM_P (x)) |
| { |
| if (loongarch_small_data_pattern_1 (XEXP (x, 0))) |
| return true; |
| iter.skip_subrtxes (); |
| } |
| } |
| return false; |
| } |
| |
| /* Return true if OP refers to small data symbols directly. */ |
| |
| bool |
| loongarch_small_data_pattern_p (rtx op) |
| { |
| return loongarch_small_data_pattern_1 (op); |
| } |
| |
| /* Rewrite *LOC so that it refers to small data using explicit |
| relocations. */ |
| |
| static void |
| loongarch_rewrite_small_data_1 (rtx *loc) |
| { |
| subrtx_ptr_iterator::array_type array; |
| FOR_EACH_SUBRTX_PTR (iter, array, loc, ALL) |
| { |
| rtx *loc = *iter; |
| if (MEM_P (*loc)) |
| { |
| loongarch_rewrite_small_data_1 (&XEXP (*loc, 0)); |
| iter.skip_subrtxes (); |
| } |
| } |
| } |
| |
| /* Rewrite instruction pattern PATTERN so that it refers to small data |
| using explicit relocations. */ |
| |
| rtx |
| loongarch_rewrite_small_data (rtx pattern) |
| { |
| pattern = copy_insn (pattern); |
| loongarch_rewrite_small_data_1 (&pattern); |
| return pattern; |
| } |
| |
| /* The cost of loading values from the constant pool. It should be |
| larger than the cost of any constant we want to synthesize inline. */ |
| #define CONSTANT_POOL_COST COSTS_N_INSNS (8) |
| |
| /* Return true if there is a instruction that implements CODE |
| and if that instruction accepts X as an immediate operand. */ |
| |
| static int |
| loongarch_immediate_operand_p (int code, HOST_WIDE_INT x) |
| { |
| switch (code) |
| { |
| case ASHIFT: |
| case ASHIFTRT: |
| case LSHIFTRT: |
| /* All shift counts are truncated to a valid constant. */ |
| return true; |
| |
| case ROTATE: |
| case ROTATERT: |
| return true; |
| |
| case AND: |
| case IOR: |
| case XOR: |
| /* These instructions take 12-bit unsigned immediates. */ |
| return IMM12_OPERAND_UNSIGNED (x); |
| |
| case PLUS: |
| case LT: |
| case LTU: |
| /* These instructions take 12-bit signed immediates. */ |
| return IMM12_OPERAND (x); |
| |
| case EQ: |
| case NE: |
| case GT: |
| case GTU: |
| /* The "immediate" forms of these instructions are really |
| implemented as comparisons with register 0. */ |
| return x == 0; |
| |
| case GE: |
| case GEU: |
| /* Likewise, meaning that the only valid immediate operand is 1. */ |
| return x == 1; |
| |
| case LE: |
| /* We add 1 to the immediate and use SLT. */ |
| return IMM12_OPERAND (x + 1); |
| |
| case LEU: |
| /* Likewise SLTU, but reject the always-true case. */ |
| return IMM12_OPERAND (x + 1) && x + 1 != 0; |
| |
| case SIGN_EXTRACT: |
| case ZERO_EXTRACT: |
| /* The bit position and size are immediate operands. */ |
| return 1; |
| |
| default: |
| /* By default assume that $0 can be used for 0. */ |
| return x == 0; |
| } |
| } |
| |
| /* Return the cost of binary operation X, given that the instruction |
| sequence for a word-sized or smaller operation has cost SINGLE_COST |
| and that the sequence of a double-word operation has cost DOUBLE_COST. |
| If SPEED is true, optimize for speed otherwise optimize for size. */ |
| |
| static int |
| loongarch_binary_cost (rtx x, int single_cost, int double_cost, bool speed) |
| { |
| int cost; |
| |
| if (GET_MODE_SIZE (GET_MODE (x)) == UNITS_PER_WORD * 2) |
| cost = double_cost; |
| else |
| cost = single_cost; |
| return (cost |
| + set_src_cost (XEXP (x, 0), GET_MODE (x), speed) |
| + rtx_cost (XEXP (x, 1), GET_MODE (x), GET_CODE (x), 1, speed)); |
| } |
| |
| /* Return the cost of floating-point multiplications of mode MODE. */ |
| |
| static int |
| loongarch_fp_mult_cost (machine_mode mode) |
| { |
| return mode == DFmode ? loongarch_cost->fp_mult_df |
| : loongarch_cost->fp_mult_sf; |
| } |
| |
| /* Return the cost of floating-point divisions of mode MODE. */ |
| |
| static int |
| loongarch_fp_div_cost (machine_mode mode) |
| { |
| return mode == DFmode ? loongarch_cost->fp_div_df |
| : loongarch_cost->fp_div_sf; |
| } |
| |
| /* Return the cost of sign-extending OP to mode MODE, not including the |
| cost of OP itself. */ |
| |
| static int |
| loongarch_sign_extend_cost (rtx op) |
| { |
| if (MEM_P (op)) |
| /* Extended loads are as cheap as unextended ones. */ |
| return 0; |
| |
| return COSTS_N_INSNS (1); |
| } |
| |
| /* Return the cost of zero-extending OP to mode MODE, not including the |
| cost of OP itself. */ |
| |
| static int |
| loongarch_zero_extend_cost (rtx op) |
| { |
| if (MEM_P (op)) |
| /* Extended loads are as cheap as unextended ones. */ |
| return 0; |
| |
| /* We can use ANDI. */ |
| return COSTS_N_INSNS (1); |
| } |
| |
| /* Return the cost of moving between two registers of mode MODE, |
| assuming that the move will be in pieces of at most UNITS bytes. */ |
| |
| static int |
| loongarch_set_reg_reg_piece_cost (machine_mode mode, unsigned int units) |
| { |
| return COSTS_N_INSNS ((GET_MODE_SIZE (mode) + units - 1) / units); |
| } |
| |
| /* Return the cost of moving between two registers of mode MODE. */ |
| |
| static int |
| loongarch_set_reg_reg_cost (machine_mode mode) |
| { |
| switch (GET_MODE_CLASS (mode)) |
| { |
| case MODE_CC: |
| return loongarch_set_reg_reg_piece_cost (mode, GET_MODE_SIZE (CCmode)); |
| |
| case MODE_FLOAT: |
| case MODE_COMPLEX_FLOAT: |
| case MODE_VECTOR_FLOAT: |
| if (TARGET_HARD_FLOAT) |
| return loongarch_set_reg_reg_piece_cost (mode, UNITS_PER_HWFPVALUE); |
| /* Fall through. */ |
| |
| default: |
| return loongarch_set_reg_reg_piece_cost (mode, UNITS_PER_WORD); |
| } |
| } |
| |
| /* Implement TARGET_RTX_COSTS. */ |
| |
| static bool |
| loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code, |
| int opno ATTRIBUTE_UNUSED, int *total, bool speed) |
| { |
| int code = GET_CODE (x); |
| bool float_mode_p = FLOAT_MODE_P (mode); |
| int cost; |
| rtx addr; |
| |
| if (outer_code == COMPARE) |
| { |
| gcc_assert (CONSTANT_P (x)); |
| *total = 0; |
| return true; |
| } |
| |
| switch (code) |
| { |
| case CONST_INT: |
| if (TARGET_64BIT && outer_code == AND && UINTVAL (x) == 0xffffffff) |
| { |
| *total = 0; |
| return true; |
| } |
| |
| /* When not optimizing for size, we care more about the cost |
| of hot code, and hot code is often in a loop. If a constant |
| operand needs to be forced into a register, we will often be |
| able to hoist the constant load out of the loop, so the load |
| should not contribute to the cost. */ |
| if (speed || loongarch_immediate_operand_p (outer_code, INTVAL (x))) |
| { |
| *total = 0; |
| return true; |
| } |
| /* Fall through. */ |
| |
| case CONST: |
| case SYMBOL_REF: |
| case LABEL_REF: |
| case CONST_DOUBLE: |
| cost = loongarch_const_insns (x); |
| if (cost > 0) |
| { |
| if (cost == 1 && outer_code == SET |
| && !(float_mode_p && TARGET_HARD_FLOAT)) |
| cost = 0; |
| else if ((outer_code == SET || GET_MODE (x) == VOIDmode)) |
| cost = 1; |
| *total = COSTS_N_INSNS (cost); |
| return true; |
| } |
| /* The value will need to be fetched from the constant pool. */ |
| *total = CONSTANT_POOL_COST; |
| return true; |
| |
| case MEM: |
| /* If the address is legitimate, return the number of |
| instructions it needs. */ |
| addr = XEXP (x, 0); |
| /* Check for a scaled indexed address. */ |
| if (loongarch_index_address_p (addr, mode)) |
| { |
| *total = COSTS_N_INSNS (2); |
| return true; |
| } |
| cost = loongarch_address_insns (addr, mode, true); |
| if (cost > 0) |
| { |
| *total = COSTS_N_INSNS (cost + 1); |
| return true; |
| } |
| /* Otherwise use the default handling. */ |
| return false; |
| |
| case FFS: |
| *total = COSTS_N_INSNS (6); |
| return false; |
| |
| case NOT: |
| *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1); |
| return false; |
| |
| case AND: |
| /* Check for a *clear_upper32 pattern and treat it like a zero |
| extension. See the pattern's comment for details. */ |
| if (TARGET_64BIT && mode == DImode && CONST_INT_P (XEXP (x, 1)) |
| && UINTVAL (XEXP (x, 1)) == 0xffffffff) |
| { |
| *total = (loongarch_zero_extend_cost (XEXP (x, 0)) |
| + set_src_cost (XEXP (x, 0), mode, speed)); |
| return true; |
| } |
| /* (AND (NOT op0) (NOT op1) is a nor operation that can be done in |
| a single instruction. */ |
| if (GET_CODE (XEXP (x, 0)) == NOT && GET_CODE (XEXP (x, 1)) == NOT) |
| { |
| cost = GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 2 : 1; |
| *total = (COSTS_N_INSNS (cost) |
| + set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed) |
| + set_src_cost (XEXP (XEXP (x, 1), 0), mode, speed)); |
| return true; |
| } |
| |
| /* Fall through. */ |
| |
| case IOR: |
| case XOR: |
| /* Double-word operations use two single-word operations. */ |
| *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (2), |
| speed); |
| return true; |
| |
| case ASHIFT: |
| case ASHIFTRT: |
| case LSHIFTRT: |
| case ROTATE: |
| case ROTATERT: |
| if (CONSTANT_P (XEXP (x, 1))) |
| *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), |
| COSTS_N_INSNS (4), speed); |
| else |
| *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), |
| COSTS_N_INSNS (12), speed); |
| return true; |
| |
| case ABS: |
| if (float_mode_p) |
| *total = loongarch_cost->fp_add; |
| else |
| *total = COSTS_N_INSNS (4); |
| return false; |
| |
| case LT: |
| case LTU: |
| case LE: |
| case LEU: |
| case GT: |
| case GTU: |
| case GE: |
| case GEU: |
| case EQ: |
| case NE: |
| case UNORDERED: |
| case LTGT: |
| case UNGE: |
| case UNGT: |
| case UNLE: |
| case UNLT: |
| /* Branch comparisons have VOIDmode, so use the first operand's |
| mode instead. */ |
| mode = GET_MODE (XEXP (x, 0)); |
| if (FLOAT_MODE_P (mode)) |
| { |
| *total = loongarch_cost->fp_add; |
| return false; |
| } |
| *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4), |
| speed); |
| return true; |
| |
| case MINUS: |
| case PLUS: |
| if (float_mode_p) |
| { |
| *total = loongarch_cost->fp_add; |
| return false; |
| } |
| |
| /* If it's an add + mult (which is equivalent to shift left) and |
| it's immediate operand satisfies const_immalsl_operand predicate. */ |
| if ((mode == SImode || (TARGET_64BIT && mode == DImode)) |
| && GET_CODE (XEXP (x, 0)) == MULT) |
| { |
| rtx op2 = XEXP (XEXP (x, 0), 1); |
| if (const_immalsl_operand (op2, mode)) |
| { |
| *total = (COSTS_N_INSNS (1) |
| + set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed) |
| + set_src_cost (XEXP (x, 1), mode, speed)); |
| return true; |
| } |
| } |
| |
| /* Double-word operations require three single-word operations and |
| an SLTU. */ |
| *total = loongarch_binary_cost (x, COSTS_N_INSNS (1), COSTS_N_INSNS (4), |
| speed); |
| return true; |
| |
| case NEG: |
| if (float_mode_p) |
| *total = loongarch_cost->fp_add; |
| else |
| *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) > UNITS_PER_WORD ? 4 : 1); |
| return false; |
| |
| case FMA: |
| *total = loongarch_fp_mult_cost (mode); |
| return false; |
| |
| case MULT: |
| if (float_mode_p) |
| *total = loongarch_fp_mult_cost (mode); |
| else if (mode == DImode && !TARGET_64BIT) |
| *total = (speed |
| ? loongarch_cost->int_mult_si * 3 + 6 |
| : COSTS_N_INSNS (7)); |
| else if (!speed) |
| *total = COSTS_N_INSNS (1) + 1; |
| else if (mode == DImode) |
| *total = loongarch_cost->int_mult_di; |
| else |
| *total = loongarch_cost->int_mult_si; |
| return false; |
| |
| case DIV: |
| /* Check for a reciprocal. */ |
| if (float_mode_p |
| && flag_unsafe_math_optimizations |
| && XEXP (x, 0) == CONST1_RTX (mode)) |
| { |
| if (outer_code == SQRT || GET_CODE (XEXP (x, 1)) == SQRT) |
| /* An rsqrt<mode>a or rsqrt<mode>b pattern. Count the |
| division as being free. */ |
| *total = set_src_cost (XEXP (x, 1), mode, speed); |
| else |
| *total = (loongarch_fp_div_cost (mode) |
| + set_src_cost (XEXP (x, 1), mode, speed)); |
| return true; |
| } |
| /* Fall through. */ |
| |
| case SQRT: |
| case MOD: |
| if (float_mode_p) |
| { |
| *total = loongarch_fp_div_cost (mode); |
| return false; |
| } |
| /* Fall through. */ |
| |
| case UDIV: |
| case UMOD: |
| if (!speed) |
| { |
| *total = COSTS_N_INSNS (loongarch_idiv_insns (mode)); |
| } |
| else if (mode == DImode) |
| *total = loongarch_cost->int_div_di; |
| else |
| *total = loongarch_cost->int_div_si; |
| return false; |
| |
| case SIGN_EXTEND: |
| *total = loongarch_sign_extend_cost (XEXP (x, 0)); |
| return false; |
| |
| case ZERO_EXTEND: |
| *total = loongarch_zero_extend_cost (XEXP (x, 0)); |
| return false; |
| case TRUNCATE: |
| /* Costings for highpart multiplies. Matching patterns of the form: |
| |
| (lshiftrt:DI (mult:DI (sign_extend:DI (...) |
| (sign_extend:DI (...)) |
| (const_int 32) |
| */ |
| if ((GET_CODE (XEXP (x, 0)) == ASHIFTRT |
| || GET_CODE (XEXP (x, 0)) == LSHIFTRT) |
| && CONST_INT_P (XEXP (XEXP (x, 0), 1)) |
| && ((INTVAL (XEXP (XEXP (x, 0), 1)) == 32 |
| && GET_MODE (XEXP (x, 0)) == DImode) |
| || (TARGET_64BIT |
| && INTVAL (XEXP (XEXP (x, 0), 1)) == 64 |
| && GET_MODE (XEXP (x, 0)) == TImode)) |
| && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT |
| && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND |
| && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND) |
| || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND |
| && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) |
| == ZERO_EXTEND)))) |
| { |
| if (!speed) |
| *total = COSTS_N_INSNS (1) + 1; |
| else if (mode == DImode) |
| *total = loongarch_cost->int_mult_di; |
| else |
| *total = loongarch_cost->int_mult_si; |
| |
| /* Sign extension is free, zero extension costs for DImode when |
| on a 64bit core / when DMUL is present. */ |
| for (int i = 0; i < 2; ++i) |
| { |
| rtx op = XEXP (XEXP (XEXP (x, 0), 0), i); |
| if (TARGET_64BIT |
| && GET_CODE (op) == ZERO_EXTEND |
| && GET_MODE (op) == DImode) |
| *total += rtx_cost (op, DImode, MULT, i, speed); |
| else |
| *total += rtx_cost (XEXP (op, 0), VOIDmode, GET_CODE (op), 0, |
| speed); |
| } |
| |
| return true; |
| } |
| return false; |
| |
| case FLOAT: |
| case UNSIGNED_FLOAT: |
| case FIX: |
| case FLOAT_EXTEND: |
| case FLOAT_TRUNCATE: |
| *total = loongarch_cost->fp_add; |
| return false; |
| |
| case SET: |
| if (register_operand (SET_DEST (x), VOIDmode) |
| && reg_or_0_operand (SET_SRC (x), VOIDmode)) |
| { |
| *total = loongarch_set_reg_reg_cost (GET_MODE (SET_DEST (x))); |
| return true; |
| } |
| return false; |
| |
| default: |
| return false; |
| } |
| } |
| |
| /* Implement TARGET_ADDRESS_COST. */ |
| |
| static int |
| loongarch_address_cost (rtx addr, machine_mode mode, |
| addr_space_t as ATTRIBUTE_UNUSED, |
| bool speed ATTRIBUTE_UNUSED) |
| { |
| return loongarch_address_insns (addr, mode, false); |
| } |
| |
| /* Return one word of double-word value OP, taking into account the fixed |
| endianness of certain registers. HIGH_P is true to select the high part, |
| false to select the low part. */ |
| |
| rtx |
| loongarch_subword (rtx op, bool high_p) |
| { |
| unsigned int byte; |
| machine_mode mode; |
| |
| byte = high_p ? UNITS_PER_WORD : 0; |
| mode = GET_MODE (op); |
| if (mode == VOIDmode) |
| mode = TARGET_64BIT ? TImode : DImode; |
| |
| if (FP_REG_RTX_P (op)) |
| return gen_rtx_REG (word_mode, REGNO (op) + high_p); |
| |
| if (MEM_P (op)) |
| return loongarch_rewrite_small_data (adjust_address (op, word_mode, byte)); |
| |
| return simplify_gen_subreg (word_mode, op, mode, byte); |
| } |
| |
| /* Return true if a move from SRC to DEST should be split into two. |
| SPLIT_TYPE describes the split condition. */ |
| |
| bool |
| loongarch_split_move_p (rtx dest, rtx src) |
| { |
| /* FPR-to-FPR moves can be done in a single instruction, if they're |
| allowed at all. */ |
| unsigned int size = GET_MODE_SIZE (GET_MODE (dest)); |
| if (size == 8 && FP_REG_RTX_P (src) && FP_REG_RTX_P (dest)) |
| return false; |
| |
| /* Check for floating-point loads and stores. */ |
| if (size == 8) |
| { |
| if (FP_REG_RTX_P (dest) && MEM_P (src)) |
| return false; |
| if (FP_REG_RTX_P (src) && MEM_P (dest)) |
| return false; |
| } |
| /* Otherwise split all multiword moves. */ |
| return size > UNITS_PER_WORD; |
| } |
| |
| /* Split a move from SRC to DEST, given that loongarch_split_move_p holds. |
| SPLIT_TYPE describes the split condition. */ |
| |
| void |
| loongarch_split_move (rtx dest, rtx src, rtx insn_) |
| { |
| rtx low_dest; |
| |
| gcc_checking_assert (loongarch_split_move_p (dest, src)); |
| if (FP_REG_RTX_P (dest) || FP_REG_RTX_P (src)) |
| { |
| if (!TARGET_64BIT && GET_MODE (dest) == DImode) |
| emit_insn (gen_move_doubleword_fprdi (dest, src)); |
| else if (!TARGET_64BIT && GET_MODE (dest) == DFmode) |
| emit_insn (gen_move_doubleword_fprdf (dest, src)); |
| else if (TARGET_64BIT && GET_MODE (dest) == TFmode) |
| emit_insn (gen_move_doubleword_fprtf (dest, src)); |
| else |
| gcc_unreachable (); |
| } |
| else |
| { |
| /* The operation can be split into two normal moves. Decide in |
| which order to do them. */ |
| low_dest = loongarch_subword (dest, false); |
| if (REG_P (low_dest) && reg_overlap_mentioned_p (low_dest, src)) |
| { |
| loongarch_emit_move (loongarch_subword (dest, true), |
| loongarch_subword (src, true)); |
| loongarch_emit_move (low_dest, loongarch_subword (src, false)); |
| } |
| else |
| { |
| loongarch_emit_move (low_dest, loongarch_subword (src, false)); |
| loongarch_emit_move (loongarch_subword (dest, true), |
| loongarch_subword (src, true)); |
| } |
| } |
| |
| /* This is a hack. See if the next insn uses DEST and if so, see if we |
| can forward SRC for DEST. This is most useful if the next insn is a |
| simple store. */ |
| rtx_insn *insn = (rtx_insn *) insn_; |
| struct loongarch_address_info addr = {}; |
| if (insn) |
| { |
| rtx_insn *next = next_nonnote_nondebug_insn_bb (insn); |
| if (next) |
| { |
| rtx set = single_set (next); |
| if (set && SET_SRC (set) == dest) |
| { |
| if (MEM_P (src)) |
| { |
| rtx tmp = XEXP (src, 0); |
| loongarch_classify_address (&addr, tmp, GET_MODE (tmp), |
| true); |
| if (addr.reg && !reg_overlap_mentioned_p (dest, addr.reg)) |
| validate_change (next, &SET_SRC (set), src, false); |
| } |
| else |
| validate_change (next, &SET_SRC (set), src, false); |
| } |
| } |
| } |
| } |
| |
| /* Return true if a move from SRC to DEST in INSN should be split. */ |
| |
| bool |
| loongarch_split_move_insn_p (rtx dest, rtx src) |
| { |
| return loongarch_split_move_p (dest, src); |
| } |
| |
| /* Split a move from SRC to DEST in INSN, given that |
| loongarch_split_move_insn_p holds. */ |
| |
| void |
| loongarch_split_move_insn (rtx dest, rtx src, rtx insn) |
| { |
| loongarch_split_move (dest, src, insn); |
| } |
| |
| /* Implement TARGET_CONSTANT_ALIGNMENT. */ |
| |
| static HOST_WIDE_INT |
| loongarch_constant_alignment (const_tree exp, HOST_WIDE_INT align) |
| { |
| if (TREE_CODE (exp) == STRING_CST || TREE_CODE (exp) == CONSTRUCTOR) |
| return MAX (align, BITS_PER_WORD); |
| return align; |
| } |
| |
| const char * |
| loongarch_output_move_index (rtx x, machine_mode mode, bool ldr) |
| { |
| int index = exact_log2 (GET_MODE_SIZE (mode)); |
| if (!IN_RANGE (index, 0, 3)) |
| return NULL; |
| |
| struct loongarch_address_info info; |
| if ((loongarch_classify_address (&info, x, mode |