| /* Subroutines used for code generation on IA-32. |
| Copyright (C) 1988-2021 Free Software Foundation, Inc. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| GCC is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #define IN_TARGET_CODE 1 |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "backend.h" |
| #include "rtl.h" |
| #include "tree.h" |
| #include "memmodel.h" |
| #include "gimple.h" |
| #include "cfghooks.h" |
| #include "cfgloop.h" |
| #include "df.h" |
| #include "tm_p.h" |
| #include "stringpool.h" |
| #include "expmed.h" |
| #include "optabs.h" |
| #include "regs.h" |
| #include "emit-rtl.h" |
| #include "recog.h" |
| #include "cgraph.h" |
| #include "diagnostic.h" |
| #include "cfgbuild.h" |
| #include "alias.h" |
| #include "fold-const.h" |
| #include "attribs.h" |
| #include "calls.h" |
| #include "stor-layout.h" |
| #include "varasm.h" |
| #include "output.h" |
| #include "insn-attr.h" |
| #include "flags.h" |
| #include "except.h" |
| #include "explow.h" |
| #include "expr.h" |
| #include "cfgrtl.h" |
| #include "common/common-target.h" |
| #include "langhooks.h" |
| #include "reload.h" |
| #include "gimplify.h" |
| #include "dwarf2.h" |
| #include "tm-constrs.h" |
| #include "cselib.h" |
| #include "sched-int.h" |
| #include "opts.h" |
| #include "tree-pass.h" |
| #include "context.h" |
| #include "pass_manager.h" |
| #include "target-globals.h" |
| #include "gimple-iterator.h" |
| #include "gimple-fold.h" |
| #include "tree-vectorizer.h" |
| #include "shrink-wrap.h" |
| #include "builtins.h" |
| #include "rtl-iter.h" |
| #include "tree-iterator.h" |
| #include "dbgcnt.h" |
| #include "case-cfn-macros.h" |
| #include "dojump.h" |
| #include "fold-const-call.h" |
| #include "tree-vrp.h" |
| #include "tree-ssanames.h" |
| #include "selftest.h" |
| #include "selftest-rtl.h" |
| #include "print-rtl.h" |
| #include "intl.h" |
| #include "ifcvt.h" |
| #include "symbol-summary.h" |
| #include "ipa-prop.h" |
| #include "ipa-fnsummary.h" |
| #include "wide-int-bitmask.h" |
| #include "tree-vector-builder.h" |
| #include "debug.h" |
| #include "dwarf2out.h" |
| #include "i386-options.h" |
| #include "i386-builtins.h" |
| #include "i386-expand.h" |
| #include "i386-features.h" |
| #include "function-abi.h" |
| |
| /* This file should be included last. */ |
| #include "target-def.h" |
| |
| static rtx legitimize_dllimport_symbol (rtx, bool); |
| static rtx legitimize_pe_coff_extern_decl (rtx, bool); |
| static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool); |
| static void ix86_emit_restore_reg_using_pop (rtx); |
| |
| |
| #ifndef CHECK_STACK_LIMIT |
| #define CHECK_STACK_LIMIT (-1) |
| #endif |
| |
| /* Return index of given mode in mult and division cost tables. */ |
| #define MODE_INDEX(mode) \ |
| ((mode) == QImode ? 0 \ |
| : (mode) == HImode ? 1 \ |
| : (mode) == SImode ? 2 \ |
| : (mode) == DImode ? 3 \ |
| : 4) |
| |
| |
| /* Set by -mtune. */ |
| const struct processor_costs *ix86_tune_cost = NULL; |
| |
| /* Set by -mtune or -Os. */ |
| const struct processor_costs *ix86_cost = NULL; |
| |
| /* In case the average insn count for single function invocation is |
| lower than this constant, emit fast (but longer) prologue and |
| epilogue code. */ |
| #define FAST_PROLOGUE_INSN_COUNT 20 |
| |
| /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ |
| static const char *const qi_reg_name[] = QI_REGISTER_NAMES; |
| static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; |
| static const char *const hi_reg_name[] = HI_REGISTER_NAMES; |
| |
| /* Array of the smallest class containing reg number REGNO, indexed by |
| REGNO. Used by REGNO_REG_CLASS in i386.h. */ |
| |
| enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = |
| { |
| /* ax, dx, cx, bx */ |
| AREG, DREG, CREG, BREG, |
| /* si, di, bp, sp */ |
| SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, |
| /* FP registers */ |
| FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, |
| FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, |
| /* arg pointer, flags, fpsr, frame */ |
| NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS, |
| /* SSE registers */ |
| SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, |
| SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
| /* MMX registers */ |
| MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, |
| MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, |
| /* REX registers */ |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, |
| /* SSE REX registers */ |
| SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
| SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, |
| /* AVX-512 SSE registers */ |
| ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, |
| ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, |
| ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, |
| ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, |
| /* Mask registers. */ |
| ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS, |
| MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS |
| }; |
| |
| /* The "default" register map used in 32bit mode. */ |
| |
| int const dbx_register_map[FIRST_PSEUDO_REGISTER] = |
| { |
| /* general regs */ |
| 0, 2, 1, 3, 6, 7, 4, 5, |
| /* fp regs */ |
| 12, 13, 14, 15, 16, 17, 18, 19, |
| /* arg, flags, fpsr, frame */ |
| IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
| IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
| /* SSE */ |
| 21, 22, 23, 24, 25, 26, 27, 28, |
| /* MMX */ |
| 29, 30, 31, 32, 33, 34, 35, 36, |
| /* extended integer registers */ |
| INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| /* extended sse registers */ |
| INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| /* AVX-512 registers 16-23 */ |
| INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| /* AVX-512 registers 24-31 */ |
| INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| /* Mask registers */ |
| 93, 94, 95, 96, 97, 98, 99, 100 |
| }; |
| |
| /* The "default" register map used in 64bit mode. */ |
| |
| int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = |
| { |
| /* general regs */ |
| 0, 1, 2, 3, 4, 5, 6, 7, |
| /* fp regs */ |
| 33, 34, 35, 36, 37, 38, 39, 40, |
| /* arg, flags, fpsr, frame */ |
| IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
| IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
| /* SSE */ |
| 17, 18, 19, 20, 21, 22, 23, 24, |
| /* MMX */ |
| 41, 42, 43, 44, 45, 46, 47, 48, |
| /* extended integer registers */ |
| 8, 9, 10, 11, 12, 13, 14, 15, |
| /* extended SSE registers */ |
| 25, 26, 27, 28, 29, 30, 31, 32, |
| /* AVX-512 registers 16-23 */ |
| 67, 68, 69, 70, 71, 72, 73, 74, |
| /* AVX-512 registers 24-31 */ |
| 75, 76, 77, 78, 79, 80, 81, 82, |
| /* Mask registers */ |
| 118, 119, 120, 121, 122, 123, 124, 125 |
| }; |
| |
| /* Define the register numbers to be used in Dwarf debugging information. |
| The SVR4 reference port C compiler uses the following register numbers |
| in its Dwarf output code: |
| 0 for %eax (gcc regno = 0) |
| 1 for %ecx (gcc regno = 2) |
| 2 for %edx (gcc regno = 1) |
| 3 for %ebx (gcc regno = 3) |
| 4 for %esp (gcc regno = 7) |
| 5 for %ebp (gcc regno = 6) |
| 6 for %esi (gcc regno = 4) |
| 7 for %edi (gcc regno = 5) |
| The following three DWARF register numbers are never generated by |
| the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 |
| believed these numbers have these meanings. |
| 8 for %eip (no gcc equivalent) |
| 9 for %eflags (gcc regno = 17) |
| 10 for %trapno (no gcc equivalent) |
| It is not at all clear how we should number the FP stack registers |
| for the x86 architecture. If the version of SDB on x86/svr4 were |
| a bit less brain dead with respect to floating-point then we would |
| have a precedent to follow with respect to DWARF register numbers |
| for x86 FP registers, but the SDB on x86/svr4 was so completely |
| broken with respect to FP registers that it is hardly worth thinking |
| of it as something to strive for compatibility with. |
| The version of x86/svr4 SDB I had does (partially) |
| seem to believe that DWARF register number 11 is associated with |
| the x86 register %st(0), but that's about all. Higher DWARF |
| register numbers don't seem to be associated with anything in |
| particular, and even for DWARF regno 11, SDB only seemed to under- |
| stand that it should say that a variable lives in %st(0) (when |
| asked via an `=' command) if we said it was in DWARF regno 11, |
| but SDB still printed garbage when asked for the value of the |
| variable in question (via a `/' command). |
| (Also note that the labels SDB printed for various FP stack regs |
| when doing an `x' command were all wrong.) |
| Note that these problems generally don't affect the native SVR4 |
| C compiler because it doesn't allow the use of -O with -g and |
| because when it is *not* optimizing, it allocates a memory |
| location for each floating-point variable, and the memory |
| location is what gets described in the DWARF AT_location |
| attribute for the variable in question. |
| Regardless of the severe mental illness of the x86/svr4 SDB, we |
| do something sensible here and we use the following DWARF |
| register numbers. Note that these are all stack-top-relative |
| numbers. |
| 11 for %st(0) (gcc regno = 8) |
| 12 for %st(1) (gcc regno = 9) |
| 13 for %st(2) (gcc regno = 10) |
| 14 for %st(3) (gcc regno = 11) |
| 15 for %st(4) (gcc regno = 12) |
| 16 for %st(5) (gcc regno = 13) |
| 17 for %st(6) (gcc regno = 14) |
| 18 for %st(7) (gcc regno = 15) |
| */ |
| int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = |
| { |
| /* general regs */ |
| 0, 2, 1, 3, 6, 7, 5, 4, |
| /* fp regs */ |
| 11, 12, 13, 14, 15, 16, 17, 18, |
| /* arg, flags, fpsr, frame */ |
| IGNORED_DWARF_REGNUM, 9, |
| IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM, |
| /* SSE registers */ |
| 21, 22, 23, 24, 25, 26, 27, 28, |
| /* MMX registers */ |
| 29, 30, 31, 32, 33, 34, 35, 36, |
| /* extended integer registers */ |
| INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| /* extended sse registers */ |
| INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| /* AVX-512 registers 16-23 */ |
| INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| /* AVX-512 registers 24-31 */ |
| INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, |
| /* Mask registers */ |
| 93, 94, 95, 96, 97, 98, 99, 100 |
| }; |
| |
| /* Define parameter passing and return registers. */ |
| |
| static int const x86_64_int_parameter_registers[6] = |
| { |
| DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG |
| }; |
| |
| static int const x86_64_ms_abi_int_parameter_registers[4] = |
| { |
| CX_REG, DX_REG, R8_REG, R9_REG |
| }; |
| |
| static int const x86_64_int_return_registers[4] = |
| { |
| AX_REG, DX_REG, DI_REG, SI_REG |
| }; |
| |
| /* Define the structure for the machine field in struct function. */ |
| |
| struct GTY(()) stack_local_entry { |
| unsigned short mode; |
| unsigned short n; |
| rtx rtl; |
| struct stack_local_entry *next; |
| }; |
| |
| /* Which cpu are we scheduling for. */ |
| enum attr_cpu ix86_schedule; |
| |
| /* Which cpu are we optimizing for. */ |
| enum processor_type ix86_tune; |
| |
| /* Which instruction set architecture to use. */ |
| enum processor_type ix86_arch; |
| |
| /* True if processor has SSE prefetch instruction. */ |
| unsigned char ix86_prefetch_sse; |
| |
| /* Preferred alignment for stack boundary in bits. */ |
| unsigned int ix86_preferred_stack_boundary; |
| |
| /* Alignment for incoming stack boundary in bits specified at |
| command line. */ |
| unsigned int ix86_user_incoming_stack_boundary; |
| |
| /* Default alignment for incoming stack boundary in bits. */ |
| unsigned int ix86_default_incoming_stack_boundary; |
| |
| /* Alignment for incoming stack boundary in bits. */ |
| unsigned int ix86_incoming_stack_boundary; |
| |
| /* Calling abi specific va_list type nodes. */ |
| tree sysv_va_list_type_node; |
| tree ms_va_list_type_node; |
| |
| /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ |
| char internal_label_prefix[16]; |
| int internal_label_prefix_len; |
| |
| /* Fence to use after loop using movnt. */ |
| tree x86_mfence; |
| |
| /* Register class used for passing given 64bit part of the argument. |
| These represent classes as documented by the PS ABI, with the exception |
| of SSESF, SSEDF classes, that are basically SSE class, just gcc will |
| use SF or DFmode move instead of DImode to avoid reformatting penalties. |
| |
| Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves |
| whenever possible (upper half does contain padding). */ |
| enum x86_64_reg_class |
| { |
| X86_64_NO_CLASS, |
| X86_64_INTEGER_CLASS, |
| X86_64_INTEGERSI_CLASS, |
| X86_64_SSE_CLASS, |
| X86_64_SSEHF_CLASS, |
| X86_64_SSESF_CLASS, |
| X86_64_SSEDF_CLASS, |
| X86_64_SSEUP_CLASS, |
| X86_64_X87_CLASS, |
| X86_64_X87UP_CLASS, |
| X86_64_COMPLEX_X87_CLASS, |
| X86_64_MEMORY_CLASS |
| }; |
| |
| #define MAX_CLASSES 8 |
| |
| /* Table of constants used by fldpi, fldln2, etc.... */ |
| static REAL_VALUE_TYPE ext_80387_constants_table [5]; |
| static bool ext_80387_constants_init; |
| |
| |
| static rtx ix86_function_value (const_tree, const_tree, bool); |
| static bool ix86_function_value_regno_p (const unsigned int); |
| static unsigned int ix86_function_arg_boundary (machine_mode, |
| const_tree); |
| static rtx ix86_static_chain (const_tree, bool); |
| static int ix86_function_regparm (const_tree, const_tree); |
| static void ix86_compute_frame_layout (void); |
| static tree ix86_canonical_va_list_type (tree); |
| static unsigned int split_stack_prologue_scratch_regno (void); |
| static bool i386_asm_output_addr_const_extra (FILE *, rtx); |
| |
| static bool ix86_can_inline_p (tree, tree); |
| static unsigned int ix86_minimum_incoming_stack_boundary (bool); |
| |
| |
| /* Whether -mtune= or -march= were specified */ |
| int ix86_tune_defaulted; |
| int ix86_arch_specified; |
| |
| /* Return true if a red-zone is in use. We can't use red-zone when |
| there are local indirect jumps, like "indirect_jump" or "tablejump", |
| which jumps to another place in the function, since "call" in the |
| indirect thunk pushes the return address onto stack, destroying |
| red-zone. |
| |
| TODO: If we can reserve the first 2 WORDs, for PUSH and, another |
| for CALL, in red-zone, we can allow local indirect jumps with |
| indirect thunk. */ |
| |
| bool |
| ix86_using_red_zone (void) |
| { |
| return (TARGET_RED_ZONE |
| && !TARGET_64BIT_MS_ABI |
| && (!cfun->machine->has_local_indirect_jump |
| || cfun->machine->indirect_branch_type == indirect_branch_keep)); |
| } |
| |
| /* Return true, if profiling code should be emitted before |
| prologue. Otherwise it returns false. |
| Note: For x86 with "hotfix" it is sorried. */ |
| static bool |
| ix86_profile_before_prologue (void) |
| { |
| return flag_fentry != 0; |
| } |
| |
| /* Update register usage after having seen the compiler flags. */ |
| |
| static void |
| ix86_conditional_register_usage (void) |
| { |
| int i, c_mask; |
| |
| /* If there are no caller-saved registers, preserve all registers. |
| except fixed_regs and registers used for function return value |
| since aggregate_value_p checks call_used_regs[regno] on return |
| value. */ |
| if (cfun && cfun->machine->no_caller_saved_registers) |
| for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
| if (!fixed_regs[i] && !ix86_function_value_regno_p (i)) |
| call_used_regs[i] = 0; |
| |
| /* For 32-bit targets, disable the REX registers. */ |
| if (! TARGET_64BIT) |
| { |
| for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++) |
| CLEAR_HARD_REG_BIT (accessible_reg_set, i); |
| for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) |
| CLEAR_HARD_REG_BIT (accessible_reg_set, i); |
| for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) |
| CLEAR_HARD_REG_BIT (accessible_reg_set, i); |
| } |
| |
| /* See the definition of CALL_USED_REGISTERS in i386.h. */ |
| c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI); |
| |
| CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]); |
| |
| for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
| { |
| /* Set/reset conditionally defined registers from |
| CALL_USED_REGISTERS initializer. */ |
| if (call_used_regs[i] > 1) |
| call_used_regs[i] = !!(call_used_regs[i] & c_mask); |
| |
| /* Calculate registers of CLOBBERED_REGS register set |
| as call used registers from GENERAL_REGS register set. */ |
| if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i) |
| && call_used_regs[i]) |
| SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i); |
| } |
| |
| /* If MMX is disabled, disable the registers. */ |
| if (! TARGET_MMX) |
| accessible_reg_set &= ~reg_class_contents[MMX_REGS]; |
| |
| /* If SSE is disabled, disable the registers. */ |
| if (! TARGET_SSE) |
| accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS]; |
| |
| /* If the FPU is disabled, disable the registers. */ |
| if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) |
| accessible_reg_set &= ~reg_class_contents[FLOAT_REGS]; |
| |
| /* If AVX512F is disabled, disable the registers. */ |
| if (! TARGET_AVX512F) |
| { |
| for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++) |
| CLEAR_HARD_REG_BIT (accessible_reg_set, i); |
| |
| accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS]; |
| } |
| } |
| |
| /* Canonicalize a comparison from one we don't have to one we do have. */ |
| |
| static void |
| ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1, |
| bool op0_preserve_value) |
| { |
| /* The order of operands in x87 ficom compare is forced by combine in |
| simplify_comparison () function. Float operator is treated as RTX_OBJ |
| with a precedence over other operators and is always put in the first |
| place. Swap condition and operands to match ficom instruction. */ |
| if (!op0_preserve_value |
| && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1)) |
| { |
| enum rtx_code scode = swap_condition ((enum rtx_code) *code); |
| |
| /* We are called only for compares that are split to SAHF instruction. |
| Ensure that we have setcc/jcc insn for the swapped condition. */ |
| if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN) |
| { |
| std::swap (*op0, *op1); |
| *code = (int) scode; |
| } |
| } |
| } |
| |
| |
| /* Hook to determine if one function can safely inline another. */ |
| |
| static bool |
| ix86_can_inline_p (tree caller, tree callee) |
| { |
| tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); |
| tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); |
| |
| /* Changes of those flags can be tolerated for always inlines. Lets hope |
| user knows what he is doing. */ |
| unsigned HOST_WIDE_INT always_inline_safe_mask |
| = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS |
| | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD |
| | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD |
| | MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS |
| | MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE |
| | MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER |
| | MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER); |
| |
| |
| if (!callee_tree) |
| callee_tree = target_option_default_node; |
| if (!caller_tree) |
| caller_tree = target_option_default_node; |
| if (callee_tree == caller_tree) |
| return true; |
| |
| struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); |
| struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); |
| bool ret = false; |
| bool always_inline |
| = (DECL_DISREGARD_INLINE_LIMITS (callee) |
| && lookup_attribute ("always_inline", |
| DECL_ATTRIBUTES (callee))); |
| |
| /* If callee only uses GPRs, ignore MASK_80387. */ |
| if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags)) |
| always_inline_safe_mask |= MASK_80387; |
| |
| cgraph_node *callee_node = cgraph_node::get (callee); |
| /* Callee's isa options should be a subset of the caller's, i.e. a SSE4 |
| function can inline a SSE2 function but a SSE2 function can't inline |
| a SSE4 function. */ |
| if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags) |
| != callee_opts->x_ix86_isa_flags) |
| || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2) |
| != callee_opts->x_ix86_isa_flags2)) |
| ret = false; |
| |
| /* See if we have the same non-isa options. */ |
| else if ((!always_inline |
| && caller_opts->x_target_flags != callee_opts->x_target_flags) |
| || (caller_opts->x_target_flags & ~always_inline_safe_mask) |
| != (callee_opts->x_target_flags & ~always_inline_safe_mask)) |
| ret = false; |
| |
| /* See if arch, tune, etc. are the same. */ |
| else if (caller_opts->arch != callee_opts->arch) |
| ret = false; |
| |
| else if (!always_inline && caller_opts->tune != callee_opts->tune) |
| ret = false; |
| |
| else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath |
| /* If the calle doesn't use FP expressions differences in |
| ix86_fpmath can be ignored. We are called from FEs |
| for multi-versioning call optimization, so beware of |
| ipa_fn_summaries not available. */ |
| && (! ipa_fn_summaries |
| || ipa_fn_summaries->get (callee_node) == NULL |
| || ipa_fn_summaries->get (callee_node)->fp_expressions)) |
| ret = false; |
| |
| else if (!always_inline |
| && caller_opts->branch_cost != callee_opts->branch_cost) |
| ret = false; |
| |
| else |
| ret = true; |
| |
| return ret; |
| } |
| |
| /* Return true if this goes in large data/bss. */ |
| |
| static bool |
| ix86_in_large_data_p (tree exp) |
| { |
| if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC) |
| return false; |
| |
| if (exp == NULL_TREE) |
| return false; |
| |
| /* Functions are never large data. */ |
| if (TREE_CODE (exp) == FUNCTION_DECL) |
| return false; |
| |
| /* Automatic variables are never large data. */ |
| if (VAR_P (exp) && !is_global_var (exp)) |
| return false; |
| |
| if (VAR_P (exp) && DECL_SECTION_NAME (exp)) |
| { |
| const char *section = DECL_SECTION_NAME (exp); |
| if (strcmp (section, ".ldata") == 0 |
| || strcmp (section, ".lbss") == 0) |
| return true; |
| return false; |
| } |
| else |
| { |
| HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); |
| |
| /* If this is an incomplete type with size 0, then we can't put it |
| in data because it might be too big when completed. Also, |
| int_size_in_bytes returns -1 if size can vary or is larger than |
| an integer in which case also it is safer to assume that it goes in |
| large data. */ |
| if (size <= 0 || size > ix86_section_threshold) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* i386-specific section flag to mark large sections. */ |
| #define SECTION_LARGE SECTION_MACH_DEP |
| |
| /* Switch to the appropriate section for output of DECL. |
| DECL is either a `VAR_DECL' node or a constant of some sort. |
| RELOC indicates whether forming the initial value of DECL requires |
| link-time relocations. */ |
| |
| ATTRIBUTE_UNUSED static section * |
| x86_64_elf_select_section (tree decl, int reloc, |
| unsigned HOST_WIDE_INT align) |
| { |
| if (ix86_in_large_data_p (decl)) |
| { |
| const char *sname = NULL; |
| unsigned int flags = SECTION_WRITE | SECTION_LARGE; |
| switch (categorize_decl_for_section (decl, reloc)) |
| { |
| case SECCAT_DATA: |
| sname = ".ldata"; |
| break; |
| case SECCAT_DATA_REL: |
| sname = ".ldata.rel"; |
| break; |
| case SECCAT_DATA_REL_LOCAL: |
| sname = ".ldata.rel.local"; |
| break; |
| case SECCAT_DATA_REL_RO: |
| sname = ".ldata.rel.ro"; |
| break; |
| case SECCAT_DATA_REL_RO_LOCAL: |
| sname = ".ldata.rel.ro.local"; |
| break; |
| case SECCAT_BSS: |
| sname = ".lbss"; |
| flags |= SECTION_BSS; |
| break; |
| case SECCAT_RODATA: |
| case SECCAT_RODATA_MERGE_STR: |
| case SECCAT_RODATA_MERGE_STR_INIT: |
| case SECCAT_RODATA_MERGE_CONST: |
| sname = ".lrodata"; |
| flags &= ~SECTION_WRITE; |
| break; |
| case SECCAT_SRODATA: |
| case SECCAT_SDATA: |
| case SECCAT_SBSS: |
| gcc_unreachable (); |
| case SECCAT_TEXT: |
| case SECCAT_TDATA: |
| case SECCAT_TBSS: |
| /* We don't split these for medium model. Place them into |
| default sections and hope for best. */ |
| break; |
| } |
| if (sname) |
| { |
| /* We might get called with string constants, but get_named_section |
| doesn't like them as they are not DECLs. Also, we need to set |
| flags in that case. */ |
| if (!DECL_P (decl)) |
| return get_section (sname, flags, NULL); |
| return get_named_section (decl, sname, reloc); |
| } |
| } |
| return default_elf_select_section (decl, reloc, align); |
| } |
| |
| /* Select a set of attributes for section NAME based on the properties |
| of DECL and whether or not RELOC indicates that DECL's initializer |
| might contain runtime relocations. */ |
| |
| static unsigned int ATTRIBUTE_UNUSED |
| x86_64_elf_section_type_flags (tree decl, const char *name, int reloc) |
| { |
| unsigned int flags = default_section_type_flags (decl, name, reloc); |
| |
| if (ix86_in_large_data_p (decl)) |
| flags |= SECTION_LARGE; |
| |
| if (decl == NULL_TREE |
| && (strcmp (name, ".ldata.rel.ro") == 0 |
| || strcmp (name, ".ldata.rel.ro.local") == 0)) |
| flags |= SECTION_RELRO; |
| |
| if (strcmp (name, ".lbss") == 0 |
| || startswith (name, ".lbss.") |
| || startswith (name, ".gnu.linkonce.lb.")) |
| flags |= SECTION_BSS; |
| |
| return flags; |
| } |
| |
| /* Build up a unique section name, expressed as a |
| STRING_CST node, and assign it to DECL_SECTION_NAME (decl). |
| RELOC indicates whether the initial value of EXP requires |
| link-time relocations. */ |
| |
| static void ATTRIBUTE_UNUSED |
| x86_64_elf_unique_section (tree decl, int reloc) |
| { |
| if (ix86_in_large_data_p (decl)) |
| { |
| const char *prefix = NULL; |
| /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ |
| bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP; |
| |
| switch (categorize_decl_for_section (decl, reloc)) |
| { |
| case SECCAT_DATA: |
| case SECCAT_DATA_REL: |
| case SECCAT_DATA_REL_LOCAL: |
| case SECCAT_DATA_REL_RO: |
| case SECCAT_DATA_REL_RO_LOCAL: |
| prefix = one_only ? ".ld" : ".ldata"; |
| break; |
| case SECCAT_BSS: |
| prefix = one_only ? ".lb" : ".lbss"; |
| break; |
| case SECCAT_RODATA: |
| case SECCAT_RODATA_MERGE_STR: |
| case SECCAT_RODATA_MERGE_STR_INIT: |
| case SECCAT_RODATA_MERGE_CONST: |
| prefix = one_only ? ".lr" : ".lrodata"; |
| break; |
| case SECCAT_SRODATA: |
| case SECCAT_SDATA: |
| case SECCAT_SBSS: |
| gcc_unreachable (); |
| case SECCAT_TEXT: |
| case SECCAT_TDATA: |
| case SECCAT_TBSS: |
| /* We don't split these for medium model. Place them into |
| default sections and hope for best. */ |
| break; |
| } |
| if (prefix) |
| { |
| const char *name, *linkonce; |
| char *string; |
| |
| name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); |
| name = targetm.strip_name_encoding (name); |
| |
| /* If we're using one_only, then there needs to be a .gnu.linkonce |
| prefix to the section name. */ |
| linkonce = one_only ? ".gnu.linkonce" : ""; |
| |
| string = ACONCAT ((linkonce, prefix, ".", name, NULL)); |
| |
| set_decl_section_name (decl, string); |
| return; |
| } |
| } |
| default_unique_section (decl, reloc); |
| } |
| |
| #ifdef COMMON_ASM_OP |
| |
| #ifndef LARGECOMM_SECTION_ASM_OP |
| #define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t" |
| #endif |
| |
| /* This says how to output assembler code to declare an |
| uninitialized external linkage data object. |
| |
| For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for |
| large objects. */ |
| void |
| x86_elf_aligned_decl_common (FILE *file, tree decl, |
| const char *name, unsigned HOST_WIDE_INT size, |
| unsigned align) |
| { |
| if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) |
| && size > (unsigned int)ix86_section_threshold) |
| { |
| switch_to_section (get_named_section (decl, ".lbss", 0)); |
| fputs (LARGECOMM_SECTION_ASM_OP, file); |
| } |
| else |
| fputs (COMMON_ASM_OP, file); |
| assemble_name (file, name); |
| fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n", |
| size, align / BITS_PER_UNIT); |
| } |
| #endif |
| |
| /* Utility function for targets to use in implementing |
| ASM_OUTPUT_ALIGNED_BSS. */ |
| |
| void |
| x86_output_aligned_bss (FILE *file, tree decl, const char *name, |
| unsigned HOST_WIDE_INT size, unsigned align) |
| { |
| if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) |
| && size > (unsigned int)ix86_section_threshold) |
| switch_to_section (get_named_section (decl, ".lbss", 0)); |
| else |
| switch_to_section (bss_section); |
| ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); |
| #ifdef ASM_DECLARE_OBJECT_NAME |
| last_assemble_variable_decl = decl; |
| ASM_DECLARE_OBJECT_NAME (file, name, decl); |
| #else |
| /* Standard thing is just output label for the object. */ |
| ASM_OUTPUT_LABEL (file, name); |
| #endif /* ASM_DECLARE_OBJECT_NAME */ |
| ASM_OUTPUT_SKIP (file, size ? size : 1); |
| } |
| |
| /* Decide whether we must probe the stack before any space allocation |
| on this target. It's essentially TARGET_STACK_PROBE except when |
| -fstack-check causes the stack to be already probed differently. */ |
| |
| bool |
| ix86_target_stack_probe (void) |
| { |
| /* Do not probe the stack twice if static stack checking is enabled. */ |
| if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK) |
| return false; |
| |
| return TARGET_STACK_PROBE; |
| } |
| |
| /* Decide whether we can make a sibling call to a function. DECL is the |
| declaration of the function being targeted by the call and EXP is the |
| CALL_EXPR representing the call. */ |
| |
| static bool |
| ix86_function_ok_for_sibcall (tree decl, tree exp) |
| { |
| tree type, decl_or_type; |
| rtx a, b; |
| bool bind_global = decl && !targetm.binds_local_p (decl); |
| |
| if (ix86_function_naked (current_function_decl)) |
| return false; |
| |
| /* Sibling call isn't OK if there are no caller-saved registers |
| since all registers must be preserved before return. */ |
| if (cfun->machine->no_caller_saved_registers) |
| return false; |
| |
| /* If we are generating position-independent code, we cannot sibcall |
| optimize direct calls to global functions, as the PLT requires |
| %ebx be live. (Darwin does not have a PLT.) */ |
| if (!TARGET_MACHO |
| && !TARGET_64BIT |
| && flag_pic |
| && flag_plt |
| && bind_global) |
| return false; |
| |
| /* If we need to align the outgoing stack, then sibcalling would |
| unalign the stack, which may break the called function. */ |
| if (ix86_minimum_incoming_stack_boundary (true) |
| < PREFERRED_STACK_BOUNDARY) |
| return false; |
| |
| if (decl) |
| { |
| decl_or_type = decl; |
| type = TREE_TYPE (decl); |
| } |
| else |
| { |
| /* We're looking at the CALL_EXPR, we need the type of the function. */ |
| type = CALL_EXPR_FN (exp); /* pointer expression */ |
| type = TREE_TYPE (type); /* pointer type */ |
| type = TREE_TYPE (type); /* function type */ |
| decl_or_type = type; |
| } |
| |
| /* If outgoing reg parm stack space changes, we cannot do sibcall. */ |
| if ((OUTGOING_REG_PARM_STACK_SPACE (type) |
| != OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl))) |
| || (REG_PARM_STACK_SPACE (decl_or_type) |
| != REG_PARM_STACK_SPACE (current_function_decl))) |
| { |
| maybe_complain_about_tail_call (exp, |
| "inconsistent size of stack space" |
| " allocated for arguments which are" |
| " passed in registers"); |
| return false; |
| } |
| |
| /* Check that the return value locations are the same. Like |
| if we are returning floats on the 80387 register stack, we cannot |
| make a sibcall from a function that doesn't return a float to a |
| function that does or, conversely, from a function that does return |
| a float to a function that doesn't; the necessary stack adjustment |
| would not be executed. This is also the place we notice |
| differences in the return value ABI. Note that it is ok for one |
| of the functions to have void return type as long as the return |
| value of the other is passed in a register. */ |
| a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false); |
| b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), |
| cfun->decl, false); |
| if (STACK_REG_P (a) || STACK_REG_P (b)) |
| { |
| if (!rtx_equal_p (a, b)) |
| return false; |
| } |
| else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) |
| ; |
| else if (!rtx_equal_p (a, b)) |
| return false; |
| |
| if (TARGET_64BIT) |
| { |
| /* The SYSV ABI has more call-clobbered registers; |
| disallow sibcalls from MS to SYSV. */ |
| if (cfun->machine->call_abi == MS_ABI |
| && ix86_function_type_abi (type) == SYSV_ABI) |
| return false; |
| } |
| else |
| { |
| /* If this call is indirect, we'll need to be able to use a |
| call-clobbered register for the address of the target function. |
| Make sure that all such registers are not used for passing |
| parameters. Note that DLLIMPORT functions and call to global |
| function via GOT slot are indirect. */ |
| if (!decl |
| || (bind_global && flag_pic && !flag_plt) |
| || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)) |
| || flag_force_indirect_call) |
| { |
| /* Check if regparm >= 3 since arg_reg_available is set to |
| false if regparm == 0. If regparm is 1 or 2, there is |
| always a call-clobbered register available. |
| |
| ??? The symbol indirect call doesn't need a call-clobbered |
| register. But we don't know if this is a symbol indirect |
| call or not here. */ |
| if (ix86_function_regparm (type, decl) >= 3 |
| && !cfun->machine->arg_reg_available) |
| return false; |
| } |
| } |
| |
| /* Otherwise okay. That also includes certain types of indirect calls. */ |
| return true; |
| } |
| |
| /* This function determines from TYPE the calling-convention. */ |
| |
| unsigned int |
| ix86_get_callcvt (const_tree type) |
| { |
| unsigned int ret = 0; |
| bool is_stdarg; |
| tree attrs; |
| |
| if (TARGET_64BIT) |
| return IX86_CALLCVT_CDECL; |
| |
| attrs = TYPE_ATTRIBUTES (type); |
| if (attrs != NULL_TREE) |
| { |
| if (lookup_attribute ("cdecl", attrs)) |
| ret |= IX86_CALLCVT_CDECL; |
| else if (lookup_attribute ("stdcall", attrs)) |
| ret |= IX86_CALLCVT_STDCALL; |
| else if (lookup_attribute ("fastcall", attrs)) |
| ret |= IX86_CALLCVT_FASTCALL; |
| else if (lookup_attribute ("thiscall", attrs)) |
| ret |= IX86_CALLCVT_THISCALL; |
| |
| /* Regparam isn't allowed for thiscall and fastcall. */ |
| if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0) |
| { |
| if (lookup_attribute ("regparm", attrs)) |
| ret |= IX86_CALLCVT_REGPARM; |
| if (lookup_attribute ("sseregparm", attrs)) |
| ret |= IX86_CALLCVT_SSEREGPARM; |
| } |
| |
| if (IX86_BASE_CALLCVT(ret) != 0) |
| return ret; |
| } |
| |
| is_stdarg = stdarg_p (type); |
| if (TARGET_RTD && !is_stdarg) |
| return IX86_CALLCVT_STDCALL | ret; |
| |
| if (ret != 0 |
| || is_stdarg |
| || TREE_CODE (type) != METHOD_TYPE |
| || ix86_function_type_abi (type) != MS_ABI) |
| return IX86_CALLCVT_CDECL | ret; |
| |
| return IX86_CALLCVT_THISCALL; |
| } |
| |
| /* Return 0 if the attributes for two types are incompatible, 1 if they |
| are compatible, and 2 if they are nearly compatible (which causes a |
| warning to be generated). */ |
| |
| static int |
| ix86_comp_type_attributes (const_tree type1, const_tree type2) |
| { |
| unsigned int ccvt1, ccvt2; |
| |
| if (TREE_CODE (type1) != FUNCTION_TYPE |
| && TREE_CODE (type1) != METHOD_TYPE) |
| return 1; |
| |
| ccvt1 = ix86_get_callcvt (type1); |
| ccvt2 = ix86_get_callcvt (type2); |
| if (ccvt1 != ccvt2) |
| return 0; |
| if (ix86_function_regparm (type1, NULL) |
| != ix86_function_regparm (type2, NULL)) |
| return 0; |
| |
| return 1; |
| } |
| |
| /* Return the regparm value for a function with the indicated TYPE and DECL. |
| DECL may be NULL when calling function indirectly |
| or considering a libcall. */ |
| |
| static int |
| ix86_function_regparm (const_tree type, const_tree decl) |
| { |
| tree attr; |
| int regparm; |
| unsigned int ccvt; |
| |
| if (TARGET_64BIT) |
| return (ix86_function_type_abi (type) == SYSV_ABI |
| ? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX); |
| ccvt = ix86_get_callcvt (type); |
| regparm = ix86_regparm; |
| |
| if ((ccvt & IX86_CALLCVT_REGPARM) != 0) |
| { |
| attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); |
| if (attr) |
| { |
| regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); |
| return regparm; |
| } |
| } |
| else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
| return 2; |
| else if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
| return 1; |
| |
| /* Use register calling convention for local functions when possible. */ |
| if (decl |
| && TREE_CODE (decl) == FUNCTION_DECL) |
| { |
| cgraph_node *target = cgraph_node::get (decl); |
| if (target) |
| target = target->function_symbol (); |
| |
| /* Caller and callee must agree on the calling convention, so |
| checking here just optimize means that with |
| __attribute__((optimize (...))) caller could use regparm convention |
| and callee not, or vice versa. Instead look at whether the callee |
| is optimized or not. */ |
| if (target && opt_for_fn (target->decl, optimize) |
| && !(profile_flag && !flag_fentry)) |
| { |
| if (target->local && target->can_change_signature) |
| { |
| int local_regparm, globals = 0, regno; |
| |
| /* Make sure no regparm register is taken by a |
| fixed register variable. */ |
| for (local_regparm = 0; local_regparm < REGPARM_MAX; |
| local_regparm++) |
| if (fixed_regs[local_regparm]) |
| break; |
| |
| /* We don't want to use regparm(3) for nested functions as |
| these use a static chain pointer in the third argument. */ |
| if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl)) |
| local_regparm = 2; |
| |
| /* Save a register for the split stack. */ |
| if (flag_split_stack) |
| { |
| if (local_regparm == 3) |
| local_regparm = 2; |
| else if (local_regparm == 2 |
| && DECL_STATIC_CHAIN (target->decl)) |
| local_regparm = 1; |
| } |
| |
| /* Each fixed register usage increases register pressure, |
| so less registers should be used for argument passing. |
| This functionality can be overriden by an explicit |
| regparm value. */ |
| for (regno = AX_REG; regno <= DI_REG; regno++) |
| if (fixed_regs[regno]) |
| globals++; |
| |
| local_regparm |
| = globals < local_regparm ? local_regparm - globals : 0; |
| |
| if (local_regparm > regparm) |
| regparm = local_regparm; |
| } |
| } |
| } |
| |
| return regparm; |
| } |
| |
| /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and |
| DFmode (2) arguments in SSE registers for a function with the |
| indicated TYPE and DECL. DECL may be NULL when calling function |
| indirectly or considering a libcall. Return -1 if any FP parameter |
| should be rejected by error. This is used in siutation we imply SSE |
| calling convetion but the function is called from another function with |
| SSE disabled. Otherwise return 0. */ |
| |
| static int |
| ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) |
| { |
| gcc_assert (!TARGET_64BIT); |
| |
| /* Use SSE registers to pass SFmode and DFmode arguments if requested |
| by the sseregparm attribute. */ |
| if (TARGET_SSEREGPARM |
| || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))) |
| { |
| if (!TARGET_SSE) |
| { |
| if (warn) |
| { |
| if (decl) |
| error ("calling %qD with attribute sseregparm without " |
| "SSE/SSE2 enabled", decl); |
| else |
| error ("calling %qT with attribute sseregparm without " |
| "SSE/SSE2 enabled", type); |
| } |
| return 0; |
| } |
| |
| return 2; |
| } |
| |
| if (!decl) |
| return 0; |
| |
| cgraph_node *target = cgraph_node::get (decl); |
| if (target) |
| target = target->function_symbol (); |
| |
| /* For local functions, pass up to SSE_REGPARM_MAX SFmode |
| (and DFmode for SSE2) arguments in SSE registers. */ |
| if (target |
| /* TARGET_SSE_MATH */ |
| && (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE) |
| && opt_for_fn (target->decl, optimize) |
| && !(profile_flag && !flag_fentry)) |
| { |
| if (target->local && target->can_change_signature) |
| { |
| /* Refuse to produce wrong code when local function with SSE enabled |
| is called from SSE disabled function. |
| FIXME: We need a way to detect these cases cross-ltrans partition |
| and avoid using SSE calling conventions on local functions called |
| from function with SSE disabled. For now at least delay the |
| warning until we know we are going to produce wrong code. |
| See PR66047 */ |
| if (!TARGET_SSE && warn) |
| return -1; |
| return TARGET_SSE2_P (target_opts_for_fn (target->decl) |
| ->x_ix86_isa_flags) ? 2 : 1; |
| } |
| } |
| |
| return 0; |
| } |
| |
| /* Return true if EAX is live at the start of the function. Used by |
| ix86_expand_prologue to determine if we need special help before |
| calling allocate_stack_worker. */ |
| |
| static bool |
| ix86_eax_live_at_start_p (void) |
| { |
| /* Cheat. Don't bother working forward from ix86_function_regparm |
| to the function type to whether an actual argument is located in |
| eax. Instead just look at cfg info, which is still close enough |
| to correct at this point. This gives false positives for broken |
| functions that might use uninitialized data that happens to be |
| allocated in eax, but who cares? */ |
| return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0); |
| } |
| |
| static bool |
| ix86_keep_aggregate_return_pointer (tree fntype) |
| { |
| tree attr; |
| |
| if (!TARGET_64BIT) |
| { |
| attr = lookup_attribute ("callee_pop_aggregate_return", |
| TYPE_ATTRIBUTES (fntype)); |
| if (attr) |
| return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0); |
| |
| /* For 32-bit MS-ABI the default is to keep aggregate |
| return pointer. */ |
| if (ix86_function_type_abi (fntype) == MS_ABI) |
| return true; |
| } |
| return KEEP_AGGREGATE_RETURN_POINTER != 0; |
| } |
| |
| /* Value is the number of bytes of arguments automatically |
| popped when returning from a subroutine call. |
| FUNDECL is the declaration node of the function (as a tree), |
| FUNTYPE is the data type of the function (as a tree), |
| or for a library call it is an identifier node for the subroutine name. |
| SIZE is the number of bytes of arguments passed on the stack. |
| |
| On the 80386, the RTD insn may be used to pop them if the number |
| of args is fixed, but if the number is variable then the caller |
| must pop them all. RTD can't be used for library calls now |
| because the library is compiled with the Unix compiler. |
| Use of RTD is a selectable option, since it is incompatible with |
| standard Unix calling sequences. If the option is not selected, |
| the caller must always pop the args. |
| |
| The attribute stdcall is equivalent to RTD on a per module basis. */ |
| |
| static poly_int64 |
| ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size) |
| { |
| unsigned int ccvt; |
| |
| /* None of the 64-bit ABIs pop arguments. */ |
| if (TARGET_64BIT) |
| return 0; |
| |
| ccvt = ix86_get_callcvt (funtype); |
| |
| if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL |
| | IX86_CALLCVT_THISCALL)) != 0 |
| && ! stdarg_p (funtype)) |
| return size; |
| |
| /* Lose any fake structure return argument if it is passed on the stack. */ |
| if (aggregate_value_p (TREE_TYPE (funtype), fundecl) |
| && !ix86_keep_aggregate_return_pointer (funtype)) |
| { |
| int nregs = ix86_function_regparm (funtype, fundecl); |
| if (nregs == 0) |
| return GET_MODE_SIZE (Pmode); |
| } |
| |
| return 0; |
| } |
| |
| /* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */ |
| |
| static bool |
| ix86_legitimate_combined_insn (rtx_insn *insn) |
| { |
| int i; |
| |
| /* Check operand constraints in case hard registers were propagated |
| into insn pattern. This check prevents combine pass from |
| generating insn patterns with invalid hard register operands. |
| These invalid insns can eventually confuse reload to error out |
| with a spill failure. See also PRs 46829 and 46843. */ |
| |
| gcc_assert (INSN_CODE (insn) >= 0); |
| |
| extract_insn (insn); |
| preprocess_constraints (insn); |
| |
| int n_operands = recog_data.n_operands; |
| int n_alternatives = recog_data.n_alternatives; |
| for (i = 0; i < n_operands; i++) |
| { |
| rtx op = recog_data.operand[i]; |
| machine_mode mode = GET_MODE (op); |
| const operand_alternative *op_alt; |
| int offset = 0; |
| bool win; |
| int j; |
| |
| /* A unary operator may be accepted by the predicate, but it |
| is irrelevant for matching constraints. */ |
| if (UNARY_P (op)) |
| op = XEXP (op, 0); |
| |
| if (SUBREG_P (op)) |
| { |
| if (REG_P (SUBREG_REG (op)) |
| && REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER) |
| offset = subreg_regno_offset (REGNO (SUBREG_REG (op)), |
| GET_MODE (SUBREG_REG (op)), |
| SUBREG_BYTE (op), |
| GET_MODE (op)); |
| op = SUBREG_REG (op); |
| } |
| |
| if (!(REG_P (op) && HARD_REGISTER_P (op))) |
| continue; |
| |
| op_alt = recog_op_alt; |
| |
| /* Operand has no constraints, anything is OK. */ |
| win = !n_alternatives; |
| |
| alternative_mask preferred = get_preferred_alternatives (insn); |
| for (j = 0; j < n_alternatives; j++, op_alt += n_operands) |
| { |
| if (!TEST_BIT (preferred, j)) |
| continue; |
| if (op_alt[i].anything_ok |
| || (op_alt[i].matches != -1 |
| && operands_match_p |
| (recog_data.operand[i], |
| recog_data.operand[op_alt[i].matches])) |
| || reg_fits_class_p (op, op_alt[i].cl, offset, mode)) |
| { |
| win = true; |
| break; |
| } |
| } |
| |
| if (!win) |
| return false; |
| } |
| |
| return true; |
| } |
| |
| /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */ |
| |
| static unsigned HOST_WIDE_INT |
| ix86_asan_shadow_offset (void) |
| { |
| return SUBTARGET_SHADOW_OFFSET; |
| } |
| |
| /* Argument support functions. */ |
| |
| /* Return true when register may be used to pass function parameters. */ |
| bool |
| ix86_function_arg_regno_p (int regno) |
| { |
| int i; |
| enum calling_abi call_abi; |
| const int *parm_regs; |
| |
| if (TARGET_SSE && SSE_REGNO_P (regno) |
| && regno < FIRST_SSE_REG + SSE_REGPARM_MAX) |
| return true; |
| |
| if (!TARGET_64BIT) |
| return (regno < REGPARM_MAX |
| || (TARGET_MMX && MMX_REGNO_P (regno) |
| && regno < FIRST_MMX_REG + MMX_REGPARM_MAX)); |
| |
| /* TODO: The function should depend on current function ABI but |
| builtins.c would need updating then. Therefore we use the |
| default ABI. */ |
| call_abi = ix86_cfun_abi (); |
| |
| /* RAX is used as hidden argument to va_arg functions. */ |
| if (call_abi == SYSV_ABI && regno == AX_REG) |
| return true; |
| |
| if (call_abi == MS_ABI) |
| parm_regs = x86_64_ms_abi_int_parameter_registers; |
| else |
| parm_regs = x86_64_int_parameter_registers; |
| |
| for (i = 0; i < (call_abi == MS_ABI |
| ? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++) |
| if (regno == parm_regs[i]) |
| return true; |
| return false; |
| } |
| |
| /* Return if we do not know how to pass ARG solely in registers. */ |
| |
| static bool |
| ix86_must_pass_in_stack (const function_arg_info &arg) |
| { |
| if (must_pass_in_stack_var_size_or_pad (arg)) |
| return true; |
| |
| /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! |
| The layout_type routine is crafty and tries to trick us into passing |
| currently unsupported vector types on the stack by using TImode. */ |
| return (!TARGET_64BIT && arg.mode == TImode |
| && arg.type && TREE_CODE (arg.type) != VECTOR_TYPE); |
| } |
| |
| /* It returns the size, in bytes, of the area reserved for arguments passed |
| in registers for the function represented by fndecl dependent to the used |
| abi format. */ |
| int |
| ix86_reg_parm_stack_space (const_tree fndecl) |
| { |
| enum calling_abi call_abi = SYSV_ABI; |
| if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL) |
| call_abi = ix86_function_abi (fndecl); |
| else |
| call_abi = ix86_function_type_abi (fndecl); |
| if (TARGET_64BIT && call_abi == MS_ABI) |
| return 32; |
| return 0; |
| } |
| |
| /* We add this as a workaround in order to use libc_has_function |
| hook in i386.md. */ |
| bool |
| ix86_libc_has_function (enum function_class fn_class) |
| { |
| return targetm.libc_has_function (fn_class, NULL_TREE); |
| } |
| |
| /* Returns value SYSV_ABI, MS_ABI dependent on fntype, |
| specifying the call abi used. */ |
| enum calling_abi |
| ix86_function_type_abi (const_tree fntype) |
| { |
| enum calling_abi abi = ix86_abi; |
| |
| if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE) |
| return abi; |
| |
| if (abi == SYSV_ABI |
| && lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype))) |
| { |
| static int warned; |
| if (TARGET_X32 && !warned) |
| { |
| error ("X32 does not support %<ms_abi%> attribute"); |
| warned = 1; |
| } |
| |
| abi = MS_ABI; |
| } |
| else if (abi == MS_ABI |
| && lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype))) |
| abi = SYSV_ABI; |
| |
| return abi; |
| } |
| |
| enum calling_abi |
| ix86_function_abi (const_tree fndecl) |
| { |
| return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi; |
| } |
| |
| /* Returns value SYSV_ABI, MS_ABI dependent on cfun, |
| specifying the call abi used. */ |
| enum calling_abi |
| ix86_cfun_abi (void) |
| { |
| return cfun ? cfun->machine->call_abi : ix86_abi; |
| } |
| |
| bool |
| ix86_function_ms_hook_prologue (const_tree fn) |
| { |
| if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn))) |
| { |
| if (decl_function_context (fn) != NULL_TREE) |
| error_at (DECL_SOURCE_LOCATION (fn), |
| "%<ms_hook_prologue%> attribute is not compatible " |
| "with nested function"); |
| else |
| return true; |
| } |
| return false; |
| } |
| |
| bool |
| ix86_function_naked (const_tree fn) |
| { |
| if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn))) |
| return true; |
| |
| return false; |
| } |
| |
| /* Write the extra assembler code needed to declare a function properly. */ |
| |
| void |
| ix86_asm_output_function_label (FILE *asm_out_file, const char *fname, |
| tree decl) |
| { |
| bool is_ms_hook = ix86_function_ms_hook_prologue (decl); |
| |
| if (cfun) |
| cfun->machine->function_label_emitted = true; |
| |
| if (is_ms_hook) |
| { |
| int i, filler_count = (TARGET_64BIT ? 32 : 16); |
| unsigned int filler_cc = 0xcccccccc; |
| |
| for (i = 0; i < filler_count; i += 4) |
| fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc); |
| } |
| |
| #ifdef SUBTARGET_ASM_UNWIND_INIT |
| SUBTARGET_ASM_UNWIND_INIT (asm_out_file); |
| #endif |
| |
| ASM_OUTPUT_LABEL (asm_out_file, fname); |
| |
| /* Output magic byte marker, if hot-patch attribute is set. */ |
| if (is_ms_hook) |
| { |
| if (TARGET_64BIT) |
| { |
| /* leaq [%rsp + 0], %rsp */ |
| fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n", |
| asm_out_file); |
| } |
| else |
| { |
| /* movl.s %edi, %edi |
| push %ebp |
| movl.s %esp, %ebp */ |
| fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file); |
| } |
| } |
| } |
| |
| /* Implementation of call abi switching target hook. Specific to FNDECL |
| the specific call register sets are set. See also |
| ix86_conditional_register_usage for more details. */ |
| void |
| ix86_call_abi_override (const_tree fndecl) |
| { |
| cfun->machine->call_abi = ix86_function_abi (fndecl); |
| } |
| |
| /* Return 1 if pseudo register should be created and used to hold |
| GOT address for PIC code. */ |
| bool |
| ix86_use_pseudo_pic_reg (void) |
| { |
| if ((TARGET_64BIT |
| && (ix86_cmodel == CM_SMALL_PIC |
| || TARGET_PECOFF)) |
| || !flag_pic) |
| return false; |
| return true; |
| } |
| |
| /* Initialize large model PIC register. */ |
| |
| static void |
| ix86_init_large_pic_reg (unsigned int tmp_regno) |
| { |
| rtx_code_label *label; |
| rtx tmp_reg; |
| |
| gcc_assert (Pmode == DImode); |
| label = gen_label_rtx (); |
| emit_label (label); |
| LABEL_PRESERVE_P (label) = 1; |
| tmp_reg = gen_rtx_REG (Pmode, tmp_regno); |
| gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno); |
| emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, |
| label)); |
| emit_insn (gen_set_got_offset_rex64 (tmp_reg, label)); |
| emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg)); |
| const char *name = LABEL_NAME (label); |
| PUT_CODE (label, NOTE); |
| NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL; |
| NOTE_DELETED_LABEL_NAME (label) = name; |
| } |
| |
| /* Create and initialize PIC register if required. */ |
| static void |
| ix86_init_pic_reg (void) |
| { |
| edge entry_edge; |
| rtx_insn *seq; |
| |
| if (!ix86_use_pseudo_pic_reg ()) |
| return; |
| |
| start_sequence (); |
| |
| if (TARGET_64BIT) |
| { |
| if (ix86_cmodel == CM_LARGE_PIC) |
| ix86_init_large_pic_reg (R11_REG); |
| else |
| emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); |
| } |
| else |
| { |
| /* If there is future mcount call in the function it is more profitable |
| to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */ |
| rtx reg = crtl->profile |
| ? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM) |
| : pic_offset_table_rtx; |
| rtx_insn *insn = emit_insn (gen_set_got (reg)); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| if (crtl->profile) |
| emit_move_insn (pic_offset_table_rtx, reg); |
| add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX); |
| } |
| |
| seq = get_insns (); |
| end_sequence (); |
| |
| entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)); |
| insert_insn_on_edge (seq, entry_edge); |
| commit_one_edge_insertion (entry_edge); |
| } |
| |
| /* Initialize a variable CUM of type CUMULATIVE_ARGS |
| for a call to a function whose data type is FNTYPE. |
| For a library call, FNTYPE is 0. */ |
| |
| void |
| init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ |
| tree fntype, /* tree ptr for function decl */ |
| rtx libname, /* SYMBOL_REF of library name or 0 */ |
| tree fndecl, |
| int caller) |
| { |
| struct cgraph_node *local_info_node = NULL; |
| struct cgraph_node *target = NULL; |
| |
| /* Set silent_p to false to raise an error for invalid calls when |
| expanding function body. */ |
| cfun->machine->silent_p = false; |
| |
| memset (cum, 0, sizeof (*cum)); |
| |
| if (fndecl) |
| { |
| target = cgraph_node::get (fndecl); |
| if (target) |
| { |
| target = target->function_symbol (); |
| local_info_node = cgraph_node::local_info_node (target->decl); |
| cum->call_abi = ix86_function_abi (target->decl); |
| } |
| else |
| cum->call_abi = ix86_function_abi (fndecl); |
| } |
| else |
| cum->call_abi = ix86_function_type_abi (fntype); |
| |
| cum->caller = caller; |
| |
| /* Set up the number of registers to use for passing arguments. */ |
| cum->nregs = ix86_regparm; |
| if (TARGET_64BIT) |
| { |
| cum->nregs = (cum->call_abi == SYSV_ABI |
| ? X86_64_REGPARM_MAX |
| : X86_64_MS_REGPARM_MAX); |
| } |
| if (TARGET_SSE) |
| { |
| cum->sse_nregs = SSE_REGPARM_MAX; |
| if (TARGET_64BIT) |
| { |
| cum->sse_nregs = (cum->call_abi == SYSV_ABI |
| ? X86_64_SSE_REGPARM_MAX |
| : X86_64_MS_SSE_REGPARM_MAX); |
| } |
| } |
| if (TARGET_MMX) |
| cum->mmx_nregs = MMX_REGPARM_MAX; |
| cum->warn_avx512f = true; |
| cum->warn_avx = true; |
| cum->warn_sse = true; |
| cum->warn_mmx = true; |
| |
| /* Because type might mismatch in between caller and callee, we need to |
| use actual type of function for local calls. |
| FIXME: cgraph_analyze can be told to actually record if function uses |
| va_start so for local functions maybe_vaarg can be made aggressive |
| helping K&R code. |
| FIXME: once typesytem is fixed, we won't need this code anymore. */ |
| if (local_info_node && local_info_node->local |
| && local_info_node->can_change_signature) |
| fntype = TREE_TYPE (target->decl); |
| cum->stdarg = stdarg_p (fntype); |
| cum->maybe_vaarg = (fntype |
| ? (!prototype_p (fntype) || stdarg_p (fntype)) |
| : !libname); |
| |
| cum->decl = fndecl; |
| |
| cum->warn_empty = !warn_abi || cum->stdarg; |
| if (!cum->warn_empty && fntype) |
| { |
| function_args_iterator iter; |
| tree argtype; |
| bool seen_empty_type = false; |
| FOREACH_FUNCTION_ARGS (fntype, argtype, iter) |
| { |
| if (argtype == error_mark_node || VOID_TYPE_P (argtype)) |
| break; |
| if (TYPE_EMPTY_P (argtype)) |
| seen_empty_type = true; |
| else if (seen_empty_type) |
| { |
| cum->warn_empty = true; |
| break; |
| } |
| } |
| } |
| |
| if (!TARGET_64BIT) |
| { |
| /* If there are variable arguments, then we won't pass anything |
| in registers in 32-bit mode. */ |
| if (stdarg_p (fntype)) |
| { |
| cum->nregs = 0; |
| /* Since in 32-bit, variable arguments are always passed on |
| stack, there is scratch register available for indirect |
| sibcall. */ |
| cfun->machine->arg_reg_available = true; |
| cum->sse_nregs = 0; |
| cum->mmx_nregs = 0; |
| cum->warn_avx512f = false; |
| cum->warn_avx = false; |
| cum->warn_sse = false; |
| cum->warn_mmx = false; |
| return; |
| } |
| |
| /* Use ecx and edx registers if function has fastcall attribute, |
| else look for regparm information. */ |
| if (fntype) |
| { |
| unsigned int ccvt = ix86_get_callcvt (fntype); |
| if ((ccvt & IX86_CALLCVT_THISCALL) != 0) |
| { |
| cum->nregs = 1; |
| cum->fastcall = 1; /* Same first register as in fastcall. */ |
| } |
| else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0) |
| { |
| cum->nregs = 2; |
| cum->fastcall = 1; |
| } |
| else |
| cum->nregs = ix86_function_regparm (fntype, fndecl); |
| } |
| |
| /* Set up the number of SSE registers used for passing SFmode |
| and DFmode arguments. Warn for mismatching ABI. */ |
| cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true); |
| } |
| |
| cfun->machine->arg_reg_available = (cum->nregs > 0); |
| } |
| |
| /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. |
| But in the case of vector types, it is some vector mode. |
| |
| When we have only some of our vector isa extensions enabled, then there |
| are some modes for which vector_mode_supported_p is false. For these |
| modes, the generic vector support in gcc will choose some non-vector mode |
| in order to implement the type. By computing the natural mode, we'll |
| select the proper ABI location for the operand and not depend on whatever |
| the middle-end decides to do with these vector types. |
| |
| The midde-end can't deal with the vector types > 16 bytes. In this |
| case, we return the original mode and warn ABI change if CUM isn't |
| NULL. |
| |
| If INT_RETURN is true, warn ABI change if the vector mode isn't |
| available for function return value. */ |
| |
| static machine_mode |
| type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum, |
| bool in_return) |
| { |
| machine_mode mode = TYPE_MODE (type); |
| |
| if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) |
| { |
| HOST_WIDE_INT size = int_size_in_bytes (type); |
| if ((size == 8 || size == 16 || size == 32 || size == 64) |
| /* ??? Generic code allows us to create width 1 vectors. Ignore. */ |
| && TYPE_VECTOR_SUBPARTS (type) > 1) |
| { |
| machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); |
| |
| /* There are no XFmode vector modes. */ |
| if (innermode == XFmode) |
| return mode; |
| |
| if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) |
| mode = MIN_MODE_VECTOR_FLOAT; |
| else |
| mode = MIN_MODE_VECTOR_INT; |
| |
| /* Get the mode which has this inner mode and number of units. */ |
| FOR_EACH_MODE_FROM (mode, mode) |
| if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) |
| && GET_MODE_INNER (mode) == innermode) |
| { |
| if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU) |
| { |
| static bool warnedavx512f; |
| static bool warnedavx512f_ret; |
| |
| if (cum && cum->warn_avx512f && !warnedavx512f) |
| { |
| if (warning (OPT_Wpsabi, "AVX512F vector argument " |
| "without AVX512F enabled changes the ABI")) |
| warnedavx512f = true; |
| } |
| else if (in_return && !warnedavx512f_ret) |
| { |
| if (warning (OPT_Wpsabi, "AVX512F vector return " |
| "without AVX512F enabled changes the ABI")) |
| warnedavx512f_ret = true; |
| } |
| |
| return TYPE_MODE (type); |
| } |
| else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU) |
| { |
| static bool warnedavx; |
| static bool warnedavx_ret; |
| |
| if (cum && cum->warn_avx && !warnedavx) |
| { |
| if (warning (OPT_Wpsabi, "AVX vector argument " |
| "without AVX enabled changes the ABI")) |
| warnedavx = true; |
| } |
| else if (in_return && !warnedavx_ret) |
| { |
| if (warning (OPT_Wpsabi, "AVX vector return " |
| "without AVX enabled changes the ABI")) |
| warnedavx_ret = true; |
| } |
| |
| return TYPE_MODE (type); |
| } |
| else if (((size == 8 && TARGET_64BIT) || size == 16) |
| && !TARGET_SSE |
| && !TARGET_IAMCU) |
| { |
| static bool warnedsse; |
| static bool warnedsse_ret; |
| |
| if (cum && cum->warn_sse && !warnedsse) |
| { |
| if (warning (OPT_Wpsabi, "SSE vector argument " |
| "without SSE enabled changes the ABI")) |
| warnedsse = true; |
| } |
| else if (!TARGET_64BIT && in_return && !warnedsse_ret) |
| { |
| if (warning (OPT_Wpsabi, "SSE vector return " |
| "without SSE enabled changes the ABI")) |
| warnedsse_ret = true; |
| } |
| } |
| else if ((size == 8 && !TARGET_64BIT) |
| && (!cfun |
| || cfun->machine->func_type == TYPE_NORMAL) |
| && !TARGET_MMX |
| && !TARGET_IAMCU) |
| { |
| static bool warnedmmx; |
| static bool warnedmmx_ret; |
| |
| if (cum && cum->warn_mmx && !warnedmmx) |
| { |
| if (warning (OPT_Wpsabi, "MMX vector argument " |
| "without MMX enabled changes the ABI")) |
| warnedmmx = true; |
| } |
| else if (in_return && !warnedmmx_ret) |
| { |
| if (warning (OPT_Wpsabi, "MMX vector return " |
| "without MMX enabled changes the ABI")) |
| warnedmmx_ret = true; |
| } |
| } |
| return mode; |
| } |
| |
| gcc_unreachable (); |
| } |
| } |
| |
| return mode; |
| } |
| |
| /* We want to pass a value in REGNO whose "natural" mode is MODE. However, |
| this may not agree with the mode that the type system has chosen for the |
| register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can |
| go ahead and use it. Otherwise we have to build a PARALLEL instead. */ |
| |
| static rtx |
| gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode, |
| unsigned int regno) |
| { |
| rtx tmp; |
| |
| if (orig_mode != BLKmode) |
| tmp = gen_rtx_REG (orig_mode, regno); |
| else |
| { |
| tmp = gen_rtx_REG (mode, regno); |
| tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); |
| tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); |
| } |
| |
| return tmp; |
| } |
| |
| /* x86-64 register passing implementation. See x86-64 ABI for details. Goal |
| of this code is to classify each 8bytes of incoming argument by the register |
| class and assign registers accordingly. */ |
| |
| /* Return the union class of CLASS1 and CLASS2. |
| See the x86-64 PS ABI for details. */ |
| |
| static enum x86_64_reg_class |
| merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) |
| { |
| /* Rule #1: If both classes are equal, this is the resulting class. */ |
| if (class1 == class2) |
| return class1; |
| |
| /* Rule #2: If one of the classes is NO_CLASS, the resulting class is |
| the other class. */ |
| if (class1 == X86_64_NO_CLASS) |
| return class2; |
| if (class2 == X86_64_NO_CLASS) |
| return class1; |
| |
| /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ |
| if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) |
| return X86_64_MEMORY_CLASS; |
| |
| /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ |
| if ((class1 == X86_64_INTEGERSI_CLASS |
| && (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS)) |
| || (class2 == X86_64_INTEGERSI_CLASS |
| && (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS))) |
| return X86_64_INTEGERSI_CLASS; |
| if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS |
| || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) |
| return X86_64_INTEGER_CLASS; |
| |
| /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, |
| MEMORY is used. */ |
| if (class1 == X86_64_X87_CLASS |
| || class1 == X86_64_X87UP_CLASS |
| || class1 == X86_64_COMPLEX_X87_CLASS |
| || class2 == X86_64_X87_CLASS |
| || class2 == X86_64_X87UP_CLASS |
| || class2 == X86_64_COMPLEX_X87_CLASS) |
| return X86_64_MEMORY_CLASS; |
| |
| /* Rule #6: Otherwise class SSE is used. */ |
| return X86_64_SSE_CLASS; |
| } |
| |
| /* Classify the argument of type TYPE and mode MODE. |
| CLASSES will be filled by the register class used to pass each word |
| of the operand. The number of words is returned. In case the parameter |
| should be passed in memory, 0 is returned. As a special case for zero |
| sized containers, classes[0] will be NO_CLASS and 1 is returned. |
| |
| BIT_OFFSET is used internally for handling records and specifies offset |
| of the offset in bits modulo 512 to avoid overflow cases. |
| |
| See the x86-64 PS ABI for details. |
| */ |
| |
| static int |
| classify_argument (machine_mode mode, const_tree type, |
| enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) |
| { |
| HOST_WIDE_INT bytes |
| = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); |
| int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD); |
| |
| /* Variable sized entities are always passed/returned in memory. */ |
| if (bytes < 0) |
| return 0; |
| |
| if (mode != VOIDmode) |
| { |
| /* The value of "named" doesn't matter. */ |
| function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true); |
| if (targetm.calls.must_pass_in_stack (arg)) |
| return 0; |
| } |
| |
| if (type && AGGREGATE_TYPE_P (type)) |
| { |
| int i; |
| tree field; |
| enum x86_64_reg_class subclasses[MAX_CLASSES]; |
| |
| /* On x86-64 we pass structures larger than 64 bytes on the stack. */ |
| if (bytes > 64) |
| return 0; |
| |
| for (i = 0; i < words; i++) |
| classes[i] = X86_64_NO_CLASS; |
| |
| /* Zero sized arrays or structures are NO_CLASS. We return 0 to |
| signalize memory class, so handle it as special case. */ |
| if (!words) |
| { |
| classes[0] = X86_64_NO_CLASS; |
| return 1; |
| } |
| |
| /* Classify each field of record and merge classes. */ |
| switch (TREE_CODE (type)) |
| { |
| case RECORD_TYPE: |
| /* And now merge the fields of structure. */ |
| for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
| { |
| if (TREE_CODE (field) == FIELD_DECL) |
| { |
| int num; |
| |
| if (TREE_TYPE (field) == error_mark_node) |
| continue; |
| |
| /* Bitfields are always classified as integer. Handle them |
| early, since later code would consider them to be |
| misaligned integers. */ |
| if (DECL_BIT_FIELD (field)) |
| { |
| for (i = (int_bit_position (field) |
| + (bit_offset % 64)) / 8 / 8; |
| i < ((int_bit_position (field) + (bit_offset % 64)) |
| + tree_to_shwi (DECL_SIZE (field)) |
| + 63) / 8 / 8; i++) |
| classes[i] |
| = merge_classes (X86_64_INTEGER_CLASS, classes[i]); |
| } |
| else |
| { |
| int pos; |
| |
| type = TREE_TYPE (field); |
| |
| /* Flexible array member is ignored. */ |
| if (TYPE_MODE (type) == BLKmode |
| && TREE_CODE (type) == ARRAY_TYPE |
| && TYPE_SIZE (type) == NULL_TREE |
| && TYPE_DOMAIN (type) != NULL_TREE |
| && (TYPE_MAX_VALUE (TYPE_DOMAIN (type)) |
| == NULL_TREE)) |
| { |
| static bool warned; |
| |
| if (!warned && warn_psabi) |
| { |
| warned = true; |
| inform (input_location, |
| "the ABI of passing struct with" |
| " a flexible array member has" |
| " changed in GCC 4.4"); |
| } |
| continue; |
| } |
| num = classify_argument (TYPE_MODE (type), type, |
| subclasses, |
| (int_bit_position (field) |
| + bit_offset) % 512); |
| if (!num) |
| return 0; |
| pos = (int_bit_position (field) |
| + (bit_offset % 64)) / 8 / 8; |
| for (i = 0; i < num && (i + pos) < words; i++) |
| classes[i + pos] |
| = merge_classes (subclasses[i], classes[i + pos]); |
| } |
| } |
| } |
| break; |
| |
| case ARRAY_TYPE: |
| /* Arrays are handled as small records. */ |
| { |
| int num; |
| num = classify_argument (TYPE_MODE (TREE_TYPE (type)), |
| TREE_TYPE (type), subclasses, bit_offset); |
| if (!num) |
| return 0; |
| |
| /* The partial classes are now full classes. */ |
| if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) |
| subclasses[0] = X86_64_SSE_CLASS; |
| if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2) |
| subclasses[0] = X86_64_SSE_CLASS; |
| if (subclasses[0] == X86_64_INTEGERSI_CLASS |
| && !((bit_offset % 64) == 0 && bytes == 4)) |
| subclasses[0] = X86_64_INTEGER_CLASS; |
| |
| for (i = 0; i < words; i++) |
| classes[i] = subclasses[i % num]; |
| |
| break; |
| } |
| case UNION_TYPE: |
| case QUAL_UNION_TYPE: |
| /* Unions are similar to RECORD_TYPE but offset is always 0. |
| */ |
| for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
| { |
| if (TREE_CODE (field) == FIELD_DECL) |
| { |
| int num; |
| |
| if (TREE_TYPE (field) == error_mark_node) |
| continue; |
| |
| num = classify_argument (TYPE_MODE (TREE_TYPE (field)), |
| TREE_TYPE (field), subclasses, |
| bit_offset); |
| if (!num) |
| return 0; |
| for (i = 0; i < num && i < words; i++) |
| classes[i] = merge_classes (subclasses[i], classes[i]); |
| } |
| } |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| if (words > 2) |
| { |
| /* When size > 16 bytes, if the first one isn't |
| X86_64_SSE_CLASS or any other ones aren't |
| X86_64_SSEUP_CLASS, everything should be passed in |
| memory. */ |
| if (classes[0] != X86_64_SSE_CLASS) |
| return 0; |
| |
| for (i = 1; i < words; i++) |
| if (classes[i] != X86_64_SSEUP_CLASS) |
| return 0; |
| } |
| |
| /* Final merger cleanup. */ |
| for (i = 0; i < words; i++) |
| { |
| /* If one class is MEMORY, everything should be passed in |
| memory. */ |
| if (classes[i] == X86_64_MEMORY_CLASS) |
| return 0; |
| |
| /* The X86_64_SSEUP_CLASS should be always preceded by |
| X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */ |
| if (classes[i] == X86_64_SSEUP_CLASS |
| && classes[i - 1] != X86_64_SSE_CLASS |
| && classes[i - 1] != X86_64_SSEUP_CLASS) |
| { |
| /* The first one should never be X86_64_SSEUP_CLASS. */ |
| gcc_assert (i != 0); |
| classes[i] = X86_64_SSE_CLASS; |
| } |
| |
| /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, |
| everything should be passed in memory. */ |
| if (classes[i] == X86_64_X87UP_CLASS |
| && (classes[i - 1] != X86_64_X87_CLASS)) |
| { |
| static bool warned; |
| |
| /* The first one should never be X86_64_X87UP_CLASS. */ |
| gcc_assert (i != 0); |
| if (!warned && warn_psabi) |
| { |
| warned = true; |
| inform (input_location, |
| "the ABI of passing union with %<long double%>" |
| " has changed in GCC 4.4"); |
| } |
| return 0; |
| } |
| } |
| return words; |
| } |
| |
| /* Compute alignment needed. We align all types to natural boundaries with |
| exception of XFmode that is aligned to 64bits. */ |
| if (mode != VOIDmode && mode != BLKmode) |
| { |
| int mode_alignment = GET_MODE_BITSIZE (mode); |
| |
| if (mode == XFmode) |
| mode_alignment = 128; |
| else if (mode == XCmode) |
| mode_alignment = 256; |
| if (COMPLEX_MODE_P (mode)) |
| mode_alignment /= 2; |
| /* Misaligned fields are always returned in memory. */ |
| if (bit_offset % mode_alignment) |
| return 0; |
| } |
| |
| /* for V1xx modes, just use the base mode */ |
| if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode |
| && GET_MODE_UNIT_SIZE (mode) == bytes) |
| mode = GET_MODE_INNER (mode); |
| |
| /* Classification of atomic types. */ |
| switch (mode) |
| { |
| case E_SDmode: |
| case E_DDmode: |
| classes[0] = X86_64_SSE_CLASS; |
| return 1; |
| case E_TDmode: |
| classes[0] = X86_64_SSE_CLASS; |
| classes[1] = X86_64_SSEUP_CLASS; |
| return 2; |
| case E_DImode: |
| case E_SImode: |
| case E_HImode: |
| case E_QImode: |
| case E_CSImode: |
| case E_CHImode: |
| case E_CQImode: |
| { |
| int size = bit_offset + (int) GET_MODE_BITSIZE (mode); |
| |
| /* Analyze last 128 bits only. */ |
| size = (size - 1) & 0x7f; |
| |
| if (size < 32) |
| { |
| classes[0] = X86_64_INTEGERSI_CLASS; |
| return 1; |
| } |
| else if (size < 64) |
| { |
| classes[0] = X86_64_INTEGER_CLASS; |
| return 1; |
| } |
| else if (size < 64+32) |
| { |
| classes[0] = X86_64_INTEGER_CLASS; |
| classes[1] = X86_64_INTEGERSI_CLASS; |
| return 2; |
| } |
| else if (size < 64+64) |
| { |
| classes[0] = classes[1] = X86_64_INTEGER_CLASS; |
| return 2; |
| } |
| else |
| gcc_unreachable (); |
| } |
| case E_CDImode: |
| case E_TImode: |
| classes[0] = classes[1] = X86_64_INTEGER_CLASS; |
| return 2; |
| case E_COImode: |
| case E_OImode: |
| /* OImode shouldn't be used directly. */ |
| gcc_unreachable (); |
| case E_CTImode: |
| return 0; |
| case E_HFmode: |
| if (!(bit_offset % 64)) |
| classes[0] = X86_64_SSEHF_CLASS; |
| else |
| classes[0] = X86_64_SSE_CLASS; |
| return 1; |
| case E_SFmode: |
| if (!(bit_offset % 64)) |
| classes[0] = X86_64_SSESF_CLASS; |
| else |
| classes[0] = X86_64_SSE_CLASS; |
| return 1; |
| case E_DFmode: |
| classes[0] = X86_64_SSEDF_CLASS; |
| return 1; |
| case E_XFmode: |
| classes[0] = X86_64_X87_CLASS; |
| classes[1] = X86_64_X87UP_CLASS; |
| return 2; |
| case E_TFmode: |
| classes[0] = X86_64_SSE_CLASS; |
| classes[1] = X86_64_SSEUP_CLASS; |
| return 2; |
| case E_HCmode: |
| classes[0] = X86_64_SSE_CLASS; |
| if (!(bit_offset % 64)) |
| return 1; |
| else |
| { |
| classes[1] = X86_64_SSEHF_CLASS; |
| return 2; |
| } |
| case E_SCmode: |
| classes[0] = X86_64_SSE_CLASS; |
| if (!(bit_offset % 64)) |
| return 1; |
| else |
| { |
| static bool warned; |
| |
| if (!warned && warn_psabi) |
| { |
| warned = true; |
| inform (input_location, |
| "the ABI of passing structure with %<complex float%>" |
| " member has changed in GCC 4.4"); |
| } |
| classes[1] = X86_64_SSESF_CLASS; |
| return 2; |
| } |
| case E_DCmode: |
| classes[0] = X86_64_SSEDF_CLASS; |
| classes[1] = X86_64_SSEDF_CLASS; |
| return 2; |
| case E_XCmode: |
| classes[0] = X86_64_COMPLEX_X87_CLASS; |
| return 1; |
| case E_TCmode: |
| /* This modes is larger than 16 bytes. */ |
| return 0; |
| case E_V8SFmode: |
| case E_V8SImode: |
| case E_V32QImode: |
| case E_V16HFmode: |
| case E_V16HImode: |
| case E_V4DFmode: |
| case E_V4DImode: |
| classes[0] = X86_64_SSE_CLASS; |
| classes[1] = X86_64_SSEUP_CLASS; |
| classes[2] = X86_64_SSEUP_CLASS; |
| classes[3] = X86_64_SSEUP_CLASS; |
| return 4; |
| case E_V8DFmode: |
| case E_V16SFmode: |
| case E_V32HFmode: |
| case E_V8DImode: |
| case E_V16SImode: |
| case E_V32HImode: |
| case E_V64QImode: |
| classes[0] = X86_64_SSE_CLASS; |
| classes[1] = X86_64_SSEUP_CLASS; |
| classes[2] = X86_64_SSEUP_CLASS; |
| classes[3] = X86_64_SSEUP_CLASS; |
| classes[4] = X86_64_SSEUP_CLASS; |
| classes[5] = X86_64_SSEUP_CLASS; |
| classes[6] = X86_64_SSEUP_CLASS; |
| classes[7] = X86_64_SSEUP_CLASS; |
| return 8; |
| case E_V4SFmode: |
| case E_V4SImode: |
| case E_V16QImode: |
| case E_V8HImode: |
| case E_V8HFmode: |
| case E_V2DFmode: |
| case E_V2DImode: |
| classes[0] = X86_64_SSE_CLASS; |
| classes[1] = X86_64_SSEUP_CLASS; |
| return 2; |
| case E_V1TImode: |
| case E_V1DImode: |
| case E_V2SFmode: |
| case E_V2SImode: |
| case E_V4HImode: |
| case E_V4HFmode: |
| case E_V2HFmode: |
| case E_V8QImode: |
| classes[0] = X86_64_SSE_CLASS; |
| return 1; |
| case E_BLKmode: |
| case E_VOIDmode: |
| return 0; |
| default: |
| gcc_assert (VECTOR_MODE_P (mode)); |
| |
| if (bytes > 16) |
| return 0; |
| |
| gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); |
| |
| if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) |
| classes[0] = X86_64_INTEGERSI_CLASS; |
| else |
| classes[0] = X86_64_INTEGER_CLASS; |
| classes[1] = X86_64_INTEGER_CLASS; |
| return 1 + (bytes > 8); |
| } |
| } |
| |
| /* Examine the argument and return set number of register required in each |
| class. Return true iff parameter should be passed in memory. */ |
| |
| static bool |
| examine_argument (machine_mode mode, const_tree type, int in_return, |
| int *int_nregs, int *sse_nregs) |
| { |
| enum x86_64_reg_class regclass[MAX_CLASSES]; |
| int n = classify_argument (mode, type, regclass, 0); |
| |
| *int_nregs = 0; |
| *sse_nregs = 0; |
| |
| if (!n) |
| return true; |
| for (n--; n >= 0; n--) |
| switch (regclass[n]) |
| { |
| case X86_64_INTEGER_CLASS: |
| case X86_64_INTEGERSI_CLASS: |
| (*int_nregs)++; |
| break; |
| case X86_64_SSE_CLASS: |
| case X86_64_SSEHF_CLASS: |
| case X86_64_SSESF_CLASS: |
| case X86_64_SSEDF_CLASS: |
| (*sse_nregs)++; |
| break; |
| case X86_64_NO_CLASS: |
| case X86_64_SSEUP_CLASS: |
| break; |
| case X86_64_X87_CLASS: |
| case X86_64_X87UP_CLASS: |
| case X86_64_COMPLEX_X87_CLASS: |
| if (!in_return) |
| return true; |
| break; |
| case X86_64_MEMORY_CLASS: |
| gcc_unreachable (); |
| } |
| |
| return false; |
| } |
| |
| /* Construct container for the argument used by GCC interface. See |
| FUNCTION_ARG for the detailed description. */ |
| |
| static rtx |
| construct_container (machine_mode mode, machine_mode orig_mode, |
| const_tree type, int in_return, int nintregs, int nsseregs, |
| const int *intreg, int sse_regno) |
| { |
| /* The following variables hold the static issued_error state. */ |
| static bool issued_sse_arg_error; |
| static bool issued_sse_ret_error; |
| static bool issued_x87_ret_error; |
| |
| machine_mode tmpmode; |
| int bytes |
| = mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); |
| enum x86_64_reg_class regclass[MAX_CLASSES]; |
| int n; |
| int i; |
| int nexps = 0; |
| int needed_sseregs, needed_intregs; |
| rtx exp[MAX_CLASSES]; |
| rtx ret; |
| |
| n = classify_argument (mode, type, regclass, 0); |
| if (!n) |
| return NULL; |
| if (examine_argument (mode, type, in_return, &needed_intregs, |
| &needed_sseregs)) |
| return NULL; |
| if (needed_intregs > nintregs || needed_sseregs > nsseregs) |
| return NULL; |
| |
| /* We allowed the user to turn off SSE for kernel mode. Don't crash if |
| some less clueful developer tries to use floating-point anyway. */ |
| if (needed_sseregs && !TARGET_SSE) |
| { |
| /* Return early if we shouldn't raise an error for invalid |
| calls. */ |
| if (cfun != NULL && cfun->machine->silent_p) |
| return NULL; |
| if (in_return) |
| { |
| if (!issued_sse_ret_error) |
| { |
| error ("SSE register return with SSE disabled"); |
| issued_sse_ret_error = true; |
| } |
| } |
| else if (!issued_sse_arg_error) |
| { |
| error ("SSE register argument with SSE disabled"); |
| issued_sse_arg_error = true; |
| } |
| return NULL; |
| } |
| |
| /* Likewise, error if the ABI requires us to return values in the |
| x87 registers and the user specified -mno-80387. */ |
| if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return) |
| for (i = 0; i < n; i++) |
| if (regclass[i] == X86_64_X87_CLASS |
| || regclass[i] == X86_64_X87UP_CLASS |
| || regclass[i] == X86_64_COMPLEX_X87_CLASS) |
| { |
| /* Return early if we shouldn't raise an error for invalid |
| calls. */ |
| if (cfun != NULL && cfun->machine->silent_p) |
| return NULL; |
| if (!issued_x87_ret_error) |
| { |
| error ("x87 register return with x87 disabled"); |
| issued_x87_ret_error = true; |
| } |
| return NULL; |
| } |
| |
| /* First construct simple cases. Avoid SCmode, since we want to use |
| single register to pass this type. */ |
| if (n == 1 && mode != SCmode && mode != HCmode) |
| switch (regclass[0]) |
| { |
| case X86_64_INTEGER_CLASS: |
| case X86_64_INTEGERSI_CLASS: |
| return gen_rtx_REG (mode, intreg[0]); |
| case X86_64_SSE_CLASS: |
| case X86_64_SSEHF_CLASS: |
| case X86_64_SSESF_CLASS: |
| case X86_64_SSEDF_CLASS: |
| if (mode != BLKmode) |
| return gen_reg_or_parallel (mode, orig_mode, |
| GET_SSE_REGNO (sse_regno)); |
| break; |
| case X86_64_X87_CLASS: |
| case X86_64_COMPLEX_X87_CLASS: |
| return gen_rtx_REG (mode, FIRST_STACK_REG); |
| case X86_64_NO_CLASS: |
| /* Zero sized array, struct or class. */ |
| return NULL; |
| default: |
| gcc_unreachable (); |
| } |
| if (n == 2 |
| && regclass[0] == X86_64_SSE_CLASS |
| && regclass[1] == X86_64_SSEUP_CLASS |
| && mode != BLKmode) |
| return gen_reg_or_parallel (mode, orig_mode, |
| GET_SSE_REGNO (sse_regno)); |
| if (n == 4 |
| && regclass[0] == X86_64_SSE_CLASS |
| && regclass[1] == X86_64_SSEUP_CLASS |
| && regclass[2] == X86_64_SSEUP_CLASS |
| && regclass[3] == X86_64_SSEUP_CLASS |
| && mode != BLKmode) |
| return gen_reg_or_parallel (mode, orig_mode, |
| GET_SSE_REGNO (sse_regno)); |
| if (n == 8 |
| && regclass[0] == X86_64_SSE_CLASS |
| && regclass[1] == X86_64_SSEUP_CLASS |
| && regclass[2] == X86_64_SSEUP_CLASS |
| && regclass[3] == X86_64_SSEUP_CLASS |
| && regclass[4] == X86_64_SSEUP_CLASS |
| && regclass[5] == X86_64_SSEUP_CLASS |
| && regclass[6] == X86_64_SSEUP_CLASS |
| && regclass[7] == X86_64_SSEUP_CLASS |
| && mode != BLKmode) |
| return gen_reg_or_parallel (mode, orig_mode, |
| GET_SSE_REGNO (sse_regno)); |
| if (n == 2 |
| && regclass[0] == X86_64_X87_CLASS |
| && regclass[1] == X86_64_X87UP_CLASS) |
| return gen_rtx_REG (XFmode, FIRST_STACK_REG); |
| |
| if (n == 2 |
| && regclass[0] == X86_64_INTEGER_CLASS |
| && regclass[1] == X86_64_INTEGER_CLASS |
| && (mode == CDImode || mode == TImode || mode == BLKmode) |
| && intreg[0] + 1 == intreg[1]) |
| { |
| if (mode == BLKmode) |
| { |
| /* Use TImode for BLKmode values in 2 integer registers. */ |
| exp[0] = gen_rtx_EXPR_LIST (VOIDmode, |
| gen_rtx_REG (TImode, intreg[0]), |
| GEN_INT (0)); |
| ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1)); |
| XVECEXP (ret, 0, 0) = exp[0]; |
| return ret; |
| } |
| else |
| return gen_rtx_REG (mode, intreg[0]); |
| } |
| |
| /* Otherwise figure out the entries of the PARALLEL. */ |
| for (i = 0; i < n; i++) |
| { |
| int pos; |
| |
| switch (regclass[i]) |
| { |
| case X86_64_NO_CLASS: |
| break; |
| case X86_64_INTEGER_CLASS: |
| case X86_64_INTEGERSI_CLASS: |
| /* Merge TImodes on aligned occasions here too. */ |
| if (i * 8 + 8 > bytes) |
| { |
| unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT; |
| if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode)) |
| /* We've requested 24 bytes we |
| don't have mode for. Use DImode. */ |
| tmpmode = DImode; |
| } |
| else if (regclass[i] == X86_64_INTEGERSI_CLASS) |
| tmpmode = SImode; |
| else |
| tmpmode = DImode; |
| exp [nexps++] |
| = gen_rtx_EXPR_LIST (VOIDmode, |
| gen_rtx_REG (tmpmode, *intreg), |
| GEN_INT (i*8)); |
| intreg++; |
| break; |
| case X86_64_SSEHF_CLASS: |
| exp [nexps++] |
| = gen_rtx_EXPR_LIST (VOIDmode, |
| gen_rtx_REG (HFmode, |
| GET_SSE_REGNO (sse_regno)), |
| GEN_INT (i*8)); |
| sse_regno++; |
| break; |
| case X86_64_SSESF_CLASS: |
| exp [nexps++] |
| = gen_rtx_EXPR_LIST (VOIDmode, |
| gen_rtx_REG (SFmode, |
| GET_SSE_REGNO (sse_regno)), |
| GEN_INT (i*8)); |
| sse_regno++; |
| break; |
| case X86_64_SSEDF_CLASS: |
| exp [nexps++] |
| = gen_rtx_EXPR_LIST (VOIDmode, |
| gen_rtx_REG (DFmode, |
| GET_SSE_REGNO (sse_regno)), |
| GEN_INT (i*8)); |
| sse_regno++; |
| break; |
| case X86_64_SSE_CLASS: |
| pos = i; |
| switch (n) |
| { |
| case 1: |
| tmpmode = DImode; |
| break; |
| case 2: |
| if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS) |
| { |
| tmpmode = TImode; |
| i++; |
| } |
| else |
| tmpmode = DImode; |
| break; |
| case 4: |
| gcc_assert (i == 0 |
| && regclass[1] == X86_64_SSEUP_CLASS |
| && regclass[2] == X86_64_SSEUP_CLASS |
| && regclass[3] == X86_64_SSEUP_CLASS); |
| tmpmode = OImode; |
| i += 3; |
| break; |
| case 8: |
| gcc_assert (i == 0 |
| && regclass[1] == X86_64_SSEUP_CLASS |
| && regclass[2] == X86_64_SSEUP_CLASS |
| && regclass[3] == X86_64_SSEUP_CLASS |
| && regclass[4] == X86_64_SSEUP_CLASS |
| && regclass[5] == X86_64_SSEUP_CLASS |
| && regclass[6] == X86_64_SSEUP_CLASS |
| && regclass[7] == X86_64_SSEUP_CLASS); |
| tmpmode = XImode; |
| i += 7; |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| exp [nexps++] |
| = gen_rtx_EXPR_LIST (VOIDmode, |
| gen_rtx_REG (tmpmode, |
| GET_SSE_REGNO (sse_regno)), |
| GEN_INT (pos*8)); |
| sse_regno++; |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Empty aligned struct, union or class. */ |
| if (nexps == 0) |
| return NULL; |
| |
| ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); |
| for (i = 0; i < nexps; i++) |
| XVECEXP (ret, 0, i) = exp [i]; |
| return ret; |
| } |
| |
| /* Update the data in CUM to advance over an argument of mode MODE |
| and data type TYPE. (TYPE is null for libcalls where that information |
| may not be available.) |
| |
| Return a number of integer regsiters advanced over. */ |
| |
| static int |
| function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode, |
| const_tree type, HOST_WIDE_INT bytes, |
| HOST_WIDE_INT words) |
| { |
| int res = 0; |
| bool error_p = false; |
| |
| if (TARGET_IAMCU) |
| { |
| /* Intel MCU psABI passes scalars and aggregates no larger than 8 |
| bytes in registers. */ |
| if (!VECTOR_MODE_P (mode) && bytes <= 8) |
| goto pass_in_reg; |
| return res; |
| } |
| |
| switch (mode) |
| { |
| default: |
| break; |
| |
| case E_BLKmode: |
| if (bytes < 0) |
| break; |
| /* FALLTHRU */ |
| |
| case E_DImode: |
| case E_SImode: |
| case E_HImode: |
| case E_QImode: |
| pass_in_reg: |
| cum->words += words; |
| cum->nregs -= words; |
| cum->regno += words; |
| if (cum->nregs >= 0) |
| res = words; |
| if (cum->nregs <= 0) |
| { |
| cum->nregs = 0; |
| cfun->machine->arg_reg_available = false; |
| cum->regno = 0; |
| } |
| break; |
| |
| case E_OImode: |
| /* OImode shouldn't be used directly. */ |
| gcc_unreachable (); |
| |
| case E_DFmode: |
| if (cum->float_in_sse == -1) |
| error_p = true; |
| if (cum->float_in_sse < 2) |
| break; |
| /* FALLTHRU */ |
| case E_SFmode: |
| if (cum->float_in_sse == -1) |
| error_p = true; |
| if (cum->float_in_sse < 1) |
| break; |
| /* FALLTHRU */ |
| |
| case E_V16HFmode: |
| case E_V8SFmode: |
| case E_V8SImode: |
| case E_V64QImode: |
| case E_V32HImode: |
| case E_V16SImode: |
| case E_V8DImode: |
| case E_V32HFmode: |
| case E_V16SFmode: |
| case E_V8DFmode: |
| case E_V32QImode: |
| case E_V16HImode: |
| case E_V4DFmode: |
| case E_V4DImode: |
| case E_TImode: |
| case E_V16QImode: |
| case E_V8HImode: |
| case E_V4SImode: |
| case E_V2DImode: |
| case E_V8HFmode: |
| case E_V4SFmode: |
| case E_V2DFmode: |
| if (!type || !AGGREGATE_TYPE_P (type)) |
| { |
| cum->sse_words += words; |
| cum->sse_nregs -= 1; |
| cum->sse_regno += 1; |
| if (cum->sse_nregs <= 0) |
| { |
| cum->sse_nregs = 0; |
| cum->sse_regno = 0; |
| } |
| } |
| break; |
| |
| case E_V8QImode: |
| case E_V4HImode: |
| case E_V4HFmode: |
| case E_V2SImode: |
| case E_V2SFmode: |
| case E_V1TImode: |
| case E_V1DImode: |
| if (!type || !AGGREGATE_TYPE_P (type)) |
| { |
| cum->mmx_words += words; |
| cum->mmx_nregs -= 1; |
| cum->mmx_regno += 1; |
| if (cum->mmx_nregs <= 0) |
| { |
| cum->mmx_nregs = 0; |
| cum->mmx_regno = 0; |
| } |
| } |
| break; |
| } |
| if (error_p) |
| { |
| cum->float_in_sse = 0; |
| error ("calling %qD with SSE calling convention without " |
| "SSE/SSE2 enabled", cum->decl); |
| sorry ("this is a GCC bug that can be worked around by adding " |
| "attribute used to function called"); |
| } |
| |
| return res; |
| } |
| |
| static int |
| function_arg_advance_64 (CUMULATIVE_ARGS *cum, machine_mode mode, |
| const_tree type, HOST_WIDE_INT words, bool named) |
| { |
| int int_nregs, sse_nregs; |
| |
| /* Unnamed 512 and 256bit vector mode parameters are passed on stack. */ |
| if (!named && (VALID_AVX512F_REG_MODE (mode) |
| || VALID_AVX256_REG_MODE (mode) |
| || mode == V16HFmode |
| || mode == V32HFmode)) |
| return 0; |
| |
| if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs) |
| && sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) |
| { |
| cum->nregs -= int_nregs; |
| cum->sse_nregs -= sse_nregs; |
| cum->regno += int_nregs; |
| cum->sse_regno += sse_nregs; |
| return int_nregs; |
| } |
| else |
| { |
| int align = ix86_function_arg_boundary (mode, type) / BITS_PER_WORD; |
| cum->words = ROUND_UP (cum->words, align); |
| cum->words += words; |
| return 0; |
| } |
| } |
| |
| static int |
| function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, |
| HOST_WIDE_INT words) |
| { |
| /* Otherwise, this should be passed indirect. */ |
| gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8); |
| |
| cum->words += words; |
| if (cum->nregs > 0) |
| { |
| cum->nregs -= 1; |
| cum->regno += 1; |
| return 1; |
| } |
| return 0; |
| } |
| |
| /* Update the data in CUM to advance over argument ARG. */ |
| |
| static void |
| ix86_function_arg_advance (cumulative_args_t cum_v, |
| const function_arg_info &arg) |
| { |
| CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); |
| machine_mode mode = arg.mode; |
| HOST_WIDE_INT bytes, words; |
| int nregs; |
| |
| /* The argument of interrupt handler is a special case and is |
| handled in ix86_function_arg. */ |
| if (!cum->caller && cfun->machine->func_type != TYPE_NORMAL) |
| return; |
| |
| bytes = arg.promoted_size_in_bytes (); |
| words = CEIL (bytes, UNITS_PER_WORD); |
| |
| if (arg.type) |
| mode = type_natural_mode (arg.type, NULL, false); |
| |
| if (TARGET_64BIT) |
| { |
| enum calling_abi call_abi = cum ? cum->call_abi : ix86_abi; |
| |
| if (call_abi == MS_ABI) |
| nregs = function_arg_advance_ms_64 (cum, bytes, words); |
| else |
| nregs = function_arg_advance_64 (cum, mode, arg.type, words, |
| arg.named); |
| } |
| else |
| nregs = function_arg_advance_32 (cum, mode, arg.type, bytes, words); |
| |
| if (!nregs) |
| { |
| /* Track if there are outgoing arguments on stack. */ |
| if (cum->caller) |
| cfun->machine->outgoing_args_on_stack = true; |
| } |
| } |
| |
| /* Define where to put the arguments to a function. |
| Value is zero to push the argument on the stack, |
| or a hard register in which to store the argument. |
| |
| MODE is the argument's machine mode. |
| TYPE is the data type of the argument (as a tree). |
| This is null for libcalls where that information may |
| not be available. |
| CUM is a variable of type CUMULATIVE_ARGS which gives info about |
| the preceding args and about the function being called. |
| NAMED is nonzero if this argument is a named parameter |
| (otherwise it is an extra parameter matching an ellipsis). */ |
| |
| static rtx |
| function_arg_32 (CUMULATIVE_ARGS *cum, machine_mode mode, |
| machine_mode orig_mode, const_tree type, |
| HOST_WIDE_INT bytes, HOST_WIDE_INT words) |
| { |
| bool error_p = false; |
| |
| /* Avoid the AL settings for the Unix64 ABI. */ |
| if (mode == VOIDmode) |
| return constm1_rtx; |
| |
| if (TARGET_IAMCU) |
| { |
| /* Intel MCU psABI passes scalars and aggregates no larger than 8 |
| bytes in registers. */ |
| if (!VECTOR_MODE_P (mode) && bytes <= 8) |
| goto pass_in_reg; |
| return NULL_RTX; |
| } |
| |
| switch (mode) |
| { |
| default: |
| break; |
| |
| case E_BLKmode: |
| if (bytes < 0) |
| break; |
| /* FALLTHRU */ |
| case E_DImode: |
| case E_SImode: |
| case E_HImode: |
| case E_QImode: |
| pass_in_reg: |
| if (words <= cum->nregs) |
| { |
| int regno = cum->regno; |
| |
| /* Fastcall allocates the first two DWORD (SImode) or |
| smaller arguments to ECX and EDX if it isn't an |
| aggregate type . */ |
| if (cum->fastcall) |
| { |
| if (mode == BLKmode |
| || mode == DImode |
| || (type && AGGREGATE_TYPE_P (type))) |
| break; |
| |
| /* ECX not EAX is the first allocated register. */ |
| if (regno == AX_REG) |
| regno = CX_REG; |
| } |
| return gen_rtx_REG (mode, regno); |
| } |
| break; |
| |
| case E_DFmode: |
| if (cum->float_in_sse == -1) |
| error_p = true; |
| if (cum->float_in_sse < 2) |
| break; |
| /* FALLTHRU */ |
| case E_SFmode: |
| if (cum->float_in_sse == -1) |
| error_p = true; |
| if (cum->float_in_sse < 1) |
| break; |
| /* FALLTHRU */ |
| case E_TImode: |
| /* In 32bit, we pass TImode in xmm registers. */ |
| case E_V16QImode: |
| case E_V8HImode: |
| case E_V4SImode: |
| case E_V2DImode: |
| case E_V8HFmode: |
| case E_V4SFmode: |
| case E_V2DFmode: |
| if (!type || !AGGREGATE_TYPE_P (type)) |
| { |
| if (cum->sse_nregs) |
| return gen_reg_or_parallel (mode, orig_mode, |
| cum->sse_regno + FIRST_SSE_REG); |
| } |
| break; |
| |
| case E_OImode: |
| case E_XImode: |
| /* OImode and XImode shouldn't be used directly. */ |
| gcc_unreachable (); |
| |
| case E_V64QImode: |
| case E_V32HImode: |
| case E_V16SImode: |
| case E_V8DImode: |
| case E_V32HFmode: |
| case E_V16SFmode: |
| case E_V8DFmode: |
| case E_V16HFmode: |
| case E_V8SFmode: |
| case E_V8SImode: |
| case E_V32QImode: |
| case E_V16HImode: |
| case E_V4DFmode: |
| case E_V4DImode: |
| if (!type || !AGGREGATE_TYPE_P (type)) |
| { |
| if (cum->sse_nregs) |
| return gen_reg_or_parallel (mode, orig_mode, |
| cum->sse_regno + FIRST_SSE_REG); |
| } |
| break; |
| |
| case E_V8QImode: |
| case E_V4HImode: |
| case E_V4HFmode: |
| case E_V2SImode: |
| case E_V2SFmode: |
| case E_V1TImode: |
| case E_V1DImode: |
| if (!type || !AGGREGATE_TYPE_P (type)) |
| { |
| if (cum->mmx_nregs) |
| return gen_reg_or_parallel (mode, orig_mode, |
| cum->mmx_regno + FIRST_MMX_REG); |
| } |
| break; |
| } |
| if (error_p) |
| { |
| cum->float_in_sse = 0; |
| error ("calling %qD with SSE calling convention without " |
| "SSE/SSE2 enabled", cum->decl); |
| sorry ("this is a GCC bug that can be worked around by adding " |
| "attribute used to function called"); |
| } |
| |
| return NULL_RTX; |
| } |
| |
| static rtx |
| function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode, |
| machine_mode orig_mode, const_tree type, bool named) |
| { |
| /* Handle a hidden AL argument containing number of registers |
| for varargs x86-64 functions. */ |
| if (mode == VOIDmode) |
| return GEN_INT (cum->maybe_vaarg |
| ? (cum-> |