blob: 9cc903e826b4bf13589fd935b52c6d3a3dc33bbc [file] [log] [blame]
/* Subroutines used for code generation on IA-32.
Copyright (C) 1988-2021 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#define IN_TARGET_CODE 1
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "rtl.h"
#include "tree.h"
#include "memmodel.h"
#include "gimple.h"
#include "cfghooks.h"
#include "cfgloop.h"
#include "df.h"
#include "tm_p.h"
#include "stringpool.h"
#include "expmed.h"
#include "optabs.h"
#include "regs.h"
#include "emit-rtl.h"
#include "recog.h"
#include "cgraph.h"
#include "diagnostic.h"
#include "cfgbuild.h"
#include "alias.h"
#include "fold-const.h"
#include "attribs.h"
#include "calls.h"
#include "stor-layout.h"
#include "varasm.h"
#include "output.h"
#include "insn-attr.h"
#include "flags.h"
#include "except.h"
#include "explow.h"
#include "expr.h"
#include "cfgrtl.h"
#include "common/common-target.h"
#include "langhooks.h"
#include "reload.h"
#include "gimplify.h"
#include "dwarf2.h"
#include "tm-constrs.h"
#include "cselib.h"
#include "sched-int.h"
#include "opts.h"
#include "tree-pass.h"
#include "context.h"
#include "pass_manager.h"
#include "target-globals.h"
#include "gimple-iterator.h"
#include "gimple-fold.h"
#include "tree-vectorizer.h"
#include "shrink-wrap.h"
#include "builtins.h"
#include "rtl-iter.h"
#include "tree-iterator.h"
#include "dbgcnt.h"
#include "case-cfn-macros.h"
#include "dojump.h"
#include "fold-const-call.h"
#include "tree-vrp.h"
#include "tree-ssanames.h"
#include "selftest.h"
#include "selftest-rtl.h"
#include "print-rtl.h"
#include "intl.h"
#include "ifcvt.h"
#include "symbol-summary.h"
#include "ipa-prop.h"
#include "ipa-fnsummary.h"
#include "wide-int-bitmask.h"
#include "tree-vector-builder.h"
#include "debug.h"
#include "dwarf2out.h"
#include "i386-options.h"
#include "i386-builtins.h"
#include "i386-expand.h"
#include "i386-features.h"
#include "function-abi.h"
/* This file should be included last. */
#include "target-def.h"
static rtx legitimize_dllimport_symbol (rtx, bool);
static rtx legitimize_pe_coff_extern_decl (rtx, bool);
static void ix86_print_operand_address_as (FILE *, rtx, addr_space_t, bool);
static void ix86_emit_restore_reg_using_pop (rtx);
#ifndef CHECK_STACK_LIMIT
#define CHECK_STACK_LIMIT (-1)
#endif
/* Return index of given mode in mult and division cost tables. */
#define MODE_INDEX(mode) \
((mode) == QImode ? 0 \
: (mode) == HImode ? 1 \
: (mode) == SImode ? 2 \
: (mode) == DImode ? 3 \
: 4)
/* Set by -mtune. */
const struct processor_costs *ix86_tune_cost = NULL;
/* Set by -mtune or -Os. */
const struct processor_costs *ix86_cost = NULL;
/* In case the average insn count for single function invocation is
lower than this constant, emit fast (but longer) prologue and
epilogue code. */
#define FAST_PROLOGUE_INSN_COUNT 20
/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
/* Array of the smallest class containing reg number REGNO, indexed by
REGNO. Used by REGNO_REG_CLASS in i386.h. */
enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
{
/* ax, dx, cx, bx */
AREG, DREG, CREG, BREG,
/* si, di, bp, sp */
SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
/* FP registers */
FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
/* arg pointer, flags, fpsr, frame */
NON_Q_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
/* SSE registers */
SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS,
SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
/* MMX registers */
MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
/* REX registers */
GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
/* SSE REX registers */
SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
/* AVX-512 SSE registers */
ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS, ALL_SSE_REGS,
/* Mask registers. */
ALL_MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS,
MASK_REGS, MASK_REGS, MASK_REGS, MASK_REGS
};
/* The "default" register map used in 32bit mode. */
int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
{
/* general regs */
0, 2, 1, 3, 6, 7, 4, 5,
/* fp regs */
12, 13, 14, 15, 16, 17, 18, 19,
/* arg, flags, fpsr, frame */
IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
/* SSE */
21, 22, 23, 24, 25, 26, 27, 28,
/* MMX */
29, 30, 31, 32, 33, 34, 35, 36,
/* extended integer registers */
INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
/* extended sse registers */
INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
/* AVX-512 registers 16-23 */
INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
/* AVX-512 registers 24-31 */
INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
/* Mask registers */
93, 94, 95, 96, 97, 98, 99, 100
};
/* The "default" register map used in 64bit mode. */
int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
{
/* general regs */
0, 1, 2, 3, 4, 5, 6, 7,
/* fp regs */
33, 34, 35, 36, 37, 38, 39, 40,
/* arg, flags, fpsr, frame */
IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
/* SSE */
17, 18, 19, 20, 21, 22, 23, 24,
/* MMX */
41, 42, 43, 44, 45, 46, 47, 48,
/* extended integer registers */
8, 9, 10, 11, 12, 13, 14, 15,
/* extended SSE registers */
25, 26, 27, 28, 29, 30, 31, 32,
/* AVX-512 registers 16-23 */
67, 68, 69, 70, 71, 72, 73, 74,
/* AVX-512 registers 24-31 */
75, 76, 77, 78, 79, 80, 81, 82,
/* Mask registers */
118, 119, 120, 121, 122, 123, 124, 125
};
/* Define the register numbers to be used in Dwarf debugging information.
The SVR4 reference port C compiler uses the following register numbers
in its Dwarf output code:
0 for %eax (gcc regno = 0)
1 for %ecx (gcc regno = 2)
2 for %edx (gcc regno = 1)
3 for %ebx (gcc regno = 3)
4 for %esp (gcc regno = 7)
5 for %ebp (gcc regno = 6)
6 for %esi (gcc regno = 4)
7 for %edi (gcc regno = 5)
The following three DWARF register numbers are never generated by
the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
believed these numbers have these meanings.
8 for %eip (no gcc equivalent)
9 for %eflags (gcc regno = 17)
10 for %trapno (no gcc equivalent)
It is not at all clear how we should number the FP stack registers
for the x86 architecture. If the version of SDB on x86/svr4 were
a bit less brain dead with respect to floating-point then we would
have a precedent to follow with respect to DWARF register numbers
for x86 FP registers, but the SDB on x86/svr4 was so completely
broken with respect to FP registers that it is hardly worth thinking
of it as something to strive for compatibility with.
The version of x86/svr4 SDB I had does (partially)
seem to believe that DWARF register number 11 is associated with
the x86 register %st(0), but that's about all. Higher DWARF
register numbers don't seem to be associated with anything in
particular, and even for DWARF regno 11, SDB only seemed to under-
stand that it should say that a variable lives in %st(0) (when
asked via an `=' command) if we said it was in DWARF regno 11,
but SDB still printed garbage when asked for the value of the
variable in question (via a `/' command).
(Also note that the labels SDB printed for various FP stack regs
when doing an `x' command were all wrong.)
Note that these problems generally don't affect the native SVR4
C compiler because it doesn't allow the use of -O with -g and
because when it is *not* optimizing, it allocates a memory
location for each floating-point variable, and the memory
location is what gets described in the DWARF AT_location
attribute for the variable in question.
Regardless of the severe mental illness of the x86/svr4 SDB, we
do something sensible here and we use the following DWARF
register numbers. Note that these are all stack-top-relative
numbers.
11 for %st(0) (gcc regno = 8)
12 for %st(1) (gcc regno = 9)
13 for %st(2) (gcc regno = 10)
14 for %st(3) (gcc regno = 11)
15 for %st(4) (gcc regno = 12)
16 for %st(5) (gcc regno = 13)
17 for %st(6) (gcc regno = 14)
18 for %st(7) (gcc regno = 15)
*/
int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
{
/* general regs */
0, 2, 1, 3, 6, 7, 5, 4,
/* fp regs */
11, 12, 13, 14, 15, 16, 17, 18,
/* arg, flags, fpsr, frame */
IGNORED_DWARF_REGNUM, 9,
IGNORED_DWARF_REGNUM, IGNORED_DWARF_REGNUM,
/* SSE registers */
21, 22, 23, 24, 25, 26, 27, 28,
/* MMX registers */
29, 30, 31, 32, 33, 34, 35, 36,
/* extended integer registers */
INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
/* extended sse registers */
INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
/* AVX-512 registers 16-23 */
INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
/* AVX-512 registers 24-31 */
INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM, INVALID_REGNUM,
/* Mask registers */
93, 94, 95, 96, 97, 98, 99, 100
};
/* Define parameter passing and return registers. */
static int const x86_64_int_parameter_registers[6] =
{
DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
};
static int const x86_64_ms_abi_int_parameter_registers[4] =
{
CX_REG, DX_REG, R8_REG, R9_REG
};
static int const x86_64_int_return_registers[4] =
{
AX_REG, DX_REG, DI_REG, SI_REG
};
/* Define the structure for the machine field in struct function. */
struct GTY(()) stack_local_entry {
unsigned short mode;
unsigned short n;
rtx rtl;
struct stack_local_entry *next;
};
/* Which cpu are we scheduling for. */
enum attr_cpu ix86_schedule;
/* Which cpu are we optimizing for. */
enum processor_type ix86_tune;
/* Which instruction set architecture to use. */
enum processor_type ix86_arch;
/* True if processor has SSE prefetch instruction. */
unsigned char ix86_prefetch_sse;
/* Preferred alignment for stack boundary in bits. */
unsigned int ix86_preferred_stack_boundary;
/* Alignment for incoming stack boundary in bits specified at
command line. */
unsigned int ix86_user_incoming_stack_boundary;
/* Default alignment for incoming stack boundary in bits. */
unsigned int ix86_default_incoming_stack_boundary;
/* Alignment for incoming stack boundary in bits. */
unsigned int ix86_incoming_stack_boundary;
/* Calling abi specific va_list type nodes. */
tree sysv_va_list_type_node;
tree ms_va_list_type_node;
/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
char internal_label_prefix[16];
int internal_label_prefix_len;
/* Fence to use after loop using movnt. */
tree x86_mfence;
/* Register class used for passing given 64bit part of the argument.
These represent classes as documented by the PS ABI, with the exception
of SSESF, SSEDF classes, that are basically SSE class, just gcc will
use SF or DFmode move instead of DImode to avoid reformatting penalties.
Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
whenever possible (upper half does contain padding). */
enum x86_64_reg_class
{
X86_64_NO_CLASS,
X86_64_INTEGER_CLASS,
X86_64_INTEGERSI_CLASS,
X86_64_SSE_CLASS,
X86_64_SSEHF_CLASS,
X86_64_SSESF_CLASS,
X86_64_SSEDF_CLASS,
X86_64_SSEUP_CLASS,
X86_64_X87_CLASS,
X86_64_X87UP_CLASS,
X86_64_COMPLEX_X87_CLASS,
X86_64_MEMORY_CLASS
};
#define MAX_CLASSES 8
/* Table of constants used by fldpi, fldln2, etc.... */
static REAL_VALUE_TYPE ext_80387_constants_table [5];
static bool ext_80387_constants_init;
static rtx ix86_function_value (const_tree, const_tree, bool);
static bool ix86_function_value_regno_p (const unsigned int);
static unsigned int ix86_function_arg_boundary (machine_mode,
const_tree);
static rtx ix86_static_chain (const_tree, bool);
static int ix86_function_regparm (const_tree, const_tree);
static void ix86_compute_frame_layout (void);
static tree ix86_canonical_va_list_type (tree);
static unsigned int split_stack_prologue_scratch_regno (void);
static bool i386_asm_output_addr_const_extra (FILE *, rtx);
static bool ix86_can_inline_p (tree, tree);
static unsigned int ix86_minimum_incoming_stack_boundary (bool);
/* Whether -mtune= or -march= were specified */
int ix86_tune_defaulted;
int ix86_arch_specified;
/* Return true if a red-zone is in use. We can't use red-zone when
there are local indirect jumps, like "indirect_jump" or "tablejump",
which jumps to another place in the function, since "call" in the
indirect thunk pushes the return address onto stack, destroying
red-zone.
TODO: If we can reserve the first 2 WORDs, for PUSH and, another
for CALL, in red-zone, we can allow local indirect jumps with
indirect thunk. */
bool
ix86_using_red_zone (void)
{
return (TARGET_RED_ZONE
&& !TARGET_64BIT_MS_ABI
&& (!cfun->machine->has_local_indirect_jump
|| cfun->machine->indirect_branch_type == indirect_branch_keep));
}
/* Return true, if profiling code should be emitted before
prologue. Otherwise it returns false.
Note: For x86 with "hotfix" it is sorried. */
static bool
ix86_profile_before_prologue (void)
{
return flag_fentry != 0;
}
/* Update register usage after having seen the compiler flags. */
static void
ix86_conditional_register_usage (void)
{
int i, c_mask;
/* If there are no caller-saved registers, preserve all registers.
except fixed_regs and registers used for function return value
since aggregate_value_p checks call_used_regs[regno] on return
value. */
if (cfun && cfun->machine->no_caller_saved_registers)
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
if (!fixed_regs[i] && !ix86_function_value_regno_p (i))
call_used_regs[i] = 0;
/* For 32-bit targets, disable the REX registers. */
if (! TARGET_64BIT)
{
for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
CLEAR_HARD_REG_BIT (accessible_reg_set, i);
for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
CLEAR_HARD_REG_BIT (accessible_reg_set, i);
for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
CLEAR_HARD_REG_BIT (accessible_reg_set, i);
}
/* See the definition of CALL_USED_REGISTERS in i386.h. */
c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
{
/* Set/reset conditionally defined registers from
CALL_USED_REGISTERS initializer. */
if (call_used_regs[i] > 1)
call_used_regs[i] = !!(call_used_regs[i] & c_mask);
/* Calculate registers of CLOBBERED_REGS register set
as call used registers from GENERAL_REGS register set. */
if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
&& call_used_regs[i])
SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
}
/* If MMX is disabled, disable the registers. */
if (! TARGET_MMX)
accessible_reg_set &= ~reg_class_contents[MMX_REGS];
/* If SSE is disabled, disable the registers. */
if (! TARGET_SSE)
accessible_reg_set &= ~reg_class_contents[ALL_SSE_REGS];
/* If the FPU is disabled, disable the registers. */
if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
accessible_reg_set &= ~reg_class_contents[FLOAT_REGS];
/* If AVX512F is disabled, disable the registers. */
if (! TARGET_AVX512F)
{
for (i = FIRST_EXT_REX_SSE_REG; i <= LAST_EXT_REX_SSE_REG; i++)
CLEAR_HARD_REG_BIT (accessible_reg_set, i);
accessible_reg_set &= ~reg_class_contents[ALL_MASK_REGS];
}
}
/* Canonicalize a comparison from one we don't have to one we do have. */
static void
ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
bool op0_preserve_value)
{
/* The order of operands in x87 ficom compare is forced by combine in
simplify_comparison () function. Float operator is treated as RTX_OBJ
with a precedence over other operators and is always put in the first
place. Swap condition and operands to match ficom instruction. */
if (!op0_preserve_value
&& GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
{
enum rtx_code scode = swap_condition ((enum rtx_code) *code);
/* We are called only for compares that are split to SAHF instruction.
Ensure that we have setcc/jcc insn for the swapped condition. */
if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
{
std::swap (*op0, *op1);
*code = (int) scode;
}
}
}
/* Hook to determine if one function can safely inline another. */
static bool
ix86_can_inline_p (tree caller, tree callee)
{
tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
/* Changes of those flags can be tolerated for always inlines. Lets hope
user knows what he is doing. */
unsigned HOST_WIDE_INT always_inline_safe_mask
= (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
| MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
| MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
| MASK_NO_FANCY_MATH_387 | MASK_IEEE_FP | MASK_INLINE_ALL_STRINGOPS
| MASK_INLINE_STRINGOPS_DYNAMICALLY | MASK_RECIP | MASK_STACK_PROBE
| MASK_STV | MASK_TLS_DIRECT_SEG_REFS | MASK_VZEROUPPER
| MASK_NO_PUSH_ARGS | MASK_OMIT_LEAF_FRAME_POINTER);
if (!callee_tree)
callee_tree = target_option_default_node;
if (!caller_tree)
caller_tree = target_option_default_node;
if (callee_tree == caller_tree)
return true;
struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
bool ret = false;
bool always_inline
= (DECL_DISREGARD_INLINE_LIMITS (callee)
&& lookup_attribute ("always_inline",
DECL_ATTRIBUTES (callee)));
/* If callee only uses GPRs, ignore MASK_80387. */
if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags))
always_inline_safe_mask |= MASK_80387;
cgraph_node *callee_node = cgraph_node::get (callee);
/* Callee's isa options should be a subset of the caller's, i.e. a SSE4
function can inline a SSE2 function but a SSE2 function can't inline
a SSE4 function. */
if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
!= callee_opts->x_ix86_isa_flags)
|| ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
!= callee_opts->x_ix86_isa_flags2))
ret = false;
/* See if we have the same non-isa options. */
else if ((!always_inline
&& caller_opts->x_target_flags != callee_opts->x_target_flags)
|| (caller_opts->x_target_flags & ~always_inline_safe_mask)
!= (callee_opts->x_target_flags & ~always_inline_safe_mask))
ret = false;
/* See if arch, tune, etc. are the same. */
else if (caller_opts->arch != callee_opts->arch)
ret = false;
else if (!always_inline && caller_opts->tune != callee_opts->tune)
ret = false;
else if (caller_opts->x_ix86_fpmath != callee_opts->x_ix86_fpmath
/* If the calle doesn't use FP expressions differences in
ix86_fpmath can be ignored. We are called from FEs
for multi-versioning call optimization, so beware of
ipa_fn_summaries not available. */
&& (! ipa_fn_summaries
|| ipa_fn_summaries->get (callee_node) == NULL
|| ipa_fn_summaries->get (callee_node)->fp_expressions))
ret = false;
else if (!always_inline
&& caller_opts->branch_cost != callee_opts->branch_cost)
ret = false;
else
ret = true;
return ret;
}
/* Return true if this goes in large data/bss. */
static bool
ix86_in_large_data_p (tree exp)
{
if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
return false;
if (exp == NULL_TREE)
return false;
/* Functions are never large data. */
if (TREE_CODE (exp) == FUNCTION_DECL)
return false;
/* Automatic variables are never large data. */
if (VAR_P (exp) && !is_global_var (exp))
return false;
if (VAR_P (exp) && DECL_SECTION_NAME (exp))
{
const char *section = DECL_SECTION_NAME (exp);
if (strcmp (section, ".ldata") == 0
|| strcmp (section, ".lbss") == 0)
return true;
return false;
}
else
{
HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
/* If this is an incomplete type with size 0, then we can't put it
in data because it might be too big when completed. Also,
int_size_in_bytes returns -1 if size can vary or is larger than
an integer in which case also it is safer to assume that it goes in
large data. */
if (size <= 0 || size > ix86_section_threshold)
return true;
}
return false;
}
/* i386-specific section flag to mark large sections. */
#define SECTION_LARGE SECTION_MACH_DEP
/* Switch to the appropriate section for output of DECL.
DECL is either a `VAR_DECL' node or a constant of some sort.
RELOC indicates whether forming the initial value of DECL requires
link-time relocations. */
ATTRIBUTE_UNUSED static section *
x86_64_elf_select_section (tree decl, int reloc,
unsigned HOST_WIDE_INT align)
{
if (ix86_in_large_data_p (decl))
{
const char *sname = NULL;
unsigned int flags = SECTION_WRITE | SECTION_LARGE;
switch (categorize_decl_for_section (decl, reloc))
{
case SECCAT_DATA:
sname = ".ldata";
break;
case SECCAT_DATA_REL:
sname = ".ldata.rel";
break;
case SECCAT_DATA_REL_LOCAL:
sname = ".ldata.rel.local";
break;
case SECCAT_DATA_REL_RO:
sname = ".ldata.rel.ro";
break;
case SECCAT_DATA_REL_RO_LOCAL:
sname = ".ldata.rel.ro.local";
break;
case SECCAT_BSS:
sname = ".lbss";
flags |= SECTION_BSS;
break;
case SECCAT_RODATA:
case SECCAT_RODATA_MERGE_STR:
case SECCAT_RODATA_MERGE_STR_INIT:
case SECCAT_RODATA_MERGE_CONST:
sname = ".lrodata";
flags &= ~SECTION_WRITE;
break;
case SECCAT_SRODATA:
case SECCAT_SDATA:
case SECCAT_SBSS:
gcc_unreachable ();
case SECCAT_TEXT:
case SECCAT_TDATA:
case SECCAT_TBSS:
/* We don't split these for medium model. Place them into
default sections and hope for best. */
break;
}
if (sname)
{
/* We might get called with string constants, but get_named_section
doesn't like them as they are not DECLs. Also, we need to set
flags in that case. */
if (!DECL_P (decl))
return get_section (sname, flags, NULL);
return get_named_section (decl, sname, reloc);
}
}
return default_elf_select_section (decl, reloc, align);
}
/* Select a set of attributes for section NAME based on the properties
of DECL and whether or not RELOC indicates that DECL's initializer
might contain runtime relocations. */
static unsigned int ATTRIBUTE_UNUSED
x86_64_elf_section_type_flags (tree decl, const char *name, int reloc)
{
unsigned int flags = default_section_type_flags (decl, name, reloc);
if (ix86_in_large_data_p (decl))
flags |= SECTION_LARGE;
if (decl == NULL_TREE
&& (strcmp (name, ".ldata.rel.ro") == 0
|| strcmp (name, ".ldata.rel.ro.local") == 0))
flags |= SECTION_RELRO;
if (strcmp (name, ".lbss") == 0
|| startswith (name, ".lbss.")
|| startswith (name, ".gnu.linkonce.lb."))
flags |= SECTION_BSS;
return flags;
}
/* Build up a unique section name, expressed as a
STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
RELOC indicates whether the initial value of EXP requires
link-time relocations. */
static void ATTRIBUTE_UNUSED
x86_64_elf_unique_section (tree decl, int reloc)
{
if (ix86_in_large_data_p (decl))
{
const char *prefix = NULL;
/* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
bool one_only = DECL_COMDAT_GROUP (decl) && !HAVE_COMDAT_GROUP;
switch (categorize_decl_for_section (decl, reloc))
{
case SECCAT_DATA:
case SECCAT_DATA_REL:
case SECCAT_DATA_REL_LOCAL:
case SECCAT_DATA_REL_RO:
case SECCAT_DATA_REL_RO_LOCAL:
prefix = one_only ? ".ld" : ".ldata";
break;
case SECCAT_BSS:
prefix = one_only ? ".lb" : ".lbss";
break;
case SECCAT_RODATA:
case SECCAT_RODATA_MERGE_STR:
case SECCAT_RODATA_MERGE_STR_INIT:
case SECCAT_RODATA_MERGE_CONST:
prefix = one_only ? ".lr" : ".lrodata";
break;
case SECCAT_SRODATA:
case SECCAT_SDATA:
case SECCAT_SBSS:
gcc_unreachable ();
case SECCAT_TEXT:
case SECCAT_TDATA:
case SECCAT_TBSS:
/* We don't split these for medium model. Place them into
default sections and hope for best. */
break;
}
if (prefix)
{
const char *name, *linkonce;
char *string;
name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
name = targetm.strip_name_encoding (name);
/* If we're using one_only, then there needs to be a .gnu.linkonce
prefix to the section name. */
linkonce = one_only ? ".gnu.linkonce" : "";
string = ACONCAT ((linkonce, prefix, ".", name, NULL));
set_decl_section_name (decl, string);
return;
}
}
default_unique_section (decl, reloc);
}
#ifdef COMMON_ASM_OP
#ifndef LARGECOMM_SECTION_ASM_OP
#define LARGECOMM_SECTION_ASM_OP "\t.largecomm\t"
#endif
/* This says how to output assembler code to declare an
uninitialized external linkage data object.
For medium model x86-64 we need to use LARGECOMM_SECTION_ASM_OP opcode for
large objects. */
void
x86_elf_aligned_decl_common (FILE *file, tree decl,
const char *name, unsigned HOST_WIDE_INT size,
unsigned align)
{
if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
&& size > (unsigned int)ix86_section_threshold)
{
switch_to_section (get_named_section (decl, ".lbss", 0));
fputs (LARGECOMM_SECTION_ASM_OP, file);
}
else
fputs (COMMON_ASM_OP, file);
assemble_name (file, name);
fprintf (file, "," HOST_WIDE_INT_PRINT_UNSIGNED ",%u\n",
size, align / BITS_PER_UNIT);
}
#endif
/* Utility function for targets to use in implementing
ASM_OUTPUT_ALIGNED_BSS. */
void
x86_output_aligned_bss (FILE *file, tree decl, const char *name,
unsigned HOST_WIDE_INT size, unsigned align)
{
if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
&& size > (unsigned int)ix86_section_threshold)
switch_to_section (get_named_section (decl, ".lbss", 0));
else
switch_to_section (bss_section);
ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
#ifdef ASM_DECLARE_OBJECT_NAME
last_assemble_variable_decl = decl;
ASM_DECLARE_OBJECT_NAME (file, name, decl);
#else
/* Standard thing is just output label for the object. */
ASM_OUTPUT_LABEL (file, name);
#endif /* ASM_DECLARE_OBJECT_NAME */
ASM_OUTPUT_SKIP (file, size ? size : 1);
}
/* Decide whether we must probe the stack before any space allocation
on this target. It's essentially TARGET_STACK_PROBE except when
-fstack-check causes the stack to be already probed differently. */
bool
ix86_target_stack_probe (void)
{
/* Do not probe the stack twice if static stack checking is enabled. */
if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
return false;
return TARGET_STACK_PROBE;
}
/* Decide whether we can make a sibling call to a function. DECL is the
declaration of the function being targeted by the call and EXP is the
CALL_EXPR representing the call. */
static bool
ix86_function_ok_for_sibcall (tree decl, tree exp)
{
tree type, decl_or_type;
rtx a, b;
bool bind_global = decl && !targetm.binds_local_p (decl);
if (ix86_function_naked (current_function_decl))
return false;
/* Sibling call isn't OK if there are no caller-saved registers
since all registers must be preserved before return. */
if (cfun->machine->no_caller_saved_registers)
return false;
/* If we are generating position-independent code, we cannot sibcall
optimize direct calls to global functions, as the PLT requires
%ebx be live. (Darwin does not have a PLT.) */
if (!TARGET_MACHO
&& !TARGET_64BIT
&& flag_pic
&& flag_plt
&& bind_global)
return false;
/* If we need to align the outgoing stack, then sibcalling would
unalign the stack, which may break the called function. */
if (ix86_minimum_incoming_stack_boundary (true)
< PREFERRED_STACK_BOUNDARY)
return false;
if (decl)
{
decl_or_type = decl;
type = TREE_TYPE (decl);
}
else
{
/* We're looking at the CALL_EXPR, we need the type of the function. */
type = CALL_EXPR_FN (exp); /* pointer expression */
type = TREE_TYPE (type); /* pointer type */
type = TREE_TYPE (type); /* function type */
decl_or_type = type;
}
/* If outgoing reg parm stack space changes, we cannot do sibcall. */
if ((OUTGOING_REG_PARM_STACK_SPACE (type)
!= OUTGOING_REG_PARM_STACK_SPACE (TREE_TYPE (current_function_decl)))
|| (REG_PARM_STACK_SPACE (decl_or_type)
!= REG_PARM_STACK_SPACE (current_function_decl)))
{
maybe_complain_about_tail_call (exp,
"inconsistent size of stack space"
" allocated for arguments which are"
" passed in registers");
return false;
}
/* Check that the return value locations are the same. Like
if we are returning floats on the 80387 register stack, we cannot
make a sibcall from a function that doesn't return a float to a
function that does or, conversely, from a function that does return
a float to a function that doesn't; the necessary stack adjustment
would not be executed. This is also the place we notice
differences in the return value ABI. Note that it is ok for one
of the functions to have void return type as long as the return
value of the other is passed in a register. */
a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
cfun->decl, false);
if (STACK_REG_P (a) || STACK_REG_P (b))
{
if (!rtx_equal_p (a, b))
return false;
}
else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
;
else if (!rtx_equal_p (a, b))
return false;
if (TARGET_64BIT)
{
/* The SYSV ABI has more call-clobbered registers;
disallow sibcalls from MS to SYSV. */
if (cfun->machine->call_abi == MS_ABI
&& ix86_function_type_abi (type) == SYSV_ABI)
return false;
}
else
{
/* If this call is indirect, we'll need to be able to use a
call-clobbered register for the address of the target function.
Make sure that all such registers are not used for passing
parameters. Note that DLLIMPORT functions and call to global
function via GOT slot are indirect. */
if (!decl
|| (bind_global && flag_pic && !flag_plt)
|| (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))
|| flag_force_indirect_call)
{
/* Check if regparm >= 3 since arg_reg_available is set to
false if regparm == 0. If regparm is 1 or 2, there is
always a call-clobbered register available.
??? The symbol indirect call doesn't need a call-clobbered
register. But we don't know if this is a symbol indirect
call or not here. */
if (ix86_function_regparm (type, decl) >= 3
&& !cfun->machine->arg_reg_available)
return false;
}
}
/* Otherwise okay. That also includes certain types of indirect calls. */
return true;
}
/* This function determines from TYPE the calling-convention. */
unsigned int
ix86_get_callcvt (const_tree type)
{
unsigned int ret = 0;
bool is_stdarg;
tree attrs;
if (TARGET_64BIT)
return IX86_CALLCVT_CDECL;
attrs = TYPE_ATTRIBUTES (type);
if (attrs != NULL_TREE)
{
if (lookup_attribute ("cdecl", attrs))
ret |= IX86_CALLCVT_CDECL;
else if (lookup_attribute ("stdcall", attrs))
ret |= IX86_CALLCVT_STDCALL;
else if (lookup_attribute ("fastcall", attrs))
ret |= IX86_CALLCVT_FASTCALL;
else if (lookup_attribute ("thiscall", attrs))
ret |= IX86_CALLCVT_THISCALL;
/* Regparam isn't allowed for thiscall and fastcall. */
if ((ret & (IX86_CALLCVT_THISCALL | IX86_CALLCVT_FASTCALL)) == 0)
{
if (lookup_attribute ("regparm", attrs))
ret |= IX86_CALLCVT_REGPARM;
if (lookup_attribute ("sseregparm", attrs))
ret |= IX86_CALLCVT_SSEREGPARM;
}
if (IX86_BASE_CALLCVT(ret) != 0)
return ret;
}
is_stdarg = stdarg_p (type);
if (TARGET_RTD && !is_stdarg)
return IX86_CALLCVT_STDCALL | ret;
if (ret != 0
|| is_stdarg
|| TREE_CODE (type) != METHOD_TYPE
|| ix86_function_type_abi (type) != MS_ABI)
return IX86_CALLCVT_CDECL | ret;
return IX86_CALLCVT_THISCALL;
}
/* Return 0 if the attributes for two types are incompatible, 1 if they
are compatible, and 2 if they are nearly compatible (which causes a
warning to be generated). */
static int
ix86_comp_type_attributes (const_tree type1, const_tree type2)
{
unsigned int ccvt1, ccvt2;
if (TREE_CODE (type1) != FUNCTION_TYPE
&& TREE_CODE (type1) != METHOD_TYPE)
return 1;
ccvt1 = ix86_get_callcvt (type1);
ccvt2 = ix86_get_callcvt (type2);
if (ccvt1 != ccvt2)
return 0;
if (ix86_function_regparm (type1, NULL)
!= ix86_function_regparm (type2, NULL))
return 0;
return 1;
}
/* Return the regparm value for a function with the indicated TYPE and DECL.
DECL may be NULL when calling function indirectly
or considering a libcall. */
static int
ix86_function_regparm (const_tree type, const_tree decl)
{
tree attr;
int regparm;
unsigned int ccvt;
if (TARGET_64BIT)
return (ix86_function_type_abi (type) == SYSV_ABI
? X86_64_REGPARM_MAX : X86_64_MS_REGPARM_MAX);
ccvt = ix86_get_callcvt (type);
regparm = ix86_regparm;
if ((ccvt & IX86_CALLCVT_REGPARM) != 0)
{
attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
if (attr)
{
regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
return regparm;
}
}
else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
return 2;
else if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
return 1;
/* Use register calling convention for local functions when possible. */
if (decl
&& TREE_CODE (decl) == FUNCTION_DECL)
{
cgraph_node *target = cgraph_node::get (decl);
if (target)
target = target->function_symbol ();
/* Caller and callee must agree on the calling convention, so
checking here just optimize means that with
__attribute__((optimize (...))) caller could use regparm convention
and callee not, or vice versa. Instead look at whether the callee
is optimized or not. */
if (target && opt_for_fn (target->decl, optimize)
&& !(profile_flag && !flag_fentry))
{
if (target->local && target->can_change_signature)
{
int local_regparm, globals = 0, regno;
/* Make sure no regparm register is taken by a
fixed register variable. */
for (local_regparm = 0; local_regparm < REGPARM_MAX;
local_regparm++)
if (fixed_regs[local_regparm])
break;
/* We don't want to use regparm(3) for nested functions as
these use a static chain pointer in the third argument. */
if (local_regparm == 3 && DECL_STATIC_CHAIN (target->decl))
local_regparm = 2;
/* Save a register for the split stack. */
if (flag_split_stack)
{
if (local_regparm == 3)
local_regparm = 2;
else if (local_regparm == 2
&& DECL_STATIC_CHAIN (target->decl))
local_regparm = 1;
}
/* Each fixed register usage increases register pressure,
so less registers should be used for argument passing.
This functionality can be overriden by an explicit
regparm value. */
for (regno = AX_REG; regno <= DI_REG; regno++)
if (fixed_regs[regno])
globals++;
local_regparm
= globals < local_regparm ? local_regparm - globals : 0;
if (local_regparm > regparm)
regparm = local_regparm;
}
}
}
return regparm;
}
/* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
DFmode (2) arguments in SSE registers for a function with the
indicated TYPE and DECL. DECL may be NULL when calling function
indirectly or considering a libcall. Return -1 if any FP parameter
should be rejected by error. This is used in siutation we imply SSE
calling convetion but the function is called from another function with
SSE disabled. Otherwise return 0. */
static int
ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
{
gcc_assert (!TARGET_64BIT);
/* Use SSE registers to pass SFmode and DFmode arguments if requested
by the sseregparm attribute. */
if (TARGET_SSEREGPARM
|| (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
{
if (!TARGET_SSE)
{
if (warn)
{
if (decl)
error ("calling %qD with attribute sseregparm without "
"SSE/SSE2 enabled", decl);
else
error ("calling %qT with attribute sseregparm without "
"SSE/SSE2 enabled", type);
}
return 0;
}
return 2;
}
if (!decl)
return 0;
cgraph_node *target = cgraph_node::get (decl);
if (target)
target = target->function_symbol ();
/* For local functions, pass up to SSE_REGPARM_MAX SFmode
(and DFmode for SSE2) arguments in SSE registers. */
if (target
/* TARGET_SSE_MATH */
&& (target_opts_for_fn (target->decl)->x_ix86_fpmath & FPMATH_SSE)
&& opt_for_fn (target->decl, optimize)
&& !(profile_flag && !flag_fentry))
{
if (target->local && target->can_change_signature)
{
/* Refuse to produce wrong code when local function with SSE enabled
is called from SSE disabled function.
FIXME: We need a way to detect these cases cross-ltrans partition
and avoid using SSE calling conventions on local functions called
from function with SSE disabled. For now at least delay the
warning until we know we are going to produce wrong code.
See PR66047 */
if (!TARGET_SSE && warn)
return -1;
return TARGET_SSE2_P (target_opts_for_fn (target->decl)
->x_ix86_isa_flags) ? 2 : 1;
}
}
return 0;
}
/* Return true if EAX is live at the start of the function. Used by
ix86_expand_prologue to determine if we need special help before
calling allocate_stack_worker. */
static bool
ix86_eax_live_at_start_p (void)
{
/* Cheat. Don't bother working forward from ix86_function_regparm
to the function type to whether an actual argument is located in
eax. Instead just look at cfg info, which is still close enough
to correct at this point. This gives false positives for broken
functions that might use uninitialized data that happens to be
allocated in eax, but who cares? */
return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 0);
}
static bool
ix86_keep_aggregate_return_pointer (tree fntype)
{
tree attr;
if (!TARGET_64BIT)
{
attr = lookup_attribute ("callee_pop_aggregate_return",
TYPE_ATTRIBUTES (fntype));
if (attr)
return (TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))) == 0);
/* For 32-bit MS-ABI the default is to keep aggregate
return pointer. */
if (ix86_function_type_abi (fntype) == MS_ABI)
return true;
}
return KEEP_AGGREGATE_RETURN_POINTER != 0;
}
/* Value is the number of bytes of arguments automatically
popped when returning from a subroutine call.
FUNDECL is the declaration node of the function (as a tree),
FUNTYPE is the data type of the function (as a tree),
or for a library call it is an identifier node for the subroutine name.
SIZE is the number of bytes of arguments passed on the stack.
On the 80386, the RTD insn may be used to pop them if the number
of args is fixed, but if the number is variable then the caller
must pop them all. RTD can't be used for library calls now
because the library is compiled with the Unix compiler.
Use of RTD is a selectable option, since it is incompatible with
standard Unix calling sequences. If the option is not selected,
the caller must always pop the args.
The attribute stdcall is equivalent to RTD on a per module basis. */
static poly_int64
ix86_return_pops_args (tree fundecl, tree funtype, poly_int64 size)
{
unsigned int ccvt;
/* None of the 64-bit ABIs pop arguments. */
if (TARGET_64BIT)
return 0;
ccvt = ix86_get_callcvt (funtype);
if ((ccvt & (IX86_CALLCVT_STDCALL | IX86_CALLCVT_FASTCALL
| IX86_CALLCVT_THISCALL)) != 0
&& ! stdarg_p (funtype))
return size;
/* Lose any fake structure return argument if it is passed on the stack. */
if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
&& !ix86_keep_aggregate_return_pointer (funtype))
{
int nregs = ix86_function_regparm (funtype, fundecl);
if (nregs == 0)
return GET_MODE_SIZE (Pmode);
}
return 0;
}
/* Implement the TARGET_LEGITIMATE_COMBINED_INSN hook. */
static bool
ix86_legitimate_combined_insn (rtx_insn *insn)
{
int i;
/* Check operand constraints in case hard registers were propagated
into insn pattern. This check prevents combine pass from
generating insn patterns with invalid hard register operands.
These invalid insns can eventually confuse reload to error out
with a spill failure. See also PRs 46829 and 46843. */
gcc_assert (INSN_CODE (insn) >= 0);
extract_insn (insn);
preprocess_constraints (insn);
int n_operands = recog_data.n_operands;
int n_alternatives = recog_data.n_alternatives;
for (i = 0; i < n_operands; i++)
{
rtx op = recog_data.operand[i];
machine_mode mode = GET_MODE (op);
const operand_alternative *op_alt;
int offset = 0;
bool win;
int j;
/* A unary operator may be accepted by the predicate, but it
is irrelevant for matching constraints. */
if (UNARY_P (op))
op = XEXP (op, 0);
if (SUBREG_P (op))
{
if (REG_P (SUBREG_REG (op))
&& REGNO (SUBREG_REG (op)) < FIRST_PSEUDO_REGISTER)
offset = subreg_regno_offset (REGNO (SUBREG_REG (op)),
GET_MODE (SUBREG_REG (op)),
SUBREG_BYTE (op),
GET_MODE (op));
op = SUBREG_REG (op);
}
if (!(REG_P (op) && HARD_REGISTER_P (op)))
continue;
op_alt = recog_op_alt;
/* Operand has no constraints, anything is OK. */
win = !n_alternatives;
alternative_mask preferred = get_preferred_alternatives (insn);
for (j = 0; j < n_alternatives; j++, op_alt += n_operands)
{
if (!TEST_BIT (preferred, j))
continue;
if (op_alt[i].anything_ok
|| (op_alt[i].matches != -1
&& operands_match_p
(recog_data.operand[i],
recog_data.operand[op_alt[i].matches]))
|| reg_fits_class_p (op, op_alt[i].cl, offset, mode))
{
win = true;
break;
}
}
if (!win)
return false;
}
return true;
}
/* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
static unsigned HOST_WIDE_INT
ix86_asan_shadow_offset (void)
{
return SUBTARGET_SHADOW_OFFSET;
}
/* Argument support functions. */
/* Return true when register may be used to pass function parameters. */
bool
ix86_function_arg_regno_p (int regno)
{
int i;
enum calling_abi call_abi;
const int *parm_regs;
if (TARGET_SSE && SSE_REGNO_P (regno)
&& regno < FIRST_SSE_REG + SSE_REGPARM_MAX)
return true;
if (!TARGET_64BIT)
return (regno < REGPARM_MAX
|| (TARGET_MMX && MMX_REGNO_P (regno)
&& regno < FIRST_MMX_REG + MMX_REGPARM_MAX));
/* TODO: The function should depend on current function ABI but
builtins.c would need updating then. Therefore we use the
default ABI. */
call_abi = ix86_cfun_abi ();
/* RAX is used as hidden argument to va_arg functions. */
if (call_abi == SYSV_ABI && regno == AX_REG)
return true;
if (call_abi == MS_ABI)
parm_regs = x86_64_ms_abi_int_parameter_registers;
else
parm_regs = x86_64_int_parameter_registers;
for (i = 0; i < (call_abi == MS_ABI
? X86_64_MS_REGPARM_MAX : X86_64_REGPARM_MAX); i++)
if (regno == parm_regs[i])
return true;
return false;
}
/* Return if we do not know how to pass ARG solely in registers. */
static bool
ix86_must_pass_in_stack (const function_arg_info &arg)
{
if (must_pass_in_stack_var_size_or_pad (arg))
return true;
/* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
The layout_type routine is crafty and tries to trick us into passing
currently unsupported vector types on the stack by using TImode. */
return (!TARGET_64BIT && arg.mode == TImode
&& arg.type && TREE_CODE (arg.type) != VECTOR_TYPE);
}
/* It returns the size, in bytes, of the area reserved for arguments passed
in registers for the function represented by fndecl dependent to the used
abi format. */
int
ix86_reg_parm_stack_space (const_tree fndecl)
{
enum calling_abi call_abi = SYSV_ABI;
if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
call_abi = ix86_function_abi (fndecl);
else
call_abi = ix86_function_type_abi (fndecl);
if (TARGET_64BIT && call_abi == MS_ABI)
return 32;
return 0;
}
/* We add this as a workaround in order to use libc_has_function
hook in i386.md. */
bool
ix86_libc_has_function (enum function_class fn_class)
{
return targetm.libc_has_function (fn_class, NULL_TREE);
}
/* Returns value SYSV_ABI, MS_ABI dependent on fntype,
specifying the call abi used. */
enum calling_abi
ix86_function_type_abi (const_tree fntype)
{
enum calling_abi abi = ix86_abi;
if (fntype == NULL_TREE || TYPE_ATTRIBUTES (fntype) == NULL_TREE)
return abi;
if (abi == SYSV_ABI
&& lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)))
{
static int warned;
if (TARGET_X32 && !warned)
{
error ("X32 does not support %<ms_abi%> attribute");
warned = 1;
}
abi = MS_ABI;
}
else if (abi == MS_ABI
&& lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)))
abi = SYSV_ABI;
return abi;
}
enum calling_abi
ix86_function_abi (const_tree fndecl)
{
return fndecl ? ix86_function_type_abi (TREE_TYPE (fndecl)) : ix86_abi;
}
/* Returns value SYSV_ABI, MS_ABI dependent on cfun,
specifying the call abi used. */
enum calling_abi
ix86_cfun_abi (void)
{
return cfun ? cfun->machine->call_abi : ix86_abi;
}
bool
ix86_function_ms_hook_prologue (const_tree fn)
{
if (fn && lookup_attribute ("ms_hook_prologue", DECL_ATTRIBUTES (fn)))
{
if (decl_function_context (fn) != NULL_TREE)
error_at (DECL_SOURCE_LOCATION (fn),
"%<ms_hook_prologue%> attribute is not compatible "
"with nested function");
else
return true;
}
return false;
}
bool
ix86_function_naked (const_tree fn)
{
if (fn && lookup_attribute ("naked", DECL_ATTRIBUTES (fn)))
return true;
return false;
}
/* Write the extra assembler code needed to declare a function properly. */
void
ix86_asm_output_function_label (FILE *asm_out_file, const char *fname,
tree decl)
{
bool is_ms_hook = ix86_function_ms_hook_prologue (decl);
if (cfun)
cfun->machine->function_label_emitted = true;
if (is_ms_hook)
{
int i, filler_count = (TARGET_64BIT ? 32 : 16);
unsigned int filler_cc = 0xcccccccc;
for (i = 0; i < filler_count; i += 4)
fprintf (asm_out_file, ASM_LONG " %#x\n", filler_cc);
}
#ifdef SUBTARGET_ASM_UNWIND_INIT
SUBTARGET_ASM_UNWIND_INIT (asm_out_file);
#endif
ASM_OUTPUT_LABEL (asm_out_file, fname);
/* Output magic byte marker, if hot-patch attribute is set. */
if (is_ms_hook)
{
if (TARGET_64BIT)
{
/* leaq [%rsp + 0], %rsp */
fputs (ASM_BYTE "0x48, 0x8d, 0xa4, 0x24, 0x00, 0x00, 0x00, 0x00\n",
asm_out_file);
}
else
{
/* movl.s %edi, %edi
push %ebp
movl.s %esp, %ebp */
fputs (ASM_BYTE "0x8b, 0xff, 0x55, 0x8b, 0xec\n", asm_out_file);
}
}
}
/* Implementation of call abi switching target hook. Specific to FNDECL
the specific call register sets are set. See also
ix86_conditional_register_usage for more details. */
void
ix86_call_abi_override (const_tree fndecl)
{
cfun->machine->call_abi = ix86_function_abi (fndecl);
}
/* Return 1 if pseudo register should be created and used to hold
GOT address for PIC code. */
bool
ix86_use_pseudo_pic_reg (void)
{
if ((TARGET_64BIT
&& (ix86_cmodel == CM_SMALL_PIC
|| TARGET_PECOFF))
|| !flag_pic)
return false;
return true;
}
/* Initialize large model PIC register. */
static void
ix86_init_large_pic_reg (unsigned int tmp_regno)
{
rtx_code_label *label;
rtx tmp_reg;
gcc_assert (Pmode == DImode);
label = gen_label_rtx ();
emit_label (label);
LABEL_PRESERVE_P (label) = 1;
tmp_reg = gen_rtx_REG (Pmode, tmp_regno);
gcc_assert (REGNO (pic_offset_table_rtx) != tmp_regno);
emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx,
label));
emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
emit_insn (gen_add2_insn (pic_offset_table_rtx, tmp_reg));
const char *name = LABEL_NAME (label);
PUT_CODE (label, NOTE);
NOTE_KIND (label) = NOTE_INSN_DELETED_LABEL;
NOTE_DELETED_LABEL_NAME (label) = name;
}
/* Create and initialize PIC register if required. */
static void
ix86_init_pic_reg (void)
{
edge entry_edge;
rtx_insn *seq;
if (!ix86_use_pseudo_pic_reg ())
return;
start_sequence ();
if (TARGET_64BIT)
{
if (ix86_cmodel == CM_LARGE_PIC)
ix86_init_large_pic_reg (R11_REG);
else
emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
}
else
{
/* If there is future mcount call in the function it is more profitable
to emit SET_GOT into ABI defined REAL_PIC_OFFSET_TABLE_REGNUM. */
rtx reg = crtl->profile
? gen_rtx_REG (Pmode, REAL_PIC_OFFSET_TABLE_REGNUM)
: pic_offset_table_rtx;
rtx_insn *insn = emit_insn (gen_set_got (reg));
RTX_FRAME_RELATED_P (insn) = 1;
if (crtl->profile)
emit_move_insn (pic_offset_table_rtx, reg);
add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
}
seq = get_insns ();
end_sequence ();
entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
insert_insn_on_edge (seq, entry_edge);
commit_one_edge_insertion (entry_edge);
}
/* Initialize a variable CUM of type CUMULATIVE_ARGS
for a call to a function whose data type is FNTYPE.
For a library call, FNTYPE is 0. */
void
init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
tree fntype, /* tree ptr for function decl */
rtx libname, /* SYMBOL_REF of library name or 0 */
tree fndecl,
int caller)
{
struct cgraph_node *local_info_node = NULL;
struct cgraph_node *target = NULL;
/* Set silent_p to false to raise an error for invalid calls when
expanding function body. */
cfun->machine->silent_p = false;
memset (cum, 0, sizeof (*cum));
if (fndecl)
{
target = cgraph_node::get (fndecl);
if (target)
{
target = target->function_symbol ();
local_info_node = cgraph_node::local_info_node (target->decl);
cum->call_abi = ix86_function_abi (target->decl);
}
else
cum->call_abi = ix86_function_abi (fndecl);
}
else
cum->call_abi = ix86_function_type_abi (fntype);
cum->caller = caller;
/* Set up the number of registers to use for passing arguments. */
cum->nregs = ix86_regparm;
if (TARGET_64BIT)
{
cum->nregs = (cum->call_abi == SYSV_ABI
? X86_64_REGPARM_MAX
: X86_64_MS_REGPARM_MAX);
}
if (TARGET_SSE)
{
cum->sse_nregs = SSE_REGPARM_MAX;
if (TARGET_64BIT)
{
cum->sse_nregs = (cum->call_abi == SYSV_ABI
? X86_64_SSE_REGPARM_MAX
: X86_64_MS_SSE_REGPARM_MAX);
}
}
if (TARGET_MMX)
cum->mmx_nregs = MMX_REGPARM_MAX;
cum->warn_avx512f = true;
cum->warn_avx = true;
cum->warn_sse = true;
cum->warn_mmx = true;
/* Because type might mismatch in between caller and callee, we need to
use actual type of function for local calls.
FIXME: cgraph_analyze can be told to actually record if function uses
va_start so for local functions maybe_vaarg can be made aggressive
helping K&R code.
FIXME: once typesytem is fixed, we won't need this code anymore. */
if (local_info_node && local_info_node->local
&& local_info_node->can_change_signature)
fntype = TREE_TYPE (target->decl);
cum->stdarg = stdarg_p (fntype);
cum->maybe_vaarg = (fntype
? (!prototype_p (fntype) || stdarg_p (fntype))
: !libname);
cum->decl = fndecl;
cum->warn_empty = !warn_abi || cum->stdarg;
if (!cum->warn_empty && fntype)
{
function_args_iterator iter;
tree argtype;
bool seen_empty_type = false;
FOREACH_FUNCTION_ARGS (fntype, argtype, iter)
{
if (argtype == error_mark_node || VOID_TYPE_P (argtype))
break;
if (TYPE_EMPTY_P (argtype))
seen_empty_type = true;
else if (seen_empty_type)
{
cum->warn_empty = true;
break;
}
}
}
if (!TARGET_64BIT)
{
/* If there are variable arguments, then we won't pass anything
in registers in 32-bit mode. */
if (stdarg_p (fntype))
{
cum->nregs = 0;
/* Since in 32-bit, variable arguments are always passed on
stack, there is scratch register available for indirect
sibcall. */
cfun->machine->arg_reg_available = true;
cum->sse_nregs = 0;
cum->mmx_nregs = 0;
cum->warn_avx512f = false;
cum->warn_avx = false;
cum->warn_sse = false;
cum->warn_mmx = false;
return;
}
/* Use ecx and edx registers if function has fastcall attribute,
else look for regparm information. */
if (fntype)
{
unsigned int ccvt = ix86_get_callcvt (fntype);
if ((ccvt & IX86_CALLCVT_THISCALL) != 0)
{
cum->nregs = 1;
cum->fastcall = 1; /* Same first register as in fastcall. */
}
else if ((ccvt & IX86_CALLCVT_FASTCALL) != 0)
{
cum->nregs = 2;
cum->fastcall = 1;
}
else
cum->nregs = ix86_function_regparm (fntype, fndecl);
}
/* Set up the number of SSE registers used for passing SFmode
and DFmode arguments. Warn for mismatching ABI. */
cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
}
cfun->machine->arg_reg_available = (cum->nregs > 0);
}
/* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
But in the case of vector types, it is some vector mode.
When we have only some of our vector isa extensions enabled, then there
are some modes for which vector_mode_supported_p is false. For these
modes, the generic vector support in gcc will choose some non-vector mode
in order to implement the type. By computing the natural mode, we'll
select the proper ABI location for the operand and not depend on whatever
the middle-end decides to do with these vector types.
The midde-end can't deal with the vector types > 16 bytes. In this
case, we return the original mode and warn ABI change if CUM isn't
NULL.
If INT_RETURN is true, warn ABI change if the vector mode isn't
available for function return value. */
static machine_mode
type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
bool in_return)
{
machine_mode mode = TYPE_MODE (type);
if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
{
HOST_WIDE_INT size = int_size_in_bytes (type);
if ((size == 8 || size == 16 || size == 32 || size == 64)
/* ??? Generic code allows us to create width 1 vectors. Ignore. */
&& TYPE_VECTOR_SUBPARTS (type) > 1)
{
machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
/* There are no XFmode vector modes. */
if (innermode == XFmode)
return mode;
if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
mode = MIN_MODE_VECTOR_FLOAT;
else
mode = MIN_MODE_VECTOR_INT;
/* Get the mode which has this inner mode and number of units. */
FOR_EACH_MODE_FROM (mode, mode)
if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
&& GET_MODE_INNER (mode) == innermode)
{
if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
{
static bool warnedavx512f;
static bool warnedavx512f_ret;
if (cum && cum->warn_avx512f && !warnedavx512f)
{
if (warning (OPT_Wpsabi, "AVX512F vector argument "
"without AVX512F enabled changes the ABI"))
warnedavx512f = true;
}
else if (in_return && !warnedavx512f_ret)
{
if (warning (OPT_Wpsabi, "AVX512F vector return "
"without AVX512F enabled changes the ABI"))
warnedavx512f_ret = true;
}
return TYPE_MODE (type);
}
else if (size == 32 && !TARGET_AVX && !TARGET_IAMCU)
{
static bool warnedavx;
static bool warnedavx_ret;
if (cum && cum->warn_avx && !warnedavx)
{
if (warning (OPT_Wpsabi, "AVX vector argument "
"without AVX enabled changes the ABI"))
warnedavx = true;
}
else if (in_return && !warnedavx_ret)
{
if (warning (OPT_Wpsabi, "AVX vector return "
"without AVX enabled changes the ABI"))
warnedavx_ret = true;
}
return TYPE_MODE (type);
}
else if (((size == 8 && TARGET_64BIT) || size == 16)
&& !TARGET_SSE
&& !TARGET_IAMCU)
{
static bool warnedsse;
static bool warnedsse_ret;
if (cum && cum->warn_sse && !warnedsse)
{
if (warning (OPT_Wpsabi, "SSE vector argument "
"without SSE enabled changes the ABI"))
warnedsse = true;
}
else if (!TARGET_64BIT && in_return && !warnedsse_ret)
{
if (warning (OPT_Wpsabi, "SSE vector return "
"without SSE enabled changes the ABI"))
warnedsse_ret = true;
}
}
else if ((size == 8 && !TARGET_64BIT)
&& (!cfun
|| cfun->machine->func_type == TYPE_NORMAL)
&& !TARGET_MMX
&& !TARGET_IAMCU)
{
static bool warnedmmx;
static bool warnedmmx_ret;
if (cum && cum->warn_mmx && !warnedmmx)
{
if (warning (OPT_Wpsabi, "MMX vector argument "
"without MMX enabled changes the ABI"))
warnedmmx = true;
}
else if (in_return && !warnedmmx_ret)
{
if (warning (OPT_Wpsabi, "MMX vector return "
"without MMX enabled changes the ABI"))
warnedmmx_ret = true;
}
}
return mode;
}
gcc_unreachable ();
}
}
return mode;
}
/* We want to pass a value in REGNO whose "natural" mode is MODE. However,
this may not agree with the mode that the type system has chosen for the
register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
go ahead and use it. Otherwise we have to build a PARALLEL instead. */
static rtx
gen_reg_or_parallel (machine_mode mode, machine_mode orig_mode,
unsigned int regno)
{
rtx tmp;
if (orig_mode != BLKmode)
tmp = gen_rtx_REG (orig_mode, regno);
else
{
tmp = gen_rtx_REG (mode, regno);
tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
}
return tmp;
}
/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
of this code is to classify each 8bytes of incoming argument by the register
class and assign registers accordingly. */
/* Return the union class of CLASS1 and CLASS2.
See the x86-64 PS ABI for details. */
static enum x86_64_reg_class
merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
{
/* Rule #1: If both classes are equal, this is the resulting class. */
if (class1 == class2)
return class1;
/* Rule #2: If one of the classes is NO_CLASS, the resulting class is
the other class. */
if (class1 == X86_64_NO_CLASS)
return class2;
if (class2 == X86_64_NO_CLASS)
return class1;
/* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
return X86_64_MEMORY_CLASS;
/* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
if ((class1 == X86_64_INTEGERSI_CLASS
&& (class2 == X86_64_SSESF_CLASS || class2 == X86_64_SSEHF_CLASS))
|| (class2 == X86_64_INTEGERSI_CLASS
&& (class1 == X86_64_SSESF_CLASS || class1 == X86_64_SSEHF_CLASS)))
return X86_64_INTEGERSI_CLASS;
if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
|| class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
return X86_64_INTEGER_CLASS;
/* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
MEMORY is used. */
if (class1 == X86_64_X87_CLASS
|| class1 == X86_64_X87UP_CLASS
|| class1 == X86_64_COMPLEX_X87_CLASS
|| class2 == X86_64_X87_CLASS
|| class2 == X86_64_X87UP_CLASS
|| class2 == X86_64_COMPLEX_X87_CLASS)
return X86_64_MEMORY_CLASS;
/* Rule #6: Otherwise class SSE is used. */
return X86_64_SSE_CLASS;
}
/* Classify the argument of type TYPE and mode MODE.
CLASSES will be filled by the register class used to pass each word
of the operand. The number of words is returned. In case the parameter
should be passed in memory, 0 is returned. As a special case for zero
sized containers, classes[0] will be NO_CLASS and 1 is returned.
BIT_OFFSET is used internally for handling records and specifies offset
of the offset in bits modulo 512 to avoid overflow cases.
See the x86-64 PS ABI for details.
*/
static int
classify_argument (machine_mode mode, const_tree type,
enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
{
HOST_WIDE_INT bytes
= mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
int words = CEIL (bytes + (bit_offset % 64) / 8, UNITS_PER_WORD);
/* Variable sized entities are always passed/returned in memory. */
if (bytes < 0)
return 0;
if (mode != VOIDmode)
{
/* The value of "named" doesn't matter. */
function_arg_info arg (const_cast<tree> (type), mode, /*named=*/true);
if (targetm.calls.must_pass_in_stack (arg))
return 0;
}
if (type && AGGREGATE_TYPE_P (type))
{
int i;
tree field;
enum x86_64_reg_class subclasses[MAX_CLASSES];
/* On x86-64 we pass structures larger than 64 bytes on the stack. */
if (bytes > 64)
return 0;
for (i = 0; i < words; i++)
classes[i] = X86_64_NO_CLASS;
/* Zero sized arrays or structures are NO_CLASS. We return 0 to
signalize memory class, so handle it as special case. */
if (!words)
{
classes[0] = X86_64_NO_CLASS;
return 1;
}
/* Classify each field of record and merge classes. */
switch (TREE_CODE (type))
{
case RECORD_TYPE:
/* And now merge the fields of structure. */
for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
{
if (TREE_CODE (field) == FIELD_DECL)
{
int num;
if (TREE_TYPE (field) == error_mark_node)
continue;
/* Bitfields are always classified as integer. Handle them
early, since later code would consider them to be
misaligned integers. */
if (DECL_BIT_FIELD (field))
{
for (i = (int_bit_position (field)
+ (bit_offset % 64)) / 8 / 8;
i < ((int_bit_position (field) + (bit_offset % 64))
+ tree_to_shwi (DECL_SIZE (field))
+ 63) / 8 / 8; i++)
classes[i]
= merge_classes (X86_64_INTEGER_CLASS, classes[i]);
}
else
{
int pos;
type = TREE_TYPE (field);
/* Flexible array member is ignored. */
if (TYPE_MODE (type) == BLKmode
&& TREE_CODE (type) == ARRAY_TYPE
&& TYPE_SIZE (type) == NULL_TREE
&& TYPE_DOMAIN (type) != NULL_TREE
&& (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
== NULL_TREE))
{
static bool warned;
if (!warned && warn_psabi)
{
warned = true;
inform (input_location,
"the ABI of passing struct with"
" a flexible array member has"
" changed in GCC 4.4");
}
continue;
}
num = classify_argument (TYPE_MODE (type), type,
subclasses,
(int_bit_position (field)
+ bit_offset) % 512);
if (!num)
return 0;
pos = (int_bit_position (field)
+ (bit_offset % 64)) / 8 / 8;
for (i = 0; i < num && (i + pos) < words; i++)
classes[i + pos]
= merge_classes (subclasses[i], classes[i + pos]);
}
}
}
break;
case ARRAY_TYPE:
/* Arrays are handled as small records. */
{
int num;
num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
TREE_TYPE (type), subclasses, bit_offset);
if (!num)
return 0;
/* The partial classes are now full classes. */
if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
subclasses[0] = X86_64_SSE_CLASS;
if (subclasses[0] == X86_64_SSEHF_CLASS && bytes != 2)
subclasses[0] = X86_64_SSE_CLASS;
if (subclasses[0] == X86_64_INTEGERSI_CLASS
&& !((bit_offset % 64) == 0 && bytes == 4))
subclasses[0] = X86_64_INTEGER_CLASS;
for (i = 0; i < words; i++)
classes[i] = subclasses[i % num];
break;
}
case UNION_TYPE:
case QUAL_UNION_TYPE:
/* Unions are similar to RECORD_TYPE but offset is always 0.
*/
for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
{
if (TREE_CODE (field) == FIELD_DECL)
{
int num;
if (TREE_TYPE (field) == error_mark_node)
continue;
num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
TREE_TYPE (field), subclasses,
bit_offset);
if (!num)
return 0;
for (i = 0; i < num && i < words; i++)
classes[i] = merge_classes (subclasses[i], classes[i]);
}
}
break;
default:
gcc_unreachable ();
}
if (words > 2)
{
/* When size > 16 bytes, if the first one isn't
X86_64_SSE_CLASS or any other ones aren't
X86_64_SSEUP_CLASS, everything should be passed in
memory. */
if (classes[0] != X86_64_SSE_CLASS)
return 0;
for (i = 1; i < words; i++)
if (classes[i] != X86_64_SSEUP_CLASS)
return 0;
}
/* Final merger cleanup. */
for (i = 0; i < words; i++)
{
/* If one class is MEMORY, everything should be passed in
memory. */
if (classes[i] == X86_64_MEMORY_CLASS)
return 0;
/* The X86_64_SSEUP_CLASS should be always preceded by
X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
if (classes[i] == X86_64_SSEUP_CLASS
&& classes[i - 1] != X86_64_SSE_CLASS
&& classes[i - 1] != X86_64_SSEUP_CLASS)
{
/* The first one should never be X86_64_SSEUP_CLASS. */
gcc_assert (i != 0);
classes[i] = X86_64_SSE_CLASS;
}
/* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
everything should be passed in memory. */
if (classes[i] == X86_64_X87UP_CLASS
&& (classes[i - 1] != X86_64_X87_CLASS))
{
static bool warned;
/* The first one should never be X86_64_X87UP_CLASS. */
gcc_assert (i != 0);
if (!warned && warn_psabi)
{
warned = true;
inform (input_location,
"the ABI of passing union with %<long double%>"
" has changed in GCC 4.4");
}
return 0;
}
}
return words;
}
/* Compute alignment needed. We align all types to natural boundaries with
exception of XFmode that is aligned to 64bits. */
if (mode != VOIDmode && mode != BLKmode)
{
int mode_alignment = GET_MODE_BITSIZE (mode);
if (mode == XFmode)
mode_alignment = 128;
else if (mode == XCmode)
mode_alignment = 256;
if (COMPLEX_MODE_P (mode))
mode_alignment /= 2;
/* Misaligned fields are always returned in memory. */
if (bit_offset % mode_alignment)
return 0;
}
/* for V1xx modes, just use the base mode */
if (VECTOR_MODE_P (mode) && mode != V1DImode && mode != V1TImode
&& GET_MODE_UNIT_SIZE (mode) == bytes)
mode = GET_MODE_INNER (mode);
/* Classification of atomic types. */
switch (mode)
{
case E_SDmode:
case E_DDmode:
classes[0] = X86_64_SSE_CLASS;
return 1;
case E_TDmode:
classes[0] = X86_64_SSE_CLASS;
classes[1] = X86_64_SSEUP_CLASS;
return 2;
case E_DImode:
case E_SImode:
case E_HImode:
case E_QImode:
case E_CSImode:
case E_CHImode:
case E_CQImode:
{
int size = bit_offset + (int) GET_MODE_BITSIZE (mode);
/* Analyze last 128 bits only. */
size = (size - 1) & 0x7f;
if (size < 32)
{
classes[0] = X86_64_INTEGERSI_CLASS;
return 1;
}
else if (size < 64)
{
classes[0] = X86_64_INTEGER_CLASS;
return 1;
}
else if (size < 64+32)
{
classes[0] = X86_64_INTEGER_CLASS;
classes[1] = X86_64_INTEGERSI_CLASS;
return 2;
}
else if (size < 64+64)
{
classes[0] = classes[1] = X86_64_INTEGER_CLASS;
return 2;
}
else
gcc_unreachable ();
}
case E_CDImode:
case E_TImode:
classes[0] = classes[1] = X86_64_INTEGER_CLASS;
return 2;
case E_COImode:
case E_OImode:
/* OImode shouldn't be used directly. */
gcc_unreachable ();
case E_CTImode:
return 0;
case E_HFmode:
if (!(bit_offset % 64))
classes[0] = X86_64_SSEHF_CLASS;
else
classes[0] = X86_64_SSE_CLASS;
return 1;
case E_SFmode:
if (!(bit_offset % 64))
classes[0] = X86_64_SSESF_CLASS;
else
classes[0] = X86_64_SSE_CLASS;
return 1;
case E_DFmode:
classes[0] = X86_64_SSEDF_CLASS;
return 1;
case E_XFmode:
classes[0] = X86_64_X87_CLASS;
classes[1] = X86_64_X87UP_CLASS;
return 2;
case E_TFmode:
classes[0] = X86_64_SSE_CLASS;
classes[1] = X86_64_SSEUP_CLASS;
return 2;
case E_HCmode:
classes[0] = X86_64_SSE_CLASS;
if (!(bit_offset % 64))
return 1;
else
{
classes[1] = X86_64_SSEHF_CLASS;
return 2;
}
case E_SCmode:
classes[0] = X86_64_SSE_CLASS;
if (!(bit_offset % 64))
return 1;
else
{
static bool warned;
if (!warned && warn_psabi)
{
warned = true;
inform (input_location,
"the ABI of passing structure with %<complex float%>"
" member has changed in GCC 4.4");
}
classes[1] = X86_64_SSESF_CLASS;
return 2;
}
case E_DCmode:
classes[0] = X86_64_SSEDF_CLASS;
classes[1] = X86_64_SSEDF_CLASS;
return 2;
case E_XCmode:
classes[0] = X86_64_COMPLEX_X87_CLASS;
return 1;
case E_TCmode:
/* This modes is larger than 16 bytes. */
return 0;
case E_V8SFmode:
case E_V8SImode:
case E_V32QImode:
case E_V16HFmode:
case E_V16HImode:
case E_V4DFmode:
case E_V4DImode:
classes[0] = X86_64_SSE_CLASS;
classes[1] = X86_64_SSEUP_CLASS;
classes[2] = X86_64_SSEUP_CLASS;
classes[3] = X86_64_SSEUP_CLASS;
return 4;
case E_V8DFmode:
case E_V16SFmode:
case E_V32HFmode:
case E_V8DImode:
case E_V16SImode:
case E_V32HImode:
case E_V64QImode:
classes[0] = X86_64_SSE_CLASS;
classes[1] = X86_64_SSEUP_CLASS;
classes[2] = X86_64_SSEUP_CLASS;
classes[3] = X86_64_SSEUP_CLASS;
classes[4] = X86_64_SSEUP_CLASS;
classes[5] = X86_64_SSEUP_CLASS;
classes[6] = X86_64_SSEUP_CLASS;
classes[7] = X86_64_SSEUP_CLASS;
return 8;
case E_V4SFmode:
case E_V4SImode:
case E_V16QImode:
case E_V8HImode:
case E_V8HFmode:
case E_V2DFmode:
case E_V2DImode:
classes[0] = X86_64_SSE_CLASS;
classes[1] = X86_64_SSEUP_CLASS;
return 2;
case E_V1TImode:
case E_V1DImode:
case E_V2SFmode:
case E_V2SImode:
case E_V4HImode:
case E_V4HFmode:
case E_V2HFmode:
case E_V8QImode:
classes[0] = X86_64_SSE_CLASS;
return 1;
case E_BLKmode:
case E_VOIDmode:
return 0;
default:
gcc_assert (VECTOR_MODE_P (mode));
if (bytes > 16)
return 0;
gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
classes[0] = X86_64_INTEGERSI_CLASS;
else
classes[0] = X86_64_INTEGER_CLASS;
classes[1] = X86_64_INTEGER_CLASS;
return 1 + (bytes > 8);
}
}
/* Examine the argument and return set number of register required in each
class. Return true iff parameter should be passed in memory. */
static bool
examine_argument (machine_mode mode, const_tree type, int in_return,
int *int_nregs, int *sse_nregs)
{
enum x86_64_reg_class regclass[MAX_CLASSES];
int n = classify_argument (mode, type, regclass, 0);
*int_nregs = 0;
*sse_nregs = 0;
if (!n)
return true;
for (n--; n >= 0; n--)
switch (regclass[n])
{
case X86_64_INTEGER_CLASS:
case X86_64_INTEGERSI_CLASS:
(*int_nregs)++;
break;
case X86_64_SSE_CLASS:
case X86_64_SSEHF_CLASS:
case X86_64_SSESF_CLASS:
case X86_64_SSEDF_CLASS:
(*sse_nregs)++;
break;
case X86_64_NO_CLASS:
case X86_64_SSEUP_CLASS:
break;
case X86_64_X87_CLASS:
case X86_64_X87UP_CLASS:
case X86_64_COMPLEX_X87_CLASS:
if (!in_return)
return true;
break;
case X86_64_MEMORY_CLASS:
gcc_unreachable ();
}
return false;
}
/* Construct container for the argument used by GCC interface. See
FUNCTION_ARG for the detailed description. */
static rtx
construct_container (machine_mode mode, machine_mode orig_mode,
const_tree type, int in_return, int nintregs, int nsseregs,
const int *intreg, int sse_regno)
{
/* The following variables hold the static issued_error state. */
static bool issued_sse_arg_error;
static bool issued_sse_ret_error;
static bool issued_x87_ret_error;
machine_mode tmpmode;
int bytes
= mode == BLKmode ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
enum x86_64_reg_class regclass[MAX_CLASSES];
int n;
int i;
int nexps = 0;
int needed_sseregs, needed_intregs;
rtx exp[MAX_CLASSES];
rtx ret;
n = classify_argument (mode, type, regclass, 0);
if (!n)
return NULL;
if (examine_argument (mode, type, in_return, &needed_intregs,
&needed_sseregs))
return NULL;
if (needed_intregs > nintregs || needed_sseregs > nsseregs)
return NULL;
/* We allowed the user to turn off SSE for kernel mode. Don't crash if
some less clueful developer tries to use floating-point anyway. */
if (needed_sseregs && !TARGET_SSE)
{
/* Return early if we shouldn't raise an error for invalid
calls. */
if (cfun != NULL && cfun->machine->silent_p)
return NULL;
if (in_return)
{
if (!issued_sse_ret_error)
{
error ("SSE register return with SSE disabled");
issued_sse_ret_error = true;
}
}
else if (!issued_sse_arg_error)
{
error ("SSE register argument with SSE disabled");
issued_sse_arg_error = true;
}
return NULL;
}
/* Likewise, error if the ABI requires us to return values in the
x87 registers and the user specified -mno-80387. */
if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
for (i = 0; i < n; i++)
if (regclass[i] == X86_64_X87_CLASS
|| regclass[i] == X86_64_X87UP_CLASS
|| regclass[i] == X86_64_COMPLEX_X87_CLASS)
{
/* Return early if we shouldn't raise an error for invalid
calls. */
if (cfun != NULL && cfun->machine->silent_p)
return NULL;
if (!issued_x87_ret_error)
{
error ("x87 register return with x87 disabled");
issued_x87_ret_error = true;
}
return NULL;
}
/* First construct simple cases. Avoid SCmode, since we want to use
single register to pass this type. */
if (n == 1 && mode != SCmode && mode != HCmode)
switch (regclass[0])
{
case X86_64_INTEGER_CLASS:
case X86_64_INTEGERSI_CLASS:
return gen_rtx_REG (mode, intreg[0]);
case X86_64_SSE_CLASS:
case X86_64_SSEHF_CLASS:
case X86_64_SSESF_CLASS:
case X86_64_SSEDF_CLASS:
if (mode != BLKmode)
return gen_reg_or_parallel (mode, orig_mode,
GET_SSE_REGNO (sse_regno));
break;
case X86_64_X87_CLASS:
case X86_64_COMPLEX_X87_CLASS:
return gen_rtx_REG (mode, FIRST_STACK_REG);
case X86_64_NO_CLASS:
/* Zero sized array, struct or class. */
return NULL;
default:
gcc_unreachable ();
}
if (n == 2
&& regclass[0] == X86_64_SSE_CLASS
&& regclass[1] == X86_64_SSEUP_CLASS
&& mode != BLKmode)
return gen_reg_or_parallel (mode, orig_mode,
GET_SSE_REGNO (sse_regno));
if (n == 4
&& regclass[0] == X86_64_SSE_CLASS
&& regclass[1] == X86_64_SSEUP_CLASS
&& regclass[2] == X86_64_SSEUP_CLASS
&& regclass[3] == X86_64_SSEUP_CLASS
&& mode != BLKmode)
return gen_reg_or_parallel (mode, orig_mode,
GET_SSE_REGNO (sse_regno));
if (n == 8
&& regclass[0] == X86_64_SSE_CLASS
&& regclass[1] == X86_64_SSEUP_CLASS
&& regclass[2] == X86_64_SSEUP_CLASS
&& regclass[3] == X86_64_SSEUP_CLASS
&& regclass[4] == X86_64_SSEUP_CLASS
&& regclass[5] == X86_64_SSEUP_CLASS
&& regclass[6] == X86_64_SSEUP_CLASS
&& regclass[7] == X86_64_SSEUP_CLASS
&& mode != BLKmode)
return gen_reg_or_parallel (mode, orig_mode,
GET_SSE_REGNO (sse_regno));
if (n == 2
&& regclass[0] == X86_64_X87_CLASS
&& regclass[1] == X86_64_X87UP_CLASS)
return gen_rtx_REG (XFmode, FIRST_STACK_REG);
if (n == 2
&& regclass[0] == X86_64_INTEGER_CLASS
&& regclass[1] == X86_64_INTEGER_CLASS
&& (mode == CDImode || mode == TImode || mode == BLKmode)
&& intreg[0] + 1 == intreg[1])
{
if (mode == BLKmode)
{
/* Use TImode for BLKmode values in 2 integer registers. */
exp[0] = gen_rtx_EXPR_LIST (VOIDmode,
gen_rtx_REG (TImode, intreg[0]),
GEN_INT (0));
ret = gen_rtx_PARALLEL (mode, rtvec_alloc (1));
XVECEXP (ret, 0, 0) = exp[0];
return ret;
}
else
return gen_rtx_REG (mode, intreg[0]);
}
/* Otherwise figure out the entries of the PARALLEL. */
for (i = 0; i < n; i++)
{
int pos;
switch (regclass[i])
{
case X86_64_NO_CLASS:
break;
case X86_64_INTEGER_CLASS:
case X86_64_INTEGERSI_CLASS:
/* Merge TImodes on aligned occasions here too. */
if (i * 8 + 8 > bytes)
{
unsigned int tmpbits = (bytes - i * 8) * BITS_PER_UNIT;
if (!int_mode_for_size (tmpbits, 0).exists (&tmpmode))
/* We've requested 24 bytes we
don't have mode for. Use DImode. */
tmpmode = DImode;
}
else if (regclass[i] == X86_64_INTEGERSI_CLASS)
tmpmode = SImode;
else
tmpmode = DImode;
exp [nexps++]
= gen_rtx_EXPR_LIST (VOIDmode,
gen_rtx_REG (tmpmode, *intreg),
GEN_INT (i*8));
intreg++;
break;
case X86_64_SSEHF_CLASS:
exp [nexps++]
= gen_rtx_EXPR_LIST (VOIDmode,
gen_rtx_REG (HFmode,
GET_SSE_REGNO (sse_regno)),
GEN_INT (i*8));
sse_regno++;
break;
case X86_64_SSESF_CLASS:
exp [nexps++]
= gen_rtx_EXPR_LIST (VOIDmode,
gen_rtx_REG (SFmode,
GET_SSE_REGNO (sse_regno)),
GEN_INT (i*8));
sse_regno++;
break;
case X86_64_SSEDF_CLASS:
exp [nexps++]
= gen_rtx_EXPR_LIST (VOIDmode,
gen_rtx_REG (DFmode,
GET_SSE_REGNO (sse_regno)),
GEN_INT (i*8));
sse_regno++;
break;
case X86_64_SSE_CLASS:
pos = i;
switch (n)
{
case 1:
tmpmode = DImode;
break;
case 2:
if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
{
tmpmode = TImode;
i++;
}
else
tmpmode = DImode;
break;
case 4:
gcc_assert (i == 0
&& regclass[1] == X86_64_SSEUP_CLASS
&& regclass[2] == X86_64_SSEUP_CLASS
&& regclass[3] == X86_64_SSEUP_CLASS);
tmpmode = OImode;
i += 3;
break;
case 8:
gcc_assert (i == 0
&& regclass[1] == X86_64_SSEUP_CLASS
&& regclass[2] == X86_64_SSEUP_CLASS
&& regclass[3] == X86_64_SSEUP_CLASS
&& regclass[4] == X86_64_SSEUP_CLASS
&& regclass[5] == X86_64_SSEUP_CLASS
&& regclass[6] == X86_64_SSEUP_CLASS
&& regclass[7] == X86_64_SSEUP_CLASS);
tmpmode = XImode;
i += 7;
break;
default:
gcc_unreachable ();
}
exp [nexps++]
= gen_rtx_EXPR_LIST (VOIDmode,
gen_rtx_REG (tmpmode,
GET_SSE_REGNO (sse_regno)),
GEN_INT (pos*8));
sse_regno++;
break;
default:
gcc_unreachable ();
}
}
/* Empty aligned struct, union or class. */
if (nexps == 0)
return NULL;
ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
for (i = 0; i < nexps; i++)
XVECEXP (ret, 0, i) = exp [i];
return ret;
}
/* Update the data in CUM to advance over an argument of mode MODE
and data type TYPE. (TYPE is null for libcalls where that information
may not be available.)
Return a number of integer regsiters advanced over. */
static int
function_arg_advance_32 (CUMULATIVE_ARGS *cum, machine_mode mode,
const_tree type, HOST_WIDE_INT bytes,
HOST_WIDE_INT words)
{
int res = 0;
bool error_p = false;
if (TARGET_IAMCU)
{
/* Intel MCU psABI passes scalars and aggregates no larger than 8
bytes in registers. */
if (!VECTOR_MODE_P (mode) && bytes <= 8)
goto pass_in_reg;
return res;
}
switch (mode)
{
default:
break;
case E_BLKmode:
if (bytes < 0)
break;
/* FALLTHRU */
case E_DImode:
case E_SImode:
case E_HImode:
case E_QImode:
pass_in_reg:
cum->words += words;
cum->nregs -= words;
cum->regno += words;
if (cum->nregs >= 0)
res = words;
if (cum->nregs <= 0)
{
cum->nregs = 0;
cfun->machine->arg_reg_availab