blob: 50794c3d62f19c72729a6acdda71de7011961d3f [file] [log] [blame]
/* Subroutines used for code generation on IA-32.
Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
2002, 2003, 2004 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING. If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "rtl.h"
#include "tree.h"
#include "tm_p.h"
#include "regs.h"
#include "hard-reg-set.h"
#include "real.h"
#include "insn-config.h"
#include "conditions.h"
#include "output.h"
#include "insn-attr.h"
#include "flags.h"
#include "except.h"
#include "function.h"
#include "recog.h"
#include "expr.h"
#include "optabs.h"
#include "toplev.h"
#include "basic-block.h"
#include "ggc.h"
#include "target.h"
#include "target-def.h"
#include "langhooks.h"
#include "cgraph.h"
#ifndef CHECK_STACK_LIMIT
#define CHECK_STACK_LIMIT (-1)
#endif
/* Return index of given mode in mult and division cost tables. */
#define MODE_INDEX(mode) \
((mode) == QImode ? 0 \
: (mode) == HImode ? 1 \
: (mode) == SImode ? 2 \
: (mode) == DImode ? 3 \
: 4)
/* Processor costs (relative to an add) */
static const
struct processor_costs size_cost = { /* costs for tunning for size */
2, /* cost of an add instruction */
3, /* cost of a lea instruction */
2, /* variable shift costs */
3, /* constant shift costs */
{3, 3, 3, 3, 5}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
{3, 3, 3, 3, 5}, /* cost of a divide/mod */
3, /* cost of movsx */
3, /* cost of movzx */
0, /* "large" insn */
2, /* MOVE_RATIO */
2, /* cost for loading QImode using movzbl */
{2, 2, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 2, 2}, /* cost of storing integer registers */
2, /* cost of reg,reg fld/fst */
{2, 2, 2}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
{2, 2, 2}, /* cost of loading integer registers */
3, /* cost of moving MMX register */
{3, 3}, /* cost of loading MMX registers
in SImode and DImode */
{3, 3}, /* cost of storing MMX registers
in SImode and DImode */
3, /* cost of moving SSE register */
{3, 3, 3}, /* cost of loading SSE registers
in SImode, DImode and TImode */
{3, 3, 3}, /* cost of storing SSE registers
in SImode, DImode and TImode */
3, /* MMX or SSE register to integer */
0, /* size of prefetch block */
0, /* number of parallel prefetches */
1, /* Branch cost */
2, /* cost of FADD and FSUB insns. */
2, /* cost of FMUL instruction. */
2, /* cost of FDIV instruction. */
2, /* cost of FABS instruction. */
2, /* cost of FCHS instruction. */
2, /* cost of FSQRT instruction. */
};
/* Processor costs (relative to an add) */
static const
struct processor_costs i386_cost = { /* 386 specific costs */
1, /* cost of an add instruction */
1, /* cost of a lea instruction */
3, /* variable shift costs */
2, /* constant shift costs */
{6, 6, 6, 6, 6}, /* cost of starting a multiply */
1, /* cost of multiply per each bit set */
{23, 23, 23, 23, 23}, /* cost of a divide/mod */
3, /* cost of movsx */
2, /* cost of movzx */
15, /* "large" insn */
3, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 4, 2}, /* cost of storing integer registers */
2, /* cost of reg,reg fld/fst */
{8, 8, 8}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
{8, 8, 8}, /* cost of loading integer registers */
2, /* cost of moving MMX register */
{4, 8}, /* cost of loading MMX registers
in SImode and DImode */
{4, 8}, /* cost of storing MMX registers
in SImode and DImode */
2, /* cost of moving SSE register */
{4, 8, 16}, /* cost of loading SSE registers
in SImode, DImode and TImode */
{4, 8, 16}, /* cost of storing SSE registers
in SImode, DImode and TImode */
3, /* MMX or SSE register to integer */
0, /* size of prefetch block */
0, /* number of parallel prefetches */
1, /* Branch cost */
23, /* cost of FADD and FSUB insns. */
27, /* cost of FMUL instruction. */
88, /* cost of FDIV instruction. */
22, /* cost of FABS instruction. */
24, /* cost of FCHS instruction. */
122, /* cost of FSQRT instruction. */
};
static const
struct processor_costs i486_cost = { /* 486 specific costs */
1, /* cost of an add instruction */
1, /* cost of a lea instruction */
3, /* variable shift costs */
2, /* constant shift costs */
{12, 12, 12, 12, 12}, /* cost of starting a multiply */
1, /* cost of multiply per each bit set */
{40, 40, 40, 40, 40}, /* cost of a divide/mod */
3, /* cost of movsx */
2, /* cost of movzx */
15, /* "large" insn */
3, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 4, 2}, /* cost of storing integer registers */
2, /* cost of reg,reg fld/fst */
{8, 8, 8}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
{8, 8, 8}, /* cost of loading integer registers */
2, /* cost of moving MMX register */
{4, 8}, /* cost of loading MMX registers
in SImode and DImode */
{4, 8}, /* cost of storing MMX registers
in SImode and DImode */
2, /* cost of moving SSE register */
{4, 8, 16}, /* cost of loading SSE registers
in SImode, DImode and TImode */
{4, 8, 16}, /* cost of storing SSE registers
in SImode, DImode and TImode */
3, /* MMX or SSE register to integer */
0, /* size of prefetch block */
0, /* number of parallel prefetches */
1, /* Branch cost */
8, /* cost of FADD and FSUB insns. */
16, /* cost of FMUL instruction. */
73, /* cost of FDIV instruction. */
3, /* cost of FABS instruction. */
3, /* cost of FCHS instruction. */
83, /* cost of FSQRT instruction. */
};
static const
struct processor_costs pentium_cost = {
1, /* cost of an add instruction */
1, /* cost of a lea instruction */
4, /* variable shift costs */
1, /* constant shift costs */
{11, 11, 11, 11, 11}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
{25, 25, 25, 25, 25}, /* cost of a divide/mod */
3, /* cost of movsx */
2, /* cost of movzx */
8, /* "large" insn */
6, /* MOVE_RATIO */
6, /* cost for loading QImode using movzbl */
{2, 4, 2}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 4, 2}, /* cost of storing integer registers */
2, /* cost of reg,reg fld/fst */
{2, 2, 6}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
{4, 4, 6}, /* cost of loading integer registers */
8, /* cost of moving MMX register */
{8, 8}, /* cost of loading MMX registers
in SImode and DImode */
{8, 8}, /* cost of storing MMX registers
in SImode and DImode */
2, /* cost of moving SSE register */
{4, 8, 16}, /* cost of loading SSE registers
in SImode, DImode and TImode */
{4, 8, 16}, /* cost of storing SSE registers
in SImode, DImode and TImode */
3, /* MMX or SSE register to integer */
0, /* size of prefetch block */
0, /* number of parallel prefetches */
2, /* Branch cost */
3, /* cost of FADD and FSUB insns. */
3, /* cost of FMUL instruction. */
39, /* cost of FDIV instruction. */
1, /* cost of FABS instruction. */
1, /* cost of FCHS instruction. */
70, /* cost of FSQRT instruction. */
};
static const
struct processor_costs pentiumpro_cost = {
1, /* cost of an add instruction */
1, /* cost of a lea instruction */
1, /* variable shift costs */
1, /* constant shift costs */
{4, 4, 4, 4, 4}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
{17, 17, 17, 17, 17}, /* cost of a divide/mod */
1, /* cost of movsx */
1, /* cost of movzx */
8, /* "large" insn */
6, /* MOVE_RATIO */
2, /* cost for loading QImode using movzbl */
{4, 4, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 2, 2}, /* cost of storing integer registers */
2, /* cost of reg,reg fld/fst */
{2, 2, 6}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
{4, 4, 6}, /* cost of loading integer registers */
2, /* cost of moving MMX register */
{2, 2}, /* cost of loading MMX registers
in SImode and DImode */
{2, 2}, /* cost of storing MMX registers
in SImode and DImode */
2, /* cost of moving SSE register */
{2, 2, 8}, /* cost of loading SSE registers
in SImode, DImode and TImode */
{2, 2, 8}, /* cost of storing SSE registers
in SImode, DImode and TImode */
3, /* MMX or SSE register to integer */
32, /* size of prefetch block */
6, /* number of parallel prefetches */
2, /* Branch cost */
3, /* cost of FADD and FSUB insns. */
5, /* cost of FMUL instruction. */
56, /* cost of FDIV instruction. */
2, /* cost of FABS instruction. */
2, /* cost of FCHS instruction. */
56, /* cost of FSQRT instruction. */
};
static const
struct processor_costs k6_cost = {
1, /* cost of an add instruction */
2, /* cost of a lea instruction */
1, /* variable shift costs */
1, /* constant shift costs */
{3, 3, 3, 3, 3}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
{18, 18, 18, 18, 18}, /* cost of a divide/mod */
2, /* cost of movsx */
2, /* cost of movzx */
8, /* "large" insn */
4, /* MOVE_RATIO */
3, /* cost for loading QImode using movzbl */
{4, 5, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 3, 2}, /* cost of storing integer registers */
4, /* cost of reg,reg fld/fst */
{6, 6, 6}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
{4, 4, 4}, /* cost of loading integer registers */
2, /* cost of moving MMX register */
{2, 2}, /* cost of loading MMX registers
in SImode and DImode */
{2, 2}, /* cost of storing MMX registers
in SImode and DImode */
2, /* cost of moving SSE register */
{2, 2, 8}, /* cost of loading SSE registers
in SImode, DImode and TImode */
{2, 2, 8}, /* cost of storing SSE registers
in SImode, DImode and TImode */
6, /* MMX or SSE register to integer */
32, /* size of prefetch block */
1, /* number of parallel prefetches */
1, /* Branch cost */
2, /* cost of FADD and FSUB insns. */
2, /* cost of FMUL instruction. */
56, /* cost of FDIV instruction. */
2, /* cost of FABS instruction. */
2, /* cost of FCHS instruction. */
56, /* cost of FSQRT instruction. */
};
static const
struct processor_costs athlon_cost = {
1, /* cost of an add instruction */
2, /* cost of a lea instruction */
1, /* variable shift costs */
1, /* constant shift costs */
{5, 5, 5, 5, 5}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
{18, 26, 42, 74, 74}, /* cost of a divide/mod */
1, /* cost of movsx */
1, /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */
{3, 4, 3}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{3, 4, 3}, /* cost of storing integer registers */
4, /* cost of reg,reg fld/fst */
{4, 4, 12}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
{6, 6, 8}, /* cost of loading integer registers */
2, /* cost of moving MMX register */
{4, 4}, /* cost of loading MMX registers
in SImode and DImode */
{4, 4}, /* cost of storing MMX registers
in SImode and DImode */
2, /* cost of moving SSE register */
{4, 4, 6}, /* cost of loading SSE registers
in SImode, DImode and TImode */
{4, 4, 5}, /* cost of storing SSE registers
in SImode, DImode and TImode */
5, /* MMX or SSE register to integer */
64, /* size of prefetch block */
6, /* number of parallel prefetches */
2, /* Branch cost */
4, /* cost of FADD and FSUB insns. */
4, /* cost of FMUL instruction. */
24, /* cost of FDIV instruction. */
2, /* cost of FABS instruction. */
2, /* cost of FCHS instruction. */
35, /* cost of FSQRT instruction. */
};
static const
struct processor_costs k8_cost = {
1, /* cost of an add instruction */
2, /* cost of a lea instruction */
1, /* variable shift costs */
1, /* constant shift costs */
{3, 4, 3, 4, 5}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
{18, 26, 42, 74, 74}, /* cost of a divide/mod */
1, /* cost of movsx */
1, /* cost of movzx */
8, /* "large" insn */
9, /* MOVE_RATIO */
4, /* cost for loading QImode using movzbl */
{3, 4, 3}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{3, 4, 3}, /* cost of storing integer registers */
4, /* cost of reg,reg fld/fst */
{4, 4, 12}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
{6, 6, 8}, /* cost of loading integer registers */
2, /* cost of moving MMX register */
{3, 3}, /* cost of loading MMX registers
in SImode and DImode */
{4, 4}, /* cost of storing MMX registers
in SImode and DImode */
2, /* cost of moving SSE register */
{4, 3, 6}, /* cost of loading SSE registers
in SImode, DImode and TImode */
{4, 4, 5}, /* cost of storing SSE registers
in SImode, DImode and TImode */
5, /* MMX or SSE register to integer */
64, /* size of prefetch block */
6, /* number of parallel prefetches */
2, /* Branch cost */
4, /* cost of FADD and FSUB insns. */
4, /* cost of FMUL instruction. */
19, /* cost of FDIV instruction. */
2, /* cost of FABS instruction. */
2, /* cost of FCHS instruction. */
35, /* cost of FSQRT instruction. */
};
static const
struct processor_costs pentium4_cost = {
1, /* cost of an add instruction */
1, /* cost of a lea instruction */
4, /* variable shift costs */
4, /* constant shift costs */
{15, 15, 15, 15, 15}, /* cost of starting a multiply */
0, /* cost of multiply per each bit set */
{56, 56, 56, 56, 56}, /* cost of a divide/mod */
1, /* cost of movsx */
1, /* cost of movzx */
16, /* "large" insn */
6, /* MOVE_RATIO */
2, /* cost for loading QImode using movzbl */
{4, 5, 4}, /* cost of loading integer registers
in QImode, HImode and SImode.
Relative to reg-reg move (2). */
{2, 3, 2}, /* cost of storing integer registers */
2, /* cost of reg,reg fld/fst */
{2, 2, 6}, /* cost of loading fp registers
in SFmode, DFmode and XFmode */
{4, 4, 6}, /* cost of loading integer registers */
2, /* cost of moving MMX register */
{2, 2}, /* cost of loading MMX registers
in SImode and DImode */
{2, 2}, /* cost of storing MMX registers
in SImode and DImode */
12, /* cost of moving SSE register */
{12, 12, 12}, /* cost of loading SSE registers
in SImode, DImode and TImode */
{2, 2, 8}, /* cost of storing SSE registers
in SImode, DImode and TImode */
10, /* MMX or SSE register to integer */
64, /* size of prefetch block */
6, /* number of parallel prefetches */
2, /* Branch cost */
5, /* cost of FADD and FSUB insns. */
7, /* cost of FMUL instruction. */
43, /* cost of FDIV instruction. */
2, /* cost of FABS instruction. */
2, /* cost of FCHS instruction. */
43, /* cost of FSQRT instruction. */
};
const struct processor_costs *ix86_cost = &pentium_cost;
/* Processor feature/optimization bitmasks. */
#define m_386 (1<<PROCESSOR_I386)
#define m_486 (1<<PROCESSOR_I486)
#define m_PENT (1<<PROCESSOR_PENTIUM)
#define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
#define m_K6 (1<<PROCESSOR_K6)
#define m_ATHLON (1<<PROCESSOR_ATHLON)
#define m_PENT4 (1<<PROCESSOR_PENTIUM4)
#define m_K8 (1<<PROCESSOR_K8)
#define m_ATHLON_K8 (m_K8 | m_ATHLON)
const int x86_use_leave = m_386 | m_K6 | m_ATHLON_K8;
const int x86_push_memory = m_386 | m_K6 | m_ATHLON_K8 | m_PENT4;
const int x86_zero_extend_with_and = m_486 | m_PENT;
const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 /* m_386 | m_K6 */;
const int x86_double_with_add = ~m_386;
const int x86_use_bit_test = m_386;
const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6;
const int x86_cmove = m_PPRO | m_ATHLON_K8 | m_PENT4;
const int x86_3dnow_a = m_ATHLON_K8;
const int x86_deep_branch = m_PPRO | m_K6 | m_ATHLON_K8 | m_PENT4;
const int x86_branch_hints = m_PENT4;
const int x86_use_sahf = m_PPRO | m_K6 | m_PENT4;
const int x86_partial_reg_stall = m_PPRO;
const int x86_use_loop = m_K6;
const int x86_use_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT);
const int x86_use_mov0 = m_K6;
const int x86_use_cltd = ~(m_PENT | m_K6);
const int x86_read_modify_write = ~m_PENT;
const int x86_read_modify = ~(m_PENT | m_PPRO);
const int x86_split_long_moves = m_PPRO;
const int x86_promote_QImode = m_K6 | m_PENT | m_386 | m_486 | m_ATHLON_K8;
const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
const int x86_single_stringop = m_386 | m_PENT4;
const int x86_qimode_math = ~(0);
const int x86_promote_qi_regs = 0;
const int x86_himode_math = ~(m_PPRO);
const int x86_promote_hi_regs = m_PPRO;
const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4;
const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4;
const int x86_add_esp_4 = m_ATHLON_K8 | m_K6 | m_PENT4;
const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6 | m_386 | m_486 | m_PENT4;
const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_PPRO);
const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4;
const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4;
const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_PPRO;
const int x86_prologue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
const int x86_epilogue_using_move = m_ATHLON_K8 | m_PENT4 | m_PPRO;
const int x86_decompose_lea = m_PENT4;
const int x86_shift1 = ~m_486;
const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4;
const int x86_sse_partial_reg_dependency = m_PENT4 | m_PPRO;
/* Set for machines where the type and dependencies are resolved on SSE register
parts instead of whole registers, so we may maintain just lower part of
scalar values in proper format leaving the upper part undefined. */
const int x86_sse_partial_regs = m_ATHLON_K8;
/* Athlon optimizes partial-register FPS special case, thus avoiding the
need for extra instructions beforehand */
const int x86_sse_partial_regs_for_cvtsd2ss = 0;
const int x86_sse_typeless_stores = m_ATHLON_K8;
const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4;
const int x86_use_ffreep = m_ATHLON_K8;
const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6;
const int x86_inter_unit_moves = ~(m_ATHLON_K8);
const int x86_ext_80387_constants = m_K6 | m_ATHLON | m_PENT4 | m_PPRO;
/* In case the average insn count for single function invocation is
lower than this constant, emit fast (but longer) prologue and
epilogue code. */
#define FAST_PROLOGUE_INSN_COUNT 20
/* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
/* Array of the smallest class containing reg number REGNO, indexed by
REGNO. Used by REGNO_REG_CLASS in i386.h. */
enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
{
/* ax, dx, cx, bx */
AREG, DREG, CREG, BREG,
/* si, di, bp, sp */
SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
/* FP registers */
FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
/* arg pointer */
NON_Q_REGS,
/* flags, fpsr, dirflag, frame */
NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
SSE_REGS, SSE_REGS,
MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
MMX_REGS, MMX_REGS,
NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
SSE_REGS, SSE_REGS,
};
/* The "default" register map used in 32bit mode. */
int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
{
0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
-1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
-1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
-1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
};
static int const x86_64_int_parameter_registers[6] =
{
5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
};
static int const x86_64_int_return_registers[4] =
{
0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
};
/* The "default" register map used in 64bit mode. */
int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
{
0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
-1, -1, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
8,9,10,11,12,13,14,15, /* extended integer registers */
25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
};
/* Define the register numbers to be used in Dwarf debugging information.
The SVR4 reference port C compiler uses the following register numbers
in its Dwarf output code:
0 for %eax (gcc regno = 0)
1 for %ecx (gcc regno = 2)
2 for %edx (gcc regno = 1)
3 for %ebx (gcc regno = 3)
4 for %esp (gcc regno = 7)
5 for %ebp (gcc regno = 6)
6 for %esi (gcc regno = 4)
7 for %edi (gcc regno = 5)
The following three DWARF register numbers are never generated by
the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
believes these numbers have these meanings.
8 for %eip (no gcc equivalent)
9 for %eflags (gcc regno = 17)
10 for %trapno (no gcc equivalent)
It is not at all clear how we should number the FP stack registers
for the x86 architecture. If the version of SDB on x86/svr4 were
a bit less brain dead with respect to floating-point then we would
have a precedent to follow with respect to DWARF register numbers
for x86 FP registers, but the SDB on x86/svr4 is so completely
broken with respect to FP registers that it is hardly worth thinking
of it as something to strive for compatibility with.
The version of x86/svr4 SDB I have at the moment does (partially)
seem to believe that DWARF register number 11 is associated with
the x86 register %st(0), but that's about all. Higher DWARF
register numbers don't seem to be associated with anything in
particular, and even for DWARF regno 11, SDB only seems to under-
stand that it should say that a variable lives in %st(0) (when
asked via an `=' command) if we said it was in DWARF regno 11,
but SDB still prints garbage when asked for the value of the
variable in question (via a `/' command).
(Also note that the labels SDB prints for various FP stack regs
when doing an `x' command are all wrong.)
Note that these problems generally don't affect the native SVR4
C compiler because it doesn't allow the use of -O with -g and
because when it is *not* optimizing, it allocates a memory
location for each floating-point variable, and the memory
location is what gets described in the DWARF AT_location
attribute for the variable in question.
Regardless of the severe mental illness of the x86/svr4 SDB, we
do something sensible here and we use the following DWARF
register numbers. Note that these are all stack-top-relative
numbers.
11 for %st(0) (gcc regno = 8)
12 for %st(1) (gcc regno = 9)
13 for %st(2) (gcc regno = 10)
14 for %st(3) (gcc regno = 11)
15 for %st(4) (gcc regno = 12)
16 for %st(5) (gcc regno = 13)
17 for %st(6) (gcc regno = 14)
18 for %st(7) (gcc regno = 15)
*/
int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
{
0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
-1, 9, -1, -1, -1, /* arg, flags, fpsr, dir, frame */
21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
-1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
-1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
};
/* Test and compare insns in i386.md store the information needed to
generate branch and scc insns here. */
rtx ix86_compare_op0 = NULL_RTX;
rtx ix86_compare_op1 = NULL_RTX;
#define MAX_386_STACK_LOCALS 3
/* Size of the register save area. */
#define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
/* Define the structure for the machine field in struct function. */
struct stack_local_entry GTY(())
{
unsigned short mode;
unsigned short n;
rtx rtl;
struct stack_local_entry *next;
};
/* Structure describing stack frame layout.
Stack grows downward:
[arguments]
<- ARG_POINTER
saved pc
saved frame pointer if frame_pointer_needed
<- HARD_FRAME_POINTER
[saved regs]
[padding1] \
)
[va_arg registers] (
> to_allocate <- FRAME_POINTER
[frame] (
)
[padding2] /
*/
struct ix86_frame
{
int nregs;
int padding1;
int va_arg_size;
HOST_WIDE_INT frame;
int padding2;
int outgoing_arguments_size;
int red_zone_size;
HOST_WIDE_INT to_allocate;
/* The offsets relative to ARG_POINTER. */
HOST_WIDE_INT frame_pointer_offset;
HOST_WIDE_INT hard_frame_pointer_offset;
HOST_WIDE_INT stack_pointer_offset;
/* When save_regs_using_mov is set, emit prologue using
move instead of push instructions. */
bool save_regs_using_mov;
};
/* Used to enable/disable debugging features. */
const char *ix86_debug_arg_string, *ix86_debug_addr_string;
/* Code model option as passed by user. */
const char *ix86_cmodel_string;
/* Parsed value. */
enum cmodel ix86_cmodel;
/* Asm dialect. */
const char *ix86_asm_string;
enum asm_dialect ix86_asm_dialect = ASM_ATT;
/* TLS dialext. */
const char *ix86_tls_dialect_string;
enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
/* Which unit we are generating floating point math for. */
enum fpmath_unit ix86_fpmath;
/* Which cpu are we scheduling for. */
enum processor_type ix86_tune;
/* Which instruction set architecture to use. */
enum processor_type ix86_arch;
/* Strings to hold which cpu and instruction set architecture to use. */
const char *ix86_tune_string; /* for -mtune=<xxx> */
const char *ix86_arch_string; /* for -march=<xxx> */
const char *ix86_fpmath_string; /* for -mfpmath=<xxx> */
/* # of registers to use to pass arguments. */
const char *ix86_regparm_string;
/* true if sse prefetch instruction is not NOOP. */
int x86_prefetch_sse;
/* ix86_regparm_string as a number */
int ix86_regparm;
/* Alignment to use for loops and jumps: */
/* Power of two alignment for loops. */
const char *ix86_align_loops_string;
/* Power of two alignment for non-loop jumps. */
const char *ix86_align_jumps_string;
/* Power of two alignment for stack boundary in bytes. */
const char *ix86_preferred_stack_boundary_string;
/* Preferred alignment for stack boundary in bits. */
int ix86_preferred_stack_boundary;
/* Values 1-5: see jump.c */
int ix86_branch_cost;
const char *ix86_branch_cost_string;
/* Power of two alignment for functions. */
const char *ix86_align_funcs_string;
/* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
static char internal_label_prefix[16];
static int internal_label_prefix_len;
static int local_symbolic_operand (rtx, enum machine_mode);
static int tls_symbolic_operand_1 (rtx, enum tls_model);
static void output_pic_addr_const (FILE *, rtx, int);
static void put_condition_code (enum rtx_code, enum machine_mode,
int, int, FILE *);
static const char *get_some_local_dynamic_name (void);
static int get_some_local_dynamic_name_1 (rtx *, void *);
static rtx maybe_get_pool_constant (rtx);
static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
rtx *);
static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
enum machine_mode);
static rtx get_thread_pointer (int);
static rtx legitimize_tls_address (rtx, enum tls_model, int);
static void get_pc_thunk_name (char [32], unsigned int);
static rtx gen_push (rtx);
static int memory_address_length (rtx addr);
static int ix86_flags_dependant (rtx, rtx, enum attr_type);
static int ix86_agi_dependant (rtx, rtx, enum attr_type);
static enum attr_ppro_uops ix86_safe_ppro_uops (rtx);
static void ix86_dump_ppro_packet (FILE *);
static void ix86_reorder_insn (rtx *, rtx *);
static struct machine_function * ix86_init_machine_status (void);
static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
static int ix86_nsaved_regs (void);
static void ix86_emit_save_regs (void);
static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
static void ix86_sched_reorder_ppro (rtx *, rtx *);
static HOST_WIDE_INT ix86_GOT_alias_set (void);
static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
static rtx ix86_expand_aligntest (rtx, int);
static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
static int ix86_issue_rate (void);
static int ix86_adjust_cost (rtx, rtx, rtx, int);
static void ix86_sched_init (FILE *, int, int);
static int ix86_sched_reorder (FILE *, int, rtx *, int *, int);
static int ix86_variable_issue (FILE *, int, rtx, int);
static int ia32_use_dfa_pipeline_interface (void);
static int ia32_multipass_dfa_lookahead (void);
static void ix86_init_mmx_sse_builtins (void);
static rtx x86_this_parameter (tree);
static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
HOST_WIDE_INT, tree);
static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
static void x86_file_start (void);
static void ix86_reorg (void);
static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
static tree ix86_build_builtin_va_list (void);
struct ix86_address
{
rtx base, index, disp;
HOST_WIDE_INT scale;
enum ix86_address_seg { SEG_DEFAULT, SEG_FS, SEG_GS } seg;
};
static int ix86_decompose_address (rtx, struct ix86_address *);
static int ix86_address_cost (rtx);
static bool ix86_cannot_force_const_mem (rtx);
static rtx ix86_delegitimize_address (rtx);
struct builtin_description;
static rtx ix86_expand_sse_comi (const struct builtin_description *,
tree, rtx);
static rtx ix86_expand_sse_compare (const struct builtin_description *,
tree, rtx);
static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
static rtx ix86_expand_store_builtin (enum insn_code, tree);
static rtx safe_vector_operand (rtx, enum machine_mode);
static enum rtx_code ix86_fp_compare_code_to_integer (enum rtx_code);
static void ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *,
enum rtx_code *, enum rtx_code *);
static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
static int ix86_fp_comparison_cost (enum rtx_code code);
static unsigned int ix86_select_alt_pic_regnum (void);
static int ix86_save_reg (unsigned int, int);
static void ix86_compute_frame_layout (struct ix86_frame *);
static int ix86_comp_type_attributes (tree, tree);
static int ix86_function_regparm (tree, tree);
const struct attribute_spec ix86_attribute_table[];
static bool ix86_function_ok_for_sibcall (tree, tree);
static tree ix86_handle_cdecl_attribute (tree *, tree, tree, int, bool *);
static tree ix86_handle_regparm_attribute (tree *, tree, tree, int, bool *);
static int ix86_value_regno (enum machine_mode);
static bool contains_128bit_aligned_vector_p (tree);
static bool ix86_ms_bitfield_layout_p (tree);
static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
static int extended_reg_mentioned_1 (rtx *, void *);
static bool ix86_rtx_costs (rtx, int, int, int *);
static int min_insn_size (rtx);
static void k8_avoid_jump_misspredicts (void);
#if defined (DO_GLOBAL_CTORS_BODY) && defined (HAS_INIT_SECTION)
static void ix86_svr3_asm_out_constructor (rtx, int);
#endif
/* Register class used for passing given 64bit part of the argument.
These represent classes as documented by the PS ABI, with the exception
of SSESF, SSEDF classes, that are basically SSE class, just gcc will
use SF or DFmode move instead of DImode to avoid reformatting penalties.
Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
whenever possible (upper half does contain padding).
*/
enum x86_64_reg_class
{
X86_64_NO_CLASS,
X86_64_INTEGER_CLASS,
X86_64_INTEGERSI_CLASS,
X86_64_SSE_CLASS,
X86_64_SSESF_CLASS,
X86_64_SSEDF_CLASS,
X86_64_SSEUP_CLASS,
X86_64_X87_CLASS,
X86_64_X87UP_CLASS,
X86_64_MEMORY_CLASS
};
static const char * const x86_64_reg_class_name[] =
{"no", "integer", "integerSI", "sse", "sseSF", "sseDF", "sseup", "x87", "x87up", "no"};
#define MAX_CLASSES 4
static int classify_argument (enum machine_mode, tree,
enum x86_64_reg_class [MAX_CLASSES], int);
static int examine_argument (enum machine_mode, tree, int, int *, int *);
static rtx construct_container (enum machine_mode, tree, int, int, int,
const int *, int);
static enum x86_64_reg_class merge_classes (enum x86_64_reg_class,
enum x86_64_reg_class);
/* Table of constants used by fldpi, fldln2, etc.... */
static REAL_VALUE_TYPE ext_80387_constants_table [5];
static bool ext_80387_constants_init = 0;
static void init_ext_80387_constants (void);
/* Initialize the GCC target structure. */
#undef TARGET_ATTRIBUTE_TABLE
#define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
# undef TARGET_MERGE_DECL_ATTRIBUTES
# define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
#endif
#undef TARGET_COMP_TYPE_ATTRIBUTES
#define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS ix86_init_builtins
#undef TARGET_EXPAND_BUILTIN
#define TARGET_EXPAND_BUILTIN ix86_expand_builtin
#undef TARGET_ASM_FUNCTION_EPILOGUE
#define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
#undef TARGET_ASM_OPEN_PAREN
#define TARGET_ASM_OPEN_PAREN ""
#undef TARGET_ASM_CLOSE_PAREN
#define TARGET_ASM_CLOSE_PAREN ""
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
#undef TARGET_ASM_ALIGNED_SI_OP
#define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
#ifdef ASM_QUAD
#undef TARGET_ASM_ALIGNED_DI_OP
#define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
#endif
#undef TARGET_ASM_UNALIGNED_HI_OP
#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
#undef TARGET_ASM_UNALIGNED_SI_OP
#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
#undef TARGET_ASM_UNALIGNED_DI_OP
#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
#undef TARGET_SCHED_ADJUST_COST
#define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
#undef TARGET_SCHED_VARIABLE_ISSUE
#define TARGET_SCHED_VARIABLE_ISSUE ix86_variable_issue
#undef TARGET_SCHED_INIT
#define TARGET_SCHED_INIT ix86_sched_init
#undef TARGET_SCHED_REORDER
#define TARGET_SCHED_REORDER ix86_sched_reorder
#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
ia32_use_dfa_pipeline_interface
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
ia32_multipass_dfa_lookahead
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
#ifdef HAVE_AS_TLS
#undef TARGET_HAVE_TLS
#define TARGET_HAVE_TLS true
#endif
#undef TARGET_CANNOT_FORCE_CONST_MEM
#define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
#undef TARGET_DELEGITIMIZE_ADDRESS
#define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
#undef TARGET_MS_BITFIELD_LAYOUT_P
#define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
#undef TARGET_ASM_OUTPUT_MI_THUNK
#define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
#undef TARGET_ASM_FILE_START
#define TARGET_ASM_FILE_START x86_file_start
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS ix86_rtx_costs
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST ix86_address_cost
#undef TARGET_FIXED_CONDITION_CODE_REGS
#define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
#undef TARGET_CC_MODES_COMPATIBLE
#define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
#undef TARGET_BUILD_BUILTIN_VA_LIST
#define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
struct gcc_target targetm = TARGET_INITIALIZER;
/* The svr4 ABI for the i386 says that records and unions are returned
in memory. */
#ifndef DEFAULT_PCC_STRUCT_RETURN
#define DEFAULT_PCC_STRUCT_RETURN 1
#endif
/* Sometimes certain combinations of command options do not make
sense on a particular target machine. You can define a macro
`OVERRIDE_OPTIONS' to take account of this. This macro, if
defined, is executed once just after all the command options have
been parsed.
Don't use this macro to turn on various extra optimizations for
`-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
void
override_options (void)
{
int i;
/* Comes from final.c -- no real reason to change it. */
#define MAX_CODE_ALIGN 16
static struct ptt
{
const struct processor_costs *cost; /* Processor costs */
const int target_enable; /* Target flags to enable. */
const int target_disable; /* Target flags to disable. */
const int align_loop; /* Default alignments. */
const int align_loop_max_skip;
const int align_jump;
const int align_jump_max_skip;
const int align_func;
}
const processor_target_table[PROCESSOR_max] =
{
{&i386_cost, 0, 0, 4, 3, 4, 3, 4},
{&i486_cost, 0, 0, 16, 15, 16, 15, 16},
{&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
{&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
{&k6_cost, 0, 0, 32, 7, 32, 7, 32},
{&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
{&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
{&k8_cost, 0, 0, 16, 7, 16, 7, 16}
};
static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
static struct pta
{
const char *const name; /* processor name or nickname. */
const enum processor_type processor;
const enum pta_flags
{
PTA_SSE = 1,
PTA_SSE2 = 2,
PTA_SSE3 = 4,
PTA_MMX = 8,
PTA_PREFETCH_SSE = 16,
PTA_3DNOW = 32,
PTA_3DNOW_A = 64,
PTA_64BIT = 128
} flags;
}
const processor_alias_table[] =
{
{"i386", PROCESSOR_I386, 0},
{"i486", PROCESSOR_I486, 0},
{"i586", PROCESSOR_PENTIUM, 0},
{"pentium", PROCESSOR_PENTIUM, 0},
{"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
{"winchip-c6", PROCESSOR_I486, PTA_MMX},
{"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
{"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
{"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
{"i686", PROCESSOR_PENTIUMPRO, 0},
{"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
{"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
{"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
{"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
{"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
{"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
| PTA_MMX | PTA_PREFETCH_SSE},
{"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
| PTA_MMX | PTA_PREFETCH_SSE},
{"prescott", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_MMX | PTA_PREFETCH_SSE},
{"nocona", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
| PTA_MMX | PTA_PREFETCH_SSE},
{"k6", PROCESSOR_K6, PTA_MMX},
{"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
{"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
{"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
| PTA_3DNOW_A},
{"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
| PTA_3DNOW | PTA_3DNOW_A},
{"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
| PTA_3DNOW_A | PTA_SSE},
{"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
| PTA_3DNOW_A | PTA_SSE},
{"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
| PTA_3DNOW_A | PTA_SSE},
{"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
| PTA_SSE | PTA_SSE2 },
{"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
| PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
{"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
| PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
{"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
| PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
{"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
| PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
};
int const pta_size = ARRAY_SIZE (processor_alias_table);
/* Set the default values for switches whose default depends on TARGET_64BIT
in case they weren't overwritten by command line options. */
if (TARGET_64BIT)
{
if (flag_omit_frame_pointer == 2)
flag_omit_frame_pointer = 1;
if (flag_asynchronous_unwind_tables == 2)
flag_asynchronous_unwind_tables = 1;
if (flag_pcc_struct_return == 2)
flag_pcc_struct_return = 0;
}
else
{
if (flag_omit_frame_pointer == 2)
flag_omit_frame_pointer = 0;
if (flag_asynchronous_unwind_tables == 2)
flag_asynchronous_unwind_tables = 0;
if (flag_pcc_struct_return == 2)
flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
}
#ifdef SUBTARGET_OVERRIDE_OPTIONS
SUBTARGET_OVERRIDE_OPTIONS;
#endif
if (!ix86_tune_string && ix86_arch_string)
ix86_tune_string = ix86_arch_string;
if (!ix86_tune_string)
ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
if (!ix86_arch_string)
ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
if (ix86_cmodel_string != 0)
{
if (!strcmp (ix86_cmodel_string, "small"))
ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
else if (flag_pic)
sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
else if (!strcmp (ix86_cmodel_string, "32"))
ix86_cmodel = CM_32;
else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
ix86_cmodel = CM_KERNEL;
else if (!strcmp (ix86_cmodel_string, "medium") && !flag_pic)
ix86_cmodel = CM_MEDIUM;
else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
ix86_cmodel = CM_LARGE;
else
error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
}
else
{
ix86_cmodel = CM_32;
if (TARGET_64BIT)
ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
}
if (ix86_asm_string != 0)
{
if (!strcmp (ix86_asm_string, "intel"))
ix86_asm_dialect = ASM_INTEL;
else if (!strcmp (ix86_asm_string, "att"))
ix86_asm_dialect = ASM_ATT;
else
error ("bad value (%s) for -masm= switch", ix86_asm_string);
}
if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
error ("code model `%s' not supported in the %s bit mode",
ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
if (ix86_cmodel == CM_LARGE)
sorry ("code model `large' not supported yet");
if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
sorry ("%i-bit mode not compiled in",
(target_flags & MASK_64BIT) ? 64 : 32);
for (i = 0; i < pta_size; i++)
if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
{
ix86_arch = processor_alias_table[i].processor;
/* Default cpu tuning to the architecture. */
ix86_tune = ix86_arch;
if (processor_alias_table[i].flags & PTA_MMX
&& !(target_flags_explicit & MASK_MMX))
target_flags |= MASK_MMX;
if (processor_alias_table[i].flags & PTA_3DNOW
&& !(target_flags_explicit & MASK_3DNOW))
target_flags |= MASK_3DNOW;
if (processor_alias_table[i].flags & PTA_3DNOW_A
&& !(target_flags_explicit & MASK_3DNOW_A))
target_flags |= MASK_3DNOW_A;
if (processor_alias_table[i].flags & PTA_SSE
&& !(target_flags_explicit & MASK_SSE))
target_flags |= MASK_SSE;
if (processor_alias_table[i].flags & PTA_SSE2
&& !(target_flags_explicit & MASK_SSE2))
target_flags |= MASK_SSE2;
if (processor_alias_table[i].flags & PTA_SSE3
&& !(target_flags_explicit & MASK_SSE3))
target_flags |= MASK_SSE3;
if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
x86_prefetch_sse = true;
if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
error ("CPU you selected does not support x86-64 instruction set");
break;
}
if (i == pta_size)
error ("bad value (%s) for -march= switch", ix86_arch_string);
for (i = 0; i < pta_size; i++)
if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
{
ix86_tune = processor_alias_table[i].processor;
if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
error ("CPU you selected does not support x86-64 instruction set");
/* Intel CPUs have always interpreted SSE prefetch instructions as
NOPs; so, we can enable SSE prefetch instructions even when
-mtune (rather than -march) points us to a processor that has them.
However, the VIA C3 gives a SIGILL, so we only do that for i686 and
higher processors. */
if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
x86_prefetch_sse = true;
break;
}
if (i == pta_size)
error ("bad value (%s) for -mtune= switch", ix86_tune_string);
if (optimize_size)
ix86_cost = &size_cost;
else
ix86_cost = processor_target_table[ix86_tune].cost;
target_flags |= processor_target_table[ix86_tune].target_enable;
target_flags &= ~processor_target_table[ix86_tune].target_disable;
/* Arrange to set up i386_stack_locals for all functions. */
init_machine_status = ix86_init_machine_status;
/* Validate -mregparm= value. */
if (ix86_regparm_string)
{
i = atoi (ix86_regparm_string);
if (i < 0 || i > REGPARM_MAX)
error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
else
ix86_regparm = i;
}
else
if (TARGET_64BIT)
ix86_regparm = REGPARM_MAX;
/* If the user has provided any of the -malign-* options,
warn and use that value only if -falign-* is not set.
Remove this code in GCC 3.2 or later. */
if (ix86_align_loops_string)
{
warning ("-malign-loops is obsolete, use -falign-loops");
if (align_loops == 0)
{
i = atoi (ix86_align_loops_string);
if (i < 0 || i > MAX_CODE_ALIGN)
error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
else
align_loops = 1 << i;
}
}
if (ix86_align_jumps_string)
{
warning ("-malign-jumps is obsolete, use -falign-jumps");
if (align_jumps == 0)
{
i = atoi (ix86_align_jumps_string);
if (i < 0 || i > MAX_CODE_ALIGN)
error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
else
align_jumps = 1 << i;
}
}
if (ix86_align_funcs_string)
{
warning ("-malign-functions is obsolete, use -falign-functions");
if (align_functions == 0)
{
i = atoi (ix86_align_funcs_string);
if (i < 0 || i > MAX_CODE_ALIGN)
error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
else
align_functions = 1 << i;
}
}
/* Default align_* from the processor table. */
if (align_loops == 0)
{
align_loops = processor_target_table[ix86_tune].align_loop;
align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
}
if (align_jumps == 0)
{
align_jumps = processor_target_table[ix86_tune].align_jump;
align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
}
if (align_functions == 0)
{
align_functions = processor_target_table[ix86_tune].align_func;
}
/* Validate -mpreferred-stack-boundary= value, or provide default.
The default of 128 bits is for Pentium III's SSE __m128, but we
don't want additional code to keep the stack aligned when
optimizing for code size. */
ix86_preferred_stack_boundary = (optimize_size
? TARGET_64BIT ? 128 : 32
: 128);
if (ix86_preferred_stack_boundary_string)
{
i = atoi (ix86_preferred_stack_boundary_string);
if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
TARGET_64BIT ? 4 : 2);
else
ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
}
/* Validate -mbranch-cost= value, or provide default. */
ix86_branch_cost = processor_target_table[ix86_tune].cost->branch_cost;
if (ix86_branch_cost_string)
{
i = atoi (ix86_branch_cost_string);
if (i < 0 || i > 5)
error ("-mbranch-cost=%d is not between 0 and 5", i);
else
ix86_branch_cost = i;
}
if (ix86_tls_dialect_string)
{
if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
ix86_tls_dialect = TLS_DIALECT_GNU;
else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
ix86_tls_dialect = TLS_DIALECT_SUN;
else
error ("bad value (%s) for -mtls-dialect= switch",
ix86_tls_dialect_string);
}
/* Keep nonleaf frame pointers. */
if (TARGET_OMIT_LEAF_FRAME_POINTER)
flag_omit_frame_pointer = 1;
/* If we're doing fast math, we don't care about comparison order
wrt NaNs. This lets us use a shorter comparison sequence. */
if (flag_unsafe_math_optimizations)
target_flags &= ~MASK_IEEE_FP;
/* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
since the insns won't need emulation. */
if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
target_flags &= ~MASK_NO_FANCY_MATH_387;
/* Turn on SSE2 builtins for -msse3. */
if (TARGET_SSE3)
target_flags |= MASK_SSE2;
/* Turn on SSE builtins for -msse2. */
if (TARGET_SSE2)
target_flags |= MASK_SSE;
if (TARGET_64BIT)
{
if (TARGET_ALIGN_DOUBLE)
error ("-malign-double makes no sense in the 64bit mode");
if (TARGET_RTD)
error ("-mrtd calling convention not supported in the 64bit mode");
/* Enable by default the SSE and MMX builtins. */
target_flags |= (MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE);
ix86_fpmath = FPMATH_SSE;
}
else
{
ix86_fpmath = FPMATH_387;
/* i386 ABI does not specify red zone. It still makes sense to use it
when programmer takes care to stack from being destroyed. */
if (!(target_flags_explicit & MASK_NO_RED_ZONE))
target_flags |= MASK_NO_RED_ZONE;
}
if (ix86_fpmath_string != 0)
{
if (! strcmp (ix86_fpmath_string, "387"))
ix86_fpmath = FPMATH_387;
else if (! strcmp (ix86_fpmath_string, "sse"))
{
if (!TARGET_SSE)
{
warning ("SSE instruction set disabled, using 387 arithmetics");
ix86_fpmath = FPMATH_387;
}
else
ix86_fpmath = FPMATH_SSE;
}
else if (! strcmp (ix86_fpmath_string, "387,sse")
|| ! strcmp (ix86_fpmath_string, "sse,387"))
{
if (!TARGET_SSE)
{
warning ("SSE instruction set disabled, using 387 arithmetics");
ix86_fpmath = FPMATH_387;
}
else if (!TARGET_80387)
{
warning ("387 instruction set disabled, using SSE arithmetics");
ix86_fpmath = FPMATH_SSE;
}
else
ix86_fpmath = FPMATH_SSE | FPMATH_387;
}
else
error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
}
/* It makes no sense to ask for just SSE builtins, so MMX is also turned
on by -msse. */
if (TARGET_SSE)
{
target_flags |= MASK_MMX;
x86_prefetch_sse = true;
}
/* If it has 3DNow! it also has MMX so MMX is also turned on by -m3dnow */
if (TARGET_3DNOW)
{
target_flags |= MASK_MMX;
/* If we are targeting the Athlon architecture, enable the 3Dnow/MMX
extensions it adds. */
if (x86_3dnow_a & (1 << ix86_arch))
target_flags |= MASK_3DNOW_A;
}
if ((x86_accumulate_outgoing_args & TUNEMASK)
&& !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
&& !optimize_size)
target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
/* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
{
char *p;
ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
p = strchr (internal_label_prefix, 'X');
internal_label_prefix_len = p - internal_label_prefix;
*p = '\0';
}
}
void
optimization_options (int level, int size ATTRIBUTE_UNUSED)
{
/* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
make the problem with not enough registers even worse. */
#ifdef INSN_SCHEDULING
if (level > 1)
flag_schedule_insns = 0;
#endif
/* The default values of these switches depend on the TARGET_64BIT
that is not known at this moment. Mark these values with 2 and
let user the to override these. In case there is no command line option
specifying them, we will set the defaults in override_options. */
if (optimize >= 1)
flag_omit_frame_pointer = 2;
flag_pcc_struct_return = 2;
flag_asynchronous_unwind_tables = 2;
}
/* Table of valid machine attributes. */
const struct attribute_spec ix86_attribute_table[] =
{
/* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
/* Stdcall attribute says callee is responsible for popping arguments
if they are not variable. */
{ "stdcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
/* Fastcall attribute says callee is responsible for popping arguments
if they are not variable. */
{ "fastcall", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
/* Cdecl attribute says the callee is a normal C declaration */
{ "cdecl", 0, 0, false, true, true, ix86_handle_cdecl_attribute },
/* Regparm attribute specifies how many integer arguments are to be
passed in registers. */
{ "regparm", 1, 1, false, true, true, ix86_handle_regparm_attribute },
#ifdef TARGET_DLLIMPORT_DECL_ATTRIBUTES
{ "dllimport", 0, 0, false, false, false, ix86_handle_dll_attribute },
{ "dllexport", 0, 0, false, false, false, ix86_handle_dll_attribute },
{ "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
#endif
{ "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
{ "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
{ NULL, 0, 0, false, false, false, NULL }
};
/* Decide whether we can make a sibling call to a function. DECL is the
declaration of the function being targeted by the call and EXP is the
CALL_EXPR representing the call. */
static bool
ix86_function_ok_for_sibcall (tree decl, tree exp)
{
/* If we are generating position-independent code, we cannot sibcall
optimize any indirect call, or a direct call to a global function,
as the PLT requires %ebx be live. */
if (!TARGET_64BIT && flag_pic && (!decl || TREE_PUBLIC (decl)))
return false;
/* If we are returning floats on the 80387 register stack, we cannot
make a sibcall from a function that doesn't return a float to a
function that does or, conversely, from a function that does return
a float to a function that doesn't; the necessary stack adjustment
would not be executed. */
if (STACK_REG_P (ix86_function_value (TREE_TYPE (exp)))
!= STACK_REG_P (ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)))))
return false;
/* If this call is indirect, we'll need to be able to use a call-clobbered
register for the address of the target function. Make sure that all
such registers are not used for passing parameters. */
if (!decl && !TARGET_64BIT)
{
tree type;
/* We're looking at the CALL_EXPR, we need the type of the function. */
type = TREE_OPERAND (exp, 0); /* pointer expression */
type = TREE_TYPE (type); /* pointer type */
type = TREE_TYPE (type); /* function type */
if (ix86_function_regparm (type, NULL) >= 3)
{
/* ??? Need to count the actual number of registers to be used,
not the possible number of registers. Fix later. */
return false;
}
}
/* Otherwise okay. That also includes certain types of indirect calls. */
return true;
}
/* Handle a "cdecl", "stdcall", or "fastcall" attribute;
arguments as in struct attribute_spec.handler. */
static tree
ix86_handle_cdecl_attribute (tree *node, tree name,
tree args ATTRIBUTE_UNUSED,
int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
{
if (TREE_CODE (*node) != FUNCTION_TYPE
&& TREE_CODE (*node) != METHOD_TYPE
&& TREE_CODE (*node) != FIELD_DECL
&& TREE_CODE (*node) != TYPE_DECL)
{
warning ("`%s' attribute only applies to functions",
IDENTIFIER_POINTER (name));
*no_add_attrs = true;
}
else
{
if (is_attribute_p ("fastcall", name))
{
if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
{
error ("fastcall and stdcall attributes are not compatible");
}
else if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
{
error ("fastcall and regparm attributes are not compatible");
}
}
else if (is_attribute_p ("stdcall", name))
{
if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
{
error ("fastcall and stdcall attributes are not compatible");
}
}
}
if (TARGET_64BIT)
{
warning ("`%s' attribute ignored", IDENTIFIER_POINTER (name));
*no_add_attrs = true;
}
return NULL_TREE;
}
/* Handle a "regparm" attribute;
arguments as in struct attribute_spec.handler. */
static tree
ix86_handle_regparm_attribute (tree *node, tree name, tree args,
int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
{
if (TREE_CODE (*node) != FUNCTION_TYPE
&& TREE_CODE (*node) != METHOD_TYPE
&& TREE_CODE (*node) != FIELD_DECL
&& TREE_CODE (*node) != TYPE_DECL)
{
warning ("`%s' attribute only applies to functions",
IDENTIFIER_POINTER (name));
*no_add_attrs = true;
}
else
{
tree cst;
cst = TREE_VALUE (args);
if (TREE_CODE (cst) != INTEGER_CST)
{
warning ("`%s' attribute requires an integer constant argument",
IDENTIFIER_POINTER (name));
*no_add_attrs = true;
}
else if (compare_tree_int (cst, REGPARM_MAX) > 0)
{
warning ("argument to `%s' attribute larger than %d",
IDENTIFIER_POINTER (name), REGPARM_MAX);
*no_add_attrs = true;
}
if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
{
error ("fastcall and regparm attributes are not compatible");
}
}
return NULL_TREE;
}
/* Return 0 if the attributes for two types are incompatible, 1 if they
are compatible, and 2 if they are nearly compatible (which causes a
warning to be generated). */
static int
ix86_comp_type_attributes (tree type1, tree type2)
{
/* Check for mismatch of non-default calling convention. */
const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
if (TREE_CODE (type1) != FUNCTION_TYPE)
return 1;
/* Check for mismatched fastcall types */
if (!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
!= !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
return 0;
/* Check for mismatched return types (cdecl vs stdcall). */
if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
!= !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
return 0;
if (ix86_function_regparm (type1, NULL)
!= ix86_function_regparm (type2, NULL))
return 0;
return 1;
}
/* Return the regparm value for a fuctio with the indicated TYPE and DECL.
DECL may be NULL when calling function indirectly
or considering a libcall. */
static int
ix86_function_regparm (tree type, tree decl)
{
tree attr;
int regparm = ix86_regparm;
bool user_convention = false;
if (!TARGET_64BIT)
{
attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
if (attr)
{
regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
user_convention = true;
}
if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
{
regparm = 2;
user_convention = true;
}
/* Use register calling convention for local functions when possible. */
if (!TARGET_64BIT && !user_convention && decl
&& flag_unit_at_a_time && !profile_flag)
{
struct cgraph_local_info *i = cgraph_local_info (decl);
if (i && i->local)
{
/* We can't use regparm(3) for nested functions as these use
static chain pointer in third argument. */
if (DECL_CONTEXT (decl) && !DECL_NO_STATIC_CHAIN (decl))
regparm = 2;
else
regparm = 3;
}
}
}
return regparm;
}
/* Return true if EAX is live at the start of the function. Used by
ix86_expand_prologue to determine if we need special help before
calling allocate_stack_worker. */
static bool
ix86_eax_live_at_start_p (void)
{
/* Cheat. Don't bother working forward from ix86_function_regparm
to the function type to whether an actual argument is located in
eax. Instead just look at cfg info, which is still close enough
to correct at this point. This gives false positives for broken
functions that might use uninitialized data that happens to be
allocated in eax, but who cares? */
return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->global_live_at_end, 0);
}
/* Value is the number of bytes of arguments automatically
popped when returning from a subroutine call.
FUNDECL is the declaration node of the function (as a tree),
FUNTYPE is the data type of the function (as a tree),
or for a library call it is an identifier node for the subroutine name.
SIZE is the number of bytes of arguments passed on the stack.
On the 80386, the RTD insn may be used to pop them if the number
of args is fixed, but if the number is variable then the caller
must pop them all. RTD can't be used for library calls now
because the library is compiled with the Unix compiler.
Use of RTD is a selectable option, since it is incompatible with
standard Unix calling sequences. If the option is not selected,
the caller must always pop the args.
The attribute stdcall is equivalent to RTD on a per module basis. */
int
ix86_return_pops_args (tree fundecl, tree funtype, int size)
{
int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
/* Cdecl functions override -mrtd, and never pop the stack. */
if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
/* Stdcall and fastcall functions will pop the stack if not
variable args. */
if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
|| lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
rtd = 1;
if (rtd
&& (TYPE_ARG_TYPES (funtype) == NULL_TREE
|| (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
== void_type_node)))
return size;
}
/* Lose any fake structure return argument if it is passed on the stack. */
if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
&& !TARGET_64BIT)
{
int nregs = ix86_function_regparm (funtype, fundecl);
if (!nregs)
return GET_MODE_SIZE (Pmode);
}
return 0;
}
/* Argument support functions. */
/* Return true when register may be used to pass function parameters. */
bool
ix86_function_arg_regno_p (int regno)
{
int i;
if (!TARGET_64BIT)
return (regno < REGPARM_MAX
|| (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
if (SSE_REGNO_P (regno) && TARGET_SSE)
return true;
/* RAX is used as hidden argument to va_arg functions. */
if (!regno)
return true;
for (i = 0; i < REGPARM_MAX; i++)
if (regno == x86_64_int_parameter_registers[i])
return true;
return false;
}
/* Initialize a variable CUM of type CUMULATIVE_ARGS
for a call to a function whose data type is FNTYPE.
For a library call, FNTYPE is 0. */
void
init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
tree fntype, /* tree ptr for function decl */
rtx libname, /* SYMBOL_REF of library name or 0 */
tree fndecl)
{
static CUMULATIVE_ARGS zero_cum;
tree param, next_param;
if (TARGET_DEBUG_ARG)
{
fprintf (stderr, "\ninit_cumulative_args (");
if (fntype)
fprintf (stderr, "fntype code = %s, ret code = %s",
tree_code_name[(int) TREE_CODE (fntype)],
tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
else
fprintf (stderr, "no fntype");
if (libname)
fprintf (stderr, ", libname = %s", XSTR (libname, 0));
}
*cum = zero_cum;
/* Set up the number of registers to use for passing arguments. */
if (fntype)
cum->nregs = ix86_function_regparm (fntype, fndecl);
else
cum->nregs = ix86_regparm;
cum->sse_nregs = SSE_REGPARM_MAX;
cum->mmx_nregs = MMX_REGPARM_MAX;
cum->warn_sse = true;
cum->warn_mmx = true;
cum->maybe_vaarg = false;
/* Use ecx and edx registers if function has fastcall attribute */
if (fntype && !TARGET_64BIT)
{
if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
{
cum->nregs = 2;
cum->fastcall = 1;
}
}
/* Determine if this function has variable arguments. This is
indicated by the last argument being 'void_type_mode' if there
are no variable arguments. If there are variable arguments, then
we won't pass anything in registers */
if (cum->nregs || !TARGET_MMX || !TARGET_SSE)
{
for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
param != 0; param = next_param)
{
next_param = TREE_CHAIN (param);
if (next_param == 0 && TREE_VALUE (param) != void_type_node)
{
if (!TARGET_64BIT)
{
cum->nregs = 0;
cum->sse_nregs = 0;
cum->mmx_nregs = 0;
cum->warn_sse = 0;
cum->warn_mmx = 0;
cum->fastcall = 0;
}
cum->maybe_vaarg = true;
}
}
}
if ((!fntype && !libname)
|| (fntype && !TYPE_ARG_TYPES (fntype)))
cum->maybe_vaarg = 1;
if (TARGET_DEBUG_ARG)
fprintf (stderr, ", nregs=%d )\n", cum->nregs);
return;
}
/* x86-64 register passing implementation. See x86-64 ABI for details. Goal
of this code is to classify each 8bytes of incoming argument by the register
class and assign registers accordingly. */
/* Return the union class of CLASS1 and CLASS2.
See the x86-64 PS ABI for details. */
static enum x86_64_reg_class
merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
{
/* Rule #1: If both classes are equal, this is the resulting class. */
if (class1 == class2)
return class1;
/* Rule #2: If one of the classes is NO_CLASS, the resulting class is
the other class. */
if (class1 == X86_64_NO_CLASS)
return class2;
if (class2 == X86_64_NO_CLASS)
return class1;
/* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
return X86_64_MEMORY_CLASS;
/* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
|| (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
return X86_64_INTEGERSI_CLASS;
if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
|| class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
return X86_64_INTEGER_CLASS;
/* Rule #5: If one of the classes is X87 or X87UP class, MEMORY is used. */
if (class1 == X86_64_X87_CLASS || class1 == X86_64_X87UP_CLASS
|| class2 == X86_64_X87_CLASS || class2 == X86_64_X87UP_CLASS)
return X86_64_MEMORY_CLASS;
/* Rule #6: Otherwise class SSE is used. */
return X86_64_SSE_CLASS;
}
/* Classify the argument of type TYPE and mode MODE.
CLASSES will be filled by the register class used to pass each word
of the operand. The number of words is returned. In case the parameter
should be passed in memory, 0 is returned. As a special case for zero
sized containers, classes[0] will be NO_CLASS and 1 is returned.
BIT_OFFSET is used internally for handling records and specifies offset
of the offset in bits modulo 256 to avoid overflow cases.
See the x86-64 PS ABI for details.
*/
static int
classify_argument (enum machine_mode mode, tree type,
enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
{
HOST_WIDE_INT bytes =
(mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
/* Variable sized entities are always passed/returned in memory. */
if (bytes < 0)
return 0;
if (mode != VOIDmode
&& MUST_PASS_IN_STACK (mode, type))
return 0;
if (type && AGGREGATE_TYPE_P (type))
{
int i;
tree field;
enum x86_64_reg_class subclasses[MAX_CLASSES];
/* On x86-64 we pass structures larger than 16 bytes on the stack. */
if (bytes > 16)
return 0;
for (i = 0; i < words; i++)
classes[i] = X86_64_NO_CLASS;
/* Zero sized arrays or structures are NO_CLASS. We return 0 to
signalize memory class, so handle it as special case. */
if (!words)
{
classes[0] = X86_64_NO_CLASS;
return 1;
}
/* Classify each field of record and merge classes. */
if (TREE_CODE (type) == RECORD_TYPE)
{
/* For classes first merge in the field of the subclasses. */
if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
{
tree bases = TYPE_BINFO_BASETYPES (type);
int n_bases = TREE_VEC_LENGTH (bases);
int i;
for (i = 0; i < n_bases; ++i)
{
tree binfo = TREE_VEC_ELT (bases, i);
int num;
int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
tree type = BINFO_TYPE (binfo);
num = classify_argument (TYPE_MODE (type),
type, subclasses,
(offset + bit_offset) % 256);
if (!num)
return 0;
for (i = 0; i < num; i++)
{
int pos = (offset + (bit_offset % 64)) / 8 / 8;
classes[i + pos] =
merge_classes (subclasses[i], classes[i + pos]);
}
}
}
/* And now merge the fields of structure. */
for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
{
if (TREE_CODE (field) == FIELD_DECL)
{
int num;
/* Bitfields are always classified as integer. Handle them
early, since later code would consider them to be
misaligned integers. */
if (DECL_BIT_FIELD (field))
{
for (i = int_bit_position (field) / 8 / 8;
i < (int_bit_position (field)
+ tree_low_cst (DECL_SIZE (field), 0)
+ 63) / 8 / 8; i++)
classes[i] =
merge_classes (X86_64_INTEGER_CLASS,
classes[i]);
}
else
{
num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
TREE_TYPE (field), subclasses,
(int_bit_position (field)
+ bit_offset) % 256);
if (!num)
return 0;
for (i = 0; i < num; i++)
{
int pos =
(int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
classes[i + pos] =
merge_classes (subclasses[i], classes[i + pos]);
}
}
}
}
}
/* Arrays are handled as small records. */
else if (TREE_CODE (type) == ARRAY_TYPE)
{
int num;
num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
TREE_TYPE (type), subclasses, bit_offset);
if (!num)
return 0;
/* The partial classes are now full classes. */
if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
subclasses[0] = X86_64_SSE_CLASS;
if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
subclasses[0] = X86_64_INTEGER_CLASS;
for (i = 0; i < words; i++)
classes[i] = subclasses[i % num];
}
/* Unions are similar to RECORD_TYPE but offset is always 0. */
else if (TREE_CODE (type) == UNION_TYPE
|| TREE_CODE (type) == QUAL_UNION_TYPE)
{
/* For classes first merge in the field of the subclasses. */
if (TYPE_BINFO (type) != NULL && TYPE_BINFO_BASETYPES (type) != NULL)
{
tree bases = TYPE_BINFO_BASETYPES (type);
int n_bases = TREE_VEC_LENGTH (bases);
int i;
for (i = 0; i < n_bases; ++i)
{
tree binfo = TREE_VEC_ELT (bases, i);
int num;
int offset = tree_low_cst (BINFO_OFFSET (binfo), 0) * 8;
tree type = BINFO_TYPE (binfo);
num = classify_argument (TYPE_MODE (type),
type, subclasses,
(offset + (bit_offset % 64)) % 256);
if (!num)
return 0;
for (i = 0; i < num; i++)
{
int pos = (offset + (bit_offset % 64)) / 8 / 8;
classes[i + pos] =
merge_classes (subclasses[i], classes[i + pos]);
}
}
}
for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
{
if (TREE_CODE (field) == FIELD_DECL)
{
int num;
num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
TREE_TYPE (field), subclasses,
bit_offset);
if (!num)
return 0;
for (i = 0; i < num; i++)
classes[i] = merge_classes (subclasses[i], classes[i]);
}
}
}
else if (TREE_CODE (type) == SET_TYPE)
{
if (bytes <= 4)
{
classes[0] = X86_64_INTEGERSI_CLASS;
return 1;
}
else if (bytes <= 8)
{
classes[0] = X86_64_INTEGER_CLASS;
return 1;
}
else if (bytes <= 12)
{
classes[0] = X86_64_INTEGER_CLASS;
classes[1] = X86_64_INTEGERSI_CLASS;
return 2;
}
else
{
classes[0] = X86_64_INTEGER_CLASS;
classes[1] = X86_64_INTEGER_CLASS;
return 2;
}
}
else
abort ();
/* Final merger cleanup. */
for (i = 0; i < words; i++)
{
/* If one class is MEMORY, everything should be passed in
memory. */
if (classes[i] == X86_64_MEMORY_CLASS)
return 0;
/* The X86_64_SSEUP_CLASS should be always preceded by
X86_64_SSE_CLASS. */
if (classes[i] == X86_64_SSEUP_CLASS
&& (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
classes[i] = X86_64_SSE_CLASS;
/* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
if (classes[i] == X86_64_X87UP_CLASS
&& (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
classes[i] = X86_64_SSE_CLASS;
}
return words;
}
/* Compute alignment needed. We align all types to natural boundaries with
exception of XFmode that is aligned to 64bits. */
if (mode != VOIDmode && mode != BLKmode)
{
int mode_alignment = GET_MODE_BITSIZE (mode);
if (mode == XFmode)
mode_alignment = 128;
else if (mode == XCmode)
mode_alignment = 256;
if (COMPLEX_MODE_P (mode))
mode_alignment /= 2;
/* Misaligned fields are always returned in memory. */
if (bit_offset % mode_alignment)
return 0;
}
/* Classification of atomic types. */
switch (mode)
{
case DImode:
case SImode:
case HImode:
case QImode:
case CSImode:
case CHImode:
case CQImode:
if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
classes[0] = X86_64_INTEGERSI_CLASS;
else
classes[0] = X86_64_INTEGER_CLASS;
return 1;
case CDImode:
case TImode:
classes[0] = classes[1] = X86_64_INTEGER_CLASS;
return 2;
case CTImode:
classes[0] = classes[1] = X86_64_INTEGER_CLASS;
classes[2] = classes[3] = X86_64_INTEGER_CLASS;
return 4;
case SFmode:
if (!(bit_offset % 64))
classes[0] = X86_64_SSESF_CLASS;
else
classes[0] = X86_64_SSE_CLASS;
return 1;
case DFmode:
classes[0] = X86_64_SSEDF_CLASS;
return 1;
case XFmode:
classes[0] = X86_64_X87_CLASS;
classes[1] = X86_64_X87UP_CLASS;
return 2;
case TFmode:
case TCmode:
return 0;
case XCmode:
classes[0] = X86_64_X87_CLASS;
classes[1] = X86_64_X87UP_CLASS;
classes[2] = X86_64_X87_CLASS;
classes[3] = X86_64_X87UP_CLASS;
return 4;
case DCmode:
classes[0] = X86_64_SSEDF_CLASS;
classes[1] = X86_64_SSEDF_CLASS;
return 2;
case SCmode:
classes[0] = X86_64_SSE_CLASS;
return 1;
case V4SFmode:
case V4SImode:
case V16QImode:
case V8HImode:
case V2DFmode:
case V2DImode:
classes[0] = X86_64_SSE_CLASS;
classes[1] = X86_64_SSEUP_CLASS;
return 2;
case V2SFmode:
case V2SImode:
case V4HImode:
case V8QImode:
return 0;
case BLKmode:
case VOIDmode:
return 0;
default:
abort ();
}
}
/* Examine the argument and return set number of register required in each
class. Return 0 iff parameter should be passed in memory. */
static int
examine_argument (enum machine_mode mode, tree type, int in_return,
int *int_nregs, int *sse_nregs)
{
enum x86_64_reg_class class[MAX_CLASSES];
int n = classify_argument (mode, type, class, 0);
*int_nregs = 0;
*sse_nregs = 0;
if (!n)
return 0;
for (n--; n >= 0; n--)
switch (class[n])
{
case X86_64_INTEGER_CLASS:
case X86_64_INTEGERSI_CLASS:
(*int_nregs)++;
break;
case X86_64_SSE_CLASS:
case X86_64_SSESF_CLASS:
case X86_64_SSEDF_CLASS:
(*sse_nregs)++;
break;
case X86_64_NO_CLASS:
case X86_64_SSEUP_CLASS:
break;
case X86_64_X87_CLASS:
case X86_64_X87UP_CLASS:
if (!in_return)
return 0;
break;
case X86_64_MEMORY_CLASS:
abort ();
}
return 1;
}
/* Construct container for the argument used by GCC interface. See
FUNCTION_ARG for the detailed description. */
static rtx
construct_container (enum machine_mode mode, tree type, int in_return,
int nintregs, int nsseregs, const int * intreg,
int sse_regno)
{
enum machine_mode tmpmode;
int bytes =
(mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
enum x86_64_reg_class class[MAX_CLASSES];
int n;
int i;
int nexps = 0;
int needed_sseregs, needed_intregs;
rtx exp[MAX_CLASSES];
rtx ret;
n = classify_argument (mode, type, class, 0);
if (TARGET_DEBUG_ARG)
{
if (!n)
fprintf (stderr, "Memory class\n");
else
{
fprintf (stderr, "Classes:");
for (i = 0; i < n; i++)
{
fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
}
fprintf (stderr, "\n");
}
}
if (!n)
return NULL;
if (!examine_argument (mode, type, in_return, &needed_intregs, &needed_sseregs))
return NULL;
if (needed_intregs > nintregs || needed_sseregs > nsseregs)
return NULL;
/* First construct simple cases. Avoid SCmode, since we want to use
single register to pass this type. */
if (n == 1 && mode != SCmode)
switch (class[0])
{
case X86_64_INTEGER_CLASS:
case X86_64_INTEGERSI_CLASS:
return gen_rtx_REG (mode, intreg[0]);
case X86_64_SSE_CLASS:
case X86_64_SSESF_CLASS:
case X86_64_SSEDF_CLASS:
return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
case X86_64_X87_CLASS:
return gen_rtx_REG (mode, FIRST_STACK_REG);
case X86_64_NO_CLASS:
/* Zero sized array, struct or class. */
return NULL;
default:
abort ();
}
if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
&& mode != BLKmode)
return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
if (n == 2
&& class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
return gen_rtx_REG (XFmode, FIRST_STACK_REG);
if (n == 2 && class[0] == X86_64_INTEGER_CLASS
&& class[1] == X86_64_INTEGER_CLASS
&& (mode == CDImode || mode == TImode || mode == TFmode)
&& intreg[0] + 1 == intreg[1])
return gen_rtx_REG (mode, intreg[0]);
if (n == 4
&& class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS
&& class[2] == X86_64_X87_CLASS && class[3] == X86_64_X87UP_CLASS
&& mode != BLKmode)
return gen_rtx_REG (XCmode, FIRST_STACK_REG);
/* Otherwise figure out the entries of the PARALLEL. */
for (i = 0; i < n; i++)
{
switch (class[i])
{
case X86_64_NO_CLASS:
break;
case X86_64_INTEGER_CLASS:
case X86_64_INTEGERSI_CLASS:
/* Merge TImodes on aligned occasions here too. */
if (i * 8 + 8 > bytes)
tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
else if (class[i] == X86_64_INTEGERSI_CLASS)
tmpmode = SImode;
else
tmpmode = DImode;
/* We've requested 24 bytes we don't have mode for. Use DImode. */
if (tmpmode == BLKmode)
tmpmode = DImode;
exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
gen_rtx_REG (tmpmode, *intreg),
GEN_INT (i*8));
intreg++;
break;
case X86_64_SSESF_CLASS:
exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
gen_rtx_REG (SFmode,
SSE_REGNO (sse_regno)),
GEN_INT (i*8));
sse_regno++;
break;
case X86_64_SSEDF_CLASS:
exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
gen_rtx_REG (DFmode,
SSE_REGNO (sse_regno)),
GEN_INT (i*8));
sse_regno++;
break;
case X86_64_SSE_CLASS:
if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
tmpmode = TImode;
else
tmpmode = DImode;
exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
gen_rtx_REG (tmpmode,
SSE_REGNO (sse_regno)),
GEN_INT (i*8));
if (tmpmode == TImode)
i++;
sse_regno++;
break;
default:
abort ();
}
}
ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
for (i = 0; i < nexps; i++)
XVECEXP (ret, 0, i) = exp [i];
return ret;
}
/* Update the data in CUM to advance over an argument
of mode MODE and data type TYPE.
(TYPE is null for libcalls where that information may not be available.) */
void
function_arg_advance (CUMULATIVE_ARGS *cum, /* current arg information */
enum machine_mode mode, /* current arg mode */
tree type, /* type of the argument or 0 if lib support */
int named) /* whether or not the argument was named */
{
int bytes =
(mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
if (TARGET_DEBUG_ARG)
fprintf (stderr,
"function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, mode=%s, named=%d)\n\n",
words, cum->words, cum->nregs, cum->sse_nregs, GET_MODE_NAME (mode), named);
if (TARGET_64BIT)
{
int int_nregs, sse_nregs;
if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
cum->words += words;
else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
{
cum->nregs -= int_nregs;
cum->sse_nregs -= sse_nregs;
cum->regno += int_nregs;
cum->sse_regno += sse_nregs;
}
else
cum->words += words;
}
else
{
if (TARGET_SSE && SSE_REG_MODE_P (mode)
&& (!type || !AGGREGATE_TYPE_P (type)))
{
cum->sse_words += words;
cum->sse_nregs -= 1;
cum->sse_regno += 1;
if (cum->sse_nregs <= 0)
{
cum->sse_nregs = 0;
cum->sse_regno = 0;
}
}
else if (TARGET_MMX && MMX_REG_MODE_P (mode)
&& (!type || !AGGREGATE_TYPE_P (type)))
{
cum->mmx_words += words;
cum->mmx_nregs -= 1;
cum->mmx_regno += 1;
if (cum->mmx_nregs <= 0)
{
cum->mmx_nregs = 0;
cum->mmx_regno = 0;
}
}
else
{
cum->words += words;
cum->nregs -= words;
cum->regno += words;
if (cum->nregs <= 0)
{
cum->nregs = 0;
cum->regno = 0;
}
}
}
return;
}
/* Define where to put the arguments to a function.
Value is zero to push the argument on the stack,
or a hard register in which to store the argument.
MODE is the argument's machine mode.
TYPE is the data type of the argument (as a tree).
This is null for libcalls where that information may
not be available.
CUM is a variable of type CUMULATIVE_ARGS which gives info about
the preceding args and about the function being called.
NAMED is nonzero if this argument is a named parameter
(otherwise it is an extra parameter matching an ellipsis). */
rtx
function_arg (CUMULATIVE_ARGS *cum, /* current arg information */
enum machine_mode mode, /* current arg mode */
tree type, /* type of the argument or 0 if lib support */
int named) /* != 0 for normal args, == 0 for ... args */
{
rtx ret = NULL_RTX;
int bytes =
(mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
static bool warnedsse, warnedmmx;
/* Handle a hidden AL argument containing number of registers for varargs
x86-64 functions. For i386 ABI just return constm1_rtx to avoid
any AL settings. */
if (mode == VOIDmode)
{
if (TARGET_64BIT)
return GEN_INT (cum->maybe_vaarg
? (cum->sse_nregs < 0
? SSE_REGPARM_MAX
: cum->sse_regno)
: -1);
else
return constm1_rtx;
}
if (TARGET_64BIT)
ret = construct_container (mode, type, 0, cum->nregs, cum->sse_nregs,
&x86_64_int_parameter_registers [cum->regno],
cum->sse_regno);
else
switch (mode)
{
/* For now, pass fp/complex values on the stack. */
default:
break;
case BLKmode:
if (bytes < 0)
break;
/* FALLTHRU */
case DImode:
case SImode:
case HImode:
case QImode:
if (words <= cum->nregs)
{
int regno = cum->regno;
/* Fastcall allocates the first two DWORD (SImode) or
smaller arguments to ECX and EDX. */
if (cum->fastcall)
{
if (mode == BLKmode || mode == DImode)
break;
/* ECX not EAX is the first allocated register. */
if (regno == 0)
regno = 2;
}
ret = gen_rtx_REG (mode, regno);
}
break;
case TImode:
case V16QImode:
case V8HImode:
case V4SImode:
case V2DImode:
case V4SFmode:
case V2DFmode:
if (!type || !AGGREGATE_TYPE_P (type))
{
if (!TARGET_SSE && !warnedmmx && cum->warn_sse)
{
warnedsse = true;
warning ("SSE vector argument without SSE enabled "
"changes the ABI");
}
if (cum->sse_nregs)
ret = gen_rtx_REG (mode, cum->sse_regno + FIRST_SSE_REG);
}
break;
case V8QImode:
case V4HImode:
case V2SImode:
case V2SFmode:
if (!type || !AGGREGATE_TYPE_P (type))
{
if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
{
warnedmmx = true;
warning ("MMX vector argument without MMX enabled "
"changes the ABI");
}
if (cum->mmx_nregs)
ret = gen_rtx_REG (mode, cum->mmx_regno + FIRST_MMX_REG);
}
break;
}
if (TARGET_DEBUG_ARG)
{
fprintf (stderr,
"function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
if (ret)
print_simple_rtl (stderr, ret);
else
fprintf (stderr, ", stack");
fprintf (stderr, " )\n");
}
return ret;
}
/* A C expression that indicates when an argument must be passed by
reference. If nonzero for an argument, a copy of that argument is
made in memory and a pointer to the argument is passed instead of
the argument itself. The pointer is passed in whatever way is
appropriate for passing a pointer to that type. */
int
function_arg_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
enum machine_mode mode ATTRIBUTE_UNUSED,
tree type, int named ATTRIBUTE_UNUSED)
{
if (!TARGET_64BIT)
return 0;
if (type && int_size_in_bytes (type) == -1)
{
if (TARGET_DEBUG_ARG)
fprintf (stderr, "function_arg_pass_by_reference\n");
return 1;
}
return 0;
}
/* Return true when TYPE should be 128bit aligned for 32bit argument passing
ABI */
static bool
contains_128bit_aligned_vector_p (tree type)
{
enum machine_mode mode = TYPE_MODE (type);
if (SSE_REG_MODE_P (mode)
&& (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
return true;
if (TYPE_ALIGN (type) < 128)
return false;
if (AGGREGATE_TYPE_P (type))
{
/* Walk the aggregates recursively. */
if (TREE_CODE (type) == RECORD_TYPE
|| TREE_CODE (type) == UNION_TYPE
|| TREE_CODE (type) == QUAL_UNION_TYPE)
{
tree field;
if (TYPE_BINFO (type) != NULL
&& TYPE_BINFO_BASETYPES (type) != NULL)
{
tree bases = TYPE_BINFO_BASETYPES (type);
int n_bases = TREE_VEC_LENGTH (bases);
int i;
for (i = 0; i < n_bases; ++i)
{
tree binfo = TREE_VEC_ELT (bases, i);
tree type = BINFO_TYPE (binfo);
if (contains_128bit_aligned_vector_p (type))
return true;
}
}
/* And now merge the fields of structure. */
for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
{
if (TREE_CODE (field) == FIELD_DECL
&& contains_128bit_aligned_vector_p (TREE_TYPE (field)))
return true;
}
}
/* Just for use if some languages passes arrays by value. */
else if (TREE_CODE (type) == ARRAY_TYPE)
{
if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
return true;
}
else
abort ();
}
return false;
}
/* Gives the alignment boundary, in bits, of an argument with the
specified mode and type. */
int
ix86_function_arg_boundary (enum machine_mode mode, tree type)
{
int align;
if (type)
align = TYPE_ALIGN (type);
else
align = GET_MODE_ALIGNMENT (mode);
if (align < PARM_BOUNDARY)
align = PARM_BOUNDARY;
if (!TARGET_64BIT)
{
/* i386 ABI defines all arguments to be 4 byte aligned. We have to
make an exception for SSE modes since these require 128bit
alignment.
The handling here differs from field_alignment. ICC aligns MMX
arguments to 4 byte boundaries, while structure fields are aligned
to 8 byte boundaries. */
if (!type)
{
if (!SSE_REG_MODE_P (mode))
align = PARM_BOUNDARY;
}
else
{
if (!contains_128bit_aligned_vector_p (type))
align = PARM_BOUNDARY;
}
}
if (align > 128)
align = 128;
return align;
}
/* Return true if N is a possible register number of function value. */
bool
ix86_function_value_regno_p (int regno)
{
if (!TARGET_64BIT)
{
return ((regno) == 0
|| ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
|| ((regno) == FIRST_SSE_REG && TARGET_SSE));
}
return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
|| ((regno) == FIRST_SSE_REG && TARGET_SSE)
|| ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
}
/* Define how to find the value returned by a function.
VALTYPE is the data type of the value (as a tree).
If the precise function being called is known, FUNC is its FUNCTION_DECL;
otherwise, FUNC is 0. */
rtx
ix86_function_value (tree valtype)
{
if (TARGET_64BIT)
{
rtx ret = construct_container (TYPE_MODE (valtype), valtype, 1,
REGPARM_MAX, SSE_REGPARM_MAX,
x86_64_int_return_registers, 0);
/* For zero sized structures, construct_container return NULL, but we need
to keep rest of compiler happy by returning meaningful value. */
if (!ret)
ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
return ret;
}
else
return gen_rtx_REG (TYPE_MODE (valtype),
ix86_value_regno (TYPE_MODE (valtype)));
}
/* Return false iff type is returned in memory. */
int
ix86_return_in_memory (tree type)
{
int needed_intregs, needed_sseregs, size;
enum machine_mode mode = TYPE_MODE (type);
if (TARGET_64BIT)
return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
if (mode == BLKmode)
return 1;
size = int_size_in_bytes (type);
if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
return 0;
if (VECTOR_MODE_P (mode) || mode == TImode)
{
/* User-created vectors small enough to fit in EAX. */
if (size < 8)
return 0;
/* MMX/3dNow values are returned on the stack, since we've
got to EMMS/FEMMS before returning. */
if (size == 8)
return 1;
/* SSE values are returned in XMM0. */
/* ??? Except when it doesn't exist? We have a choice of
either (1) being abi incompatible with a -march switch,
or (2) generating an error here. Given no good solution,
I think the safest thing is one warning. The user won't
be able to use -Werror, but.... */
if (size == 16)
{
static bool warned;
if (TARGET_SSE)
return 0;
if (!warned)
{
warned = true;
warning ("SSE vector return without SSE enabled "
"changes the ABI");
}
return 1;
}
}
if (mode == XFmode)
return 0;
if (size > 12)
return 1;
return 0;
}
/* Define how to find the value returned by a library function
assuming the value has mode MODE. */
rtx
ix86_libcall_value (enum machine_mode mode)
{
if (TARGET_64BIT)
{
switch (mode)
{
case SFmode:
case SCmode:
case DFmode:
case DCmode:
return gen_rtx_REG (mode, FIRST_SSE_REG);
case XFmode:
case XCmode:
return gen_rtx_REG (mode, FIRST_FLOAT_REG);
case TFmode:
case TCmode:
return NULL;
default:
return gen_rtx_REG (mode, 0);
}
}
else
return gen_rtx_REG (mode, ix86_value_regno (mode));
}
/* Given a mode, return the register to use for a return value. */
static int
ix86_value_regno (enum machine_mode mode)
{
/* Floating point return values in %st(0). */
if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_FLOAT_RETURNS_IN_80387)
return FIRST_FLOAT_REG;
/* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
we prevent this case when sse is not available. */
if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
return FIRST_SSE_REG;
/* Everything else in %eax. */
return 0;
}
/* Create the va_list data type. */
static tree
ix86_build_builtin_va_list (void)
{
tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
/* For i386 we use plain pointer to argument area. */
if (!TARGET_64BIT)
return build_pointer_type (char_type_node);
record = (*lang_hooks.types.make_type) (RECORD_TYPE);
type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
unsigned_type_node);
f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
unsigned_type_node);
f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
ptr_type_node);
f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
ptr_type_node);
DECL_FIELD_CONTEXT (f_gpr) = record;
DECL_FIELD_CONTEXT (f_fpr) = record;
DECL_FIELD_CONTEXT (f_ovf) = record;
DECL_FIELD_CONTEXT (f_sav) = record;
TREE_CHAIN (record) = type_decl;
TYPE_NAME (record) = type_decl;
TYPE_FIELDS (record) = f_gpr;
TREE_CHAIN (f_gpr) = f_fpr;
TREE_CHAIN (f_fpr) = f_ovf;
TREE_CHAIN (f_ovf) = f_sav;
layout_type (record);
/* The correct type is an array type of one element. */