blob: 6bc6f0ace6ae87bd307835975c3e101e8dd4ae8e [file] [log] [blame]
/* Subroutines for insn-output.c for SPARC.
Copyright (C) 1987-2021 Free Software Foundation, Inc.
Contributed by Michael Tiemann (tiemann@cygnus.com)
64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
at Cygnus Support.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#define IN_TARGET_CODE 1
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "target.h"
#include "rtl.h"
#include "tree.h"
#include "memmodel.h"
#include "gimple.h"
#include "df.h"
#include "tm_p.h"
#include "stringpool.h"
#include "attribs.h"
#include "expmed.h"
#include "optabs.h"
#include "regs.h"
#include "emit-rtl.h"
#include "recog.h"
#include "diagnostic-core.h"
#include "alias.h"
#include "fold-const.h"
#include "stor-layout.h"
#include "calls.h"
#include "varasm.h"
#include "output.h"
#include "insn-attr.h"
#include "explow.h"
#include "expr.h"
#include "debug.h"
#include "cfgrtl.h"
#include "common/common-target.h"
#include "gimplify.h"
#include "langhooks.h"
#include "reload.h"
#include "tree-pass.h"
#include "context.h"
#include "builtins.h"
#include "tree-vector-builder.h"
#include "opts.h"
/* This file should be included last. */
#include "target-def.h"
/* Processor costs */
struct processor_costs {
/* Integer load */
const int int_load;
/* Integer signed load */
const int int_sload;
/* Integer zeroed load */
const int int_zload;
/* Float load */
const int float_load;
/* fmov, fneg, fabs */
const int float_move;
/* fadd, fsub */
const int float_plusminus;
/* fcmp */
const int float_cmp;
/* fmov, fmovr */
const int float_cmove;
/* fmul */
const int float_mul;
/* fdivs */
const int float_div_sf;
/* fdivd */
const int float_div_df;
/* fsqrts */
const int float_sqrt_sf;
/* fsqrtd */
const int float_sqrt_df;
/* umul/smul */
const int int_mul;
/* mulX */
const int int_mulX;
/* integer multiply cost for each bit set past the most
significant 3, so the formula for multiply cost becomes:
if (rs1 < 0)
highest_bit = highest_clear_bit(rs1);
else
highest_bit = highest_set_bit(rs1);
if (highest_bit < 3)
highest_bit = 3;
cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
A value of zero indicates that the multiply costs is fixed,
and not variable. */
const int int_mul_bit_factor;
/* udiv/sdiv */
const int int_div;
/* divX */
const int int_divX;
/* movcc, movr */
const int int_cmove;
/* penalty for shifts, due to scheduling rules etc. */
const int shift_penalty;
/* cost of a (predictable) branch. */
const int branch_cost;
};
static const
struct processor_costs cypress_costs = {
COSTS_N_INSNS (2), /* int load */
COSTS_N_INSNS (2), /* int signed load */
COSTS_N_INSNS (2), /* int zeroed load */
COSTS_N_INSNS (2), /* float load */
COSTS_N_INSNS (5), /* fmov, fneg, fabs */
COSTS_N_INSNS (5), /* fadd, fsub */
COSTS_N_INSNS (1), /* fcmp */
COSTS_N_INSNS (1), /* fmov, fmovr */
COSTS_N_INSNS (7), /* fmul */
COSTS_N_INSNS (37), /* fdivs */
COSTS_N_INSNS (37), /* fdivd */
COSTS_N_INSNS (63), /* fsqrts */
COSTS_N_INSNS (63), /* fsqrtd */
COSTS_N_INSNS (1), /* imul */
COSTS_N_INSNS (1), /* imulX */
0, /* imul bit factor */
COSTS_N_INSNS (1), /* idiv */
COSTS_N_INSNS (1), /* idivX */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
3 /* branch cost */
};
static const
struct processor_costs supersparc_costs = {
COSTS_N_INSNS (1), /* int load */
COSTS_N_INSNS (1), /* int signed load */
COSTS_N_INSNS (1), /* int zeroed load */
COSTS_N_INSNS (0), /* float load */
COSTS_N_INSNS (3), /* fmov, fneg, fabs */
COSTS_N_INSNS (3), /* fadd, fsub */
COSTS_N_INSNS (3), /* fcmp */
COSTS_N_INSNS (1), /* fmov, fmovr */
COSTS_N_INSNS (3), /* fmul */
COSTS_N_INSNS (6), /* fdivs */
COSTS_N_INSNS (9), /* fdivd */
COSTS_N_INSNS (12), /* fsqrts */
COSTS_N_INSNS (12), /* fsqrtd */
COSTS_N_INSNS (4), /* imul */
COSTS_N_INSNS (4), /* imulX */
0, /* imul bit factor */
COSTS_N_INSNS (4), /* idiv */
COSTS_N_INSNS (4), /* idivX */
COSTS_N_INSNS (1), /* movcc/movr */
1, /* shift penalty */
3 /* branch cost */
};
static const
struct processor_costs hypersparc_costs = {
COSTS_N_INSNS (1), /* int load */
COSTS_N_INSNS (1), /* int signed load */
COSTS_N_INSNS (1), /* int zeroed load */
COSTS_N_INSNS (1), /* float load */
COSTS_N_INSNS (1), /* fmov, fneg, fabs */
COSTS_N_INSNS (1), /* fadd, fsub */
COSTS_N_INSNS (1), /* fcmp */
COSTS_N_INSNS (1), /* fmov, fmovr */
COSTS_N_INSNS (1), /* fmul */
COSTS_N_INSNS (8), /* fdivs */
COSTS_N_INSNS (12), /* fdivd */
COSTS_N_INSNS (17), /* fsqrts */
COSTS_N_INSNS (17), /* fsqrtd */
COSTS_N_INSNS (17), /* imul */
COSTS_N_INSNS (17), /* imulX */
0, /* imul bit factor */
COSTS_N_INSNS (17), /* idiv */
COSTS_N_INSNS (17), /* idivX */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
3 /* branch cost */
};
static const
struct processor_costs leon_costs = {
COSTS_N_INSNS (1), /* int load */
COSTS_N_INSNS (1), /* int signed load */
COSTS_N_INSNS (1), /* int zeroed load */
COSTS_N_INSNS (1), /* float load */
COSTS_N_INSNS (1), /* fmov, fneg, fabs */
COSTS_N_INSNS (1), /* fadd, fsub */
COSTS_N_INSNS (1), /* fcmp */
COSTS_N_INSNS (1), /* fmov, fmovr */
COSTS_N_INSNS (1), /* fmul */
COSTS_N_INSNS (15), /* fdivs */
COSTS_N_INSNS (15), /* fdivd */
COSTS_N_INSNS (23), /* fsqrts */
COSTS_N_INSNS (23), /* fsqrtd */
COSTS_N_INSNS (5), /* imul */
COSTS_N_INSNS (5), /* imulX */
0, /* imul bit factor */
COSTS_N_INSNS (5), /* idiv */
COSTS_N_INSNS (5), /* idivX */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
3 /* branch cost */
};
static const
struct processor_costs leon3_costs = {
COSTS_N_INSNS (1), /* int load */
COSTS_N_INSNS (1), /* int signed load */
COSTS_N_INSNS (1), /* int zeroed load */
COSTS_N_INSNS (1), /* float load */
COSTS_N_INSNS (1), /* fmov, fneg, fabs */
COSTS_N_INSNS (1), /* fadd, fsub */
COSTS_N_INSNS (1), /* fcmp */
COSTS_N_INSNS (1), /* fmov, fmovr */
COSTS_N_INSNS (1), /* fmul */
COSTS_N_INSNS (14), /* fdivs */
COSTS_N_INSNS (15), /* fdivd */
COSTS_N_INSNS (22), /* fsqrts */
COSTS_N_INSNS (23), /* fsqrtd */
COSTS_N_INSNS (5), /* imul */
COSTS_N_INSNS (5), /* imulX */
0, /* imul bit factor */
COSTS_N_INSNS (35), /* idiv */
COSTS_N_INSNS (35), /* idivX */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
3 /* branch cost */
};
static const
struct processor_costs leon5_costs = {
COSTS_N_INSNS (1), /* int load */
COSTS_N_INSNS (1), /* int signed load */
COSTS_N_INSNS (1), /* int zeroed load */
COSTS_N_INSNS (1), /* float load */
COSTS_N_INSNS (1), /* fmov, fneg, fabs */
COSTS_N_INSNS (1), /* fadd, fsub */
COSTS_N_INSNS (1), /* fcmp */
COSTS_N_INSNS (1), /* fmov, fmovr */
COSTS_N_INSNS (1), /* fmul */
COSTS_N_INSNS (17), /* fdivs */
COSTS_N_INSNS (18), /* fdivd */
COSTS_N_INSNS (25), /* fsqrts */
COSTS_N_INSNS (26), /* fsqrtd */
COSTS_N_INSNS (4), /* imul */
COSTS_N_INSNS (4), /* imulX */
0, /* imul bit factor */
COSTS_N_INSNS (35), /* idiv */
COSTS_N_INSNS (35), /* idivX */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
3 /* branch cost */
};
static const
struct processor_costs sparclet_costs = {
COSTS_N_INSNS (3), /* int load */
COSTS_N_INSNS (3), /* int signed load */
COSTS_N_INSNS (1), /* int zeroed load */
COSTS_N_INSNS (1), /* float load */
COSTS_N_INSNS (1), /* fmov, fneg, fabs */
COSTS_N_INSNS (1), /* fadd, fsub */
COSTS_N_INSNS (1), /* fcmp */
COSTS_N_INSNS (1), /* fmov, fmovr */
COSTS_N_INSNS (1), /* fmul */
COSTS_N_INSNS (1), /* fdivs */
COSTS_N_INSNS (1), /* fdivd */
COSTS_N_INSNS (1), /* fsqrts */
COSTS_N_INSNS (1), /* fsqrtd */
COSTS_N_INSNS (5), /* imul */
COSTS_N_INSNS (5), /* imulX */
0, /* imul bit factor */
COSTS_N_INSNS (5), /* idiv */
COSTS_N_INSNS (5), /* idivX */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
3 /* branch cost */
};
static const
struct processor_costs ultrasparc_costs = {
COSTS_N_INSNS (2), /* int load */
COSTS_N_INSNS (3), /* int signed load */
COSTS_N_INSNS (2), /* int zeroed load */
COSTS_N_INSNS (2), /* float load */
COSTS_N_INSNS (1), /* fmov, fneg, fabs */
COSTS_N_INSNS (4), /* fadd, fsub */
COSTS_N_INSNS (1), /* fcmp */
COSTS_N_INSNS (2), /* fmov, fmovr */
COSTS_N_INSNS (4), /* fmul */
COSTS_N_INSNS (13), /* fdivs */
COSTS_N_INSNS (23), /* fdivd */
COSTS_N_INSNS (13), /* fsqrts */
COSTS_N_INSNS (23), /* fsqrtd */
COSTS_N_INSNS (4), /* imul */
COSTS_N_INSNS (4), /* imulX */
2, /* imul bit factor */
COSTS_N_INSNS (37), /* idiv */
COSTS_N_INSNS (68), /* idivX */
COSTS_N_INSNS (2), /* movcc/movr */
2, /* shift penalty */
2 /* branch cost */
};
static const
struct processor_costs ultrasparc3_costs = {
COSTS_N_INSNS (2), /* int load */
COSTS_N_INSNS (3), /* int signed load */
COSTS_N_INSNS (3), /* int zeroed load */
COSTS_N_INSNS (2), /* float load */
COSTS_N_INSNS (3), /* fmov, fneg, fabs */
COSTS_N_INSNS (4), /* fadd, fsub */
COSTS_N_INSNS (5), /* fcmp */
COSTS_N_INSNS (3), /* fmov, fmovr */
COSTS_N_INSNS (4), /* fmul */
COSTS_N_INSNS (17), /* fdivs */
COSTS_N_INSNS (20), /* fdivd */
COSTS_N_INSNS (20), /* fsqrts */
COSTS_N_INSNS (29), /* fsqrtd */
COSTS_N_INSNS (6), /* imul */
COSTS_N_INSNS (6), /* imulX */
0, /* imul bit factor */
COSTS_N_INSNS (40), /* idiv */
COSTS_N_INSNS (71), /* idivX */
COSTS_N_INSNS (2), /* movcc/movr */
0, /* shift penalty */
2 /* branch cost */
};
static const
struct processor_costs niagara_costs = {
COSTS_N_INSNS (3), /* int load */
COSTS_N_INSNS (3), /* int signed load */
COSTS_N_INSNS (3), /* int zeroed load */
COSTS_N_INSNS (9), /* float load */
COSTS_N_INSNS (8), /* fmov, fneg, fabs */
COSTS_N_INSNS (8), /* fadd, fsub */
COSTS_N_INSNS (26), /* fcmp */
COSTS_N_INSNS (8), /* fmov, fmovr */
COSTS_N_INSNS (29), /* fmul */
COSTS_N_INSNS (54), /* fdivs */
COSTS_N_INSNS (83), /* fdivd */
COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
COSTS_N_INSNS (11), /* imul */
COSTS_N_INSNS (11), /* imulX */
0, /* imul bit factor */
COSTS_N_INSNS (72), /* idiv */
COSTS_N_INSNS (72), /* idivX */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
4 /* branch cost */
};
static const
struct processor_costs niagara2_costs = {
COSTS_N_INSNS (3), /* int load */
COSTS_N_INSNS (3), /* int signed load */
COSTS_N_INSNS (3), /* int zeroed load */
COSTS_N_INSNS (3), /* float load */
COSTS_N_INSNS (6), /* fmov, fneg, fabs */
COSTS_N_INSNS (6), /* fadd, fsub */
COSTS_N_INSNS (6), /* fcmp */
COSTS_N_INSNS (6), /* fmov, fmovr */
COSTS_N_INSNS (6), /* fmul */
COSTS_N_INSNS (19), /* fdivs */
COSTS_N_INSNS (33), /* fdivd */
COSTS_N_INSNS (19), /* fsqrts */
COSTS_N_INSNS (33), /* fsqrtd */
COSTS_N_INSNS (5), /* imul */
COSTS_N_INSNS (5), /* imulX */
0, /* imul bit factor */
COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
5 /* branch cost */
};
static const
struct processor_costs niagara3_costs = {
COSTS_N_INSNS (3), /* int load */
COSTS_N_INSNS (3), /* int signed load */
COSTS_N_INSNS (3), /* int zeroed load */
COSTS_N_INSNS (3), /* float load */
COSTS_N_INSNS (9), /* fmov, fneg, fabs */
COSTS_N_INSNS (9), /* fadd, fsub */
COSTS_N_INSNS (9), /* fcmp */
COSTS_N_INSNS (9), /* fmov, fmovr */
COSTS_N_INSNS (9), /* fmul */
COSTS_N_INSNS (23), /* fdivs */
COSTS_N_INSNS (37), /* fdivd */
COSTS_N_INSNS (23), /* fsqrts */
COSTS_N_INSNS (37), /* fsqrtd */
COSTS_N_INSNS (9), /* imul */
COSTS_N_INSNS (9), /* imulX */
0, /* imul bit factor */
COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
5 /* branch cost */
};
static const
struct processor_costs niagara4_costs = {
COSTS_N_INSNS (5), /* int load */
COSTS_N_INSNS (5), /* int signed load */
COSTS_N_INSNS (5), /* int zeroed load */
COSTS_N_INSNS (5), /* float load */
COSTS_N_INSNS (11), /* fmov, fneg, fabs */
COSTS_N_INSNS (11), /* fadd, fsub */
COSTS_N_INSNS (11), /* fcmp */
COSTS_N_INSNS (11), /* fmov, fmovr */
COSTS_N_INSNS (11), /* fmul */
COSTS_N_INSNS (24), /* fdivs */
COSTS_N_INSNS (37), /* fdivd */
COSTS_N_INSNS (24), /* fsqrts */
COSTS_N_INSNS (37), /* fsqrtd */
COSTS_N_INSNS (12), /* imul */
COSTS_N_INSNS (12), /* imulX */
0, /* imul bit factor */
COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
2 /* branch cost */
};
static const
struct processor_costs niagara7_costs = {
COSTS_N_INSNS (5), /* int load */
COSTS_N_INSNS (5), /* int signed load */
COSTS_N_INSNS (5), /* int zeroed load */
COSTS_N_INSNS (5), /* float load */
COSTS_N_INSNS (11), /* fmov, fneg, fabs */
COSTS_N_INSNS (11), /* fadd, fsub */
COSTS_N_INSNS (11), /* fcmp */
COSTS_N_INSNS (11), /* fmov, fmovr */
COSTS_N_INSNS (11), /* fmul */
COSTS_N_INSNS (24), /* fdivs */
COSTS_N_INSNS (37), /* fdivd */
COSTS_N_INSNS (24), /* fsqrts */
COSTS_N_INSNS (37), /* fsqrtd */
COSTS_N_INSNS (12), /* imul */
COSTS_N_INSNS (12), /* imulX */
0, /* imul bit factor */
COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
1 /* branch cost */
};
static const
struct processor_costs m8_costs = {
COSTS_N_INSNS (3), /* int load */
COSTS_N_INSNS (3), /* int signed load */
COSTS_N_INSNS (3), /* int zeroed load */
COSTS_N_INSNS (3), /* float load */
COSTS_N_INSNS (9), /* fmov, fneg, fabs */
COSTS_N_INSNS (9), /* fadd, fsub */
COSTS_N_INSNS (9), /* fcmp */
COSTS_N_INSNS (9), /* fmov, fmovr */
COSTS_N_INSNS (9), /* fmul */
COSTS_N_INSNS (26), /* fdivs */
COSTS_N_INSNS (30), /* fdivd */
COSTS_N_INSNS (33), /* fsqrts */
COSTS_N_INSNS (41), /* fsqrtd */
COSTS_N_INSNS (12), /* imul */
COSTS_N_INSNS (10), /* imulX */
0, /* imul bit factor */
COSTS_N_INSNS (57), /* udiv/sdiv */
COSTS_N_INSNS (30), /* udivx/sdivx */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
1 /* branch cost */
};
static const struct processor_costs *sparc_costs = &cypress_costs;
#ifdef HAVE_AS_RELAX_OPTION
/* If 'as' and 'ld' are relaxing tail call insns into branch always, use
"or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
somebody does not branch between the sethi and jmp. */
#define LEAF_SIBCALL_SLOT_RESERVED_P 1
#else
#define LEAF_SIBCALL_SLOT_RESERVED_P \
((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
#endif
/* Vector, indexed by hard register number, which contains 1
for a register that is allowable in a candidate for leaf
function treatment. */
char sparc_leaf_regs[] =
{ 1, 1, 1, 1, 1, 1, 1, 1,
0, 0, 0, 0, 0, 0, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 0, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1};
struct GTY(()) machine_function
{
/* Size of the frame of the function. */
HOST_WIDE_INT frame_size;
/* Size of the frame of the function minus the register window save area
and the outgoing argument area. */
HOST_WIDE_INT apparent_frame_size;
/* Register we pretend the frame pointer is allocated to. Normally, this
is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
record "offset" separately as it may be too big for (reg + disp). */
rtx frame_base_reg;
HOST_WIDE_INT frame_base_offset;
/* Number of global or FP registers to be saved (as 4-byte quantities). */
int n_global_fp_regs;
/* True if the current function is leaf and uses only leaf regs,
so that the SPARC leaf function optimization can be applied.
Private version of crtl->uses_only_leaf_regs, see
sparc_expand_prologue for the rationale. */
int leaf_function_p;
/* True if the prologue saves local or in registers. */
bool save_local_in_regs_p;
/* True if the data calculated by sparc_expand_prologue are valid. */
bool prologue_data_valid_p;
};
#define sparc_frame_size cfun->machine->frame_size
#define sparc_apparent_frame_size cfun->machine->apparent_frame_size
#define sparc_frame_base_reg cfun->machine->frame_base_reg
#define sparc_frame_base_offset cfun->machine->frame_base_offset
#define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
#define sparc_leaf_function_p cfun->machine->leaf_function_p
#define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
#define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
/* 1 if the next opcode is to be specially indented. */
int sparc_indent_opcode = 0;
static void sparc_option_override (void);
static void sparc_init_modes (void);
static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
const_tree, bool, bool, int *, int *);
static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
static int leon5_adjust_cost (rtx_insn *, int, rtx_insn *, int);
static void sparc_emit_set_const32 (rtx, rtx);
static void sparc_emit_set_const64 (rtx, rtx);
static void sparc_output_addr_vec (rtx);
static void sparc_output_addr_diff_vec (rtx);
static void sparc_output_deferred_case_vectors (void);
static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
static bool sparc_legitimate_constant_p (machine_mode, rtx);
static rtx sparc_builtin_saveregs (void);
static int epilogue_renumber (rtx *, int);
static bool sparc_assemble_integer (rtx, unsigned int, int);
static int set_extends (rtx_insn *);
static void sparc_asm_function_prologue (FILE *);
static void sparc_asm_function_epilogue (FILE *);
#ifdef TARGET_SOLARIS
static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
tree) ATTRIBUTE_UNUSED;
#endif
static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
static int sparc_issue_rate (void);
static void sparc_sched_init (FILE *, int, int);
static int sparc_use_sched_lookahead (void);
static void emit_soft_tfmode_libcall (const char *, int, rtx *);
static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
static bool sparc_function_ok_for_sibcall (tree, tree);
static void sparc_init_libfuncs (void);
static void sparc_init_builtins (void);
static void sparc_fpu_init_builtins (void);
static void sparc_vis_init_builtins (void);
static tree sparc_builtin_decl (unsigned, bool);
static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
static tree sparc_fold_builtin (tree, int, tree *, bool);
static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
HOST_WIDE_INT, tree);
static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
HOST_WIDE_INT, const_tree);
static struct machine_function * sparc_init_machine_status (void);
static bool sparc_cannot_force_const_mem (machine_mode, rtx);
static rtx sparc_tls_get_addr (void);
static rtx sparc_tls_got (void);
static int sparc_register_move_cost (machine_mode,
reg_class_t, reg_class_t);
static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
int *, const_tree, int);
static bool sparc_strict_argument_naming (cumulative_args_t);
static void sparc_va_start (tree, rtx);
static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
static bool sparc_vector_mode_supported_p (machine_mode);
static bool sparc_tls_referenced_p (rtx);
static rtx sparc_legitimize_tls_address (rtx);
static rtx sparc_legitimize_pic_address (rtx, rtx);
static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
static rtx sparc_delegitimize_address (rtx);
static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
static bool sparc_pass_by_reference (cumulative_args_t,
const function_arg_info &);
static void sparc_function_arg_advance (cumulative_args_t,
const function_arg_info &);
static rtx sparc_function_arg (cumulative_args_t, const function_arg_info &);
static rtx sparc_function_incoming_arg (cumulative_args_t,
const function_arg_info &);
static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
static unsigned int sparc_function_arg_boundary (machine_mode,
const_tree);
static int sparc_arg_partial_bytes (cumulative_args_t,
const function_arg_info &);
static bool sparc_return_in_memory (const_tree, const_tree);
static rtx sparc_struct_value_rtx (tree, int);
static rtx sparc_function_value (const_tree, const_tree, bool);
static rtx sparc_libcall_value (machine_mode, const_rtx);
static bool sparc_function_value_regno_p (const unsigned int);
static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void);
static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
static void sparc_file_end (void);
static bool sparc_frame_pointer_required (void);
static bool sparc_can_eliminate (const int, const int);
static void sparc_conditional_register_usage (void);
static bool sparc_use_pseudo_pic_reg (void);
static void sparc_init_pic_reg (void);
#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
static const char *sparc_mangle_type (const_tree);
#endif
static void sparc_trampoline_init (rtx, tree, rtx);
static machine_mode sparc_preferred_simd_mode (scalar_mode);
static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
static bool sparc_lra_p (void);
static bool sparc_print_operand_punct_valid_p (unsigned char);
static void sparc_print_operand (FILE *, rtx, int);
static void sparc_print_operand_address (FILE *, machine_mode, rtx);
static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
machine_mode,
secondary_reload_info *);
static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
reg_class_t);
static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
static unsigned int sparc_min_arithmetic_precision (void);
static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
static bool sparc_modes_tieable_p (machine_mode, machine_mode);
static bool sparc_can_change_mode_class (machine_mode, machine_mode,
reg_class_t);
static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
const vec_perm_indices &);
static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *);
static HARD_REG_SET sparc_zero_call_used_regs (HARD_REG_SET);
#ifdef SUBTARGET_ATTRIBUTE_TABLE
/* Table of valid machine attributes. */
static const struct attribute_spec sparc_attribute_table[] =
{
/* { name, min_len, max_len, decl_req, type_req, fn_type_req,
do_diagnostic, handler, exclude } */
SUBTARGET_ATTRIBUTE_TABLE,
{ NULL, 0, 0, false, false, false, false, NULL, NULL }
};
#endif
char sparc_hard_reg_printed[8];
/* Initialize the GCC target structure. */
/* The default is to use .half rather than .short for aligned HI objects. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
#undef TARGET_ASM_UNALIGNED_HI_OP
#define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
#undef TARGET_ASM_UNALIGNED_SI_OP
#define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
#undef TARGET_ASM_UNALIGNED_DI_OP
#define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
/* The target hook has to handle DI-mode values. */
#undef TARGET_ASM_INTEGER
#define TARGET_ASM_INTEGER sparc_assemble_integer
#undef TARGET_ASM_FUNCTION_PROLOGUE
#define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
#undef TARGET_ASM_FUNCTION_EPILOGUE
#define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
#undef TARGET_SCHED_ADJUST_COST
#define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
#undef TARGET_SCHED_INIT
#define TARGET_SCHED_INIT sparc_sched_init
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
#undef TARGET_INIT_LIBFUNCS
#define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
#undef TARGET_LEGITIMIZE_ADDRESS
#define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
#undef TARGET_DELEGITIMIZE_ADDRESS
#define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
#undef TARGET_MODE_DEPENDENT_ADDRESS_P
#define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS sparc_init_builtins
#undef TARGET_BUILTIN_DECL
#define TARGET_BUILTIN_DECL sparc_builtin_decl
#undef TARGET_EXPAND_BUILTIN
#define TARGET_EXPAND_BUILTIN sparc_expand_builtin
#undef TARGET_FOLD_BUILTIN
#define TARGET_FOLD_BUILTIN sparc_fold_builtin
#if TARGET_TLS
#undef TARGET_HAVE_TLS
#define TARGET_HAVE_TLS true
#endif
#undef TARGET_CANNOT_FORCE_CONST_MEM
#define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
#undef TARGET_ASM_OUTPUT_MI_THUNK
#define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS sparc_rtx_costs
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
#undef TARGET_REGISTER_MOVE_COST
#define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
#undef TARGET_PROMOTE_FUNCTION_MODE
#define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
#undef TARGET_STRICT_ARGUMENT_NAMING
#define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
#undef TARGET_MUST_PASS_IN_STACK
#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
#undef TARGET_PASS_BY_REFERENCE
#define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
#undef TARGET_ARG_PARTIAL_BYTES
#define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
#undef TARGET_FUNCTION_ARG_ADVANCE
#define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
#undef TARGET_FUNCTION_ARG
#define TARGET_FUNCTION_ARG sparc_function_arg
#undef TARGET_FUNCTION_INCOMING_ARG
#define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
#undef TARGET_FUNCTION_ARG_PADDING
#define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
#undef TARGET_FUNCTION_ARG_BOUNDARY
#define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
#undef TARGET_STRUCT_VALUE_RTX
#define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
#undef TARGET_FUNCTION_VALUE
#define TARGET_FUNCTION_VALUE sparc_function_value
#undef TARGET_LIBCALL_VALUE
#define TARGET_LIBCALL_VALUE sparc_libcall_value
#undef TARGET_FUNCTION_VALUE_REGNO_P
#define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
#undef TARGET_EXPAND_BUILTIN_SAVEREGS
#define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
#undef TARGET_ASAN_SHADOW_OFFSET
#define TARGET_ASAN_SHADOW_OFFSET sparc_asan_shadow_offset
#undef TARGET_EXPAND_BUILTIN_VA_START
#define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
#define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
#undef TARGET_VECTOR_MODE_SUPPORTED_P
#define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
#ifdef SUBTARGET_INSERT_ATTRIBUTES
#undef TARGET_INSERT_ATTRIBUTES
#define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
#endif
#ifdef SUBTARGET_ATTRIBUTE_TABLE
#undef TARGET_ATTRIBUTE_TABLE
#define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
#endif
#undef TARGET_OPTION_OVERRIDE
#define TARGET_OPTION_OVERRIDE sparc_option_override
#ifdef TARGET_THREAD_SSP_OFFSET
#undef TARGET_STACK_PROTECT_GUARD
#define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
#endif
#if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
#define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
#endif
#undef TARGET_ASM_FILE_END
#define TARGET_ASM_FILE_END sparc_file_end
#undef TARGET_FRAME_POINTER_REQUIRED
#define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
#undef TARGET_CAN_ELIMINATE
#define TARGET_CAN_ELIMINATE sparc_can_eliminate
#undef TARGET_PREFERRED_RELOAD_CLASS
#define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
#undef TARGET_SECONDARY_RELOAD
#define TARGET_SECONDARY_RELOAD sparc_secondary_reload
#undef TARGET_SECONDARY_MEMORY_NEEDED
#define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
#undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
#define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
#undef TARGET_CONDITIONAL_REGISTER_USAGE
#define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
#undef TARGET_INIT_PIC_REG
#define TARGET_INIT_PIC_REG sparc_init_pic_reg
#undef TARGET_USE_PSEUDO_PIC_REG
#define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg
#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
#undef TARGET_MANGLE_TYPE
#define TARGET_MANGLE_TYPE sparc_mangle_type
#endif
#undef TARGET_LRA_P
#define TARGET_LRA_P sparc_lra_p
#undef TARGET_LEGITIMATE_ADDRESS_P
#define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
#undef TARGET_LEGITIMATE_CONSTANT_P
#define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
#undef TARGET_TRAMPOLINE_INIT
#define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
#define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
#undef TARGET_PRINT_OPERAND
#define TARGET_PRINT_OPERAND sparc_print_operand
#undef TARGET_PRINT_OPERAND_ADDRESS
#define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
/* The value stored by LDSTUB. */
#undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
#define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
#undef TARGET_CSTORE_MODE
#define TARGET_CSTORE_MODE sparc_cstore_mode
#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
#undef TARGET_FIXED_CONDITION_CODE_REGS
#define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
#undef TARGET_MIN_ARITHMETIC_PRECISION
#define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
#undef TARGET_HARD_REGNO_NREGS
#define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
#undef TARGET_HARD_REGNO_MODE_OK
#define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
#undef TARGET_MODES_TIEABLE_P
#define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
#undef TARGET_CAN_CHANGE_MODE_CLASS
#define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
#undef TARGET_CONSTANT_ALIGNMENT
#define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
#undef TARGET_VECTORIZE_VEC_PERM_CONST
#define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
#undef TARGET_CAN_FOLLOW_JUMP
#define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump
#undef TARGET_ZERO_CALL_USED_REGS
#define TARGET_ZERO_CALL_USED_REGS sparc_zero_call_used_regs
#ifdef SPARC_GCOV_TYPE_SIZE
static HOST_WIDE_INT
sparc_gcov_type_size (void)
{
return SPARC_GCOV_TYPE_SIZE;
}
#undef TARGET_GCOV_TYPE_SIZE
#define TARGET_GCOV_TYPE_SIZE sparc_gcov_type_size
#endif
struct gcc_target targetm = TARGET_INITIALIZER;
/* Return the memory reference contained in X if any, zero otherwise. */
static rtx
mem_ref (rtx x)
{
if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
x = XEXP (x, 0);
if (MEM_P (x))
return x;
return NULL_RTX;
}
/* True if any of INSN's source register(s) is REG. */
static bool
insn_uses_reg_p (rtx_insn *insn, unsigned int reg)
{
extract_insn (insn);
return ((REG_P (recog_data.operand[1])
&& REGNO (recog_data.operand[1]) == reg)
|| (recog_data.n_operands == 3
&& REG_P (recog_data.operand[2])
&& REGNO (recog_data.operand[2]) == reg));
}
/* True if INSN is a floating-point division or square-root. */
static bool
div_sqrt_insn_p (rtx_insn *insn)
{
if (GET_CODE (PATTERN (insn)) != SET)
return false;
switch (get_attr_type (insn))
{
case TYPE_FPDIVS:
case TYPE_FPSQRTS:
case TYPE_FPDIVD:
case TYPE_FPSQRTD:
return true;
default:
return false;
}
}
/* True if INSN is a floating-point instruction. */
static bool
fpop_insn_p (rtx_insn *insn)
{
if (GET_CODE (PATTERN (insn)) != SET)
return false;
switch (get_attr_type (insn))
{
case TYPE_FPMOVE:
case TYPE_FPCMOVE:
case TYPE_FP:
case TYPE_FPCMP:
case TYPE_FPMUL:
case TYPE_FPDIVS:
case TYPE_FPSQRTS:
case TYPE_FPDIVD:
case TYPE_FPSQRTD:
return true;
default:
return false;
}
}
/* True if INSN is an atomic instruction. */
static bool
atomic_insn_for_leon3_p (rtx_insn *insn)
{
switch (INSN_CODE (insn))
{
case CODE_FOR_swapsi:
case CODE_FOR_ldstub:
case CODE_FOR_atomic_compare_and_swap_leon3_1:
return true;
default:
return false;
}
}
/* True if INSN is a store instruction. */
static bool
store_insn_p (rtx_insn *insn)
{
if (GET_CODE (PATTERN (insn)) != SET)
return false;
switch (get_attr_type (insn))
{
case TYPE_STORE:
case TYPE_FPSTORE:
return true;
default:
return false;
}
}
/* True if INSN is a load instruction. */
static bool
load_insn_p (rtx_insn *insn)
{
if (GET_CODE (PATTERN (insn)) != SET)
return false;
switch (get_attr_type (insn))
{
case TYPE_LOAD:
case TYPE_SLOAD:
case TYPE_FPLOAD:
return true;
default:
return false;
}
}
/* We use a machine specific pass to enable workarounds for errata.
We need to have the (essentially) final form of the insn stream in order
to properly detect the various hazards. Therefore, this machine specific
pass runs as late as possible. */
/* True if INSN is a md pattern or asm statement. */
#define USEFUL_INSN_P(INSN) \
(NONDEBUG_INSN_P (INSN) \
&& GET_CODE (PATTERN (INSN)) != USE \
&& GET_CODE (PATTERN (INSN)) != CLOBBER)
rtx_insn *
next_active_non_empty_insn (rtx_insn *insn)
{
insn = next_active_insn (insn);
while (insn
&& (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
|| GET_CODE (PATTERN (insn)) == ASM_INPUT
|| (USEFUL_INSN_P (insn)
&& (asm_noperands (PATTERN (insn)) >= 0)
&& !strcmp (decode_asm_operands (PATTERN (insn),
NULL, NULL, NULL,
NULL, NULL), ""))))
insn = next_active_insn (insn);
return insn;
}
static unsigned int
sparc_do_work_around_errata (void)
{
rtx_insn *insn, *next;
bool find_first_useful = true;
/* Force all instructions to be split into their final form. */
split_all_insns_noflow ();
/* Now look for specific patterns in the insn stream. */
for (insn = get_insns (); insn; insn = next)
{
bool insert_nop = false;
rtx set;
rtx_insn *jump;
rtx_sequence *seq;
/* Look into the instruction in a delay slot. */
if (NONJUMP_INSN_P (insn)
&& (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
{
jump = seq->insn (0);
insn = seq->insn (1);
}
else if (JUMP_P (insn))
jump = insn;
else
jump = NULL;
/* Do not begin function with atomic instruction. */
if (sparc_fix_ut700
&& find_first_useful
&& USEFUL_INSN_P (insn))
{
find_first_useful = false;
if (atomic_insn_for_leon3_p (insn))
emit_insn_before (gen_nop (), insn);
}
/* Place a NOP at the branch target of an integer branch if it is a
floating-point operation or a floating-point branch. */
if (sparc_fix_gr712rc
&& jump
&& jump_to_label_p (jump)
&& get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
{
rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
if (target
&& (fpop_insn_p (target)
|| (JUMP_P (target)
&& get_attr_branch_type (target) == BRANCH_TYPE_FCC)))
emit_insn_before (gen_nop (), target);
}
/* Insert a NOP between load instruction and atomic instruction. Insert
a NOP at branch target if there is a load in delay slot and an atomic
instruction at branch target. */
if (sparc_fix_ut700
&& NONJUMP_INSN_P (insn)
&& load_insn_p (insn))
{
if (jump && jump_to_label_p (jump))
{
rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
if (target && atomic_insn_for_leon3_p (target))
emit_insn_before (gen_nop (), target);
}
next = next_active_non_empty_insn (insn);
if (!next)
break;
if (atomic_insn_for_leon3_p (next))
insert_nop = true;
}
/* Look for a sequence that starts with a fdiv or fsqrt instruction and
ends with another fdiv or fsqrt instruction with no dependencies on
the former, along with an appropriate pattern in between. */
if (sparc_fix_lost_divsqrt
&& NONJUMP_INSN_P (insn)
&& div_sqrt_insn_p (insn))
{
int i;
int fp_found = 0;
rtx_insn *after;
const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn)));
next = next_active_insn (insn);
if (!next)
break;
for (after = next, i = 0; i < 4; i++)
{
/* Count floating-point operations. */
if (i != 3 && fpop_insn_p (after))
{
/* If the insn uses the destination register of
the div/sqrt, then it cannot be problematic. */
if (insn_uses_reg_p (after, dest_reg))
break;
fp_found++;
}
/* Count floating-point loads. */
if (i != 3
&& (set = single_set (after)) != NULL_RTX
&& REG_P (SET_DEST (set))
&& REGNO (SET_DEST (set)) > 31)
{
/* If the insn uses the destination register of
the div/sqrt, then it cannot be problematic. */
if (REGNO (SET_DEST (set)) == dest_reg)
break;
fp_found++;
}
/* Check if this is a problematic sequence. */
if (i > 1
&& fp_found >= 2
&& div_sqrt_insn_p (after))
{
/* If this is the short version of the problematic
sequence we add two NOPs in a row to also prevent
the long version. */
if (i == 2)
emit_insn_before (gen_nop (), next);
insert_nop = true;
break;
}
/* No need to scan past a second div/sqrt. */
if (div_sqrt_insn_p (after))
break;
/* Insert NOP before branch. */
if (i < 3
&& (!NONJUMP_INSN_P (after)
|| GET_CODE (PATTERN (after)) == SEQUENCE))
{
insert_nop = true;
break;
}
after = next_active_insn (after);
if (!after)
break;
}
}
/* Look for either of these two sequences:
Sequence A:
1. store of word size or less (e.g. st / stb / sth / stf)
2. any single instruction that is not a load or store
3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
Sequence B:
1. store of double word size (e.g. std / stdf)
2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
if (sparc_fix_b2bst
&& NONJUMP_INSN_P (insn)
&& (set = single_set (insn)) != NULL_RTX
&& store_insn_p (insn))
{
/* Sequence B begins with a double-word store. */
bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
rtx_insn *after;
int i;
next = next_active_non_empty_insn (insn);
if (!next)
break;
for (after = next, i = 0; i < 2; i++)
{
/* If the insn is a branch, then it cannot be problematic. */
if (!NONJUMP_INSN_P (after)
|| GET_CODE (PATTERN (after)) == SEQUENCE)
break;
/* Sequence B is only two instructions long. */
if (seq_b)
{
/* Add NOP if followed by a store. */
if (store_insn_p (after))
insert_nop = true;
/* Otherwise it is ok. */
break;
}
/* If the second instruction is a load or a store,
then the sequence cannot be problematic. */
if (i == 0)
{
if ((set = single_set (after)) != NULL_RTX
&& (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set))))
break;
after = next_active_non_empty_insn (after);
if (!after)
break;
}
/* Add NOP if third instruction is a store. */
if (i == 1
&& store_insn_p (after))
insert_nop = true;
}
}
/* Look for a single-word load into an odd-numbered FP register. */
else if (sparc_fix_at697f
&& NONJUMP_INSN_P (insn)
&& (set = single_set (insn)) != NULL_RTX
&& GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
&& mem_ref (SET_SRC (set))
&& REG_P (SET_DEST (set))
&& REGNO (SET_DEST (set)) > 31
&& REGNO (SET_DEST (set)) % 2 != 0)
{
/* The wrong dependency is on the enclosing double register. */
const unsigned int x = REGNO (SET_DEST (set)) - 1;
unsigned int src1, src2, dest;
int code;
next = next_active_insn (insn);
if (!next)
break;
/* If the insn is a branch, then it cannot be problematic. */
if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
continue;
extract_insn (next);
code = INSN_CODE (next);
switch (code)
{
case CODE_FOR_adddf3:
case CODE_FOR_subdf3:
case CODE_FOR_muldf3:
case CODE_FOR_divdf3:
dest = REGNO (recog_data.operand[0]);
src1 = REGNO (recog_data.operand[1]);
src2 = REGNO (recog_data.operand[2]);
if (src1 != src2)
{
/* Case [1-4]:
ld [address], %fx+1
FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
if ((src1 == x || src2 == x)
&& (dest == src1 || dest == src2))
insert_nop = true;
}
else
{
/* Case 5:
ld [address], %fx+1
FPOPd %fx, %fx, %fx */
if (src1 == x
&& dest == src1
&& (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
insert_nop = true;
}
break;
case CODE_FOR_sqrtdf2:
dest = REGNO (recog_data.operand[0]);
src1 = REGNO (recog_data.operand[1]);
/* Case 6:
ld [address], %fx+1
fsqrtd %fx, %fx */
if (src1 == x && dest == src1)
insert_nop = true;
break;
default:
break;
}
}
/* Look for a single-word load into an integer register. */
else if (sparc_fix_ut699
&& NONJUMP_INSN_P (insn)
&& (set = single_set (insn)) != NULL_RTX
&& GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
&& (mem_ref (SET_SRC (set)) != NULL_RTX
|| INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
&& REG_P (SET_DEST (set))
&& REGNO (SET_DEST (set)) < 32)
{
/* There is no problem if the second memory access has a data
dependency on the first single-cycle load. */
rtx x = SET_DEST (set);
next = next_active_insn (insn);
if (!next)
break;
/* If the insn is a branch, then it cannot be problematic. */
if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
continue;
/* Look for a second memory access to/from an integer register. */
if ((set = single_set (next)) != NULL_RTX)
{
rtx src = SET_SRC (set);
rtx dest = SET_DEST (set);
rtx mem;
/* LDD is affected. */
if ((mem = mem_ref (src)) != NULL_RTX
&& REG_P (dest)
&& REGNO (dest) < 32
&& !reg_mentioned_p (x, XEXP (mem, 0)))
insert_nop = true;
/* STD is *not* affected. */
else if (MEM_P (dest)
&& GET_MODE_SIZE (GET_MODE (dest)) <= 4
&& (src == CONST0_RTX (GET_MODE (dest))
|| (REG_P (src)
&& REGNO (src) < 32
&& REGNO (src) != REGNO (x)))
&& !reg_mentioned_p (x, XEXP (dest, 0)))
insert_nop = true;
/* GOT accesses uses LD. */
else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
&& !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
insert_nop = true;
}
}
/* Look for a single-word load/operation into an FP register. */
else if (sparc_fix_ut699
&& NONJUMP_INSN_P (insn)
&& (set = single_set (insn)) != NULL_RTX
&& GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
&& REG_P (SET_DEST (set))
&& REGNO (SET_DEST (set)) > 31)
{
/* Number of instructions in the problematic window. */
const int n_insns = 4;
/* The problematic combination is with the sibling FP register. */
const unsigned int x = REGNO (SET_DEST (set));
const unsigned int y = x ^ 1;
rtx_insn *after;
int i;
next = next_active_insn (insn);
if (!next)
break;
/* If the insn is a branch, then it cannot be problematic. */
if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
continue;
/* Look for a second load/operation into the sibling FP register. */
if (!((set = single_set (next)) != NULL_RTX
&& GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
&& REG_P (SET_DEST (set))
&& REGNO (SET_DEST (set)) == y))
continue;
/* Look for a (possible) store from the FP register in the next N
instructions, but bail out if it is again modified or if there
is a store from the sibling FP register before this store. */
for (after = next, i = 0; i < n_insns; i++)
{
bool branch_p;
after = next_active_insn (after);
if (!after)
break;
/* This is a branch with an empty delay slot. */
if (!NONJUMP_INSN_P (after))
{
if (++i == n_insns)
break;
branch_p = true;
after = NULL;
}
/* This is a branch with a filled delay slot. */
else if (rtx_sequence *seq =
dyn_cast <rtx_sequence *> (PATTERN (after)))
{
if (++i == n_insns)
break;
branch_p = true;
after = seq->insn (1);
}
/* This is a regular instruction. */
else
branch_p = false;
if (after && (set = single_set (after)) != NULL_RTX)
{
const rtx src = SET_SRC (set);
const rtx dest = SET_DEST (set);
const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
/* If the FP register is again modified before the store,
then the store isn't affected. */
if (REG_P (dest)
&& (REGNO (dest) == x
|| (REGNO (dest) == y && size == 8)))
break;
if (MEM_P (dest) && REG_P (src))
{
/* If there is a store from the sibling FP register
before the store, then the store is not affected. */
if (REGNO (src) == y || (REGNO (src) == x && size == 8))
break;
/* Otherwise, the store is affected. */
if (REGNO (src) == x && size == 4)
{
insert_nop = true;
break;
}
}
}
/* If we have a branch in the first M instructions, then we
cannot see the (M+2)th instruction so we play safe. */
if (branch_p && i <= (n_insns - 2))
{
insert_nop = true;
break;
}
}
}
else
next = NEXT_INSN (insn);
if (insert_nop)
emit_insn_before (gen_nop (), next);
}
return 0;
}
namespace {
const pass_data pass_data_work_around_errata =
{
RTL_PASS, /* type */
"errata", /* name */
OPTGROUP_NONE, /* optinfo_flags */
TV_MACH_DEP, /* tv_id */
0, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
0, /* todo_flags_finish */
};
class pass_work_around_errata : public rtl_opt_pass
{
public:
pass_work_around_errata(gcc::context *ctxt)
: rtl_opt_pass(pass_data_work_around_errata, ctxt)
{}
/* opt_pass methods: */
virtual bool gate (function *)
{
return sparc_fix_at697f
|| sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc
|| sparc_fix_b2bst || sparc_fix_lost_divsqrt;
}
virtual unsigned int execute (function *)
{
return sparc_do_work_around_errata ();
}
}; // class pass_work_around_errata
} // anon namespace
rtl_opt_pass *
make_pass_work_around_errata (gcc::context *ctxt)
{
return new pass_work_around_errata (ctxt);
}
/* Helpers for TARGET_DEBUG_OPTIONS. */
static void
dump_target_flag_bits (const int flags)
{
if (flags & MASK_64BIT)
fprintf (stderr, "64BIT ");
if (flags & MASK_APP_REGS)
fprintf (stderr, "APP_REGS ");
if (flags & MASK_FASTER_STRUCTS)
fprintf (stderr, "FASTER_STRUCTS ");
if (flags & MASK_FLAT)
fprintf (stderr, "FLAT ");
if (flags & MASK_FMAF)
fprintf (stderr, "FMAF ");
if (flags & MASK_FSMULD)
fprintf (stderr, "FSMULD ");
if (flags & MASK_FPU)
fprintf (stderr, "FPU ");
if (flags & MASK_HARD_QUAD)
fprintf (stderr, "HARD_QUAD ");
if (flags & MASK_POPC)
fprintf (stderr, "POPC ");
if (flags & MASK_PTR64)
fprintf (stderr, "PTR64 ");
if (flags & MASK_STACK_BIAS)
fprintf (stderr, "STACK_BIAS ");
if (flags & MASK_UNALIGNED_DOUBLES)
fprintf (stderr, "UNALIGNED_DOUBLES ");
if (flags & MASK_V8PLUS)
fprintf (stderr, "V8PLUS ");
if (flags & MASK_VIS)
fprintf (stderr, "VIS ");
if (flags & MASK_VIS2)
fprintf (stderr, "VIS2 ");
if (flags & MASK_VIS3)
fprintf (stderr, "VIS3 ");
if (flags & MASK_VIS4)
fprintf (stderr, "VIS4 ");
if (flags & MASK_VIS4B)
fprintf (stderr, "VIS4B ");
if (flags & MASK_CBCOND)
fprintf (stderr, "CBCOND ");
if (flags & MASK_DEPRECATED_V8_INSNS)
fprintf (stderr, "DEPRECATED_V8_INSNS ");
if (flags & MASK_LEON)
fprintf (stderr, "LEON ");
if (flags & MASK_LEON3)
fprintf (stderr, "LEON3 ");
if (flags & MASK_SPARCLET)
fprintf (stderr, "SPARCLET ");
if (flags & MASK_SPARCLITE)
fprintf (stderr, "SPARCLITE ");
if (flags & MASK_V8)
fprintf (stderr, "V8 ");
if (flags & MASK_V9)
fprintf (stderr, "V9 ");
}
static void
dump_target_flags (const char *prefix, const int flags)
{
fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
dump_target_flag_bits (flags);
fprintf(stderr, "]\n");
}
/* Validate and override various options, and do some machine dependent
initialization. */
static void
sparc_option_override (void)
{
/* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
static struct cpu_default {
const int cpu;
const enum sparc_processor_type processor;
} const cpu_default[] = {
/* There must be one entry here for each TARGET_CPU value. */
{ TARGET_CPU_sparc, PROCESSOR_CYPRESS },
{ TARGET_CPU_v8, PROCESSOR_V8 },
{ TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
{ TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
{ TARGET_CPU_leon, PROCESSOR_LEON },
{ TARGET_CPU_leon3, PROCESSOR_LEON3 },
{ TARGET_CPU_leon5, PROCESSOR_LEON5 },
{ TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
{ TARGET_CPU_sparclite, PROCESSOR_F930 },
{ TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
{ TARGET_CPU_sparclet, PROCESSOR_TSC701 },
{ TARGET_CPU_v9, PROCESSOR_V9 },
{ TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
{ TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
{ TARGET_CPU_niagara, PROCESSOR_NIAGARA },
{ TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
{ TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
{ TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
{ TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
{ TARGET_CPU_m8, PROCESSOR_M8 },
{ -1, PROCESSOR_V7 }
};
const struct cpu_default *def;
/* Table of values for -m{cpu,tune}=. This must match the order of
the enum processor_type in sparc-opts.h. */
static struct cpu_table {
const char *const name;
const int disable;
const int enable;
} const cpu_table[] = {
{ "v7", MASK_ISA, 0 },
{ "cypress", MASK_ISA, 0 },
{ "v8", MASK_ISA, MASK_V8 },
/* TI TMS390Z55 supersparc */
{ "supersparc", MASK_ISA, MASK_V8 },
{ "hypersparc", MASK_ISA, MASK_V8 },
{ "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
{ "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
{ "leon5", MASK_ISA, MASK_V8|MASK_LEON3 },
{ "leon3v7", MASK_ISA, MASK_LEON3 },
{ "sparclite", MASK_ISA, MASK_SPARCLITE },
/* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
{ "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
/* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
{ "f934", MASK_ISA, MASK_SPARCLITE },
{ "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
{ "sparclet", MASK_ISA, MASK_SPARCLET },
/* TEMIC sparclet */
{ "tsc701", MASK_ISA, MASK_SPARCLET },
{ "v9", MASK_ISA, MASK_V9 },
/* UltraSPARC I, II, IIi */
{ "ultrasparc", MASK_ISA,
/* Although insns using %y are deprecated, it is a clear win. */
MASK_V9|MASK_DEPRECATED_V8_INSNS },
/* UltraSPARC III */
/* ??? Check if %y issue still holds true. */
{ "ultrasparc3", MASK_ISA,
MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
/* UltraSPARC T1 */
{ "niagara", MASK_ISA,
MASK_V9|MASK_DEPRECATED_V8_INSNS },
/* UltraSPARC T2 */
{ "niagara2", MASK_ISA,
MASK_V9|MASK_POPC|MASK_VIS2 },
/* UltraSPARC T3 */
{ "niagara3", MASK_ISA,
MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
/* UltraSPARC T4 */
{ "niagara4", MASK_ISA,
MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
/* UltraSPARC M7 */
{ "niagara7", MASK_ISA,
MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
/* UltraSPARC M8 */
{ "m8", MASK_ISA,
MASK_V9|MASK_POPC|MASK_VIS4B|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
};
const struct cpu_table *cpu;
unsigned int i;
if (sparc_debug_string != NULL)
{
const char *q;
char *p;
p = ASTRDUP (sparc_debug_string);
while ((q = strtok (p, ",")) != NULL)
{
bool invert;
int mask;
p = NULL;
if (*q == '!')
{
invert = true;
q++;
}
else
invert = false;
if (! strcmp (q, "all"))
mask = MASK_DEBUG_ALL;
else if (! strcmp (q, "options"))
mask = MASK_DEBUG_OPTIONS;
else
error ("unknown %<-mdebug-%s%> switch", q);
if (invert)
sparc_debug &= ~mask;
else
sparc_debug |= mask;
}
}
/* Enable the FsMULd instruction by default if not explicitly specified by
the user. It may be later disabled by the CPU (explicitly or not). */
if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
target_flags |= MASK_FSMULD;
if (TARGET_DEBUG_OPTIONS)
{
dump_target_flags("Initial target_flags", target_flags);
dump_target_flags("target_flags_explicit", target_flags_explicit);
}
#ifdef SUBTARGET_OVERRIDE_OPTIONS
SUBTARGET_OVERRIDE_OPTIONS;
#endif
#ifndef SPARC_BI_ARCH
/* Check for unsupported architecture size. */
if (!TARGET_64BIT != DEFAULT_ARCH32_P)
error ("%s is not supported by this configuration",
DEFAULT_ARCH32_P ? "-m64" : "-m32");
#endif
/* We force all 64bit archs to use 128 bit long double */
if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
{
error ("%<-mlong-double-64%> not allowed with %<-m64%>");
target_flags |= MASK_LONG_DOUBLE_128;
}
/* Check that -fcall-saved-REG wasn't specified for out registers. */
for (i = 8; i < 16; i++)
if (!call_used_regs [i])
{
error ("%<-fcall-saved-REG%> is not supported for out registers");
call_used_regs [i] = 1;
}
/* Set the default CPU if no -mcpu option was specified. */
if (!global_options_set.x_sparc_cpu_and_features)
{
for (def = &cpu_default[0]; def->cpu != -1; ++def)
if (def->cpu == TARGET_CPU_DEFAULT)
break;
gcc_assert (def->cpu != -1);
sparc_cpu_and_features = def->processor;
}
/* Set the default CPU if no -mtune option was specified. */
if (!global_options_set.x_sparc_cpu)
sparc_cpu = sparc_cpu_and_features;
cpu = &cpu_table[(int) sparc_cpu_and_features];
if (TARGET_DEBUG_OPTIONS)
{
fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
dump_target_flags ("cpu->disable", cpu->disable);
dump_target_flags ("cpu->enable", cpu->enable);
}
target_flags &= ~cpu->disable;
target_flags |= (cpu->enable
#ifndef HAVE_AS_FMAF_HPC_VIS3
& ~(MASK_FMAF | MASK_VIS3)
#endif
#ifndef HAVE_AS_SPARC4
& ~MASK_CBCOND
#endif
#ifndef HAVE_AS_SPARC5_VIS4
& ~(MASK_VIS4 | MASK_SUBXC)
#endif
#ifndef HAVE_AS_SPARC6
& ~(MASK_VIS4B)
#endif
#ifndef HAVE_AS_LEON
& ~(MASK_LEON | MASK_LEON3)
#endif
& ~(target_flags_explicit & MASK_FEATURES)
);
/* FsMULd is a V8 instruction. */
if (!TARGET_V8 && !TARGET_V9)
target_flags &= ~MASK_FSMULD;
/* -mvis2 implies -mvis. */
if (TARGET_VIS2)
target_flags |= MASK_VIS;
/* -mvis3 implies -mvis2 and -mvis. */
if (TARGET_VIS3)
target_flags |= MASK_VIS2 | MASK_VIS;
/* -mvis4 implies -mvis3, -mvis2 and -mvis. */
if (TARGET_VIS4)
target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
/* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
if (TARGET_VIS4B)
target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
/* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
FPU is disabled. */
if (!TARGET_FPU)
target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
| MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
/* -mvis assumes UltraSPARC+, so we are sure v9 instructions
are available; -m64 also implies v9. */
if (TARGET_VIS || TARGET_ARCH64)
{
target_flags |= MASK_V9;
target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
}
/* -mvis also implies -mv8plus on 32-bit. */
if (TARGET_VIS && !TARGET_ARCH64)
target_flags |= MASK_V8PLUS;
/* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
if (TARGET_V9 && TARGET_ARCH32)
target_flags |= MASK_DEPRECATED_V8_INSNS;
/* V8PLUS requires V9 and makes no sense in 64-bit mode. */
if (!TARGET_V9 || TARGET_ARCH64)
target_flags &= ~MASK_V8PLUS;
/* Don't use stack biasing in 32-bit mode. */
if (TARGET_ARCH32)
target_flags &= ~MASK_STACK_BIAS;
/* Use LRA instead of reload, unless otherwise instructed. */
if (!(target_flags_explicit & MASK_LRA))
target_flags |= MASK_LRA;
/* Enable applicable errata workarounds for LEON3FT. */
if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
{
sparc_fix_b2bst = 1;
sparc_fix_lost_divsqrt = 1;
}
/* Disable FsMULd for the UT699 since it doesn't work correctly. */
if (sparc_fix_ut699)
target_flags &= ~MASK_FSMULD;
#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
target_flags |= MASK_LONG_DOUBLE_128;
#endif
if (TARGET_DEBUG_OPTIONS)
dump_target_flags ("Final target_flags", target_flags);
/* Set the code model if no -mcmodel option was specified. */
if (global_options_set.x_sparc_code_model)
{
if (TARGET_ARCH32)
error ("%<-mcmodel=%> is not supported in 32-bit mode");
}
else
{
if (TARGET_ARCH32)
sparc_code_model = CM_32;
else
sparc_code_model = SPARC_DEFAULT_CMODEL;
}
/* Set the memory model if no -mmemory-model option was specified. */
if (!global_options_set.x_sparc_memory_model)
{
/* Choose the memory model for the operating system. */
enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
if (os_default != SMM_DEFAULT)
sparc_memory_model = os_default;
/* Choose the most relaxed model for the processor. */
else if (TARGET_V9)
sparc_memory_model = SMM_RMO;
else if (TARGET_LEON3)
sparc_memory_model = SMM_TSO;
else if (TARGET_LEON)
sparc_memory_model = SMM_SC;
else if (TARGET_V8)
sparc_memory_model = SMM_PSO;
else
sparc_memory_model = SMM_SC;
}
/* Supply a default value for align_functions. */
if (flag_align_functions && !str_align_functions)
{
if (sparc_cpu == PROCESSOR_ULTRASPARC
|| sparc_cpu == PROCESSOR_ULTRASPARC3
|| sparc_cpu == PROCESSOR_NIAGARA
|| sparc_cpu == PROCESSOR_NIAGARA2
|| sparc_cpu == PROCESSOR_NIAGARA3
|| sparc_cpu == PROCESSOR_NIAGARA4)
str_align_functions = "32";
else if (sparc_cpu == PROCESSOR_NIAGARA7
|| sparc_cpu == PROCESSOR_M8)
str_align_functions = "64";
}
/* Validate PCC_STRUCT_RETURN. */
if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
/* Only use .uaxword when compiling for a 64-bit target. */
if (!TARGET_ARCH64)
targetm.asm_out.unaligned_op.di = NULL;
/* Set the processor costs. */
switch (sparc_cpu)
{
case PROCESSOR_V7:
case PROCESSOR_CYPRESS:
sparc_costs = &cypress_costs;
break;
case PROCESSOR_V8:
case PROCESSOR_SPARCLITE:
case PROCESSOR_SUPERSPARC:
sparc_costs = &supersparc_costs;
break;
case PROCESSOR_F930:
case PROCESSOR_F934:
case PROCESSOR_HYPERSPARC:
case PROCESSOR_SPARCLITE86X:
sparc_costs = &hypersparc_costs;
break;
case PROCESSOR_LEON:
sparc_costs = &leon_costs;
break;
case PROCESSOR_LEON3:
case PROCESSOR_LEON3V7:
sparc_costs = &leon3_costs;
break;
case PROCESSOR_LEON5:
sparc_costs = &leon5_costs;
break;
case PROCESSOR_SPARCLET:
case PROCESSOR_TSC701:
sparc_costs = &sparclet_costs;
break;
case PROCESSOR_V9:
case PROCESSOR_ULTRASPARC:
sparc_costs = &ultrasparc_costs;
break;
case PROCESSOR_ULTRASPARC3:
sparc_costs = &ultrasparc3_costs;
break;
case PROCESSOR_NIAGARA:
sparc_costs = &niagara_costs;
break;
case PROCESSOR_NIAGARA2:
sparc_costs = &niagara2_costs;
break;
case PROCESSOR_NIAGARA3:
sparc_costs = &niagara3_costs;
break;
case PROCESSOR_NIAGARA4:
sparc_costs = &niagara4_costs;
break;
case PROCESSOR_NIAGARA7:
sparc_costs = &niagara7_costs;
break;
case PROCESSOR_M8:
sparc_costs = &m8_costs;
break;
case PROCESSOR_NATIVE:
gcc_unreachable ();
};
/* param_simultaneous_prefetches is the number of prefetches that
can run at the same time. More important, it is the threshold
defining when additional prefetches will be dropped by the
hardware.
The UltraSPARC-III features a documented prefetch queue with a
size of 8. Additional prefetches issued in the cpu are
dropped.
Niagara processors are different. In these processors prefetches
are handled much like regular loads. The L1 miss buffer is 32
entries, but prefetches start getting affected when 30 entries
become occupied. That occupation could be a mix of regular loads
and prefetches though. And that buffer is shared by all threads.
Once the threshold is reached, if the core is running a single
thread the prefetch will retry. If more than one thread is
running, the prefetch will be dropped.
All this makes it very difficult to determine how many
simultaneous prefetches can be issued simultaneously, even in a
single-threaded program. Experimental results show that setting
this parameter to 32 works well when the number of threads is not
high. */
SET_OPTION_IF_UNSET (&global_options, &global_options_set,
param_simultaneous_prefetches,
((sparc_cpu == PROCESSOR_ULTRASPARC
|| sparc_cpu == PROCESSOR_NIAGARA
|| sparc_cpu == PROCESSOR_NIAGARA2
|| sparc_cpu == PROCESSOR_NIAGARA3
|| sparc_cpu == PROCESSOR_NIAGARA4)
? 2
: (sparc_cpu == PROCESSOR_ULTRASPARC3
? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
|| sparc_cpu == PROCESSOR_M8)
? 32 : 3))));
/* param_l1_cache_line_size is the size of the L1 cache line, in
bytes.
The Oracle SPARC Architecture (previously the UltraSPARC
Architecture) specification states that when a PREFETCH[A]
instruction is executed an implementation-specific amount of data
is prefetched, and that it is at least 64 bytes long (aligned to
at least 64 bytes).
However, this is not correct. The M7 (and implementations prior
to that) does not guarantee a 64B prefetch into a cache if the
line size is smaller. A single cache line is all that is ever
prefetched. So for the M7, where the L1D$ has 32B lines and the
L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
L2 and L3, but only 32B are brought into the L1D$. (Assuming it
is a read_n prefetch, which is the only type which allocates to
the L1.) */
SET_OPTION_IF_UNSET (&global_options, &global_options_set,
param_l1_cache_line_size,
(sparc_cpu == PROCESSOR_M8 ? 64 : 32));
/* param_l1_cache_size is the size of the L1D$ (most SPARC chips use
Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
Niagara processors feature a L1D$ of 16KB. */
SET_OPTION_IF_UNSET (&global_options, &global_options_set,
param_l1_cache_size,
((sparc_cpu == PROCESSOR_ULTRASPARC
|| sparc_cpu == PROCESSOR_ULTRASPARC3
|| sparc_cpu == PROCESSOR_NIAGARA
|| sparc_cpu == PROCESSOR_NIAGARA2
|| sparc_cpu == PROCESSOR_NIAGARA3
|| sparc_cpu == PROCESSOR_NIAGARA4
|| sparc_cpu == PROCESSOR_NIAGARA7
|| sparc_cpu == PROCESSOR_M8)
? 16 : 64));
/* param_l2_cache_size is the size fo the L2 in kilobytes. Note
that 512 is the default in params.def. */
SET_OPTION_IF_UNSET (&global_options, &global_options_set,
param_l2_cache_size,
((sparc_cpu == PROCESSOR_NIAGARA4
|| sparc_cpu == PROCESSOR_M8)
? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
? 256 : 512)));
/* Disable save slot sharing for call-clobbered registers by default.
The IRA sharing algorithm works on single registers only and this
pessimizes for double floating-point registers. */
if (!global_options_set.x_flag_ira_share_save_slots)
flag_ira_share_save_slots = 0;
/* Only enable REE by default in 64-bit mode where it helps to eliminate
redundant 32-to-64-bit extensions. */
if (!global_options_set.x_flag_ree && TARGET_ARCH32)
flag_ree = 0;
/* Do various machine dependent initializations. */
sparc_init_modes ();
/* Set up function hooks. */
init_machine_status = sparc_init_machine_status;
}
/* Miscellaneous utilities. */
/* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
or branch on register contents instructions. */
int
v9_regcmp_p (enum rtx_code code)
{
return (code == EQ || code == NE || code == GE || code == LT
|| code == LE || code == GT);
}
/* Nonzero if OP is a floating point constant which can
be loaded into an integer register using a single
sethi instruction. */
int
fp_sethi_p (rtx op)
{
if (GET_CODE (op) == CONST_DOUBLE)
{
long i;
REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
}
return 0;
}
/* Nonzero if OP is a floating point constant which can
be loaded into an integer register using a single
mov instruction. */
int
fp_mov_p (rtx op)
{
if (GET_CODE (op) == CONST_DOUBLE)
{
long i;
REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
return SPARC_SIMM13_P (i);
}
return 0;
}
/* Nonzero if OP is a floating point constant which can
be loaded into an integer register using a high/losum
instruction sequence. */
int
fp_high_losum_p (rtx op)
{
/* The constraints calling this should only be in
SFmode move insns, so any constant which cannot
be moved using a single insn will do. */
if (GET_CODE (op) == CONST_DOUBLE)
{
long i;
REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
}
return 0;
}
/* Return true if the address of LABEL can be loaded by means of the
mov{si,di}_pic_label_ref patterns in PIC mode. */
static bool
can_use_mov_pic_label_ref (rtx label)
{
/* VxWorks does not impose a fixed gap between segments; the run-time
gap can be different from the object-file gap. We therefore can't
assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
are absolutely sure that X is in the same segment as the GOT.
Unfortunately, the flexibility of linker scripts means that we
can't be sure of that in general, so assume that GOT-relative
accesses are never valid on VxWorks. */
if (TARGET_VXWORKS_RTP)
return false;
/* Similarly, if the label is non-local, it might end up being placed
in a different section than the current one; now mov_pic_label_ref
requires the label and the code to be in the same section. */
if (LABEL_REF_NONLOCAL_P (label))
return false;
/* Finally, if we are reordering basic blocks and partition into hot
and cold sections, this might happen for any label. */
if (flag_reorder_blocks_and_partition)
return false;
return true;
}
/* Expand a move instruction. Return true if all work is done. */
bool
sparc_expand_move (machine_mode mode, rtx *operands)
{
/* Handle sets of MEM first. */
if (GET_CODE (operands[0]) == MEM)
{
/* 0 is a register (or a pair of registers) on SPARC. */
if (register_or_zero_operand (operands[1], mode))
return false;
if (!reload_in_progress)
{
operands[0] = validize_mem (operands[0]);
operands[1] = force_reg (mode, operands[1]);
}
}
/* Fix up TLS cases. */
if (TARGET_HAVE_TLS
&& CONSTANT_P (operands[1])
&& sparc_tls_referenced_p (operands [1]))
{
operands[1] = sparc_legitimize_tls_address (operands[1]);
return false;
}
/* Fix up PIC cases. */
if (flag_pic && CONSTANT_P (operands[1]))
{
if (pic_address_needs_scratch (operands[1]))
operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
/* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
if ((GET_CODE (operands[1]) == LABEL_REF
&& can_use_mov_pic_label_ref (operands[1]))
|| (GET_CODE (operands[1]) == CONST
&& GET_CODE (XEXP (operands[1], 0)) == PLUS
&& GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
&& GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
&& can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0))))
{
if (mode == SImode)
{
emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
return true;
}
if (mode == DImode)
{
emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
return true;
}
}
if (symbolic_operand (operands[1], mode))
{
operands[1]
= sparc_legitimize_pic_address (operands[1],
reload_in_progress
? operands[0] : NULL_RTX);
return false;
}
}
/* If we are trying to toss an integer constant into FP registers,
or loading a FP or vector constant, force it into memory. */
if (CONSTANT_P (operands[1])
&& REG_P (operands[0])
&& (SPARC_FP_REG_P (REGNO (operands[0]))
|| SCALAR_FLOAT_MODE_P (mode)
|| VECTOR_MODE_P (mode)))
{
/* emit_group_store will send such bogosity to us when it is
not storing directly into memory. So fix this up to avoid
crashes in output_constant_pool. */
if (operands [1] == const0_rtx)
operands[1] = CONST0_RTX (mode);
/* We can clear or set to all-ones FP registers if TARGET_VIS, and
always other regs. */
if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
&& (const_zero_operand (operands[1], mode)
|| const_all_ones_operand (operands[1], mode)))
return false;
if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
/* We are able to build any SF constant in integer registers
with at most 2 instructions. */
&& (mode == SFmode
/* And any DF constant in integer registers if needed. */
|| (mode == DFmode && !can_create_pseudo_p ())))
return false;
operands[1] = force_const_mem (mode, operands[1]);
if (!reload_in_progress)
operands[1] = validize_mem (operands[1]);
return false;
}
/* Accept non-constants and valid constants unmodified. */
if (!CONSTANT_P (operands[1])
|| GET_CODE (operands[1]) == HIGH
|| input_operand (operands[1], mode))
return false;
switch (mode)
{
case E_QImode:
/* All QImode constants require only one insn, so proceed. */
break;
case E_HImode:
case E_SImode:
sparc_emit_set_const32 (operands[0], operands[1]);
return true;
case E_DImode:
/* input_operand should have filtered out 32-bit mode. */
sparc_emit_set_const64 (operands[0], operands[1]);
return true;
case E_TImode:
{
rtx high, low;
/* TImode isn't available in 32-bit mode. */
split_double (operands[1], &high, &low);
emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
high));
emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
low));
}
return true;
default:
gcc_unreachable ();
}
return false;
}
/* Load OP1, a 32-bit constant, into OP0, a register.
We know it can't be done in one insn when we get
here, the move expander guarantees this. */
static void
sparc_emit_set_const32 (rtx op0, rtx op1)
{
machine_mode mode = GET_MODE (op0);
rtx temp = op0;
if (can_create_pseudo_p ())
temp = gen_reg_rtx (mode);
if (GET_CODE (op1) == CONST_INT)
{
gcc_assert (!small_int_operand (op1, mode)
&& !const_high_operand (op1, mode));
/* Emit them as real moves instead of a HIGH/LO_SUM,
this way CSE can see everything and reuse intermediate
values if it wants. */
emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
& ~(HOST_WIDE_INT) 0x3ff)));
emit_insn (gen_rtx_SET (op0,
gen_rtx_IOR (mode, temp,
GEN_INT (INTVAL (op1) & 0x3ff))));
}
else
{
/* A symbol, emit in the traditional way. */
emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
}
}
/* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
If TEMP is nonzero, we are forbidden to use any other scratch
registers. Otherwise, we are allowed to generate them as needed.
Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
void
sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
{
rtx cst, temp1, temp2, temp3, temp4, temp5;
rtx ti_temp = 0;
/* Deal with too large offsets. */
if (GET_CODE (op1) == CONST
&& GET_CODE (XEXP (op1, 0)) == PLUS
&& CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
&& trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
{
gcc_assert (!temp);
temp1 = gen_reg_rtx (DImode);
temp2 = gen_reg_rtx (DImode);
sparc_emit_set_const64 (temp2, cst);
sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
NULL_RTX);
emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
return;
}
if (temp && GET_MODE (temp) == TImode)
{
ti_temp = temp;
temp = gen_rtx_REG (DImode, REGNO (temp));
}
/* SPARC-V9 code model support. */
switch (sparc_code_model)
{
case CM_MEDLOW:
/* The range spanned by all instructions in the object is less
than 2^31 bytes (2GB) and the distance from any instruction
to the location of the label _GLOBAL_OFFSET_TABLE_ is less
than 2^31 bytes (2GB).
The executable must be in the low 4TB of the virtual address
space.
sethi %hi(symbol), %temp1
or %temp1, %lo(symbol), %reg */
if (temp)
temp1 = temp; /* op0 is allowed. */
else
temp1 = gen_reg_rtx (DImode);
emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
break;
case CM_MEDMID:
/* The range spanned by all instructions in the object is less
than 2^31 bytes (2GB) and the distance from any instruction
to the location of the label _GLOBAL_OFFSET_TABLE_ is less
than 2^31 bytes (2GB).
The executable must be in the low 16TB of the virtual address
space.
sethi %h44(symbol), %temp1
or %temp1, %m44(symbol), %temp2
sllx %temp2, 12, %temp3
or %temp3, %l44(symbol), %reg */
if (temp)
{
temp1 = op0;
temp2 = op0;
temp3 = temp; /* op0 is allowed. */
}
else
{
temp1 = gen_reg_rtx (DImode);
temp2 = gen_reg_rtx (DImode);
temp3 = gen_reg_rtx (DImode);
}
emit_insn (gen_seth44 (temp1, op1));
emit_insn (gen_setm44 (temp2, temp1, op1));
emit_insn (gen_rtx_SET (temp3,
gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
emit_insn (gen_setl44 (op0, temp3, op1));
break;
case CM_MEDANY:
/* The range spanned by all instructions in the object is less
than 2^31 bytes (2GB) and the distance from any instruction
to the location of the label _GLOBAL_OFFSET_TABLE_ is less
than 2^31 bytes (2GB).
The executable can be placed anywhere in the virtual address
space.
sethi %hh(symbol), %temp1
sethi %lm(symbol), %temp2
or %temp1, %hm(symbol), %temp3
sllx %temp3, 32, %temp4
or %temp4, %temp2, %temp5
or %temp5, %lo(symbol), %reg */
if (temp)
{
/* It is possible that one of the registers we got for operands[2]
might coincide with that of operands[0] (which is why we made
it TImode). Pick the other one to use as our scratch. */
if (rtx_equal_p (temp, op0))
{
gcc_assert (ti_temp);
temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
}
temp1 = op0;
temp2 = temp; /* op0 is _not_ allowed, see above. */
temp3 = op0;
temp4 = op0;
temp5 = op0;
}
else
{
temp1 = gen_reg_rtx (DImode);
temp2 = gen_reg_rtx (DImode);
temp3 = gen_reg_rtx (DImode);
temp4 = gen_reg_rtx (DImode);
temp5 = gen_reg_rtx (DImode);
}
emit_insn (gen_sethh (temp1, op1));
emit_insn (gen_setlm (temp2, op1));
emit_insn (gen_sethm (temp3, temp1, op1));
emit_insn (gen_rtx_SET (temp4,
gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
emit_insn (gen_setlo (op0, temp5, op1));
break;
case CM_EMBMEDANY:
/* Old old old backwards compatibility kruft here.
Essentially it is MEDLOW with a fixed 64-bit
virtual base added to all data segment addresses.
Text-segment stuff is computed like MEDANY, we can't
reuse the code above because the relocation knobs
look different.
Data segment: sethi %hi(symbol), %temp1
add %temp1, EMBMEDANY_BASE_REG, %temp2
or %temp2, %lo(symbol), %reg */
if (data_segment_operand (op1, GET_MODE (op1)))
{
if (temp)
{
temp1 = temp; /* op0 is allowed. */
temp2 = op0;
}
else
{
temp1 = gen_reg_rtx (DImode);
temp2 = gen_reg_rtx (DImode);
}
emit_insn (gen_embmedany_sethi (temp1, op1));
emit_insn (gen_embmedany_brsum (temp2, temp1));
emit_insn (gen_embmedany_losum (op0, temp2, op1));
}
/* Text segment: sethi %uhi(symbol), %temp1
sethi %hi(symbol), %temp2
or %temp1, %ulo(symbol), %temp3
sllx %temp3, 32, %temp4
or %temp4, %temp2, %temp5
or %temp5, %lo(symbol), %reg */
else
{
if (temp)
{
/* It is possible that one of the registers we got for operands[2]
might coincide with that of operands[0] (which is why we made
it TImode). Pick the other one to use as our scratch. */
if (rtx_equal_p (temp, op0))
{
gcc_assert (ti_temp);
temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
}
temp1 = op0;
temp2 = temp; /* op0 is _not_ allowed, see above. */
temp3 = op0;
temp4 = op0;
temp5 = op0;
}
else
{
temp1 = gen_reg_rtx (DImode);
temp2 = gen_reg_rtx (DImode);
temp3 = gen_reg_rtx (DImode);
temp4 = gen_reg_rtx (DImode);
temp5 = gen_reg_rtx (DImode);
}
emit_insn (gen_embmedany_textuhi (temp1, op1));
emit_insn (gen_embmedany_texthi (temp2, op1));
emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
emit_insn (gen_rtx_SET (temp4,
gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
emit_insn (gen_embmedany_textlo (op0, temp5, op1));
}
break;
default:
gcc_unreachable ();
}
}
/* These avoid problems when cross compiling. If we do not
go through all this hair then the optimizer will see
invalid REG_EQUAL notes or in some cases none at all. */
static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
/* The optimizer is not to assume anything about exactly
which bits are set for a HIGH, they are unspecified.
Unfortunately this leads to many missed optimizations
during CSE. We mask out the non-HIGH bits, and matches
a plain movdi, to alleviate this problem. */
static rtx
gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
{
return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
}
static rtx
gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
{
return gen_rtx_SET (dest, GEN_INT (val));
}
static rtx
gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
{
return gen_rtx_IOR (DImode, src, GEN_INT (val));
}
static rtx
gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
{
return gen_rtx_XOR (DImode, src, GEN_INT (val));
}
/* Worker routines for 64-bit constant formation on arch64.
One of the key things to be doing in these emissions is
to create as many temp REGs as possible. This makes it
possible for half-built constants to be used later when
such values are similar to something required later on.
Without doing this, the optimizer cannot see such
opportunities. */
static void sparc_emit_set_const64_quick1 (rtx, rtx,
unsigned HOST_WIDE_INT, int);
static void
sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
unsigned HOST_WIDE_INT low_bits, int is_neg)
{
unsigned HOST_WIDE_INT high_bits;
if (is_neg)
high_bits = (~low_bits) & 0xffffffff;
else
high_bits = low_bits;
emit_insn (gen_safe_HIGH64 (temp, high_bits));
if (!is_neg)
{
emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
}
else
{
/* If we are XOR'ing with -1, then we should emit a one's complement
instead. This way the combiner will notice logical operations
such as ANDN later on and substitute. */
if ((low_bits & 0x3ff) == 0x3ff)
{
emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
}
else
{
emit_insn (gen_rtx_SET (op0,
gen_safe_XOR64 (temp,
(-(HOST_WIDE_INT)0x400
| (low_bits & 0x3ff)))));
}
}
}
static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT, int);
static void
sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
unsigned HOST_WIDE_INT high_bits,
unsigned HOST_WIDE_INT low_immediate,
int shift_count)
{
rtx temp2 = op0;
if ((high_bits & 0xfffffc00) != 0)
{
emit_insn (gen_safe_HIGH64 (temp, high_bits));
if ((high_bits & ~0xfffffc00) != 0)
emit_insn (gen_rtx_SET (op0,
gen_safe_OR64 (temp, (high_bits & 0x3ff))));
else
temp2 = temp;
}
else
{
emit_insn (gen_safe_SET64 (temp, high_bits));
temp2 = temp;
}
/* Now shift it up into place. */
emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
GEN_INT (shift_count))));
/* If there is a low immediate part piece, finish up by
putting that in as well. */
if (low_immediate != 0)
emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
}
static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT);
/* Full 64-bit constant decomposition. Even though this is the
'worst' case, we still optimize a few things away. */
static void
sparc_emit_set_const64_longway (rtx op0, rtx temp,
unsigned HOST_WIDE_INT high_bits,
unsigned HOST_WIDE_INT low_bits)
{
rtx sub_temp = op0;
if (can_create_pseudo_p ())
sub_temp = gen_reg_rtx (DImode);
if ((high_bits & 0xfffffc00) != 0)
{
emit_insn (gen_safe_HIGH64 (temp, high_bits));
if ((high_bits & ~0xfffffc00) != 0)
emit_insn (gen_rtx_SET (sub_temp,
gen_safe_OR64 (temp, (high_bits & 0x3ff))));
else
sub_temp = temp;
}
else
{
emit_insn (gen_safe_SET64 (temp, high_bits));
sub_temp = temp;
}
if (can_create_pseudo_p ())
{
rtx temp2 = gen_reg_rtx (DImode);
rtx temp3 = gen_reg_rtx (DImode);
rtx temp4 = gen_reg_rtx (DImode);
emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
GEN_INT (32))));
emit_insn (gen_safe_HIGH64 (temp2, low_bits));
if ((low_bits & ~0xfffffc00) != 0)
{
emit_insn (gen_rtx_SET (temp3,
gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
}
else
{
emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
}
}
else
{
rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
int to_shift = 12;
/* We are in the middle of reload, so this is really
painful. However we do still make an attempt to
avoid emitting truly stupid code. */
if (low1 != const0_rtx)
{
emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
GEN_INT (to_shift))));
emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
sub_temp = op0;
to_shift = 12;
}
else
{
to_shift += 12;
}
if (low2 != const0_rtx)
{
emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
GEN_INT (to_shift))));
emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
sub_temp = op0;
to_shift = 8;
}
else
{
to_shift += 8;
}
emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
GEN_INT (to_shift))));
if (low3 != const0_rtx)
emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
/* phew... */
}
}
/* Analyze a 64-bit constant for certain properties. */
static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
int *, int *, int *);
static void
analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
unsigned HOST_WIDE_INT low_bits,
int *hbsp, int *lbsp, int *abbasp)
{
int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
int i;
lowest_bit_set = highest_bit_set = -1;
i = 0;
do
{
if ((lowest_bit_set == -1)
&& ((low_bits >> i) & 1))
lowest_bit_set = i;
if ((highest_bit_set == -1)
&& ((high_bits >> (32 - i - 1)) & 1))
highest_bit_set = (64 - i - 1);
}
while (++i < 32
&& ((highest_bit_set == -1)
|| (lowest_bit_set == -1)));
if (i == 32)
{
i = 0;
do
{
if ((lowest_bit_set == -1)
&& ((high_bits >> i) & 1))
lowest_bit_set = i + 32;
if ((highest_bit_set == -1)
&& ((low_bits >> (32 - i - 1)) & 1))
highest_bit_set = 32 - i - 1;
}
while (++i < 32
&& ((highest_bit_set == -1)
|| (lowest_bit_set == -1)));
}
/* If there are no bits set this should have gone out
as one instruction! */
gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
all_bits_between_are_set = 1;
for (i = lowest_bit_set; i <= highest_bit_set; i++)
{
if (i < 32)
{
if ((low_bits & (1 << i)) != 0)
continue;
}
else
{
if ((high_bits & (1 << (i - 32))) != 0)
continue;
}
all_bits_between_are_set = 0;
break;
}
*hbsp = highest_bit_set;
*lbsp = lowest_bit_set;
*abbasp = all_bits_between_are_set;
}
static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
static int
const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
unsigned HOST_WIDE_INT low_bits)
{
int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
if (high_bits == 0
|| high_bits == 0xffffffff)
return 1;
analyze_64bit_constant (high_bits, low_bits,
&highest_bit_set, &lowest_bit_set,
&all_bits_between_are_set);
if ((highest_bit_set == 63
|| lowest_bit_set == 0)
&& all_bits_between_are_set != 0)
return 1;
if ((highest_bit_set - lowest_bit_set) < 21)
return 1;
return 0;
}
static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
int, int);
static unsigned HOST_WIDE_INT
create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
unsigned HOST_WIDE_INT low_bits,
int lowest_bit_set, int shift)
{
HOST_WIDE_INT hi, lo;
if (lowest_bit_set < 32)
{
lo = (low_bits >> lowest_bit_set) << shift;
hi = ((high_bits << (32 - lowest_bit_set)) << shift);
}
else
{
lo = 0;
hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
}
gcc_assert (! (hi & lo));
return (hi | lo);
}
/* Here we are sure to be arch64 and this is an integer constant
being loaded into a register. Emit the most efficient
insn sequence possible. Detection of all the 1-insn cases
has been done already. */
static void
sparc_emit_set_const64 (rtx op0, rtx op1)
{
unsigned HOST_WIDE_INT high_bits, low_bits;
int lowest_bit_set, highest_bit_set;
int all_bits_between_are_set;
rtx temp = 0;
/* Sanity check that we know what we are working with. */
gcc_assert (TARGET_ARCH64
&& (GET_CODE (op0) == SUBREG
|| (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
if (! can_create_pseudo_p ())
temp = op0;
if (GET_CODE (op1) != CONST_INT)
{
sparc_emit_set_symbolic_const64 (op0, op1, temp);
return;
}
if (! temp)
temp = gen_reg_rtx (DImode);
high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
low_bits = (INTVAL (op1) & 0xffffffff);
/* low_bits bits 0 --> 31
high_bits bits 32 --> 63 */
analyze_64bit_constant (high_bits, low_bits,
&highest_bit_set, &lowest_bit_set,
&all_bits_between_are_set);
/* First try for a 2-insn sequence. */
/* These situations are preferred because the optimizer can
* do more things with them:
* 1) mov -1, %reg
* sllx %reg, shift, %reg
* 2) mov -1, %reg
* srlx %reg, shift, %reg
* 3) mov some_small_const, %reg
* sllx %reg, shift, %reg
*/
if (((highest_bit_set == 63
|| lowest_bit_set == 0)
&& all_bits_between_are_set != 0)
|| ((highest_bit_set - lowest_bit_set) < 12))
{
HOST_WIDE_INT the_const = -1;
int shift = lowest_bit_set;
if ((highest_bit_set != 63
&& lowest_bit_set != 0)
|| all_bits_between_are_set == 0)
{
the_const =
create_simple_focus_bits (high_bits, low_bits,
lowest_bit_set, 0);
}
else if (lowest_bit_set == 0)
shift = -(63 - highest_bit_set);
gcc_assert (SPARC_SIMM13_P (the_const));
gcc_assert (shift != 0);
emit_insn (gen_safe_SET64 (temp, the_const));
if (shift > 0)
emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
GEN_INT (shift))));
else if (shift < 0)
emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
GEN_INT (-shift))));
return;
}
/* Now a range of 22 or less bits set somewhere.
* 1) sethi %hi(focus_bits), %reg
* sllx %reg, shift, %reg
* 2) sethi %hi(focus_bits), %reg
* srlx %reg, shift, %reg
*/
if ((highest_bit_set - lowest_bit_set) < 21)
{
unsigned HOST_WIDE_INT focus_bits =
create_simple_focus_bits (high_bits, low_bits,
lowest_bit_set, 10);
gcc_assert (SPARC_SETHI_P (focus_bits));
gcc_assert (lowest_bit_set != 10);
emit_insn (gen_safe_HIGH64 (temp, focus_bits));
/* If lowest_bit_set == 10 then a sethi alone could have done it. */
if (lowest_bit_set < 10)
emit_insn (gen_rtx_SET (op0,
gen_rtx_LSHIFTRT (DImode, temp,
GEN_INT (10 - lowest_bit_set))));
else if (lowest_bit_set > 10)
emit_insn (gen_rtx_SET (op0,
gen_rtx_ASHIFT (DImode, temp,
GEN_INT (lowest_bit_set - 10))));
return;
}
/* 1) sethi %hi(low_bits), %reg
* or %reg, %lo(low_bits), %reg
* 2) sethi %hi(~low_bits), %reg
* xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
*/
if (high_bits == 0
|| high_bits == 0xffffffff)
{
sparc_emit_set_const64_quick1 (op0, temp, low_bits,
(high_bits == 0xffffffff));
return;
}
/* Now, try 3-insn sequences. */
/* 1) sethi %hi(high_bits), %reg
* or %reg, %lo(high_bits), %reg
* sllx %reg, 32, %reg
*/
if (low_bits == 0)
{
sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
return;
}
/* We may be able to do something quick
when the constant is negated, so try that. */
if (const64_is_2insns ((~high_bits) & 0xffffffff,
(~low_bits) & 0xfffffc00))
{
/* NOTE: The trailing bits get XOR'd so we need the
non-negated bits, not the negated ones. */
unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
if ((((~high_bits) & 0xffffffff) == 0
&& ((~low_bits) & 0x80000000) == 0)
|| (((~high_bits) & 0xffffffff) == 0xffffffff
&& ((~low_bits) & 0x80000000) != 0))
{