blob: 3db53374c9ea2a880c10e6f260ce95204b77e37b [file] [log] [blame]
/* Subroutines used for code generation on the DEC Alpha.
Copyright (C) 1992-2022 Free Software Foundation, Inc.
Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#define IN_TARGET_CODE 1
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "target.h"
#include "rtl.h"
#include "tree.h"
#include "stringpool.h"
#include "attribs.h"
#include "memmodel.h"
#include "gimple.h"
#include "df.h"
#include "predict.h"
#include "tm_p.h"
#include "ssa.h"
#include "expmed.h"
#include "optabs.h"
#include "regs.h"
#include "emit-rtl.h"
#include "recog.h"
#include "diagnostic-core.h"
#include "alias.h"
#include "fold-const.h"
#include "stor-layout.h"
#include "calls.h"
#include "varasm.h"
#include "output.h"
#include "insn-attr.h"
#include "explow.h"
#include "expr.h"
#include "reload.h"
#include "except.h"
#include "common/common-target.h"
#include "debug.h"
#include "langhooks.h"
#include "cfgrtl.h"
#include "tree-pass.h"
#include "context.h"
#include "gimple-iterator.h"
#include "gimplify.h"
#include "tree-stdarg.h"
#include "tm-constrs.h"
#include "libfuncs.h"
#include "builtins.h"
#include "rtl-iter.h"
#include "flags.h"
#include "opts.h"
/* This file should be included last. */
#include "target-def.h"
/* Specify which cpu to schedule for. */
enum processor_type alpha_tune;
/* Which cpu we're generating code for. */
enum processor_type alpha_cpu;
static const char * const alpha_cpu_name[] =
{
"ev4", "ev5", "ev6"
};
/* Specify how accurate floating-point traps need to be. */
enum alpha_trap_precision alpha_tp;
/* Specify the floating-point rounding mode. */
enum alpha_fp_rounding_mode alpha_fprm;
/* Specify which things cause traps. */
enum alpha_fp_trap_mode alpha_fptm;
/* Nonzero if inside of a function, because the Alpha asm can't
handle .files inside of functions. */
static int inside_function = FALSE;
/* The number of cycles of latency we should assume on memory reads. */
static int alpha_memory_latency = 3;
/* Whether the function needs the GP. */
static int alpha_function_needs_gp;
/* The assembler name of the current function. */
static const char *alpha_fnname;
/* The next explicit relocation sequence number. */
extern GTY(()) int alpha_next_sequence_number;
int alpha_next_sequence_number = 1;
/* The literal and gpdisp sequence numbers for this insn, as printed
by %# and %* respectively. */
extern GTY(()) int alpha_this_literal_sequence_number;
extern GTY(()) int alpha_this_gpdisp_sequence_number;
int alpha_this_literal_sequence_number;
int alpha_this_gpdisp_sequence_number;
/* Costs of various operations on the different architectures. */
struct alpha_rtx_cost_data
{
unsigned char fp_add;
unsigned char fp_mult;
unsigned char fp_div_sf;
unsigned char fp_div_df;
unsigned char int_mult_si;
unsigned char int_mult_di;
unsigned char int_shift;
unsigned char int_cmov;
unsigned short int_div;
};
static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] =
{
{ /* EV4 */
COSTS_N_INSNS (6), /* fp_add */
COSTS_N_INSNS (6), /* fp_mult */
COSTS_N_INSNS (34), /* fp_div_sf */
COSTS_N_INSNS (63), /* fp_div_df */
COSTS_N_INSNS (23), /* int_mult_si */
COSTS_N_INSNS (23), /* int_mult_di */
COSTS_N_INSNS (2), /* int_shift */
COSTS_N_INSNS (2), /* int_cmov */
COSTS_N_INSNS (97), /* int_div */
},
{ /* EV5 */
COSTS_N_INSNS (4), /* fp_add */
COSTS_N_INSNS (4), /* fp_mult */
COSTS_N_INSNS (15), /* fp_div_sf */
COSTS_N_INSNS (22), /* fp_div_df */
COSTS_N_INSNS (8), /* int_mult_si */
COSTS_N_INSNS (12), /* int_mult_di */
COSTS_N_INSNS (1) + 1, /* int_shift */
COSTS_N_INSNS (1), /* int_cmov */
COSTS_N_INSNS (83), /* int_div */
},
{ /* EV6 */
COSTS_N_INSNS (4), /* fp_add */
COSTS_N_INSNS (4), /* fp_mult */
COSTS_N_INSNS (12), /* fp_div_sf */
COSTS_N_INSNS (15), /* fp_div_df */
COSTS_N_INSNS (7), /* int_mult_si */
COSTS_N_INSNS (7), /* int_mult_di */
COSTS_N_INSNS (1), /* int_shift */
COSTS_N_INSNS (2), /* int_cmov */
COSTS_N_INSNS (86), /* int_div */
},
};
/* Similar but tuned for code size instead of execution latency. The
extra +N is fractional cost tuning based on latency. It's used to
encourage use of cheaper insns like shift, but only if there's just
one of them. */
static struct alpha_rtx_cost_data const alpha_rtx_cost_size =
{
COSTS_N_INSNS (1), /* fp_add */
COSTS_N_INSNS (1), /* fp_mult */
COSTS_N_INSNS (1), /* fp_div_sf */
COSTS_N_INSNS (1) + 1, /* fp_div_df */
COSTS_N_INSNS (1) + 1, /* int_mult_si */
COSTS_N_INSNS (1) + 2, /* int_mult_di */
COSTS_N_INSNS (1), /* int_shift */
COSTS_N_INSNS (1), /* int_cmov */
COSTS_N_INSNS (6), /* int_div */
};
/* Get the number of args of a function in one of two ways. */
#if TARGET_ABI_OPEN_VMS
#define NUM_ARGS crtl->args.info.num_args
#else
#define NUM_ARGS crtl->args.info
#endif
#define REG_PV 27
#define REG_RA 26
/* Declarations of static functions. */
static struct machine_function *alpha_init_machine_status (void);
static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
static void alpha_handle_trap_shadows (void);
static void alpha_align_insns (void);
static void alpha_override_options_after_change (void);
#if TARGET_ABI_OPEN_VMS
static void alpha_write_linkage (FILE *, const char *);
static bool vms_valid_pointer_mode (scalar_int_mode);
#else
#define vms_patch_builtins() gcc_unreachable()
#endif
static unsigned int
rest_of_handle_trap_shadows (void)
{
alpha_handle_trap_shadows ();
return 0;
}
namespace {
const pass_data pass_data_handle_trap_shadows =
{
RTL_PASS,
"trap_shadows", /* name */
OPTGROUP_NONE, /* optinfo_flags */
TV_NONE, /* tv_id */
0, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_df_finish, /* todo_flags_finish */
};
class pass_handle_trap_shadows : public rtl_opt_pass
{
public:
pass_handle_trap_shadows(gcc::context *ctxt)
: rtl_opt_pass(pass_data_handle_trap_shadows, ctxt)
{}
/* opt_pass methods: */
virtual bool gate (function *)
{
return alpha_tp != ALPHA_TP_PROG || flag_exceptions;
}
virtual unsigned int execute (function *)
{
return rest_of_handle_trap_shadows ();
}
}; // class pass_handle_trap_shadows
} // anon namespace
rtl_opt_pass *
make_pass_handle_trap_shadows (gcc::context *ctxt)
{
return new pass_handle_trap_shadows (ctxt);
}
static unsigned int
rest_of_align_insns (void)
{
alpha_align_insns ();
return 0;
}
namespace {
const pass_data pass_data_align_insns =
{
RTL_PASS,
"align_insns", /* name */
OPTGROUP_NONE, /* optinfo_flags */
TV_NONE, /* tv_id */
0, /* properties_required */
0, /* properties_provided */
0, /* properties_destroyed */
0, /* todo_flags_start */
TODO_df_finish, /* todo_flags_finish */
};
class pass_align_insns : public rtl_opt_pass
{
public:
pass_align_insns(gcc::context *ctxt)
: rtl_opt_pass(pass_data_align_insns, ctxt)
{}
/* opt_pass methods: */
virtual bool gate (function *)
{
/* Due to the number of extra trapb insns, don't bother fixing up
alignment when trap precision is instruction. Moreover, we can
only do our job when sched2 is run. */
return ((alpha_tune == PROCESSOR_EV4
|| alpha_tune == PROCESSOR_EV5)
&& optimize && !optimize_size
&& alpha_tp != ALPHA_TP_INSN
&& flag_schedule_insns_after_reload);
}
virtual unsigned int execute (function *)
{
return rest_of_align_insns ();
}
}; // class pass_align_insns
} // anon namespace
rtl_opt_pass *
make_pass_align_insns (gcc::context *ctxt)
{
return new pass_align_insns (ctxt);
}
#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
/* Implement TARGET_MANGLE_TYPE. */
static const char *
alpha_mangle_type (const_tree type)
{
if (TYPE_MAIN_VARIANT (type) == long_double_type_node
&& TARGET_LONG_DOUBLE_128)
return "g";
/* For all other types, use normal C++ mangling. */
return NULL;
}
#endif
/* Parse target option strings. */
static void
alpha_option_override (void)
{
static const struct cpu_table {
const char *const name;
const enum processor_type processor;
const int flags;
const unsigned short line_size; /* in bytes */
const unsigned short l1_size; /* in kb. */
const unsigned short l2_size; /* in kb. */
} cpu_table[] = {
/* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches.
EV4/EV45 had 128k to 16M 32-byte direct Bcache. LCA45
had 64k to 8M 8-byte direct Bcache. */
{ "ev4", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
{ "21064", PROCESSOR_EV4, 0, 32, 8, 8*1024 },
{ "ev45", PROCESSOR_EV4, 0, 32, 16, 16*1024 },
/* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2,
and 1M to 16M 64 byte L3 (not modeled).
PCA56 had 16k 64-byte cache; PCA57 had 32k Icache.
PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache. */
{ "ev5", PROCESSOR_EV5, 0, 32, 8, 96 },
{ "21164", PROCESSOR_EV5, 0, 32, 8, 96 },
{ "ev56", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
{ "21164a", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 },
{ "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
{ "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
{ "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 },
/* EV6 had 64k 64 byte L1, 1M to 16M Bcache. */
{ "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
{ "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 },
{ "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
64, 64, 16*1024 },
{ "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX,
64, 64, 16*1024 }
};
int const ct_size = ARRAY_SIZE (cpu_table);
int line_size = 0, l1_size = 0, l2_size = 0;
int i;
#ifdef SUBTARGET_OVERRIDE_OPTIONS
SUBTARGET_OVERRIDE_OPTIONS;
#endif
/* Default to full IEEE compliance mode for Go language. */
if (strcmp (lang_hooks.name, "GNU Go") == 0
&& !(target_flags_explicit & MASK_IEEE))
target_flags |= MASK_IEEE;
alpha_fprm = ALPHA_FPRM_NORM;
alpha_tp = ALPHA_TP_PROG;
alpha_fptm = ALPHA_FPTM_N;
if (TARGET_IEEE)
{
alpha_tp = ALPHA_TP_INSN;
alpha_fptm = ALPHA_FPTM_SU;
}
if (TARGET_IEEE_WITH_INEXACT)
{
alpha_tp = ALPHA_TP_INSN;
alpha_fptm = ALPHA_FPTM_SUI;
}
if (alpha_tp_string)
{
if (! strcmp (alpha_tp_string, "p"))
alpha_tp = ALPHA_TP_PROG;
else if (! strcmp (alpha_tp_string, "f"))
alpha_tp = ALPHA_TP_FUNC;
else if (! strcmp (alpha_tp_string, "i"))
alpha_tp = ALPHA_TP_INSN;
else
error ("bad value %qs for %<-mtrap-precision%> switch",
alpha_tp_string);
}
if (alpha_fprm_string)
{
if (! strcmp (alpha_fprm_string, "n"))
alpha_fprm = ALPHA_FPRM_NORM;
else if (! strcmp (alpha_fprm_string, "m"))
alpha_fprm = ALPHA_FPRM_MINF;
else if (! strcmp (alpha_fprm_string, "c"))
alpha_fprm = ALPHA_FPRM_CHOP;
else if (! strcmp (alpha_fprm_string,"d"))
alpha_fprm = ALPHA_FPRM_DYN;
else
error ("bad value %qs for %<-mfp-rounding-mode%> switch",
alpha_fprm_string);
}
if (alpha_fptm_string)
{
if (strcmp (alpha_fptm_string, "n") == 0)
alpha_fptm = ALPHA_FPTM_N;
else if (strcmp (alpha_fptm_string, "u") == 0)
alpha_fptm = ALPHA_FPTM_U;
else if (strcmp (alpha_fptm_string, "su") == 0)
alpha_fptm = ALPHA_FPTM_SU;
else if (strcmp (alpha_fptm_string, "sui") == 0)
alpha_fptm = ALPHA_FPTM_SUI;
else
error ("bad value %qs for %<-mfp-trap-mode%> switch",
alpha_fptm_string);
}
if (alpha_cpu_string)
{
for (i = 0; i < ct_size; i++)
if (! strcmp (alpha_cpu_string, cpu_table [i].name))
{
alpha_tune = alpha_cpu = cpu_table[i].processor;
line_size = cpu_table[i].line_size;
l1_size = cpu_table[i].l1_size;
l2_size = cpu_table[i].l2_size;
target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
target_flags |= cpu_table[i].flags;
break;
}
if (i == ct_size)
error ("bad value %qs for %<-mcpu%> switch", alpha_cpu_string);
}
if (alpha_tune_string)
{
for (i = 0; i < ct_size; i++)
if (! strcmp (alpha_tune_string, cpu_table [i].name))
{
alpha_tune = cpu_table[i].processor;
line_size = cpu_table[i].line_size;
l1_size = cpu_table[i].l1_size;
l2_size = cpu_table[i].l2_size;
break;
}
if (i == ct_size)
error ("bad value %qs for %<-mtune%> switch", alpha_tune_string);
}
if (line_size)
SET_OPTION_IF_UNSET (&global_options, &global_options_set,
param_l1_cache_line_size, line_size);
if (l1_size)
SET_OPTION_IF_UNSET (&global_options, &global_options_set,
param_l1_cache_size, l1_size);
if (l2_size)
SET_OPTION_IF_UNSET (&global_options, &global_options_set,
param_l2_cache_size, l2_size);
/* Do some sanity checks on the above options. */
if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI)
&& alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6)
{
warning (0, "fp software completion requires %<-mtrap-precision=i%>");
alpha_tp = ALPHA_TP_INSN;
}
if (alpha_cpu == PROCESSOR_EV6)
{
/* Except for EV6 pass 1 (not released), we always have precise
arithmetic traps. Which means we can do software completion
without minding trap shadows. */
alpha_tp = ALPHA_TP_PROG;
}
if (TARGET_FLOAT_VAX)
{
if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN)
{
warning (0, "rounding mode not supported for VAX floats");
alpha_fprm = ALPHA_FPRM_NORM;
}
if (alpha_fptm == ALPHA_FPTM_SUI)
{
warning (0, "trap mode not supported for VAX floats");
alpha_fptm = ALPHA_FPTM_SU;
}
if (target_flags_explicit & MASK_LONG_DOUBLE_128)
warning (0, "128-bit %<long double%> not supported for VAX floats");
target_flags &= ~MASK_LONG_DOUBLE_128;
}
{
char *end;
int lat;
if (!alpha_mlat_string)
alpha_mlat_string = "L1";
if (ISDIGIT ((unsigned char)alpha_mlat_string[0])
&& (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0'))
;
else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l')
&& ISDIGIT ((unsigned char)alpha_mlat_string[1])
&& alpha_mlat_string[2] == '\0')
{
static int const cache_latency[][4] =
{
{ 3, 30, -1 }, /* ev4 -- Bcache is a guess */
{ 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */
{ 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */
};
lat = alpha_mlat_string[1] - '0';
if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1)
{
warning (0, "L%d cache latency unknown for %s",
lat, alpha_cpu_name[alpha_tune]);
lat = 3;
}
else
lat = cache_latency[alpha_tune][lat-1];
}
else if (! strcmp (alpha_mlat_string, "main"))
{
/* Most current memories have about 370ns latency. This is
a reasonable guess for a fast cpu. */
lat = 150;
}
else
{
warning (0, "bad value %qs for %<-mmemory-latency%>",
alpha_mlat_string);
lat = 3;
}
alpha_memory_latency = lat;
}
/* Default the definition of "small data" to 8 bytes. */
if (!OPTION_SET_P (g_switch_value))
g_switch_value = 8;
/* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */
if (flag_pic == 1)
target_flags |= MASK_SMALL_DATA;
else if (flag_pic == 2)
target_flags &= ~MASK_SMALL_DATA;
alpha_override_options_after_change ();
/* Register variables and functions with the garbage collector. */
/* Set up function hooks. */
init_machine_status = alpha_init_machine_status;
/* Tell the compiler when we're using VAX floating point. */
if (TARGET_FLOAT_VAX)
{
REAL_MODE_FORMAT (SFmode) = &vax_f_format;
REAL_MODE_FORMAT (DFmode) = &vax_g_format;
REAL_MODE_FORMAT (TFmode) = NULL;
}
#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
target_flags |= MASK_LONG_DOUBLE_128;
#endif
}
/* Implement targetm.override_options_after_change. */
static void
alpha_override_options_after_change (void)
{
/* Align labels and loops for optimal branching. */
/* ??? Kludge these by not doing anything if we don't optimize. */
if (optimize > 0)
{
if (flag_align_loops && !str_align_loops)
str_align_loops = "16";
if (flag_align_jumps && !str_align_jumps)
str_align_jumps = "16";
}
if (flag_align_functions && !str_align_functions)
str_align_functions = "16";
}
/* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */
int
zap_mask (HOST_WIDE_INT value)
{
int i;
for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR;
i++, value >>= 8)
if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
return 0;
return 1;
}
/* Return true if OP is valid for a particular TLS relocation.
We are already guaranteed that OP is a CONST. */
int
tls_symbolic_operand_1 (rtx op, int size, int unspec)
{
op = XEXP (op, 0);
if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
return 0;
op = XVECEXP (op, 0, 0);
if (GET_CODE (op) != SYMBOL_REF)
return 0;
switch (SYMBOL_REF_TLS_MODEL (op))
{
case TLS_MODEL_LOCAL_DYNAMIC:
return unspec == UNSPEC_DTPREL && size == alpha_tls_size;
case TLS_MODEL_INITIAL_EXEC:
return unspec == UNSPEC_TPREL && size == 64;
case TLS_MODEL_LOCAL_EXEC:
return unspec == UNSPEC_TPREL && size == alpha_tls_size;
default:
gcc_unreachable ();
}
}
/* Used by aligned_memory_operand and unaligned_memory_operand to
resolve what reload is going to do with OP if it's a register. */
rtx
resolve_reload_operand (rtx op)
{
if (reload_in_progress)
{
rtx tmp = op;
if (SUBREG_P (tmp))
tmp = SUBREG_REG (tmp);
if (REG_P (tmp)
&& REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
{
op = reg_equiv_memory_loc (REGNO (tmp));
if (op == 0)
return 0;
}
}
return op;
}
/* The scalar modes supported differs from the default check-what-c-supports
version in that sometimes TFmode is available even when long double
indicates only DFmode. */
static bool
alpha_scalar_mode_supported_p (scalar_mode mode)
{
switch (mode)
{
case E_QImode:
case E_HImode:
case E_SImode:
case E_DImode:
case E_TImode: /* via optabs.cc */
return true;
case E_SFmode:
case E_DFmode:
return true;
case E_TFmode:
return TARGET_HAS_XFLOATING_LIBS;
default:
return false;
}
}
/* Alpha implements a couple of integer vector mode operations when
TARGET_MAX is enabled. We do not check TARGET_MAX here, however,
which allows the vectorizer to operate on e.g. move instructions,
or when expand_vector_operations can do something useful. */
static bool
alpha_vector_mode_supported_p (machine_mode mode)
{
return mode == V8QImode || mode == V4HImode || mode == V2SImode;
}
/* Return the TLS model to use for SYMBOL. */
static enum tls_model
tls_symbolic_operand_type (rtx symbol)
{
enum tls_model model;
if (GET_CODE (symbol) != SYMBOL_REF)
return TLS_MODEL_NONE;
model = SYMBOL_REF_TLS_MODEL (symbol);
/* Local-exec with a 64-bit size is the same code as initial-exec. */
if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64)
model = TLS_MODEL_INITIAL_EXEC;
return model;
}
/* Return true if the function DECL will share the same GP as any
function in the current unit of translation. */
static bool
decl_has_samegp (const_tree decl)
{
/* Functions that are not local can be overridden, and thus may
not share the same gp. */
if (!(*targetm.binds_local_p) (decl))
return false;
/* If -msmall-data is in effect, assume that there is only one GP
for the module, and so any local symbol has this property. We
need explicit relocations to be able to enforce this for symbols
not defined in this unit of translation, however. */
if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
return true;
/* Functions that are not external are defined in this UoT. */
/* ??? Irritatingly, static functions not yet emitted are still
marked "external". Apply this to non-static functions only. */
return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
}
/* Return true if EXP should be placed in the small data section. */
static bool
alpha_in_small_data_p (const_tree exp)
{
/* We want to merge strings, so we never consider them small data. */
if (TREE_CODE (exp) == STRING_CST)
return false;
/* Functions are never in the small data area. Duh. */
if (TREE_CODE (exp) == FUNCTION_DECL)
return false;
/* COMMON symbols are never small data. */
if (TREE_CODE (exp) == VAR_DECL && DECL_COMMON (exp))
return false;
if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
{
const char *section = DECL_SECTION_NAME (exp);
if (strcmp (section, ".sdata") == 0
|| strcmp (section, ".sbss") == 0)
return true;
}
else
{
HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
/* If this is an incomplete type with size 0, then we can't put it
in sdata because it might be too big when completed. */
if (size > 0 && size <= g_switch_value)
return true;
}
return false;
}
#if TARGET_ABI_OPEN_VMS
static bool
vms_valid_pointer_mode (scalar_int_mode mode)
{
return (mode == SImode || mode == DImode);
}
static bool
alpha_linkage_symbol_p (const char *symname)
{
int symlen = strlen (symname);
if (symlen > 4)
return strcmp (&symname [symlen - 4], "..lk") == 0;
return false;
}
#define LINKAGE_SYMBOL_REF_P(X) \
((GET_CODE (X) == SYMBOL_REF \
&& alpha_linkage_symbol_p (XSTR (X, 0))) \
|| (GET_CODE (X) == CONST \
&& GET_CODE (XEXP (X, 0)) == PLUS \
&& GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \
&& alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0))))
#endif
/* legitimate_address_p recognizes an RTL expression that is a valid
memory address for an instruction. The MODE argument is the
machine mode for the MEM expression that wants to use this address.
For Alpha, we have either a constant address or the sum of a
register and a constant address, or just a register. For DImode,
any of those forms can be surrounded with an AND that clear the
low-order three bits; this is an "unaligned" access. */
static bool
alpha_legitimate_address_p (machine_mode mode, rtx x, bool strict)
{
/* If this is an ldq_u type address, discard the outer AND. */
if (mode == DImode
&& GET_CODE (x) == AND
&& CONST_INT_P (XEXP (x, 1))
&& INTVAL (XEXP (x, 1)) == -8)
x = XEXP (x, 0);
/* Discard non-paradoxical subregs. */
if (SUBREG_P (x)
&& (GET_MODE_SIZE (GET_MODE (x))
< GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
x = SUBREG_REG (x);
/* Unadorned general registers are valid. */
if (REG_P (x)
&& (strict
? STRICT_REG_OK_FOR_BASE_P (x)
: NONSTRICT_REG_OK_FOR_BASE_P (x)))
return true;
/* Constant addresses (i.e. +/- 32k) are valid. */
if (CONSTANT_ADDRESS_P (x))
return true;
#if TARGET_ABI_OPEN_VMS
if (LINKAGE_SYMBOL_REF_P (x))
return true;
#endif
/* Register plus a small constant offset is valid. */
if (GET_CODE (x) == PLUS)
{
rtx ofs = XEXP (x, 1);
x = XEXP (x, 0);
/* Discard non-paradoxical subregs. */
if (SUBREG_P (x)
&& (GET_MODE_SIZE (GET_MODE (x))
< GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
x = SUBREG_REG (x);
if (REG_P (x))
{
if (! strict
&& NONSTRICT_REG_OK_FP_BASE_P (x)
&& CONST_INT_P (ofs))
return true;
if ((strict
? STRICT_REG_OK_FOR_BASE_P (x)
: NONSTRICT_REG_OK_FOR_BASE_P (x))
&& CONSTANT_ADDRESS_P (ofs))
return true;
}
}
/* If we're managing explicit relocations, LO_SUM is valid, as are small
data symbols. Avoid explicit relocations of modes larger than word
mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */
else if (TARGET_EXPLICIT_RELOCS
&& GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
{
if (small_symbolic_operand (x, Pmode))
return true;
if (GET_CODE (x) == LO_SUM)
{
rtx ofs = XEXP (x, 1);
x = XEXP (x, 0);
/* Discard non-paradoxical subregs. */
if (SUBREG_P (x)
&& (GET_MODE_SIZE (GET_MODE (x))
< GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
x = SUBREG_REG (x);
/* Must have a valid base register. */
if (! (REG_P (x)
&& (strict
? STRICT_REG_OK_FOR_BASE_P (x)
: NONSTRICT_REG_OK_FOR_BASE_P (x))))
return false;
/* The symbol must be local. */
if (local_symbolic_operand (ofs, Pmode)
|| dtp32_symbolic_operand (ofs, Pmode)
|| tp32_symbolic_operand (ofs, Pmode))
return true;
}
}
return false;
}
/* Build the SYMBOL_REF for __tls_get_addr. */
static GTY(()) rtx tls_get_addr_libfunc;
static rtx
get_tls_get_addr (void)
{
if (!tls_get_addr_libfunc)
tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
return tls_get_addr_libfunc;
}
/* Try machine-dependent ways of modifying an illegitimate address
to be legitimate. If we find one, return the new, valid address. */
static rtx
alpha_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode)
{
HOST_WIDE_INT addend;
/* If the address is (plus reg const_int) and the CONST_INT is not a
valid offset, compute the high part of the constant and add it to
the register. Then our address is (plus temp low-part-const). */
if (GET_CODE (x) == PLUS
&& REG_P (XEXP (x, 0))
&& CONST_INT_P (XEXP (x, 1))
&& ! CONSTANT_ADDRESS_P (XEXP (x, 1)))
{
addend = INTVAL (XEXP (x, 1));
x = XEXP (x, 0);
goto split_addend;
}
/* If the address is (const (plus FOO const_int)), find the low-order
part of the CONST_INT. Then load FOO plus any high-order part of the
CONST_INT into a register. Our address is (plus reg low-part-const).
This is done to reduce the number of GOT entries. */
if (can_create_pseudo_p ()
&& GET_CODE (x) == CONST
&& GET_CODE (XEXP (x, 0)) == PLUS
&& CONST_INT_P (XEXP (XEXP (x, 0), 1)))
{
addend = INTVAL (XEXP (XEXP (x, 0), 1));
x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
goto split_addend;
}
/* If we have a (plus reg const), emit the load as in (2), then add
the two registers, and finally generate (plus reg low-part-const) as
our address. */
if (can_create_pseudo_p ()
&& GET_CODE (x) == PLUS
&& REG_P (XEXP (x, 0))
&& GET_CODE (XEXP (x, 1)) == CONST
&& GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
&& CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
{
addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
XEXP (XEXP (XEXP (x, 1), 0), 0),
NULL_RTX, 1, OPTAB_LIB_WIDEN);
goto split_addend;
}
/* If this is a local symbol, split the address into HIGH/LO_SUM parts.
Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
around +/- 32k offset. */
if (TARGET_EXPLICIT_RELOCS
&& GET_MODE_SIZE (mode) <= UNITS_PER_WORD
&& symbolic_operand (x, Pmode))
{
rtx r0, r16, eqv, tga, tp, dest, seq;
rtx_insn *insn;
switch (tls_symbolic_operand_type (x))
{
case TLS_MODEL_NONE:
break;
case TLS_MODEL_GLOBAL_DYNAMIC:
{
start_sequence ();
r0 = gen_rtx_REG (Pmode, 0);
r16 = gen_rtx_REG (Pmode, 16);
tga = get_tls_get_addr ();
dest = gen_reg_rtx (Pmode);
seq = GEN_INT (alpha_next_sequence_number++);
emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
rtx val = gen_call_value_osf_tlsgd (r0, tga, seq);
insn = emit_call_insn (val);
RTL_CONST_CALL_P (insn) = 1;
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
insn = get_insns ();
end_sequence ();
emit_libcall_block (insn, dest, r0, x);
return dest;
}
case TLS_MODEL_LOCAL_DYNAMIC:
{
start_sequence ();
r0 = gen_rtx_REG (Pmode, 0);
r16 = gen_rtx_REG (Pmode, 16);
tga = get_tls_get_addr ();
scratch = gen_reg_rtx (Pmode);
seq = GEN_INT (alpha_next_sequence_number++);
emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
rtx val = gen_call_value_osf_tlsldm (r0, tga, seq);
insn = emit_call_insn (val);
RTL_CONST_CALL_P (insn) = 1;
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
insn = get_insns ();
end_sequence ();
eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
UNSPEC_TLSLDM_CALL);
emit_libcall_block (insn, scratch, r0, eqv);
eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
eqv = gen_rtx_CONST (Pmode, eqv);
if (alpha_tls_size == 64)
{
dest = gen_reg_rtx (Pmode);
emit_insn (gen_rtx_SET (dest, eqv));
emit_insn (gen_adddi3 (dest, dest, scratch));
return dest;
}
if (alpha_tls_size == 32)
{
rtx temp = gen_rtx_HIGH (Pmode, eqv);
temp = gen_rtx_PLUS (Pmode, scratch, temp);
scratch = gen_reg_rtx (Pmode);
emit_insn (gen_rtx_SET (scratch, temp));
}
return gen_rtx_LO_SUM (Pmode, scratch, eqv);
}
case TLS_MODEL_INITIAL_EXEC:
eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
eqv = gen_rtx_CONST (Pmode, eqv);
tp = gen_reg_rtx (Pmode);
scratch = gen_reg_rtx (Pmode);
dest = gen_reg_rtx (Pmode);
emit_insn (gen_get_thread_pointerdi (tp));
emit_insn (gen_rtx_SET (scratch, eqv));
emit_insn (gen_adddi3 (dest, tp, scratch));
return dest;
case TLS_MODEL_LOCAL_EXEC:
eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
eqv = gen_rtx_CONST (Pmode, eqv);
tp = gen_reg_rtx (Pmode);
emit_insn (gen_get_thread_pointerdi (tp));
if (alpha_tls_size == 32)
{
rtx temp = gen_rtx_HIGH (Pmode, eqv);
temp = gen_rtx_PLUS (Pmode, tp, temp);
tp = gen_reg_rtx (Pmode);
emit_insn (gen_rtx_SET (tp, temp));
}
return gen_rtx_LO_SUM (Pmode, tp, eqv);
default:
gcc_unreachable ();
}
if (local_symbolic_operand (x, Pmode))
{
if (small_symbolic_operand (x, Pmode))
return x;
else
{
if (can_create_pseudo_p ())
scratch = gen_reg_rtx (Pmode);
emit_insn (gen_rtx_SET (scratch, gen_rtx_HIGH (Pmode, x)));
return gen_rtx_LO_SUM (Pmode, scratch, x);
}
}
}
return NULL;
split_addend:
{
HOST_WIDE_INT low, high;
low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
addend -= low;
high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
addend -= high;
if (addend)
x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
(!can_create_pseudo_p () ? scratch : NULL_RTX),
1, OPTAB_LIB_WIDEN);
if (high)
x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
(!can_create_pseudo_p () ? scratch : NULL_RTX),
1, OPTAB_LIB_WIDEN);
return plus_constant (Pmode, x, low);
}
}
/* Try machine-dependent ways of modifying an illegitimate address
to be legitimate. Return X or the new, valid address. */
static rtx
alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
machine_mode mode)
{
rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode);
return new_x ? new_x : x;
}
/* Return true if ADDR has an effect that depends on the machine mode it
is used for. On the Alpha this is true only for the unaligned modes.
We can simplify the test since we know that the address must be valid. */
static bool
alpha_mode_dependent_address_p (const_rtx addr,
addr_space_t as ATTRIBUTE_UNUSED)
{
return GET_CODE (addr) == AND;
}
/* Primarily this is required for TLS symbols, but given that our move
patterns *ought* to be able to handle any symbol at any time, we
should never be spilling symbolic operands to the constant pool, ever. */
static bool
alpha_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
{
enum rtx_code code = GET_CODE (x);
return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
}
/* We do not allow indirect calls to be optimized into sibling calls, nor
can we allow a call to a function with a different GP to be optimized
into a sibcall. */
static bool
alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
{
/* Can't do indirect tail calls, since we don't know if the target
uses the same GP. */
if (!decl)
return false;
/* Otherwise, we can make a tail call if the target function shares
the same GP. */
return decl_has_samegp (decl);
}
bool
some_small_symbolic_operand_int (rtx x)
{
subrtx_var_iterator::array_type array;
FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
{
rtx x = *iter;
/* Don't re-split. */
if (GET_CODE (x) == LO_SUM)
iter.skip_subrtxes ();
else if (small_symbolic_operand (x, Pmode))
return true;
}
return false;
}
rtx
split_small_symbolic_operand (rtx x)
{
x = copy_insn (x);
subrtx_ptr_iterator::array_type array;
FOR_EACH_SUBRTX_PTR (iter, array, &x, ALL)
{
rtx *ptr = *iter;
rtx x = *ptr;
/* Don't re-split. */
if (GET_CODE (x) == LO_SUM)
iter.skip_subrtxes ();
else if (small_symbolic_operand (x, Pmode))
{
*ptr = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
iter.skip_subrtxes ();
}
}
return x;
}
/* Indicate that INSN cannot be duplicated. This is true for any insn
that we've marked with gpdisp relocs, since those have to stay in
1-1 correspondence with one another.
Technically we could copy them if we could set up a mapping from one
sequence number to another, across the set of insns to be duplicated.
This seems overly complicated and error-prone since interblock motion
from sched-ebb could move one of the pair of insns to a different block.
Also cannot allow jsr insns to be duplicated. If they throw exceptions,
then they'll be in a different block from their ldgp. Which could lead
the bb reorder code to think that it would be ok to copy just the block
containing the call and branch to the block containing the ldgp. */
static bool
alpha_cannot_copy_insn_p (rtx_insn *insn)
{
if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
return false;
if (recog_memoized (insn) >= 0)
return get_attr_cannot_copy (insn);
else
return false;
}
/* Try a machine-dependent way of reloading an illegitimate address
operand. If we find one, push the reload and return the new rtx. */
rtx
alpha_legitimize_reload_address (rtx x,
machine_mode mode ATTRIBUTE_UNUSED,
int opnum, int type,
int ind_levels ATTRIBUTE_UNUSED)
{
/* We must recognize output that we have already generated ourselves. */
if (GET_CODE (x) == PLUS
&& GET_CODE (XEXP (x, 0)) == PLUS
&& REG_P (XEXP (XEXP (x, 0), 0))
&& CONST_INT_P (XEXP (XEXP (x, 0), 1))
&& CONST_INT_P (XEXP (x, 1)))
{
push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
opnum, (enum reload_type) type);
return x;
}
/* We wish to handle large displacements off a base register by
splitting the addend across an ldah and the mem insn. This
cuts number of extra insns needed from 3 to 1. */
if (GET_CODE (x) == PLUS
&& REG_P (XEXP (x, 0))
&& REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
&& REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0)))
&& CONST_INT_P (XEXP (x, 1)))
{
HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
HOST_WIDE_INT high
= (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
/* Check for 32-bit overflow. */
if (high + low != val)
return NULL_RTX;
/* Reload the high part into a base reg; leave the low part
in the mem directly. */
x = gen_rtx_PLUS (GET_MODE (x),
gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
GEN_INT (high)),
GEN_INT (low));
push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
opnum, (enum reload_type) type);
return x;
}
return NULL_RTX;
}
/* Return the cost of moving between registers of various classes. Moving
between FLOAT_REGS and anything else except float regs is expensive.
In fact, we make it quite expensive because we really don't want to
do these moves unless it is clearly worth it. Optimizations may
reduce the impact of not being able to allocate a pseudo to a
hard register. */
static int
alpha_register_move_cost (machine_mode /*mode*/,
reg_class_t from, reg_class_t to)
{
if ((from == FLOAT_REGS) == (to == FLOAT_REGS))
return 2;
if (TARGET_FIX)
return (from == FLOAT_REGS) ? 6 : 8;
return 4 + 2 * alpha_memory_latency;
}
/* Return the cost of moving data of MODE from a register to
or from memory. On the Alpha, bump this up a bit. */
static int
alpha_memory_move_cost (machine_mode /*mode*/, reg_class_t /*regclass*/,
bool /*in*/)
{
return 2 * alpha_memory_latency;
}
/* Compute a (partial) cost for rtx X. Return true if the complete
cost has been computed, and false if subexpressions should be
scanned. In either case, *TOTAL contains the cost result. */
static bool
alpha_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno, int *total,
bool speed)
{
int code = GET_CODE (x);
bool float_mode_p = FLOAT_MODE_P (mode);
const struct alpha_rtx_cost_data *cost_data;
if (!speed)
cost_data = &alpha_rtx_cost_size;
else
cost_data = &alpha_rtx_cost_data[alpha_tune];
switch (code)
{
case CONST_INT:
/* If this is an 8-bit constant, return zero since it can be used
nearly anywhere with no cost. If it is a valid operand for an
ADD or AND, likewise return 0 if we know it will be used in that
context. Otherwise, return 2 since it might be used there later.
All other constants take at least two insns. */
if (INTVAL (x) >= 0 && INTVAL (x) < 256)
{
*total = 0;
return true;
}
/* FALLTHRU */
case CONST_DOUBLE:
case CONST_WIDE_INT:
if (x == CONST0_RTX (mode))
*total = 0;
else if ((outer_code == PLUS && add_operand (x, VOIDmode))
|| (outer_code == AND && and_operand (x, VOIDmode)))
*total = 0;
else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
*total = 2;
else
*total = COSTS_N_INSNS (2);
return true;
case CONST:
case SYMBOL_REF:
case LABEL_REF:
if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
*total = COSTS_N_INSNS (outer_code != MEM);
else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
*total = COSTS_N_INSNS (1 + (outer_code != MEM));
else if (tls_symbolic_operand_type (x))
/* Estimate of cost for call_pal rduniq. */
/* ??? How many insns do we emit here? More than one... */
*total = COSTS_N_INSNS (15);
else
/* Otherwise we do a load from the GOT. */
*total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
return true;
case HIGH:
/* This is effectively an add_operand. */
*total = 2;
return true;
case PLUS:
case MINUS:
if (float_mode_p)
*total = cost_data->fp_add;
else if (GET_CODE (XEXP (x, 0)) == ASHIFT
&& const23_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
{
*total = (rtx_cost (XEXP (XEXP (x, 0), 0), mode,
(enum rtx_code) outer_code, opno, speed)
+ rtx_cost (XEXP (x, 1), mode,
(enum rtx_code) outer_code, opno, speed)
+ COSTS_N_INSNS (1));
return true;
}
return false;
case MULT:
if (float_mode_p)
*total = cost_data->fp_mult;
else if (mode == DImode)
*total = cost_data->int_mult_di;
else
*total = cost_data->int_mult_si;
return false;
case ASHIFT:
if (CONST_INT_P (XEXP (x, 1))
&& INTVAL (XEXP (x, 1)) <= 3)
{
*total = COSTS_N_INSNS (1);
return false;
}
/* FALLTHRU */
case ASHIFTRT:
case LSHIFTRT:
*total = cost_data->int_shift;
return false;
case IF_THEN_ELSE:
if (float_mode_p)
*total = cost_data->fp_add;
else
*total = cost_data->int_cmov;
return false;
case DIV:
case UDIV:
case MOD:
case UMOD:
if (!float_mode_p)
*total = cost_data->int_div;
else if (mode == SFmode)
*total = cost_data->fp_div_sf;
else
*total = cost_data->fp_div_df;
return false;
case MEM:
*total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency);
return true;
case NEG:
if (! float_mode_p)
{
*total = COSTS_N_INSNS (1);
return false;
}
/* FALLTHRU */
case ABS:
if (! float_mode_p)
{
*total = COSTS_N_INSNS (1) + cost_data->int_cmov;
return false;
}
/* FALLTHRU */
case FLOAT:
case UNSIGNED_FLOAT:
case FIX:
case UNSIGNED_FIX:
case FLOAT_TRUNCATE:
*total = cost_data->fp_add;
return false;
case FLOAT_EXTEND:
if (MEM_P (XEXP (x, 0)))
*total = 0;
else
*total = cost_data->fp_add;
return false;
default:
return false;
}
}
/* REF is an alignable memory location. Place an aligned SImode
reference into *PALIGNED_MEM and the number of bits to shift into
*PBITNUM. SCRATCH is a free register for use in reloading out
of range stack slots. */
void
get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
{
rtx base;
HOST_WIDE_INT disp, offset;
gcc_assert (MEM_P (ref));
if (reload_in_progress)
{
base = find_replacement (&XEXP (ref, 0));
gcc_assert (memory_address_p (GET_MODE (ref), base));
}
else
base = XEXP (ref, 0);
if (GET_CODE (base) == PLUS)
disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
else
disp = 0;
/* Find the byte offset within an aligned word. If the memory itself is
claimed to be aligned, believe it. Otherwise, aligned_memory_operand
will have examined the base register and determined it is aligned, and
thus displacements from it are naturally alignable. */
if (MEM_ALIGN (ref) >= 32)
offset = 0;
else
offset = disp & 3;
/* The location should not cross aligned word boundary. */
gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
<= GET_MODE_SIZE (SImode));
/* Access the entire aligned word. */
*paligned_mem = widen_memory_access (ref, SImode, -offset);
/* Convert the byte offset within the word to a bit offset. */
offset *= BITS_PER_UNIT;
*pbitnum = GEN_INT (offset);
}
/* Similar, but just get the address. Handle the two reload cases.
Add EXTRA_OFFSET to the address we return. */
rtx
get_unaligned_address (rtx ref)
{
rtx base;
HOST_WIDE_INT offset = 0;
gcc_assert (MEM_P (ref));
if (reload_in_progress)
{
base = find_replacement (&XEXP (ref, 0));
gcc_assert (memory_address_p (GET_MODE (ref), base));
}
else
base = XEXP (ref, 0);
if (GET_CODE (base) == PLUS)
offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
return plus_constant (Pmode, base, offset);
}
/* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
X is always returned in a register. */
rtx
get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
{
if (GET_CODE (addr) == PLUS)
{
ofs += INTVAL (XEXP (addr, 1));
addr = XEXP (addr, 0);
}
return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7),
NULL_RTX, 1, OPTAB_LIB_WIDEN);
}
/* On the Alpha, all (non-symbolic) constants except zero go into
a floating-point register via memory. Note that we cannot
return anything that is not a subset of RCLASS, and that some
symbolic constants cannot be dropped to memory. */
enum reg_class
alpha_preferred_reload_class(rtx x, enum reg_class rclass)
{
/* Zero is present in any register class. */
if (x == CONST0_RTX (GET_MODE (x)))
return rclass;
/* These sorts of constants we can easily drop to memory. */
if (CONST_SCALAR_INT_P (x)
|| CONST_DOUBLE_P (x)
|| GET_CODE (x) == CONST_VECTOR)
{
if (rclass == FLOAT_REGS)
return NO_REGS;
if (rclass == ALL_REGS)
return GENERAL_REGS;
return rclass;
}
/* All other kinds of constants should not (and in the case of HIGH
cannot) be dropped to memory -- instead we use a GENERAL_REGS
secondary reload. */
if (CONSTANT_P (x))
return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
return rclass;
}
/* Inform reload about cases where moving X with a mode MODE to a register in
RCLASS requires an extra scratch or immediate register. Return the class
needed for the immediate register. */
static reg_class_t
alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
machine_mode mode, secondary_reload_info *sri)
{
enum reg_class rclass = (enum reg_class) rclass_i;
/* Loading and storing HImode or QImode values to and from memory
usually requires a scratch register. */
if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
{
if (any_memory_operand (x, mode))
{
if (in_p)
{
if (!aligned_memory_operand (x, mode))
sri->icode = direct_optab_handler (reload_in_optab, mode);
}
else
sri->icode = direct_optab_handler (reload_out_optab, mode);
return NO_REGS;
}
}
/* We also cannot do integral arithmetic into FP regs, as might result
from register elimination into a DImode fp register. */
if (rclass == FLOAT_REGS)
{
if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
return GENERAL_REGS;
if (in_p && INTEGRAL_MODE_P (mode)
&& !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x))
return GENERAL_REGS;
}
return NO_REGS;
}
/* Implement TARGET_SECONDARY_MEMORY_NEEDED.
If we are copying between general and FP registers, we need a memory
location unless the FIX extension is available. */
static bool
alpha_secondary_memory_needed (machine_mode, reg_class_t class1,
reg_class_t class2)
{
return (!TARGET_FIX
&& ((class1 == FLOAT_REGS && class2 != FLOAT_REGS)
|| (class2 == FLOAT_REGS && class1 != FLOAT_REGS)));
}
/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. If MODE is
floating-point, use it. Otherwise, widen to a word like the default.
This is needed because we always store integers in FP registers in
quadword format. This whole area is very tricky! */
static machine_mode
alpha_secondary_memory_needed_mode (machine_mode mode)
{
if (GET_MODE_CLASS (mode) == MODE_FLOAT)
return mode;
if (GET_MODE_SIZE (mode) >= 4)
return mode;
return mode_for_size (BITS_PER_WORD, GET_MODE_CLASS (mode), 0).require ();
}
/* Given SEQ, which is an INSN list, look for any MEMs in either
a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
volatile flags from REF into each of the MEMs found. If REF is not
a MEM, don't do anything. */
void
alpha_set_memflags (rtx seq, rtx ref)
{
rtx_insn *insn;
if (!MEM_P (ref))
return;
/* This is only called from alpha.md, after having had something
generated from one of the insn patterns. So if everything is
zero, the pattern is already up-to-date. */
if (!MEM_VOLATILE_P (ref)
&& !MEM_NOTRAP_P (ref)
&& !MEM_READONLY_P (ref))
return;
subrtx_var_iterator::array_type array;
for (insn = as_a <rtx_insn *> (seq); insn; insn = NEXT_INSN (insn))
if (INSN_P (insn))
FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST)
{
rtx x = *iter;
if (MEM_P (x))
{
MEM_VOLATILE_P (x) = MEM_VOLATILE_P (ref);
MEM_NOTRAP_P (x) = MEM_NOTRAP_P (ref);
MEM_READONLY_P (x) = MEM_READONLY_P (ref);
/* Sadly, we cannot use alias sets because the extra
aliasing produced by the AND interferes. Given that
two-byte quantities are the only thing we would be
able to differentiate anyway, there does not seem to
be any point in convoluting the early out of the
alias check. */
iter.skip_subrtxes ();
}
}
else
gcc_unreachable ();
}
static rtx alpha_emit_set_const (rtx, machine_mode, HOST_WIDE_INT,
int, bool);
/* Internal routine for alpha_emit_set_const to check for N or below insns.
If NO_OUTPUT is true, then we only check to see if N insns are possible,
and return pc_rtx if successful. */
static rtx
alpha_emit_set_const_1 (rtx target, machine_mode mode,
HOST_WIDE_INT c, int n, bool no_output)
{
HOST_WIDE_INT new_const;
int i, bits;
/* Use a pseudo if highly optimizing and still generating RTL. */
rtx subtarget
= (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
rtx temp, insn;
/* If this is a sign-extended 32-bit constant, we can do this in at most
three insns, so do it if we have enough insns left. */
if (c >> 31 == -1 || c >> 31 == 0)
{
HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
HOST_WIDE_INT tmp1 = c - low;
HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
HOST_WIDE_INT extra = 0;
/* If HIGH will be interpreted as negative but the constant is
positive, we must adjust it to do two ldha insns. */
if ((high & 0x8000) != 0 && c >= 0)
{
extra = 0x4000;
tmp1 -= 0x40000000;
high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
}
if (c == low || (low == 0 && extra == 0))
{
/* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
but that meant that we can't handle INT_MIN on 32-bit machines
(like NT/Alpha), because we recurse indefinitely through
emit_move_insn to gen_movdi. So instead, since we know exactly
what we want, create it explicitly. */
if (no_output)
return pc_rtx;
if (target == NULL)
target = gen_reg_rtx (mode);
emit_insn (gen_rtx_SET (target, GEN_INT (c)));
return target;
}
else if (n >= 2 + (extra != 0))
{
if (no_output)
return pc_rtx;
if (!can_create_pseudo_p ())
{
emit_insn (gen_rtx_SET (target, GEN_INT (high << 16)));
temp = target;
}
else
temp = copy_to_suggested_reg (GEN_INT (high << 16),
subtarget, mode);
/* As of 2002-02-23, addsi3 is only available when not optimizing.
This means that if we go through expand_binop, we'll try to
generate extensions, etc, which will require new pseudos, which
will fail during some split phases. The SImode add patterns
still exist, but are not named. So build the insns by hand. */
if (extra != 0)
{
if (! subtarget)
subtarget = gen_reg_rtx (mode);
insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
insn = gen_rtx_SET (subtarget, insn);
emit_insn (insn);
temp = subtarget;
}
if (target == NULL)
target = gen_reg_rtx (mode);
insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
insn = gen_rtx_SET (target, insn);
emit_insn (insn);
return target;
}
}
/* If we couldn't do it that way, try some other methods. But if we have
no instructions left, don't bother. Likewise, if this is SImode and
we can't make pseudos, we can't do anything since the expand_binop
and expand_unop calls will widen and try to make pseudos. */
if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
return 0;
/* Next, see if we can load a related constant and then shift and possibly
negate it to get the constant we want. Try this once each increasing
numbers of insns. */
for (i = 1; i < n; i++)
{
/* First, see if minus some low bits, we've an easy load of
high bits. */
new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
if (new_const != 0)
{
temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output);
if (temp)
{
if (no_output)
return temp;
return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
target, 0, OPTAB_WIDEN);
}
}
/* Next try complementing. */
temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output);
if (temp)
{
if (no_output)
return temp;
return expand_unop (mode, one_cmpl_optab, temp, target, 0);
}
/* Next try to form a constant and do a left shift. We can do this
if some low-order bits are zero; the exact_log2 call below tells
us that information. The bits we are shifting out could be any
value, but here we'll just try the 0- and sign-extended forms of
the constant. To try to increase the chance of having the same
constant in more than one insn, start at the highest number of
bits to shift, but try all possibilities in case a ZAPNOT will
be useful. */
bits = exact_log2 (c & -c);
if (bits > 0)
for (; bits > 0; bits--)
{
new_const = c >> bits;
temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
if (!temp && c < 0)
{
new_const = (unsigned HOST_WIDE_INT)c >> bits;
temp = alpha_emit_set_const (subtarget, mode, new_const,
i, no_output);
}
if (temp)
{
if (no_output)
return temp;
return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
target, 0, OPTAB_WIDEN);
}
}
/* Now try high-order zero bits. Here we try the shifted-in bits as
all zero and all ones. Be careful to avoid shifting outside the
mode and to avoid shifting outside the host wide int size. */
bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
- floor_log2 (c) - 1);
if (bits > 0)
for (; bits > 0; bits--)
{
new_const = c << bits;
temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
if (!temp)
{
new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
temp = alpha_emit_set_const (subtarget, mode, new_const,
i, no_output);
}
if (temp)
{
if (no_output)
return temp;
return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
target, 1, OPTAB_WIDEN);
}
}
/* Now try high-order 1 bits. We get that with a sign-extension.
But one bit isn't enough here. Be careful to avoid shifting outside
the mode and to avoid shifting outside the host wide int size. */
bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
- floor_log2 (~ c) - 2);
if (bits > 0)
for (; bits > 0; bits--)
{
new_const = c << bits;
temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output);
if (!temp)
{
new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
temp = alpha_emit_set_const (subtarget, mode, new_const,
i, no_output);
}
if (temp)
{
if (no_output)
return temp;
return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
target, 0, OPTAB_WIDEN);
}
}
}
/* Finally, see if can load a value into the target that is the same as the
constant except that all bytes that are 0 are changed to be 0xff. If we
can, then we can do a ZAPNOT to obtain the desired constant. */
new_const = c;
for (i = 0; i < 64; i += 8)
if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
new_const |= (HOST_WIDE_INT) 0xff << i;
/* We are only called for SImode and DImode. If this is SImode, ensure that
we are sign extended to a full word. */
if (mode == SImode)
new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
if (new_const != c)
{
temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
if (temp)
{
if (no_output)
return temp;
return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const),
target, 0, OPTAB_WIDEN);
}
}
return 0;
}
/* Try to output insns to set TARGET equal to the constant C if it can be
done in less than N insns. Do all computations in MODE. Returns the place
where the output has been placed if it can be done and the insns have been
emitted. If it would take more than N insns, zero is returned and no
insns and emitted. */
static rtx
alpha_emit_set_const (rtx target, machine_mode mode,
HOST_WIDE_INT c, int n, bool no_output)
{
machine_mode orig_mode = mode;
rtx orig_target = target;
rtx result = 0;
int i;
/* If we can't make any pseudos, TARGET is an SImode hard register, we
can't load this constant in one insn, do this in DImode. */
if (!can_create_pseudo_p () && mode == SImode
&& REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER)
{
result = alpha_emit_set_const_1 (target, mode, c, 1, no_output);
if (result)
return result;
target = no_output ? NULL : gen_lowpart (DImode, target);
mode = DImode;
}
else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
{
target = no_output ? NULL : gen_lowpart (DImode, target);
mode = DImode;
}
/* Try 1 insn, then 2, then up to N. */
for (i = 1; i <= n; i++)
{
result = alpha_emit_set_const_1 (target, mode, c, i, no_output);
if (result)
{
rtx_insn *insn;
rtx set;
if (no_output)
return result;
insn = get_last_insn ();
set = single_set (insn);
if (! CONSTANT_P (SET_SRC (set)))
set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
break;
}
}
/* Allow for the case where we changed the mode of TARGET. */
if (result)
{
if (result == target)
result = orig_target;
else if (mode != orig_mode)
result = gen_lowpart (orig_mode, result);
}
return result;
}
/* Having failed to find a 3 insn sequence in alpha_emit_set_const,
fall back to a straight forward decomposition. We do this to avoid
exponential run times encountered when looking for longer sequences
with alpha_emit_set_const. */
static rtx
alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1)
{
HOST_WIDE_INT d1, d2, d3, d4;
/* Decompose the entire word */
d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
c1 -= d1;
d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
c1 = (c1 - d2) >> 32;
d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
c1 -= d3;
d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
gcc_assert (c1 == d4);
/* Construct the high word */
if (d4)
{
emit_move_insn (target, GEN_INT (d4));
if (d3)
emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
}
else
emit_move_insn (target, GEN_INT (d3));
/* Shift it into place */
emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
/* Add in the low bits. */
if (d2)
emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
if (d1)
emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
return target;
}
/* Given an integral CONST_INT or CONST_VECTOR, return the low 64 bits. */
static HOST_WIDE_INT
alpha_extract_integer (rtx x)
{
if (GET_CODE (x) == CONST_VECTOR)
x = simplify_subreg (DImode, x, GET_MODE (x), 0);
gcc_assert (CONST_INT_P (x));
return INTVAL (x);
}
/* Implement TARGET_LEGITIMATE_CONSTANT_P. This is all constants for which
we are willing to load the value into a register via a move pattern.
Normally this is all symbolic constants, integral constants that
take three or fewer instructions, and floating-point zero. */
bool
alpha_legitimate_constant_p (machine_mode mode, rtx x)
{
HOST_WIDE_INT i0;
switch (GET_CODE (x))
{
case LABEL_REF:
case HIGH:
return true;
case CONST:
if (GET_CODE (XEXP (x, 0)) == PLUS
&& CONST_INT_P (XEXP (XEXP (x, 0), 1)))
x = XEXP (XEXP (x, 0), 0);
else
return true;
if (GET_CODE (x) != SYMBOL_REF)
return true;
/* FALLTHRU */
case SYMBOL_REF:
/* TLS symbols are never valid. */
return SYMBOL_REF_TLS_MODEL (x) == 0;
case CONST_WIDE_INT:
if (TARGET_BUILD_CONSTANTS)
return true;
if (x == CONST0_RTX (mode))
return true;
mode = DImode;
gcc_assert (CONST_WIDE_INT_NUNITS (x) == 2);
i0 = CONST_WIDE_INT_ELT (x, 1);
if (alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) == NULL)
return false;
i0 = CONST_WIDE_INT_ELT (x, 0);
goto do_integer;
case CONST_DOUBLE:
if (x == CONST0_RTX (mode))
return true;
return false;
case CONST_VECTOR:
if (x == CONST0_RTX (mode))
return true;
if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
return false;
if (GET_MODE_SIZE (mode) != 8)
return false;
/* FALLTHRU */
case CONST_INT:
if (TARGET_BUILD_CONSTANTS)
return true;
i0 = alpha_extract_integer (x);
do_integer:
return alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) != NULL;
default:
return false;
}
}
/* Operand 1 is known to be a constant, and should require more than one
instruction to load. Emit that multi-part load. */
bool
alpha_split_const_mov (machine_mode mode, rtx *operands)
{
HOST_WIDE_INT i0;
rtx temp = NULL_RTX;
i0 = alpha_extract_integer (operands[1]);
temp = alpha_emit_set_const (operands[0], mode, i0, 3, false);
if (!temp && TARGET_BUILD_CONSTANTS)
temp = alpha_emit_set_long_const (operands[0], i0);
if (temp)
{
if (!rtx_equal_p (operands[0], temp))
emit_move_insn (operands[0], temp);
return true;
}
return false;
}
/* Expand a move instruction; return true if all work is done.
We don't handle non-bwx subword loads here. */
bool
alpha_expand_mov (machine_mode mode, rtx *operands)
{
rtx tmp;
/* If the output is not a register, the input must be. */
if (MEM_P (operands[0])
&& ! reg_or_0_operand (operands[1], mode))
operands[1] = force_reg (mode, operands[1]);
/* Allow legitimize_address to perform some simplifications. */
if (mode == Pmode && symbolic_operand (operands[1], mode))
{
tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode);
if (tmp)
{
if (tmp == operands[0])
return true;
operands[1] = tmp;
return false;
}
}
/* Early out for non-constants and valid constants. */
if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
return false;
/* Split large integers. */
if (CONST_INT_P (operands[1])
|| GET_CODE (operands[1]) == CONST_VECTOR)
{
if (alpha_split_const_mov (mode, operands))
return true;
}
/* Otherwise we've nothing left but to drop the thing to memory. */
tmp = force_const_mem (mode, operands[1]);
if (tmp == NULL_RTX)
return false;
if (reload_in_progress)
{
emit_move_insn (operands[0], XEXP (tmp, 0));
operands[1] = replace_equiv_address (tmp, operands[0]);
}
else
operands[1] = validize_mem (tmp);
return false;
}
/* Expand a non-bwx QImode or HImode move instruction;
return true if all work is done. */
bool
alpha_expand_mov_nobwx (machine_mode mode, rtx *operands)
{
rtx seq;
/* If the output is not a register, the input must be. */
if (MEM_P (operands[0]))
operands[1] = force_reg (mode, operands[1]);
/* Handle four memory cases, unaligned and aligned for either the input
or the output. The only case where we can be called during reload is
for aligned loads; all other cases require temporaries. */
if (any_memory_operand (operands[1], mode))
{
if (aligned_memory_operand (operands[1], mode))
{
if (reload_in_progress)
{
seq = gen_reload_in_aligned (mode, operands[0], operands[1]);
emit_insn (seq);
}
else
{
rtx aligned_mem, bitnum;
rtx scratch = gen_reg_rtx (SImode);
rtx subtarget;
bool copyout;
get_aligned_mem (operands[1], &aligned_mem, &bitnum);
subtarget = operands[0];
if (REG_P (subtarget))
subtarget = gen_lowpart (DImode, subtarget), copyout = false;
else
subtarget = gen_reg_rtx (DImode), copyout = true;
if (mode == QImode)
seq = gen_aligned_loadqi (subtarget, aligned_mem,
bitnum, scratch);
else
seq = gen_aligned_loadhi (subtarget, aligned_mem,
bitnum, scratch);
emit_insn (seq);
if (copyout)
emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
}
}
else
{
/* Don't pass these as parameters since that makes the generated
code depend on parameter evaluation order which will cause
bootstrap failures. */
rtx temp1, temp2, subtarget, ua;
bool copyout;
temp1 = gen_reg_rtx (DImode);
temp2 = gen_reg_rtx (DImode);
subtarget = operands[0];
if (REG_P (subtarget))
subtarget = gen_lowpart (DImode, subtarget), copyout = false;
else
subtarget = gen_reg_rtx (DImode), copyout = true;
ua = get_unaligned_address (operands[1]);
if (mode == QImode)
seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
else
seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
alpha_set_memflags (seq, operands[1]);
emit_insn (seq);
if (copyout)
emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
}
return true;
}
if (any_memory_operand (operands[0], mode))
{
if (aligned_memory_operand (operands[0], mode))
{
rtx aligned_mem, bitnum;
rtx temp1 = gen_reg_rtx (SImode);
rtx temp2 = gen_reg_rtx (SImode);
get_aligned_mem (operands[0], &aligned_mem, &bitnum);
emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
temp1, temp2));
}
else
{
rtx temp1 = gen_reg_rtx (DImode);
rtx temp2 = gen_reg_rtx (DImode);
rtx temp3 = gen_reg_rtx (DImode);
rtx ua = get_unaligned_address (operands[0]);
seq = gen_unaligned_store
(mode, ua, operands[1], temp1, temp2, temp3);
alpha_set_memflags (seq, operands[0]);
emit_insn (seq);
}
return true;
}
return false;
}
/* Implement the movmisalign patterns. One of the operands is a memory
that is not naturally aligned. Emit instructions to load it. */
void
alpha_expand_movmisalign (machine_mode mode, rtx *operands)
{
/* Honor misaligned loads, for those we promised to do so. */
if (MEM_P (operands[1]))
{
rtx tmp;
if (register_operand (operands[0], mode))
tmp = operands[0];
else
tmp = gen_reg_rtx (mode);
alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
if (tmp != operands[0])
emit_move_insn (operands[0], tmp);
}
else if (MEM_P (operands[0]))
{
if (!reg_or_0_operand (operands[1], mode))
operands[1] = force_reg (mode, operands[1]);
alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
}
else
gcc_unreachable ();
}
/* Generate an unsigned DImode to FP conversion. This is the same code
optabs would emit if we didn't have TFmode patterns.
For SFmode, this is the only construction I've found that can pass
gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode
intermediates will work, because you'll get intermediate rounding
that ruins the end result. Some of this could be fixed by turning
on round-to-positive-infinity, but that requires diddling the fpsr,
which kills performance. I tried turning this around and converting
to a negative number, so that I could turn on /m, but either I did
it wrong or there's something else cause I wound up with the exact
same single-bit error. There is a branch-less form of this same code:
srl $16,1,$1
and $16,1,$2
cmplt $16,0,$3
or $1,$2,$2
cmovge $16,$16,$2
itoft $3,$f10
itoft $2,$f11
cvtqs $f11,$f11
adds $f11,$f11,$f0
fcmoveq $f10,$f11,$f0
I'm not using it because it's the same number of instructions as
this branch-full form, and it has more serialized long latency
instructions on the critical path.
For DFmode, we can avoid rounding errors by breaking up the word
into two pieces, converting them separately, and adding them back:
LC0: .long 0,0x5f800000
itoft $16,$f11
lda $2,LC0
cmplt $16,0,$1
cpyse $f11,$f31,$f10
cpyse $f31,$f11,$f11
s4addq $1,$2,$1
lds $f12,0($1)
cvtqt $f10,$f10
cvtqt $f11,$f11
addt $f12,$f10,$f0
addt $f0,$f11,$f0
This doesn't seem to be a clear-cut win over the optabs form.
It probably all depends on the distribution of numbers being
converted -- in the optabs form, all but high-bit-set has a
much lower minimum execution time. */
void
alpha_emit_floatuns (rtx operands[2])
{
rtx neglab, donelab, i0, i1, f0, in, out;
machine_mode mode;
out = operands[0];
in = force_reg (DImode, operands[1]);
mode = GET_MODE (out);
neglab = gen_label_rtx ();
donelab = gen_label_rtx ();
i0 = gen_reg_rtx (DImode);
i1 = gen_reg_rtx (DImode);
f0 = gen_reg_rtx (mode);
emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
emit_jump_insn (gen_jump (donelab));
emit_barrier ();
emit_label (neglab);
emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
emit_insn (gen_anddi3 (i1, in, const1_rtx));
emit_insn (gen_iordi3 (i0, i0, i1));
emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
emit_label (donelab);
}
/* Generate the comparison for a conditional branch. */
void
alpha_emit_conditional_branch (rtx operands[], machine_mode cmp_mode)
{
enum rtx_code cmp_code, branch_code;
machine_mode branch_mode = VOIDmode;
enum rtx_code code = GET_CODE (operands[0]);
rtx op0 = operands[1], op1 = operands[2];
rtx tem;
if (cmp_mode == TFmode)
{
op0 = alpha_emit_xfloating_compare (&code, op0, op1);
op1 = const0_rtx;
cmp_mode = DImode;
}
/* The general case: fold the comparison code to the types of compares
that we have, choosing the branch as necessary. */
switch (code)
{
case EQ: case LE: case LT: case LEU: case LTU:
case UNORDERED:
/* We have these compares. */
cmp_code = code, branch_code = NE;
break;
case NE:
case ORDERED:
/* These must be reversed. */
cmp_code = reverse_condition (code), branch_code = EQ;
break;
case GE: case GT: case GEU: case GTU:
/* For FP, we swap them, for INT, we reverse them. */
if (cmp_mode == DFmode)
{
cmp_code = swap_condition (code);
branch_code = NE;
std::swap (op0, op1);
}
else
{
cmp_code = reverse_condition (code);
branch_code = EQ;
}
break;
default:
gcc_unreachable ();
}
if (cmp_mode == DFmode)
{
if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
{
/* When we are not as concerned about non-finite values, and we
are comparing against zero, we can branch directly. */
if (op1 == CONST0_RTX (DFmode))
cmp_code = UNKNOWN, branch_code = code;
else if (op0 == CONST0_RTX (DFmode))
{
/* Undo the swap we probably did just above. */
std::swap (op0, op1);
branch_code = swap_condition (cmp_code);
cmp_code = UNKNOWN;
}
}
else
{
/* ??? We mark the branch mode to be CCmode to prevent the
compare and branch from being combined, since the compare
insn follows IEEE rules that the branch does not. */
branch_mode = CCmode;
}
}
else
{
/* The following optimizations are only for signed compares. */
if (code != LEU && code != LTU && code != GEU && code != GTU)
{
/* Whee. Compare and branch against 0 directly. */
if (op1 == const0_rtx)
cmp_code = UNKNOWN, branch_code = code;
/* If the constants doesn't fit into an immediate, but can
be generated by lda/ldah, we adjust the argument and
compare against zero, so we can use beq/bne directly. */
/* ??? Don't do this when comparing against symbols, otherwise
we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
be declared false out of hand (at least for non-weak). */
else if (CONST_INT_P (op1)
&& (code == EQ || code == NE)
&& !(symbolic_operand (op0, VOIDmode)
|| (REG_P (op0) && REG_POINTER (op0))))
{
rtx n_op1 = GEN_INT (-INTVAL (op1));
if (! satisfies_constraint_I (op1)
&& (satisfies_constraint_K (n_op1)
|| satisfies_constraint_L (n_op1)))
cmp_code = PLUS, branch_code = code, op1 = n_op1;
}
}
if (!reg_or_0_operand (op0, DImode))
op0 = force_reg (DImode, op0);
if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
op1 = force_reg (DImode, op1);
}
/* Emit an initial compare instruction, if necessary. */
tem = op0;
if (cmp_code != UNKNOWN)
{
tem = gen_reg_rtx (cmp_mode);
emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
}
/* Emit the branch instruction. */
tem = gen_rtx_SET (pc_rtx,
gen_rtx_IF_THEN_ELSE (VOIDmode,
gen_rtx_fmt_ee (branch_code,
branch_mode, tem,
CONST0_RTX (cmp_mode)),
gen_rtx_LABEL_REF (VOIDmode,
operands[3]),
pc_rtx));
emit_jump_insn (tem);
}
/* Certain simplifications can be done to make invalid setcc operations
valid. Return the final comparison, or NULL if we can't work. */
bool
alpha_emit_setcc (rtx operands[], machine_mode cmp_mode)
{
enum rtx_code cmp_code;
enum rtx_code code = GET_CODE (operands[1]);
rtx op0 = operands[2], op1 = operands[3];
rtx tmp;
if (cmp_mode == TFmode)
{
op0 = alpha_emit_xfloating_compare (&code, op0, op1);
op1 = const0_rtx;
cmp_mode = DImode;
}
if (cmp_mode == DFmode && !TARGET_FIX)
return 0;
/* The general case: fold the comparison code to the types of compares
that we have, choosing the branch as necessary. */
cmp_code = UNKNOWN;
switch (code)
{
case EQ: case LE: case LT: case LEU: case LTU:
case UNORDERED:
/* We have these compares. */
if (cmp_mode == DFmode)
cmp_code = code, code = NE;
break;
case NE:
if (cmp_mode == DImode && op1 == const0_rtx)
break;
/* FALLTHRU */
case ORDERED:
cmp_code = reverse_condition (code);
code = EQ;
break;
case GE: case GT: case GEU: case GTU:
/* These normally need swapping, but for integer zero we have
special patterns that recognize swapped operands. */
if (cmp_mode == DImode && op1 == const0_rtx)
break;
code = swap_condition (code);
if (cmp_mode == DFmode)
cmp_code = code, code = NE;
std::swap (op0, op1);
break;
default:
gcc_unreachable ();
}
if (cmp_mode == DImode)
{
if (!register_operand (op0, DImode))
op0 = force_reg (DImode, op0);
if (!reg_or_8bit_operand (op1, DImode))
op1 = force_reg (DImode, op1);
}
/* Emit an initial compare instruction, if necessary. */
if (cmp_code != UNKNOWN)
{
tmp = gen_reg_rtx (cmp_mode);
emit_insn (gen_rtx_SET (tmp, gen_rtx_fmt_ee (cmp_code, cmp_mode,
op0, op1)));
op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
op1 = const0_rtx;
}
/* Emit the setcc instruction. */
emit_insn (gen_rtx_SET (operands[0], gen_rtx_fmt_ee (code, DImode,
op0, op1)));
return true;
}
/* Rewrite a comparison against zero CMP of the form
(CODE (cc0) (const_int 0)) so it can be written validly in
a conditional move (if_then_else CMP ...).
If both of the operands that set cc0 are nonzero we must emit
an insn to perform the compare (it can't be done within
the conditional move). */
rtx
alpha_emit_conditional_move (rtx cmp, machine_mode mode)
{
enum rtx_code code = GET_CODE (cmp);
enum rtx_code cmov_code = NE;
rtx op0 = XEXP (cmp, 0);
rtx op1 = XEXP (cmp, 1);
machine_mode cmp_mode
= (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
machine_mode cmov_mode = VOIDmode;
int local_fast_math = flag_unsafe_math_optimizations;
rtx tem;
if (cmp_mode == TFmode)
{
op0 = alpha_emit_xfloating_compare (&code, op0, op1);
op1 = const0_rtx;
cmp_mode = DImode;
}
gcc_assert (cmp_mode == DFmode || cmp_mode == DImode);
if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
{
enum rtx_code cmp_code;
if (! TARGET_FIX)
return 0;
/* If we have fp<->int register move instructions, do a cmov by
performing the comparison in fp registers, and move the
zero/nonzero value to integer registers, where we can then
use a normal cmov, or vice-versa. */
switch (code)
{
case EQ: case LE: case LT: case LEU: case LTU:
case UNORDERED:
/* We have these compares. */
cmp_code = code, code = NE;
break;
case NE:
case ORDERED:
/* These must be reversed. */
cmp_code = reverse_condition (code), code = EQ;
break;
case GE: case GT: case GEU: case GTU:
/* These normally need swapping, but for integer zero we have
special patterns that recognize swapped operands. */
if (cmp_mode == DImode && op1 == const0_rtx)
cmp_code = code, code = NE;
else
{
cmp_code = swap_condition (code);
code = NE;
std::swap (op0, op1);
}
break;
default:
gcc_unreachable ();
}
if (cmp_mode == DImode)
{
if (!reg_or_0_operand (op0, DImode))
op0 = force_reg (DImode, op0);
if (!reg_or_8bit_operand (op1, DImode))
op1 = force_reg (DImode, op1);
}
tem = gen_reg_rtx (cmp_mode);
emit_insn (gen_rtx_SET (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode,
op0, op1)));
cmp_mode = cmp_mode == DImode ? E_DFmode : E_DImode;
op0 = gen_lowpart (cmp_mode, tem);
op1 = CONST0_RTX (cmp_mode);
cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
local_fast_math = 1;
}
if (cmp_mode == DImode)
{
if (!reg_or_0_operand (op0, DImode))
op0 = force_reg (DImode, op0);
if (!reg_or_8bit_operand (op1, DImode))
op1 = force_reg (DImode, op1);
}
/* We may be able to use a conditional move directly.
This avoids emitting spurious compares. */
if (signed_comparison_operator (cmp, VOIDmode)
&& (cmp_mode == DImode || local_fast_math)
&& (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
/* We can't put the comparison inside the conditional move;
emit a compare instruction and put that inside the
conditional move. Make sure we emit only comparisons we have;
swap or reverse as necessary. */
if (!can_create_pseudo_p ())
return NULL_RTX;
switch (code)
{
case EQ: case LE: case LT: case LEU: case LTU:
case UNORDERED:
/* We have these compares: */
break;
case NE:
case ORDERED:
/* These must be reversed. */
code = reverse_condition (code);
cmov_code = EQ;
break;
case GE: case GT: case GEU: case GTU:
/* These normally need swapping, but for integer zero we have
special patterns that recognize swapped operands. */
if (cmp_mode == DImode && op1 == const0_rtx)
break;
code = swap_condition (code);
std::swap (op0, op1);
break;
default:
gcc_unreachable ();
}
if (cmp_mode == DImode)
{
if (!reg_or_0_operand (op0, DImode))
op0 = force_reg (DImode, op0);
if (!reg_or_8bit_operand (op1, DImode))
op1 = force_reg (DImode, op1);
}
/* ??? We mark the branch mode to be CCmode to prevent the compare
and cmov from being combined, since the compare insn follows IEEE
rules that the cmov does not. */
if (cmp_mode == DFmode && !local_fast_math)
cmov_mode = CCmode;
tem = gen_reg_rtx (cmp_mode);
emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
}
/* Simplify a conditional move of two constants into a setcc with
arithmetic. This is done with a splitter since combine would
just undo the work if done during code generation. It also catches
cases we wouldn't have before cse. */
int
alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
rtx t_rtx, rtx f_rtx)
{
HOST_WIDE_INT t, f, diff;
machine_mode mode;
rtx target, subtarget, tmp;
mode = GET_MODE (dest);
t = INTVAL (t_rtx);
f = INTVAL (f_rtx);
diff = t - f;
if (((code == NE || code == EQ) && diff < 0)
|| (code == GE || code == GT))
{
code = reverse_condition (code);
std::swap (t, f);
diff = -diff;
}
subtarget = target = dest;
if (mode != DImode)
{
target = gen_lowpart (DImode, dest);
if (can_create_pseudo_p ())
subtarget = gen_reg_rtx (DImode);
else
subtarget = target;
}
/* Below, we must be careful to use copy_rtx on target and subtarget
in intermediate insns, as they may be a subreg rtx, which may not
be shared. */
if (f == 0 && exact_log2 (diff) > 0
/* On EV6, we've got enough shifters to make non-arithmetic shifts
viable over a longer latency cmove. On EV5, the E0 slot is a
scarce resource, and on EV4 shift has the same latency as a cmove. */
&& (diff <= 8 || alpha_tune == PROCESSOR_EV6))
{
tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
GEN_INT (exact_log2 (t)));
emit_insn (gen_rtx_SET (target, tmp));
}
else if (f == 0 && t == -1)
{
tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
}
else if (diff == 1 || diff == 4 || diff == 8)
{
rtx add_op;
tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
if (diff == 1)
emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
else
{
add_op = GEN_INT (f);
if (sext_add_operand (add_op, mode))
{
tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
GEN_INT (exact_log2 (diff)));
tmp = gen_rtx_PLUS (DImode, tmp, add_op);
emit_insn (gen_rtx_SET (target, tmp));
}
else
return 0;
}
}
else
return 0;
return 1;
}
/* Look up the function X_floating library function name for the
given operation. */
struct GTY(()) xfloating_op
{
const enum rtx_code code;
const char *const GTY((skip)) osf_func;
const char *const GTY((skip)) vms_func;
rtx libcall;
};
static GTY(()) struct xfloating_op xfloating_ops[] =
{
{ PLUS, "_OtsAddX", "OTS$ADD_X", 0 },
{ MINUS, "_OtsSubX", "OTS$SUB_X", 0 },
{ MULT, "_OtsMulX", "OTS$MUL_X", 0 },
{ DIV, "_OtsDivX", "OTS$DIV_X", 0 },
{ EQ, "_OtsEqlX", "OTS$EQL_X", 0 },
{ NE, "_OtsNeqX", "OTS$NEQ_X", 0 },
{ LT, "_OtsLssX", "OTS$LSS_X", 0 },
{ LE, "_OtsLeqX", "OTS$LEQ_X", 0 },
{ GT, "_OtsGtrX", "OTS$GTR_X", 0 },
{ GE, "_OtsGeqX", "OTS$GEQ_X", 0 },
{ FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 },
{ FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 },
{ UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 },
{ FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 },
{ FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 }
};
static GTY(()) struct xfloating_op vax_cvt_ops[] =
{
{ FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 },
{ FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 }
};
static rtx
alpha_lookup_xfloating_lib_func (enum rtx_code code)
{
struct xfloating_op *ops = xfloating_ops;
long n = ARRAY_SIZE (xfloating_ops);
long i;
gcc_assert (TARGET_HAS_XFLOATING_LIBS);
/* How irritating. Nothing to key off for the main table. */
if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
{
ops = vax_cvt_ops;
n = ARRAY_SIZE (vax_cvt_ops);
}
for (i = 0; i < n; ++i, ++ops)
if (ops->code == code)
{
rtx func = ops->libcall;
if (!func)
{
func = init_one_libfunc (TARGET_ABI_OPEN_VMS
? ops->vms_func : ops->osf_func);
ops->libcall = func;
}
return func;
}
gcc_unreachable ();
}
/* Most X_floating operations take the rounding mode as an argument.
Compute that here. */
static int
alpha_compute_xfloating_mode_arg (enum rtx_code code,
enum alpha_fp_rounding_mode round)
{
int mode;
switch (round)
{
case ALPHA_FPRM_NORM:
mode = 2;
break;
case ALPHA_FPRM_MINF:
mode = 1;
break;
case ALPHA_FPRM_CHOP:
mode = 0;
break;
case ALPHA_FPRM_DYN:
mode = 4;
break;
default:
gcc_unreachable ();
/* XXX For reference, round to +inf is mode = 3. */
}
if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N)
mode |= 0x10000;
return mode;
}
/* Emit an X_floating library function call.
Note that these functions do not follow normal calling conventions:
TFmode arguments are passed in two integer registers (as opposed to
indirect); TFmode return values appear in R16+R17.
FUNC is the function to call.
TARGET is where the output belongs.
OPERANDS are the inputs.
NOPERANDS is the count of inputs.
EQUIV is the expression equivalent for the function.
*/
static void
alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
int noperands, rtx equiv)
{
rtx usage = NULL_RTX, reg;
int regno = 16, i;
start_sequence ();
for (i = 0; i < noperands; ++i)
{
switch (GET_MODE (operands[i]))
{
case E_TFmode:
reg = gen_rtx_REG (TFmode, regno);
regno += 2;
break;
case E_DFmode:
reg = gen_rtx_REG (DFmode, regno + 32);
regno += 1;
break;
case E_VOIDmode:
gcc_assert (CONST_INT_P (operands[i]));
/* FALLTHRU */
case E_DImode:
reg = gen_rtx_REG (DImode, regno);
regno += 1;
break;
default:
gcc_unreachable ();
}
emit_move_insn (reg, operands[i]);
use_reg (&usage, reg);
}
switch (GET_MODE (target))
{
case E_TFmode:
reg = gen_rtx_REG (TFmode, 16);
break;
case E_DFmode:
reg = gen_rtx_REG (DFmode, 32);
break;
case E_DImode:
reg = gen_rtx_REG (DImode, 0);
break;
default:
gcc_unreachable ();
}
rtx mem = gen_rtx_MEM (QImode, func);
rtx_insn *tmp = emit_call_insn (gen_call_value (reg, mem, const0_rtx,
const0_rtx, const0_rtx));
CALL_INSN_FUNCTION_USAGE (tmp) = usage;
RTL_CONST_CALL_P (tmp) = 1;
tmp = get_insns ();
end_sequence ();
emit_libcall_block (tmp, target, reg, equiv);
}