| /* Subroutines used for code generation on the DEC Alpha. |
| Copyright (C) 1992-2022 Free Software Foundation, Inc. |
| Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| GCC is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| |
| #define IN_TARGET_CODE 1 |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "backend.h" |
| #include "target.h" |
| #include "rtl.h" |
| #include "tree.h" |
| #include "stringpool.h" |
| #include "attribs.h" |
| #include "memmodel.h" |
| #include "gimple.h" |
| #include "df.h" |
| #include "predict.h" |
| #include "tm_p.h" |
| #include "ssa.h" |
| #include "expmed.h" |
| #include "optabs.h" |
| #include "regs.h" |
| #include "emit-rtl.h" |
| #include "recog.h" |
| #include "diagnostic-core.h" |
| #include "alias.h" |
| #include "fold-const.h" |
| #include "stor-layout.h" |
| #include "calls.h" |
| #include "varasm.h" |
| #include "output.h" |
| #include "insn-attr.h" |
| #include "explow.h" |
| #include "expr.h" |
| #include "reload.h" |
| #include "except.h" |
| #include "common/common-target.h" |
| #include "debug.h" |
| #include "langhooks.h" |
| #include "cfgrtl.h" |
| #include "tree-pass.h" |
| #include "context.h" |
| #include "gimple-iterator.h" |
| #include "gimplify.h" |
| #include "tree-stdarg.h" |
| #include "tm-constrs.h" |
| #include "libfuncs.h" |
| #include "builtins.h" |
| #include "rtl-iter.h" |
| #include "flags.h" |
| #include "opts.h" |
| |
| /* This file should be included last. */ |
| #include "target-def.h" |
| |
| /* Specify which cpu to schedule for. */ |
| enum processor_type alpha_tune; |
| |
| /* Which cpu we're generating code for. */ |
| enum processor_type alpha_cpu; |
| |
| static const char * const alpha_cpu_name[] = |
| { |
| "ev4", "ev5", "ev6" |
| }; |
| |
| /* Specify how accurate floating-point traps need to be. */ |
| |
| enum alpha_trap_precision alpha_tp; |
| |
| /* Specify the floating-point rounding mode. */ |
| |
| enum alpha_fp_rounding_mode alpha_fprm; |
| |
| /* Specify which things cause traps. */ |
| |
| enum alpha_fp_trap_mode alpha_fptm; |
| |
| /* Nonzero if inside of a function, because the Alpha asm can't |
| handle .files inside of functions. */ |
| |
| static int inside_function = FALSE; |
| |
| /* The number of cycles of latency we should assume on memory reads. */ |
| |
| static int alpha_memory_latency = 3; |
| |
| /* Whether the function needs the GP. */ |
| |
| static int alpha_function_needs_gp; |
| |
| /* The assembler name of the current function. */ |
| |
| static const char *alpha_fnname; |
| |
| /* The next explicit relocation sequence number. */ |
| extern GTY(()) int alpha_next_sequence_number; |
| int alpha_next_sequence_number = 1; |
| |
| /* The literal and gpdisp sequence numbers for this insn, as printed |
| by %# and %* respectively. */ |
| extern GTY(()) int alpha_this_literal_sequence_number; |
| extern GTY(()) int alpha_this_gpdisp_sequence_number; |
| int alpha_this_literal_sequence_number; |
| int alpha_this_gpdisp_sequence_number; |
| |
| /* Costs of various operations on the different architectures. */ |
| |
| struct alpha_rtx_cost_data |
| { |
| unsigned char fp_add; |
| unsigned char fp_mult; |
| unsigned char fp_div_sf; |
| unsigned char fp_div_df; |
| unsigned char int_mult_si; |
| unsigned char int_mult_di; |
| unsigned char int_shift; |
| unsigned char int_cmov; |
| unsigned short int_div; |
| }; |
| |
| static struct alpha_rtx_cost_data const alpha_rtx_cost_data[PROCESSOR_MAX] = |
| { |
| { /* EV4 */ |
| COSTS_N_INSNS (6), /* fp_add */ |
| COSTS_N_INSNS (6), /* fp_mult */ |
| COSTS_N_INSNS (34), /* fp_div_sf */ |
| COSTS_N_INSNS (63), /* fp_div_df */ |
| COSTS_N_INSNS (23), /* int_mult_si */ |
| COSTS_N_INSNS (23), /* int_mult_di */ |
| COSTS_N_INSNS (2), /* int_shift */ |
| COSTS_N_INSNS (2), /* int_cmov */ |
| COSTS_N_INSNS (97), /* int_div */ |
| }, |
| { /* EV5 */ |
| COSTS_N_INSNS (4), /* fp_add */ |
| COSTS_N_INSNS (4), /* fp_mult */ |
| COSTS_N_INSNS (15), /* fp_div_sf */ |
| COSTS_N_INSNS (22), /* fp_div_df */ |
| COSTS_N_INSNS (8), /* int_mult_si */ |
| COSTS_N_INSNS (12), /* int_mult_di */ |
| COSTS_N_INSNS (1) + 1, /* int_shift */ |
| COSTS_N_INSNS (1), /* int_cmov */ |
| COSTS_N_INSNS (83), /* int_div */ |
| }, |
| { /* EV6 */ |
| COSTS_N_INSNS (4), /* fp_add */ |
| COSTS_N_INSNS (4), /* fp_mult */ |
| COSTS_N_INSNS (12), /* fp_div_sf */ |
| COSTS_N_INSNS (15), /* fp_div_df */ |
| COSTS_N_INSNS (7), /* int_mult_si */ |
| COSTS_N_INSNS (7), /* int_mult_di */ |
| COSTS_N_INSNS (1), /* int_shift */ |
| COSTS_N_INSNS (2), /* int_cmov */ |
| COSTS_N_INSNS (86), /* int_div */ |
| }, |
| }; |
| |
| /* Similar but tuned for code size instead of execution latency. The |
| extra +N is fractional cost tuning based on latency. It's used to |
| encourage use of cheaper insns like shift, but only if there's just |
| one of them. */ |
| |
| static struct alpha_rtx_cost_data const alpha_rtx_cost_size = |
| { |
| COSTS_N_INSNS (1), /* fp_add */ |
| COSTS_N_INSNS (1), /* fp_mult */ |
| COSTS_N_INSNS (1), /* fp_div_sf */ |
| COSTS_N_INSNS (1) + 1, /* fp_div_df */ |
| COSTS_N_INSNS (1) + 1, /* int_mult_si */ |
| COSTS_N_INSNS (1) + 2, /* int_mult_di */ |
| COSTS_N_INSNS (1), /* int_shift */ |
| COSTS_N_INSNS (1), /* int_cmov */ |
| COSTS_N_INSNS (6), /* int_div */ |
| }; |
| |
| /* Get the number of args of a function in one of two ways. */ |
| #if TARGET_ABI_OPEN_VMS |
| #define NUM_ARGS crtl->args.info.num_args |
| #else |
| #define NUM_ARGS crtl->args.info |
| #endif |
| |
| #define REG_PV 27 |
| #define REG_RA 26 |
| |
| /* Declarations of static functions. */ |
| static struct machine_function *alpha_init_machine_status (void); |
| static rtx alpha_emit_xfloating_compare (enum rtx_code *, rtx, rtx); |
| static void alpha_handle_trap_shadows (void); |
| static void alpha_align_insns (void); |
| static void alpha_override_options_after_change (void); |
| |
| #if TARGET_ABI_OPEN_VMS |
| static void alpha_write_linkage (FILE *, const char *); |
| static bool vms_valid_pointer_mode (scalar_int_mode); |
| #else |
| #define vms_patch_builtins() gcc_unreachable() |
| #endif |
| |
| static unsigned int |
| rest_of_handle_trap_shadows (void) |
| { |
| alpha_handle_trap_shadows (); |
| return 0; |
| } |
| |
| namespace { |
| |
| const pass_data pass_data_handle_trap_shadows = |
| { |
| RTL_PASS, |
| "trap_shadows", /* name */ |
| OPTGROUP_NONE, /* optinfo_flags */ |
| TV_NONE, /* tv_id */ |
| 0, /* properties_required */ |
| 0, /* properties_provided */ |
| 0, /* properties_destroyed */ |
| 0, /* todo_flags_start */ |
| TODO_df_finish, /* todo_flags_finish */ |
| }; |
| |
| class pass_handle_trap_shadows : public rtl_opt_pass |
| { |
| public: |
| pass_handle_trap_shadows(gcc::context *ctxt) |
| : rtl_opt_pass(pass_data_handle_trap_shadows, ctxt) |
| {} |
| |
| /* opt_pass methods: */ |
| virtual bool gate (function *) |
| { |
| return alpha_tp != ALPHA_TP_PROG || flag_exceptions; |
| } |
| |
| virtual unsigned int execute (function *) |
| { |
| return rest_of_handle_trap_shadows (); |
| } |
| |
| }; // class pass_handle_trap_shadows |
| |
| } // anon namespace |
| |
| rtl_opt_pass * |
| make_pass_handle_trap_shadows (gcc::context *ctxt) |
| { |
| return new pass_handle_trap_shadows (ctxt); |
| } |
| |
| static unsigned int |
| rest_of_align_insns (void) |
| { |
| alpha_align_insns (); |
| return 0; |
| } |
| |
| namespace { |
| |
| const pass_data pass_data_align_insns = |
| { |
| RTL_PASS, |
| "align_insns", /* name */ |
| OPTGROUP_NONE, /* optinfo_flags */ |
| TV_NONE, /* tv_id */ |
| 0, /* properties_required */ |
| 0, /* properties_provided */ |
| 0, /* properties_destroyed */ |
| 0, /* todo_flags_start */ |
| TODO_df_finish, /* todo_flags_finish */ |
| }; |
| |
| class pass_align_insns : public rtl_opt_pass |
| { |
| public: |
| pass_align_insns(gcc::context *ctxt) |
| : rtl_opt_pass(pass_data_align_insns, ctxt) |
| {} |
| |
| /* opt_pass methods: */ |
| virtual bool gate (function *) |
| { |
| /* Due to the number of extra trapb insns, don't bother fixing up |
| alignment when trap precision is instruction. Moreover, we can |
| only do our job when sched2 is run. */ |
| return ((alpha_tune == PROCESSOR_EV4 |
| || alpha_tune == PROCESSOR_EV5) |
| && optimize && !optimize_size |
| && alpha_tp != ALPHA_TP_INSN |
| && flag_schedule_insns_after_reload); |
| } |
| |
| virtual unsigned int execute (function *) |
| { |
| return rest_of_align_insns (); |
| } |
| |
| }; // class pass_align_insns |
| |
| } // anon namespace |
| |
| rtl_opt_pass * |
| make_pass_align_insns (gcc::context *ctxt) |
| { |
| return new pass_align_insns (ctxt); |
| } |
| |
| #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING |
| /* Implement TARGET_MANGLE_TYPE. */ |
| |
| static const char * |
| alpha_mangle_type (const_tree type) |
| { |
| if (TYPE_MAIN_VARIANT (type) == long_double_type_node |
| && TARGET_LONG_DOUBLE_128) |
| return "g"; |
| |
| /* For all other types, use normal C++ mangling. */ |
| return NULL; |
| } |
| #endif |
| |
| /* Parse target option strings. */ |
| |
| static void |
| alpha_option_override (void) |
| { |
| static const struct cpu_table { |
| const char *const name; |
| const enum processor_type processor; |
| const int flags; |
| const unsigned short line_size; /* in bytes */ |
| const unsigned short l1_size; /* in kb. */ |
| const unsigned short l2_size; /* in kb. */ |
| } cpu_table[] = { |
| /* EV4/LCA45 had 8k L1 caches; EV45 had 16k L1 caches. |
| EV4/EV45 had 128k to 16M 32-byte direct Bcache. LCA45 |
| had 64k to 8M 8-byte direct Bcache. */ |
| { "ev4", PROCESSOR_EV4, 0, 32, 8, 8*1024 }, |
| { "21064", PROCESSOR_EV4, 0, 32, 8, 8*1024 }, |
| { "ev45", PROCESSOR_EV4, 0, 32, 16, 16*1024 }, |
| |
| /* EV5 or EV56 had 8k 32 byte L1, 96k 32 or 64 byte L2, |
| and 1M to 16M 64 byte L3 (not modeled). |
| PCA56 had 16k 64-byte cache; PCA57 had 32k Icache. |
| PCA56 had 8k 64-byte cache; PCA57 had 16k Dcache. */ |
| { "ev5", PROCESSOR_EV5, 0, 32, 8, 96 }, |
| { "21164", PROCESSOR_EV5, 0, 32, 8, 96 }, |
| { "ev56", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 }, |
| { "21164a", PROCESSOR_EV5, MASK_BWX, 32, 8, 96 }, |
| { "pca56", PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 }, |
| { "21164PC",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 }, |
| { "21164pc",PROCESSOR_EV5, MASK_BWX|MASK_MAX, 64, 16, 4*1024 }, |
| |
| /* EV6 had 64k 64 byte L1, 1M to 16M Bcache. */ |
| { "ev6", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 }, |
| { "21264", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX, 64, 64, 16*1024 }, |
| { "ev67", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX, |
| 64, 64, 16*1024 }, |
| { "21264a", PROCESSOR_EV6, MASK_BWX|MASK_MAX|MASK_FIX|MASK_CIX, |
| 64, 64, 16*1024 } |
| }; |
| |
| int const ct_size = ARRAY_SIZE (cpu_table); |
| int line_size = 0, l1_size = 0, l2_size = 0; |
| int i; |
| |
| #ifdef SUBTARGET_OVERRIDE_OPTIONS |
| SUBTARGET_OVERRIDE_OPTIONS; |
| #endif |
| |
| /* Default to full IEEE compliance mode for Go language. */ |
| if (strcmp (lang_hooks.name, "GNU Go") == 0 |
| && !(target_flags_explicit & MASK_IEEE)) |
| target_flags |= MASK_IEEE; |
| |
| alpha_fprm = ALPHA_FPRM_NORM; |
| alpha_tp = ALPHA_TP_PROG; |
| alpha_fptm = ALPHA_FPTM_N; |
| |
| if (TARGET_IEEE) |
| { |
| alpha_tp = ALPHA_TP_INSN; |
| alpha_fptm = ALPHA_FPTM_SU; |
| } |
| if (TARGET_IEEE_WITH_INEXACT) |
| { |
| alpha_tp = ALPHA_TP_INSN; |
| alpha_fptm = ALPHA_FPTM_SUI; |
| } |
| |
| if (alpha_tp_string) |
| { |
| if (! strcmp (alpha_tp_string, "p")) |
| alpha_tp = ALPHA_TP_PROG; |
| else if (! strcmp (alpha_tp_string, "f")) |
| alpha_tp = ALPHA_TP_FUNC; |
| else if (! strcmp (alpha_tp_string, "i")) |
| alpha_tp = ALPHA_TP_INSN; |
| else |
| error ("bad value %qs for %<-mtrap-precision%> switch", |
| alpha_tp_string); |
| } |
| |
| if (alpha_fprm_string) |
| { |
| if (! strcmp (alpha_fprm_string, "n")) |
| alpha_fprm = ALPHA_FPRM_NORM; |
| else if (! strcmp (alpha_fprm_string, "m")) |
| alpha_fprm = ALPHA_FPRM_MINF; |
| else if (! strcmp (alpha_fprm_string, "c")) |
| alpha_fprm = ALPHA_FPRM_CHOP; |
| else if (! strcmp (alpha_fprm_string,"d")) |
| alpha_fprm = ALPHA_FPRM_DYN; |
| else |
| error ("bad value %qs for %<-mfp-rounding-mode%> switch", |
| alpha_fprm_string); |
| } |
| |
| if (alpha_fptm_string) |
| { |
| if (strcmp (alpha_fptm_string, "n") == 0) |
| alpha_fptm = ALPHA_FPTM_N; |
| else if (strcmp (alpha_fptm_string, "u") == 0) |
| alpha_fptm = ALPHA_FPTM_U; |
| else if (strcmp (alpha_fptm_string, "su") == 0) |
| alpha_fptm = ALPHA_FPTM_SU; |
| else if (strcmp (alpha_fptm_string, "sui") == 0) |
| alpha_fptm = ALPHA_FPTM_SUI; |
| else |
| error ("bad value %qs for %<-mfp-trap-mode%> switch", |
| alpha_fptm_string); |
| } |
| |
| if (alpha_cpu_string) |
| { |
| for (i = 0; i < ct_size; i++) |
| if (! strcmp (alpha_cpu_string, cpu_table [i].name)) |
| { |
| alpha_tune = alpha_cpu = cpu_table[i].processor; |
| line_size = cpu_table[i].line_size; |
| l1_size = cpu_table[i].l1_size; |
| l2_size = cpu_table[i].l2_size; |
| target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX); |
| target_flags |= cpu_table[i].flags; |
| break; |
| } |
| if (i == ct_size) |
| error ("bad value %qs for %<-mcpu%> switch", alpha_cpu_string); |
| } |
| |
| if (alpha_tune_string) |
| { |
| for (i = 0; i < ct_size; i++) |
| if (! strcmp (alpha_tune_string, cpu_table [i].name)) |
| { |
| alpha_tune = cpu_table[i].processor; |
| line_size = cpu_table[i].line_size; |
| l1_size = cpu_table[i].l1_size; |
| l2_size = cpu_table[i].l2_size; |
| break; |
| } |
| if (i == ct_size) |
| error ("bad value %qs for %<-mtune%> switch", alpha_tune_string); |
| } |
| |
| if (line_size) |
| SET_OPTION_IF_UNSET (&global_options, &global_options_set, |
| param_l1_cache_line_size, line_size); |
| if (l1_size) |
| SET_OPTION_IF_UNSET (&global_options, &global_options_set, |
| param_l1_cache_size, l1_size); |
| if (l2_size) |
| SET_OPTION_IF_UNSET (&global_options, &global_options_set, |
| param_l2_cache_size, l2_size); |
| |
| /* Do some sanity checks on the above options. */ |
| |
| if ((alpha_fptm == ALPHA_FPTM_SU || alpha_fptm == ALPHA_FPTM_SUI) |
| && alpha_tp != ALPHA_TP_INSN && alpha_cpu != PROCESSOR_EV6) |
| { |
| warning (0, "fp software completion requires %<-mtrap-precision=i%>"); |
| alpha_tp = ALPHA_TP_INSN; |
| } |
| |
| if (alpha_cpu == PROCESSOR_EV6) |
| { |
| /* Except for EV6 pass 1 (not released), we always have precise |
| arithmetic traps. Which means we can do software completion |
| without minding trap shadows. */ |
| alpha_tp = ALPHA_TP_PROG; |
| } |
| |
| if (TARGET_FLOAT_VAX) |
| { |
| if (alpha_fprm == ALPHA_FPRM_MINF || alpha_fprm == ALPHA_FPRM_DYN) |
| { |
| warning (0, "rounding mode not supported for VAX floats"); |
| alpha_fprm = ALPHA_FPRM_NORM; |
| } |
| if (alpha_fptm == ALPHA_FPTM_SUI) |
| { |
| warning (0, "trap mode not supported for VAX floats"); |
| alpha_fptm = ALPHA_FPTM_SU; |
| } |
| if (target_flags_explicit & MASK_LONG_DOUBLE_128) |
| warning (0, "128-bit %<long double%> not supported for VAX floats"); |
| target_flags &= ~MASK_LONG_DOUBLE_128; |
| } |
| |
| { |
| char *end; |
| int lat; |
| |
| if (!alpha_mlat_string) |
| alpha_mlat_string = "L1"; |
| |
| if (ISDIGIT ((unsigned char)alpha_mlat_string[0]) |
| && (lat = strtol (alpha_mlat_string, &end, 10), *end == '\0')) |
| ; |
| else if ((alpha_mlat_string[0] == 'L' || alpha_mlat_string[0] == 'l') |
| && ISDIGIT ((unsigned char)alpha_mlat_string[1]) |
| && alpha_mlat_string[2] == '\0') |
| { |
| static int const cache_latency[][4] = |
| { |
| { 3, 30, -1 }, /* ev4 -- Bcache is a guess */ |
| { 2, 12, 38 }, /* ev5 -- Bcache from PC164 LMbench numbers */ |
| { 3, 12, 30 }, /* ev6 -- Bcache from DS20 LMbench. */ |
| }; |
| |
| lat = alpha_mlat_string[1] - '0'; |
| if (lat <= 0 || lat > 3 || cache_latency[alpha_tune][lat-1] == -1) |
| { |
| warning (0, "L%d cache latency unknown for %s", |
| lat, alpha_cpu_name[alpha_tune]); |
| lat = 3; |
| } |
| else |
| lat = cache_latency[alpha_tune][lat-1]; |
| } |
| else if (! strcmp (alpha_mlat_string, "main")) |
| { |
| /* Most current memories have about 370ns latency. This is |
| a reasonable guess for a fast cpu. */ |
| lat = 150; |
| } |
| else |
| { |
| warning (0, "bad value %qs for %<-mmemory-latency%>", |
| alpha_mlat_string); |
| lat = 3; |
| } |
| |
| alpha_memory_latency = lat; |
| } |
| |
| /* Default the definition of "small data" to 8 bytes. */ |
| if (!OPTION_SET_P (g_switch_value)) |
| g_switch_value = 8; |
| |
| /* Infer TARGET_SMALL_DATA from -fpic/-fPIC. */ |
| if (flag_pic == 1) |
| target_flags |= MASK_SMALL_DATA; |
| else if (flag_pic == 2) |
| target_flags &= ~MASK_SMALL_DATA; |
| |
| alpha_override_options_after_change (); |
| |
| /* Register variables and functions with the garbage collector. */ |
| |
| /* Set up function hooks. */ |
| init_machine_status = alpha_init_machine_status; |
| |
| /* Tell the compiler when we're using VAX floating point. */ |
| if (TARGET_FLOAT_VAX) |
| { |
| REAL_MODE_FORMAT (SFmode) = &vax_f_format; |
| REAL_MODE_FORMAT (DFmode) = &vax_g_format; |
| REAL_MODE_FORMAT (TFmode) = NULL; |
| } |
| |
| #ifdef TARGET_DEFAULT_LONG_DOUBLE_128 |
| if (!(target_flags_explicit & MASK_LONG_DOUBLE_128)) |
| target_flags |= MASK_LONG_DOUBLE_128; |
| #endif |
| |
| } |
| |
| /* Implement targetm.override_options_after_change. */ |
| |
| static void |
| alpha_override_options_after_change (void) |
| { |
| /* Align labels and loops for optimal branching. */ |
| /* ??? Kludge these by not doing anything if we don't optimize. */ |
| if (optimize > 0) |
| { |
| if (flag_align_loops && !str_align_loops) |
| str_align_loops = "16"; |
| if (flag_align_jumps && !str_align_jumps) |
| str_align_jumps = "16"; |
| } |
| if (flag_align_functions && !str_align_functions) |
| str_align_functions = "16"; |
| } |
| |
| /* Returns 1 if VALUE is a mask that contains full bytes of zero or ones. */ |
| |
| int |
| zap_mask (HOST_WIDE_INT value) |
| { |
| int i; |
| |
| for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; |
| i++, value >>= 8) |
| if ((value & 0xff) != 0 && (value & 0xff) != 0xff) |
| return 0; |
| |
| return 1; |
| } |
| |
| /* Return true if OP is valid for a particular TLS relocation. |
| We are already guaranteed that OP is a CONST. */ |
| |
| int |
| tls_symbolic_operand_1 (rtx op, int size, int unspec) |
| { |
| op = XEXP (op, 0); |
| |
| if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec) |
| return 0; |
| op = XVECEXP (op, 0, 0); |
| |
| if (GET_CODE (op) != SYMBOL_REF) |
| return 0; |
| |
| switch (SYMBOL_REF_TLS_MODEL (op)) |
| { |
| case TLS_MODEL_LOCAL_DYNAMIC: |
| return unspec == UNSPEC_DTPREL && size == alpha_tls_size; |
| case TLS_MODEL_INITIAL_EXEC: |
| return unspec == UNSPEC_TPREL && size == 64; |
| case TLS_MODEL_LOCAL_EXEC: |
| return unspec == UNSPEC_TPREL && size == alpha_tls_size; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* Used by aligned_memory_operand and unaligned_memory_operand to |
| resolve what reload is going to do with OP if it's a register. */ |
| |
| rtx |
| resolve_reload_operand (rtx op) |
| { |
| if (reload_in_progress) |
| { |
| rtx tmp = op; |
| if (SUBREG_P (tmp)) |
| tmp = SUBREG_REG (tmp); |
| if (REG_P (tmp) |
| && REGNO (tmp) >= FIRST_PSEUDO_REGISTER) |
| { |
| op = reg_equiv_memory_loc (REGNO (tmp)); |
| if (op == 0) |
| return 0; |
| } |
| } |
| return op; |
| } |
| |
| /* The scalar modes supported differs from the default check-what-c-supports |
| version in that sometimes TFmode is available even when long double |
| indicates only DFmode. */ |
| |
| static bool |
| alpha_scalar_mode_supported_p (scalar_mode mode) |
| { |
| switch (mode) |
| { |
| case E_QImode: |
| case E_HImode: |
| case E_SImode: |
| case E_DImode: |
| case E_TImode: /* via optabs.cc */ |
| return true; |
| |
| case E_SFmode: |
| case E_DFmode: |
| return true; |
| |
| case E_TFmode: |
| return TARGET_HAS_XFLOATING_LIBS; |
| |
| default: |
| return false; |
| } |
| } |
| |
| /* Alpha implements a couple of integer vector mode operations when |
| TARGET_MAX is enabled. We do not check TARGET_MAX here, however, |
| which allows the vectorizer to operate on e.g. move instructions, |
| or when expand_vector_operations can do something useful. */ |
| |
| static bool |
| alpha_vector_mode_supported_p (machine_mode mode) |
| { |
| return mode == V8QImode || mode == V4HImode || mode == V2SImode; |
| } |
| |
| /* Return the TLS model to use for SYMBOL. */ |
| |
| static enum tls_model |
| tls_symbolic_operand_type (rtx symbol) |
| { |
| enum tls_model model; |
| |
| if (GET_CODE (symbol) != SYMBOL_REF) |
| return TLS_MODEL_NONE; |
| model = SYMBOL_REF_TLS_MODEL (symbol); |
| |
| /* Local-exec with a 64-bit size is the same code as initial-exec. */ |
| if (model == TLS_MODEL_LOCAL_EXEC && alpha_tls_size == 64) |
| model = TLS_MODEL_INITIAL_EXEC; |
| |
| return model; |
| } |
| |
| /* Return true if the function DECL will share the same GP as any |
| function in the current unit of translation. */ |
| |
| static bool |
| decl_has_samegp (const_tree decl) |
| { |
| /* Functions that are not local can be overridden, and thus may |
| not share the same gp. */ |
| if (!(*targetm.binds_local_p) (decl)) |
| return false; |
| |
| /* If -msmall-data is in effect, assume that there is only one GP |
| for the module, and so any local symbol has this property. We |
| need explicit relocations to be able to enforce this for symbols |
| not defined in this unit of translation, however. */ |
| if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA) |
| return true; |
| |
| /* Functions that are not external are defined in this UoT. */ |
| /* ??? Irritatingly, static functions not yet emitted are still |
| marked "external". Apply this to non-static functions only. */ |
| return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl); |
| } |
| |
| /* Return true if EXP should be placed in the small data section. */ |
| |
| static bool |
| alpha_in_small_data_p (const_tree exp) |
| { |
| /* We want to merge strings, so we never consider them small data. */ |
| if (TREE_CODE (exp) == STRING_CST) |
| return false; |
| |
| /* Functions are never in the small data area. Duh. */ |
| if (TREE_CODE (exp) == FUNCTION_DECL) |
| return false; |
| |
| /* COMMON symbols are never small data. */ |
| if (TREE_CODE (exp) == VAR_DECL && DECL_COMMON (exp)) |
| return false; |
| |
| if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) |
| { |
| const char *section = DECL_SECTION_NAME (exp); |
| if (strcmp (section, ".sdata") == 0 |
| || strcmp (section, ".sbss") == 0) |
| return true; |
| } |
| else |
| { |
| HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); |
| |
| /* If this is an incomplete type with size 0, then we can't put it |
| in sdata because it might be too big when completed. */ |
| if (size > 0 && size <= g_switch_value) |
| return true; |
| } |
| |
| return false; |
| } |
| |
| #if TARGET_ABI_OPEN_VMS |
| static bool |
| vms_valid_pointer_mode (scalar_int_mode mode) |
| { |
| return (mode == SImode || mode == DImode); |
| } |
| |
| static bool |
| alpha_linkage_symbol_p (const char *symname) |
| { |
| int symlen = strlen (symname); |
| |
| if (symlen > 4) |
| return strcmp (&symname [symlen - 4], "..lk") == 0; |
| |
| return false; |
| } |
| |
| #define LINKAGE_SYMBOL_REF_P(X) \ |
| ((GET_CODE (X) == SYMBOL_REF \ |
| && alpha_linkage_symbol_p (XSTR (X, 0))) \ |
| || (GET_CODE (X) == CONST \ |
| && GET_CODE (XEXP (X, 0)) == PLUS \ |
| && GET_CODE (XEXP (XEXP (X, 0), 0)) == SYMBOL_REF \ |
| && alpha_linkage_symbol_p (XSTR (XEXP (XEXP (X, 0), 0), 0)))) |
| #endif |
| |
| /* legitimate_address_p recognizes an RTL expression that is a valid |
| memory address for an instruction. The MODE argument is the |
| machine mode for the MEM expression that wants to use this address. |
| |
| For Alpha, we have either a constant address or the sum of a |
| register and a constant address, or just a register. For DImode, |
| any of those forms can be surrounded with an AND that clear the |
| low-order three bits; this is an "unaligned" access. */ |
| |
| static bool |
| alpha_legitimate_address_p (machine_mode mode, rtx x, bool strict) |
| { |
| /* If this is an ldq_u type address, discard the outer AND. */ |
| if (mode == DImode |
| && GET_CODE (x) == AND |
| && CONST_INT_P (XEXP (x, 1)) |
| && INTVAL (XEXP (x, 1)) == -8) |
| x = XEXP (x, 0); |
| |
| /* Discard non-paradoxical subregs. */ |
| if (SUBREG_P (x) |
| && (GET_MODE_SIZE (GET_MODE (x)) |
| < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) |
| x = SUBREG_REG (x); |
| |
| /* Unadorned general registers are valid. */ |
| if (REG_P (x) |
| && (strict |
| ? STRICT_REG_OK_FOR_BASE_P (x) |
| : NONSTRICT_REG_OK_FOR_BASE_P (x))) |
| return true; |
| |
| /* Constant addresses (i.e. +/- 32k) are valid. */ |
| if (CONSTANT_ADDRESS_P (x)) |
| return true; |
| |
| #if TARGET_ABI_OPEN_VMS |
| if (LINKAGE_SYMBOL_REF_P (x)) |
| return true; |
| #endif |
| |
| /* Register plus a small constant offset is valid. */ |
| if (GET_CODE (x) == PLUS) |
| { |
| rtx ofs = XEXP (x, 1); |
| x = XEXP (x, 0); |
| |
| /* Discard non-paradoxical subregs. */ |
| if (SUBREG_P (x) |
| && (GET_MODE_SIZE (GET_MODE (x)) |
| < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) |
| x = SUBREG_REG (x); |
| |
| if (REG_P (x)) |
| { |
| if (! strict |
| && NONSTRICT_REG_OK_FP_BASE_P (x) |
| && CONST_INT_P (ofs)) |
| return true; |
| if ((strict |
| ? STRICT_REG_OK_FOR_BASE_P (x) |
| : NONSTRICT_REG_OK_FOR_BASE_P (x)) |
| && CONSTANT_ADDRESS_P (ofs)) |
| return true; |
| } |
| } |
| |
| /* If we're managing explicit relocations, LO_SUM is valid, as are small |
| data symbols. Avoid explicit relocations of modes larger than word |
| mode since i.e. $LC0+8($1) can fold around +/- 32k offset. */ |
| else if (TARGET_EXPLICIT_RELOCS |
| && GET_MODE_SIZE (mode) <= UNITS_PER_WORD) |
| { |
| if (small_symbolic_operand (x, Pmode)) |
| return true; |
| |
| if (GET_CODE (x) == LO_SUM) |
| { |
| rtx ofs = XEXP (x, 1); |
| x = XEXP (x, 0); |
| |
| /* Discard non-paradoxical subregs. */ |
| if (SUBREG_P (x) |
| && (GET_MODE_SIZE (GET_MODE (x)) |
| < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x))))) |
| x = SUBREG_REG (x); |
| |
| /* Must have a valid base register. */ |
| if (! (REG_P (x) |
| && (strict |
| ? STRICT_REG_OK_FOR_BASE_P (x) |
| : NONSTRICT_REG_OK_FOR_BASE_P (x)))) |
| return false; |
| |
| /* The symbol must be local. */ |
| if (local_symbolic_operand (ofs, Pmode) |
| || dtp32_symbolic_operand (ofs, Pmode) |
| || tp32_symbolic_operand (ofs, Pmode)) |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| /* Build the SYMBOL_REF for __tls_get_addr. */ |
| |
| static GTY(()) rtx tls_get_addr_libfunc; |
| |
| static rtx |
| get_tls_get_addr (void) |
| { |
| if (!tls_get_addr_libfunc) |
| tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr"); |
| return tls_get_addr_libfunc; |
| } |
| |
| /* Try machine-dependent ways of modifying an illegitimate address |
| to be legitimate. If we find one, return the new, valid address. */ |
| |
| static rtx |
| alpha_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode) |
| { |
| HOST_WIDE_INT addend; |
| |
| /* If the address is (plus reg const_int) and the CONST_INT is not a |
| valid offset, compute the high part of the constant and add it to |
| the register. Then our address is (plus temp low-part-const). */ |
| if (GET_CODE (x) == PLUS |
| && REG_P (XEXP (x, 0)) |
| && CONST_INT_P (XEXP (x, 1)) |
| && ! CONSTANT_ADDRESS_P (XEXP (x, 1))) |
| { |
| addend = INTVAL (XEXP (x, 1)); |
| x = XEXP (x, 0); |
| goto split_addend; |
| } |
| |
| /* If the address is (const (plus FOO const_int)), find the low-order |
| part of the CONST_INT. Then load FOO plus any high-order part of the |
| CONST_INT into a register. Our address is (plus reg low-part-const). |
| This is done to reduce the number of GOT entries. */ |
| if (can_create_pseudo_p () |
| && GET_CODE (x) == CONST |
| && GET_CODE (XEXP (x, 0)) == PLUS |
| && CONST_INT_P (XEXP (XEXP (x, 0), 1))) |
| { |
| addend = INTVAL (XEXP (XEXP (x, 0), 1)); |
| x = force_reg (Pmode, XEXP (XEXP (x, 0), 0)); |
| goto split_addend; |
| } |
| |
| /* If we have a (plus reg const), emit the load as in (2), then add |
| the two registers, and finally generate (plus reg low-part-const) as |
| our address. */ |
| if (can_create_pseudo_p () |
| && GET_CODE (x) == PLUS |
| && REG_P (XEXP (x, 0)) |
| && GET_CODE (XEXP (x, 1)) == CONST |
| && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS |
| && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1))) |
| { |
| addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1)); |
| x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0), |
| XEXP (XEXP (XEXP (x, 1), 0), 0), |
| NULL_RTX, 1, OPTAB_LIB_WIDEN); |
| goto split_addend; |
| } |
| |
| /* If this is a local symbol, split the address into HIGH/LO_SUM parts. |
| Avoid modes larger than word mode since i.e. $LC0+8($1) can fold |
| around +/- 32k offset. */ |
| if (TARGET_EXPLICIT_RELOCS |
| && GET_MODE_SIZE (mode) <= UNITS_PER_WORD |
| && symbolic_operand (x, Pmode)) |
| { |
| rtx r0, r16, eqv, tga, tp, dest, seq; |
| rtx_insn *insn; |
| |
| switch (tls_symbolic_operand_type (x)) |
| { |
| case TLS_MODEL_NONE: |
| break; |
| |
| case TLS_MODEL_GLOBAL_DYNAMIC: |
| { |
| start_sequence (); |
| |
| r0 = gen_rtx_REG (Pmode, 0); |
| r16 = gen_rtx_REG (Pmode, 16); |
| tga = get_tls_get_addr (); |
| dest = gen_reg_rtx (Pmode); |
| seq = GEN_INT (alpha_next_sequence_number++); |
| |
| emit_insn (gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq)); |
| rtx val = gen_call_value_osf_tlsgd (r0, tga, seq); |
| insn = emit_call_insn (val); |
| RTL_CONST_CALL_P (insn) = 1; |
| use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16); |
| |
| insn = get_insns (); |
| end_sequence (); |
| |
| emit_libcall_block (insn, dest, r0, x); |
| return dest; |
| } |
| |
| case TLS_MODEL_LOCAL_DYNAMIC: |
| { |
| start_sequence (); |
| |
| r0 = gen_rtx_REG (Pmode, 0); |
| r16 = gen_rtx_REG (Pmode, 16); |
| tga = get_tls_get_addr (); |
| scratch = gen_reg_rtx (Pmode); |
| seq = GEN_INT (alpha_next_sequence_number++); |
| |
| emit_insn (gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq)); |
| rtx val = gen_call_value_osf_tlsldm (r0, tga, seq); |
| insn = emit_call_insn (val); |
| RTL_CONST_CALL_P (insn) = 1; |
| use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16); |
| |
| insn = get_insns (); |
| end_sequence (); |
| |
| eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), |
| UNSPEC_TLSLDM_CALL); |
| emit_libcall_block (insn, scratch, r0, eqv); |
| |
| eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL); |
| eqv = gen_rtx_CONST (Pmode, eqv); |
| |
| if (alpha_tls_size == 64) |
| { |
| dest = gen_reg_rtx (Pmode); |
| emit_insn (gen_rtx_SET (dest, eqv)); |
| emit_insn (gen_adddi3 (dest, dest, scratch)); |
| return dest; |
| } |
| if (alpha_tls_size == 32) |
| { |
| rtx temp = gen_rtx_HIGH (Pmode, eqv); |
| temp = gen_rtx_PLUS (Pmode, scratch, temp); |
| scratch = gen_reg_rtx (Pmode); |
| emit_insn (gen_rtx_SET (scratch, temp)); |
| } |
| return gen_rtx_LO_SUM (Pmode, scratch, eqv); |
| } |
| |
| case TLS_MODEL_INITIAL_EXEC: |
| eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL); |
| eqv = gen_rtx_CONST (Pmode, eqv); |
| tp = gen_reg_rtx (Pmode); |
| scratch = gen_reg_rtx (Pmode); |
| dest = gen_reg_rtx (Pmode); |
| |
| emit_insn (gen_get_thread_pointerdi (tp)); |
| emit_insn (gen_rtx_SET (scratch, eqv)); |
| emit_insn (gen_adddi3 (dest, tp, scratch)); |
| return dest; |
| |
| case TLS_MODEL_LOCAL_EXEC: |
| eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL); |
| eqv = gen_rtx_CONST (Pmode, eqv); |
| tp = gen_reg_rtx (Pmode); |
| |
| emit_insn (gen_get_thread_pointerdi (tp)); |
| if (alpha_tls_size == 32) |
| { |
| rtx temp = gen_rtx_HIGH (Pmode, eqv); |
| temp = gen_rtx_PLUS (Pmode, tp, temp); |
| tp = gen_reg_rtx (Pmode); |
| emit_insn (gen_rtx_SET (tp, temp)); |
| } |
| return gen_rtx_LO_SUM (Pmode, tp, eqv); |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| if (local_symbolic_operand (x, Pmode)) |
| { |
| if (small_symbolic_operand (x, Pmode)) |
| return x; |
| else |
| { |
| if (can_create_pseudo_p ()) |
| scratch = gen_reg_rtx (Pmode); |
| emit_insn (gen_rtx_SET (scratch, gen_rtx_HIGH (Pmode, x))); |
| return gen_rtx_LO_SUM (Pmode, scratch, x); |
| } |
| } |
| } |
| |
| return NULL; |
| |
| split_addend: |
| { |
| HOST_WIDE_INT low, high; |
| |
| low = ((addend & 0xffff) ^ 0x8000) - 0x8000; |
| addend -= low; |
| high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000; |
| addend -= high; |
| |
| if (addend) |
| x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend), |
| (!can_create_pseudo_p () ? scratch : NULL_RTX), |
| 1, OPTAB_LIB_WIDEN); |
| if (high) |
| x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high), |
| (!can_create_pseudo_p () ? scratch : NULL_RTX), |
| 1, OPTAB_LIB_WIDEN); |
| |
| return plus_constant (Pmode, x, low); |
| } |
| } |
| |
| |
| /* Try machine-dependent ways of modifying an illegitimate address |
| to be legitimate. Return X or the new, valid address. */ |
| |
| static rtx |
| alpha_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, |
| machine_mode mode) |
| { |
| rtx new_x = alpha_legitimize_address_1 (x, NULL_RTX, mode); |
| return new_x ? new_x : x; |
| } |
| |
| /* Return true if ADDR has an effect that depends on the machine mode it |
| is used for. On the Alpha this is true only for the unaligned modes. |
| We can simplify the test since we know that the address must be valid. */ |
| |
| static bool |
| alpha_mode_dependent_address_p (const_rtx addr, |
| addr_space_t as ATTRIBUTE_UNUSED) |
| { |
| return GET_CODE (addr) == AND; |
| } |
| |
| /* Primarily this is required for TLS symbols, but given that our move |
| patterns *ought* to be able to handle any symbol at any time, we |
| should never be spilling symbolic operands to the constant pool, ever. */ |
| |
| static bool |
| alpha_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) |
| { |
| enum rtx_code code = GET_CODE (x); |
| return code == SYMBOL_REF || code == LABEL_REF || code == CONST; |
| } |
| |
| /* We do not allow indirect calls to be optimized into sibling calls, nor |
| can we allow a call to a function with a different GP to be optimized |
| into a sibcall. */ |
| |
| static bool |
| alpha_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED) |
| { |
| /* Can't do indirect tail calls, since we don't know if the target |
| uses the same GP. */ |
| if (!decl) |
| return false; |
| |
| /* Otherwise, we can make a tail call if the target function shares |
| the same GP. */ |
| return decl_has_samegp (decl); |
| } |
| |
| bool |
| some_small_symbolic_operand_int (rtx x) |
| { |
| subrtx_var_iterator::array_type array; |
| FOR_EACH_SUBRTX_VAR (iter, array, x, ALL) |
| { |
| rtx x = *iter; |
| /* Don't re-split. */ |
| if (GET_CODE (x) == LO_SUM) |
| iter.skip_subrtxes (); |
| else if (small_symbolic_operand (x, Pmode)) |
| return true; |
| } |
| return false; |
| } |
| |
| rtx |
| split_small_symbolic_operand (rtx x) |
| { |
| x = copy_insn (x); |
| subrtx_ptr_iterator::array_type array; |
| FOR_EACH_SUBRTX_PTR (iter, array, &x, ALL) |
| { |
| rtx *ptr = *iter; |
| rtx x = *ptr; |
| /* Don't re-split. */ |
| if (GET_CODE (x) == LO_SUM) |
| iter.skip_subrtxes (); |
| else if (small_symbolic_operand (x, Pmode)) |
| { |
| *ptr = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x); |
| iter.skip_subrtxes (); |
| } |
| } |
| return x; |
| } |
| |
| /* Indicate that INSN cannot be duplicated. This is true for any insn |
| that we've marked with gpdisp relocs, since those have to stay in |
| 1-1 correspondence with one another. |
| |
| Technically we could copy them if we could set up a mapping from one |
| sequence number to another, across the set of insns to be duplicated. |
| This seems overly complicated and error-prone since interblock motion |
| from sched-ebb could move one of the pair of insns to a different block. |
| |
| Also cannot allow jsr insns to be duplicated. If they throw exceptions, |
| then they'll be in a different block from their ldgp. Which could lead |
| the bb reorder code to think that it would be ok to copy just the block |
| containing the call and branch to the block containing the ldgp. */ |
| |
| static bool |
| alpha_cannot_copy_insn_p (rtx_insn *insn) |
| { |
| if (!reload_completed || !TARGET_EXPLICIT_RELOCS) |
| return false; |
| if (recog_memoized (insn) >= 0) |
| return get_attr_cannot_copy (insn); |
| else |
| return false; |
| } |
| |
| |
| /* Try a machine-dependent way of reloading an illegitimate address |
| operand. If we find one, push the reload and return the new rtx. */ |
| |
| rtx |
| alpha_legitimize_reload_address (rtx x, |
| machine_mode mode ATTRIBUTE_UNUSED, |
| int opnum, int type, |
| int ind_levels ATTRIBUTE_UNUSED) |
| { |
| /* We must recognize output that we have already generated ourselves. */ |
| if (GET_CODE (x) == PLUS |
| && GET_CODE (XEXP (x, 0)) == PLUS |
| && REG_P (XEXP (XEXP (x, 0), 0)) |
| && CONST_INT_P (XEXP (XEXP (x, 0), 1)) |
| && CONST_INT_P (XEXP (x, 1))) |
| { |
| push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, |
| BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, |
| opnum, (enum reload_type) type); |
| return x; |
| } |
| |
| /* We wish to handle large displacements off a base register by |
| splitting the addend across an ldah and the mem insn. This |
| cuts number of extra insns needed from 3 to 1. */ |
| if (GET_CODE (x) == PLUS |
| && REG_P (XEXP (x, 0)) |
| && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER |
| && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0))) |
| && CONST_INT_P (XEXP (x, 1))) |
| { |
| HOST_WIDE_INT val = INTVAL (XEXP (x, 1)); |
| HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000; |
| HOST_WIDE_INT high |
| = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000; |
| |
| /* Check for 32-bit overflow. */ |
| if (high + low != val) |
| return NULL_RTX; |
| |
| /* Reload the high part into a base reg; leave the low part |
| in the mem directly. */ |
| x = gen_rtx_PLUS (GET_MODE (x), |
| gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0), |
| GEN_INT (high)), |
| GEN_INT (low)); |
| |
| push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, |
| BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0, |
| opnum, (enum reload_type) type); |
| return x; |
| } |
| |
| return NULL_RTX; |
| } |
| |
| /* Return the cost of moving between registers of various classes. Moving |
| between FLOAT_REGS and anything else except float regs is expensive. |
| In fact, we make it quite expensive because we really don't want to |
| do these moves unless it is clearly worth it. Optimizations may |
| reduce the impact of not being able to allocate a pseudo to a |
| hard register. */ |
| |
| static int |
| alpha_register_move_cost (machine_mode /*mode*/, |
| reg_class_t from, reg_class_t to) |
| { |
| if ((from == FLOAT_REGS) == (to == FLOAT_REGS)) |
| return 2; |
| |
| if (TARGET_FIX) |
| return (from == FLOAT_REGS) ? 6 : 8; |
| |
| return 4 + 2 * alpha_memory_latency; |
| } |
| |
| /* Return the cost of moving data of MODE from a register to |
| or from memory. On the Alpha, bump this up a bit. */ |
| |
| static int |
| alpha_memory_move_cost (machine_mode /*mode*/, reg_class_t /*regclass*/, |
| bool /*in*/) |
| { |
| return 2 * alpha_memory_latency; |
| } |
| |
| /* Compute a (partial) cost for rtx X. Return true if the complete |
| cost has been computed, and false if subexpressions should be |
| scanned. In either case, *TOTAL contains the cost result. */ |
| |
| static bool |
| alpha_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno, int *total, |
| bool speed) |
| { |
| int code = GET_CODE (x); |
| bool float_mode_p = FLOAT_MODE_P (mode); |
| const struct alpha_rtx_cost_data *cost_data; |
| |
| if (!speed) |
| cost_data = &alpha_rtx_cost_size; |
| else |
| cost_data = &alpha_rtx_cost_data[alpha_tune]; |
| |
| switch (code) |
| { |
| case CONST_INT: |
| /* If this is an 8-bit constant, return zero since it can be used |
| nearly anywhere with no cost. If it is a valid operand for an |
| ADD or AND, likewise return 0 if we know it will be used in that |
| context. Otherwise, return 2 since it might be used there later. |
| All other constants take at least two insns. */ |
| if (INTVAL (x) >= 0 && INTVAL (x) < 256) |
| { |
| *total = 0; |
| return true; |
| } |
| /* FALLTHRU */ |
| |
| case CONST_DOUBLE: |
| case CONST_WIDE_INT: |
| if (x == CONST0_RTX (mode)) |
| *total = 0; |
| else if ((outer_code == PLUS && add_operand (x, VOIDmode)) |
| || (outer_code == AND && and_operand (x, VOIDmode))) |
| *total = 0; |
| else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode)) |
| *total = 2; |
| else |
| *total = COSTS_N_INSNS (2); |
| return true; |
| |
| case CONST: |
| case SYMBOL_REF: |
| case LABEL_REF: |
| if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode)) |
| *total = COSTS_N_INSNS (outer_code != MEM); |
| else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode)) |
| *total = COSTS_N_INSNS (1 + (outer_code != MEM)); |
| else if (tls_symbolic_operand_type (x)) |
| /* Estimate of cost for call_pal rduniq. */ |
| /* ??? How many insns do we emit here? More than one... */ |
| *total = COSTS_N_INSNS (15); |
| else |
| /* Otherwise we do a load from the GOT. */ |
| *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency); |
| return true; |
| |
| case HIGH: |
| /* This is effectively an add_operand. */ |
| *total = 2; |
| return true; |
| |
| case PLUS: |
| case MINUS: |
| if (float_mode_p) |
| *total = cost_data->fp_add; |
| else if (GET_CODE (XEXP (x, 0)) == ASHIFT |
| && const23_operand (XEXP (XEXP (x, 0), 1), VOIDmode)) |
| { |
| *total = (rtx_cost (XEXP (XEXP (x, 0), 0), mode, |
| (enum rtx_code) outer_code, opno, speed) |
| + rtx_cost (XEXP (x, 1), mode, |
| (enum rtx_code) outer_code, opno, speed) |
| + COSTS_N_INSNS (1)); |
| return true; |
| } |
| return false; |
| |
| case MULT: |
| if (float_mode_p) |
| *total = cost_data->fp_mult; |
| else if (mode == DImode) |
| *total = cost_data->int_mult_di; |
| else |
| *total = cost_data->int_mult_si; |
| return false; |
| |
| case ASHIFT: |
| if (CONST_INT_P (XEXP (x, 1)) |
| && INTVAL (XEXP (x, 1)) <= 3) |
| { |
| *total = COSTS_N_INSNS (1); |
| return false; |
| } |
| /* FALLTHRU */ |
| |
| case ASHIFTRT: |
| case LSHIFTRT: |
| *total = cost_data->int_shift; |
| return false; |
| |
| case IF_THEN_ELSE: |
| if (float_mode_p) |
| *total = cost_data->fp_add; |
| else |
| *total = cost_data->int_cmov; |
| return false; |
| |
| case DIV: |
| case UDIV: |
| case MOD: |
| case UMOD: |
| if (!float_mode_p) |
| *total = cost_data->int_div; |
| else if (mode == SFmode) |
| *total = cost_data->fp_div_sf; |
| else |
| *total = cost_data->fp_div_df; |
| return false; |
| |
| case MEM: |
| *total = COSTS_N_INSNS (!speed ? 1 : alpha_memory_latency); |
| return true; |
| |
| case NEG: |
| if (! float_mode_p) |
| { |
| *total = COSTS_N_INSNS (1); |
| return false; |
| } |
| /* FALLTHRU */ |
| |
| case ABS: |
| if (! float_mode_p) |
| { |
| *total = COSTS_N_INSNS (1) + cost_data->int_cmov; |
| return false; |
| } |
| /* FALLTHRU */ |
| |
| case FLOAT: |
| case UNSIGNED_FLOAT: |
| case FIX: |
| case UNSIGNED_FIX: |
| case FLOAT_TRUNCATE: |
| *total = cost_data->fp_add; |
| return false; |
| |
| case FLOAT_EXTEND: |
| if (MEM_P (XEXP (x, 0))) |
| *total = 0; |
| else |
| *total = cost_data->fp_add; |
| return false; |
| |
| default: |
| return false; |
| } |
| } |
| |
| /* REF is an alignable memory location. Place an aligned SImode |
| reference into *PALIGNED_MEM and the number of bits to shift into |
| *PBITNUM. SCRATCH is a free register for use in reloading out |
| of range stack slots. */ |
| |
| void |
| get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum) |
| { |
| rtx base; |
| HOST_WIDE_INT disp, offset; |
| |
| gcc_assert (MEM_P (ref)); |
| |
| if (reload_in_progress) |
| { |
| base = find_replacement (&XEXP (ref, 0)); |
| gcc_assert (memory_address_p (GET_MODE (ref), base)); |
| } |
| else |
| base = XEXP (ref, 0); |
| |
| if (GET_CODE (base) == PLUS) |
| disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0); |
| else |
| disp = 0; |
| |
| /* Find the byte offset within an aligned word. If the memory itself is |
| claimed to be aligned, believe it. Otherwise, aligned_memory_operand |
| will have examined the base register and determined it is aligned, and |
| thus displacements from it are naturally alignable. */ |
| if (MEM_ALIGN (ref) >= 32) |
| offset = 0; |
| else |
| offset = disp & 3; |
| |
| /* The location should not cross aligned word boundary. */ |
| gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref)) |
| <= GET_MODE_SIZE (SImode)); |
| |
| /* Access the entire aligned word. */ |
| *paligned_mem = widen_memory_access (ref, SImode, -offset); |
| |
| /* Convert the byte offset within the word to a bit offset. */ |
| offset *= BITS_PER_UNIT; |
| *pbitnum = GEN_INT (offset); |
| } |
| |
| /* Similar, but just get the address. Handle the two reload cases. |
| Add EXTRA_OFFSET to the address we return. */ |
| |
| rtx |
| get_unaligned_address (rtx ref) |
| { |
| rtx base; |
| HOST_WIDE_INT offset = 0; |
| |
| gcc_assert (MEM_P (ref)); |
| |
| if (reload_in_progress) |
| { |
| base = find_replacement (&XEXP (ref, 0)); |
| gcc_assert (memory_address_p (GET_MODE (ref), base)); |
| } |
| else |
| base = XEXP (ref, 0); |
| |
| if (GET_CODE (base) == PLUS) |
| offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0); |
| |
| return plus_constant (Pmode, base, offset); |
| } |
| |
| /* Compute a value X, such that X & 7 == (ADDR + OFS) & 7. |
| X is always returned in a register. */ |
| |
| rtx |
| get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs) |
| { |
| if (GET_CODE (addr) == PLUS) |
| { |
| ofs += INTVAL (XEXP (addr, 1)); |
| addr = XEXP (addr, 0); |
| } |
| |
| return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7), |
| NULL_RTX, 1, OPTAB_LIB_WIDEN); |
| } |
| |
| /* On the Alpha, all (non-symbolic) constants except zero go into |
| a floating-point register via memory. Note that we cannot |
| return anything that is not a subset of RCLASS, and that some |
| symbolic constants cannot be dropped to memory. */ |
| |
| enum reg_class |
| alpha_preferred_reload_class(rtx x, enum reg_class rclass) |
| { |
| /* Zero is present in any register class. */ |
| if (x == CONST0_RTX (GET_MODE (x))) |
| return rclass; |
| |
| /* These sorts of constants we can easily drop to memory. */ |
| if (CONST_SCALAR_INT_P (x) |
| || CONST_DOUBLE_P (x) |
| || GET_CODE (x) == CONST_VECTOR) |
| { |
| if (rclass == FLOAT_REGS) |
| return NO_REGS; |
| if (rclass == ALL_REGS) |
| return GENERAL_REGS; |
| return rclass; |
| } |
| |
| /* All other kinds of constants should not (and in the case of HIGH |
| cannot) be dropped to memory -- instead we use a GENERAL_REGS |
| secondary reload. */ |
| if (CONSTANT_P (x)) |
| return (rclass == ALL_REGS ? GENERAL_REGS : rclass); |
| |
| return rclass; |
| } |
| |
| /* Inform reload about cases where moving X with a mode MODE to a register in |
| RCLASS requires an extra scratch or immediate register. Return the class |
| needed for the immediate register. */ |
| |
| static reg_class_t |
| alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, |
| machine_mode mode, secondary_reload_info *sri) |
| { |
| enum reg_class rclass = (enum reg_class) rclass_i; |
| |
| /* Loading and storing HImode or QImode values to and from memory |
| usually requires a scratch register. */ |
| if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode)) |
| { |
| if (any_memory_operand (x, mode)) |
| { |
| if (in_p) |
| { |
| if (!aligned_memory_operand (x, mode)) |
| sri->icode = direct_optab_handler (reload_in_optab, mode); |
| } |
| else |
| sri->icode = direct_optab_handler (reload_out_optab, mode); |
| return NO_REGS; |
| } |
| } |
| |
| /* We also cannot do integral arithmetic into FP regs, as might result |
| from register elimination into a DImode fp register. */ |
| if (rclass == FLOAT_REGS) |
| { |
| if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND) |
| return GENERAL_REGS; |
| if (in_p && INTEGRAL_MODE_P (mode) |
| && !MEM_P (x) && !REG_P (x) && !CONST_INT_P (x)) |
| return GENERAL_REGS; |
| } |
| |
| return NO_REGS; |
| } |
| |
| /* Implement TARGET_SECONDARY_MEMORY_NEEDED. |
| |
| If we are copying between general and FP registers, we need a memory |
| location unless the FIX extension is available. */ |
| |
| static bool |
| alpha_secondary_memory_needed (machine_mode, reg_class_t class1, |
| reg_class_t class2) |
| { |
| return (!TARGET_FIX |
| && ((class1 == FLOAT_REGS && class2 != FLOAT_REGS) |
| || (class2 == FLOAT_REGS && class1 != FLOAT_REGS))); |
| } |
| |
| /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE. If MODE is |
| floating-point, use it. Otherwise, widen to a word like the default. |
| This is needed because we always store integers in FP registers in |
| quadword format. This whole area is very tricky! */ |
| |
| static machine_mode |
| alpha_secondary_memory_needed_mode (machine_mode mode) |
| { |
| if (GET_MODE_CLASS (mode) == MODE_FLOAT) |
| return mode; |
| if (GET_MODE_SIZE (mode) >= 4) |
| return mode; |
| return mode_for_size (BITS_PER_WORD, GET_MODE_CLASS (mode), 0).require (); |
| } |
| |
| /* Given SEQ, which is an INSN list, look for any MEMs in either |
| a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and |
| volatile flags from REF into each of the MEMs found. If REF is not |
| a MEM, don't do anything. */ |
| |
| void |
| alpha_set_memflags (rtx seq, rtx ref) |
| { |
| rtx_insn *insn; |
| |
| if (!MEM_P (ref)) |
| return; |
| |
| /* This is only called from alpha.md, after having had something |
| generated from one of the insn patterns. So if everything is |
| zero, the pattern is already up-to-date. */ |
| if (!MEM_VOLATILE_P (ref) |
| && !MEM_NOTRAP_P (ref) |
| && !MEM_READONLY_P (ref)) |
| return; |
| |
| subrtx_var_iterator::array_type array; |
| for (insn = as_a <rtx_insn *> (seq); insn; insn = NEXT_INSN (insn)) |
| if (INSN_P (insn)) |
| FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST) |
| { |
| rtx x = *iter; |
| if (MEM_P (x)) |
| { |
| MEM_VOLATILE_P (x) = MEM_VOLATILE_P (ref); |
| MEM_NOTRAP_P (x) = MEM_NOTRAP_P (ref); |
| MEM_READONLY_P (x) = MEM_READONLY_P (ref); |
| /* Sadly, we cannot use alias sets because the extra |
| aliasing produced by the AND interferes. Given that |
| two-byte quantities are the only thing we would be |
| able to differentiate anyway, there does not seem to |
| be any point in convoluting the early out of the |
| alias check. */ |
| iter.skip_subrtxes (); |
| } |
| } |
| else |
| gcc_unreachable (); |
| } |
| |
| static rtx alpha_emit_set_const (rtx, machine_mode, HOST_WIDE_INT, |
| int, bool); |
| |
| /* Internal routine for alpha_emit_set_const to check for N or below insns. |
| If NO_OUTPUT is true, then we only check to see if N insns are possible, |
| and return pc_rtx if successful. */ |
| |
| static rtx |
| alpha_emit_set_const_1 (rtx target, machine_mode mode, |
| HOST_WIDE_INT c, int n, bool no_output) |
| { |
| HOST_WIDE_INT new_const; |
| int i, bits; |
| /* Use a pseudo if highly optimizing and still generating RTL. */ |
| rtx subtarget |
| = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target); |
| rtx temp, insn; |
| |
| /* If this is a sign-extended 32-bit constant, we can do this in at most |
| three insns, so do it if we have enough insns left. */ |
| |
| if (c >> 31 == -1 || c >> 31 == 0) |
| { |
| HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000; |
| HOST_WIDE_INT tmp1 = c - low; |
| HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000; |
| HOST_WIDE_INT extra = 0; |
| |
| /* If HIGH will be interpreted as negative but the constant is |
| positive, we must adjust it to do two ldha insns. */ |
| |
| if ((high & 0x8000) != 0 && c >= 0) |
| { |
| extra = 0x4000; |
| tmp1 -= 0x40000000; |
| high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000); |
| } |
| |
| if (c == low || (low == 0 && extra == 0)) |
| { |
| /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode) |
| but that meant that we can't handle INT_MIN on 32-bit machines |
| (like NT/Alpha), because we recurse indefinitely through |
| emit_move_insn to gen_movdi. So instead, since we know exactly |
| what we want, create it explicitly. */ |
| |
| if (no_output) |
| return pc_rtx; |
| if (target == NULL) |
| target = gen_reg_rtx (mode); |
| emit_insn (gen_rtx_SET (target, GEN_INT (c))); |
| return target; |
| } |
| else if (n >= 2 + (extra != 0)) |
| { |
| if (no_output) |
| return pc_rtx; |
| if (!can_create_pseudo_p ()) |
| { |
| emit_insn (gen_rtx_SET (target, GEN_INT (high << 16))); |
| temp = target; |
| } |
| else |
| temp = copy_to_suggested_reg (GEN_INT (high << 16), |
| subtarget, mode); |
| |
| /* As of 2002-02-23, addsi3 is only available when not optimizing. |
| This means that if we go through expand_binop, we'll try to |
| generate extensions, etc, which will require new pseudos, which |
| will fail during some split phases. The SImode add patterns |
| still exist, but are not named. So build the insns by hand. */ |
| |
| if (extra != 0) |
| { |
| if (! subtarget) |
| subtarget = gen_reg_rtx (mode); |
| insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16)); |
| insn = gen_rtx_SET (subtarget, insn); |
| emit_insn (insn); |
| temp = subtarget; |
| } |
| |
| if (target == NULL) |
| target = gen_reg_rtx (mode); |
| insn = gen_rtx_PLUS (mode, temp, GEN_INT (low)); |
| insn = gen_rtx_SET (target, insn); |
| emit_insn (insn); |
| return target; |
| } |
| } |
| |
| /* If we couldn't do it that way, try some other methods. But if we have |
| no instructions left, don't bother. Likewise, if this is SImode and |
| we can't make pseudos, we can't do anything since the expand_binop |
| and expand_unop calls will widen and try to make pseudos. */ |
| |
| if (n == 1 || (mode == SImode && !can_create_pseudo_p ())) |
| return 0; |
| |
| /* Next, see if we can load a related constant and then shift and possibly |
| negate it to get the constant we want. Try this once each increasing |
| numbers of insns. */ |
| |
| for (i = 1; i < n; i++) |
| { |
| /* First, see if minus some low bits, we've an easy load of |
| high bits. */ |
| |
| new_const = ((c & 0xffff) ^ 0x8000) - 0x8000; |
| if (new_const != 0) |
| { |
| temp = alpha_emit_set_const (subtarget, mode, c - new_const, i, no_output); |
| if (temp) |
| { |
| if (no_output) |
| return temp; |
| return expand_binop (mode, add_optab, temp, GEN_INT (new_const), |
| target, 0, OPTAB_WIDEN); |
| } |
| } |
| |
| /* Next try complementing. */ |
| temp = alpha_emit_set_const (subtarget, mode, ~c, i, no_output); |
| if (temp) |
| { |
| if (no_output) |
| return temp; |
| return expand_unop (mode, one_cmpl_optab, temp, target, 0); |
| } |
| |
| /* Next try to form a constant and do a left shift. We can do this |
| if some low-order bits are zero; the exact_log2 call below tells |
| us that information. The bits we are shifting out could be any |
| value, but here we'll just try the 0- and sign-extended forms of |
| the constant. To try to increase the chance of having the same |
| constant in more than one insn, start at the highest number of |
| bits to shift, but try all possibilities in case a ZAPNOT will |
| be useful. */ |
| |
| bits = exact_log2 (c & -c); |
| if (bits > 0) |
| for (; bits > 0; bits--) |
| { |
| new_const = c >> bits; |
| temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output); |
| if (!temp && c < 0) |
| { |
| new_const = (unsigned HOST_WIDE_INT)c >> bits; |
| temp = alpha_emit_set_const (subtarget, mode, new_const, |
| i, no_output); |
| } |
| if (temp) |
| { |
| if (no_output) |
| return temp; |
| return expand_binop (mode, ashl_optab, temp, GEN_INT (bits), |
| target, 0, OPTAB_WIDEN); |
| } |
| } |
| |
| /* Now try high-order zero bits. Here we try the shifted-in bits as |
| all zero and all ones. Be careful to avoid shifting outside the |
| mode and to avoid shifting outside the host wide int size. */ |
| |
| bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8) |
| - floor_log2 (c) - 1); |
| if (bits > 0) |
| for (; bits > 0; bits--) |
| { |
| new_const = c << bits; |
| temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output); |
| if (!temp) |
| { |
| new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1); |
| temp = alpha_emit_set_const (subtarget, mode, new_const, |
| i, no_output); |
| } |
| if (temp) |
| { |
| if (no_output) |
| return temp; |
| return expand_binop (mode, lshr_optab, temp, GEN_INT (bits), |
| target, 1, OPTAB_WIDEN); |
| } |
| } |
| |
| /* Now try high-order 1 bits. We get that with a sign-extension. |
| But one bit isn't enough here. Be careful to avoid shifting outside |
| the mode and to avoid shifting outside the host wide int size. */ |
| |
| bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8) |
| - floor_log2 (~ c) - 2); |
| if (bits > 0) |
| for (; bits > 0; bits--) |
| { |
| new_const = c << bits; |
| temp = alpha_emit_set_const (subtarget, mode, new_const, i, no_output); |
| if (!temp) |
| { |
| new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1); |
| temp = alpha_emit_set_const (subtarget, mode, new_const, |
| i, no_output); |
| } |
| if (temp) |
| { |
| if (no_output) |
| return temp; |
| return expand_binop (mode, ashr_optab, temp, GEN_INT (bits), |
| target, 0, OPTAB_WIDEN); |
| } |
| } |
| } |
| |
| /* Finally, see if can load a value into the target that is the same as the |
| constant except that all bytes that are 0 are changed to be 0xff. If we |
| can, then we can do a ZAPNOT to obtain the desired constant. */ |
| |
| new_const = c; |
| for (i = 0; i < 64; i += 8) |
| if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0) |
| new_const |= (HOST_WIDE_INT) 0xff << i; |
| |
| /* We are only called for SImode and DImode. If this is SImode, ensure that |
| we are sign extended to a full word. */ |
| |
| if (mode == SImode) |
| new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000; |
| |
| if (new_const != c) |
| { |
| temp = alpha_emit_set_const (subtarget, mode, new_const, n - 1, no_output); |
| if (temp) |
| { |
| if (no_output) |
| return temp; |
| return expand_binop (mode, and_optab, temp, GEN_INT (c | ~ new_const), |
| target, 0, OPTAB_WIDEN); |
| } |
| } |
| |
| return 0; |
| } |
| |
| /* Try to output insns to set TARGET equal to the constant C if it can be |
| done in less than N insns. Do all computations in MODE. Returns the place |
| where the output has been placed if it can be done and the insns have been |
| emitted. If it would take more than N insns, zero is returned and no |
| insns and emitted. */ |
| |
| static rtx |
| alpha_emit_set_const (rtx target, machine_mode mode, |
| HOST_WIDE_INT c, int n, bool no_output) |
| { |
| machine_mode orig_mode = mode; |
| rtx orig_target = target; |
| rtx result = 0; |
| int i; |
| |
| /* If we can't make any pseudos, TARGET is an SImode hard register, we |
| can't load this constant in one insn, do this in DImode. */ |
| if (!can_create_pseudo_p () && mode == SImode |
| && REG_P (target) && REGNO (target) < FIRST_PSEUDO_REGISTER) |
| { |
| result = alpha_emit_set_const_1 (target, mode, c, 1, no_output); |
| if (result) |
| return result; |
| |
| target = no_output ? NULL : gen_lowpart (DImode, target); |
| mode = DImode; |
| } |
| else if (mode == V8QImode || mode == V4HImode || mode == V2SImode) |
| { |
| target = no_output ? NULL : gen_lowpart (DImode, target); |
| mode = DImode; |
| } |
| |
| /* Try 1 insn, then 2, then up to N. */ |
| for (i = 1; i <= n; i++) |
| { |
| result = alpha_emit_set_const_1 (target, mode, c, i, no_output); |
| if (result) |
| { |
| rtx_insn *insn; |
| rtx set; |
| |
| if (no_output) |
| return result; |
| |
| insn = get_last_insn (); |
| set = single_set (insn); |
| if (! CONSTANT_P (SET_SRC (set))) |
| set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c)); |
| break; |
| } |
| } |
| |
| /* Allow for the case where we changed the mode of TARGET. */ |
| if (result) |
| { |
| if (result == target) |
| result = orig_target; |
| else if (mode != orig_mode) |
| result = gen_lowpart (orig_mode, result); |
| } |
| |
| return result; |
| } |
| |
| /* Having failed to find a 3 insn sequence in alpha_emit_set_const, |
| fall back to a straight forward decomposition. We do this to avoid |
| exponential run times encountered when looking for longer sequences |
| with alpha_emit_set_const. */ |
| |
| static rtx |
| alpha_emit_set_long_const (rtx target, HOST_WIDE_INT c1) |
| { |
| HOST_WIDE_INT d1, d2, d3, d4; |
| |
| /* Decompose the entire word */ |
| |
| d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000; |
| c1 -= d1; |
| d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000; |
| c1 = (c1 - d2) >> 32; |
| d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000; |
| c1 -= d3; |
| d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000; |
| gcc_assert (c1 == d4); |
| |
| /* Construct the high word */ |
| if (d4) |
| { |
| emit_move_insn (target, GEN_INT (d4)); |
| if (d3) |
| emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3))); |
| } |
| else |
| emit_move_insn (target, GEN_INT (d3)); |
| |
| /* Shift it into place */ |
| emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32))); |
| |
| /* Add in the low bits. */ |
| if (d2) |
| emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2))); |
| if (d1) |
| emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1))); |
| |
| return target; |
| } |
| |
| /* Given an integral CONST_INT or CONST_VECTOR, return the low 64 bits. */ |
| |
| static HOST_WIDE_INT |
| alpha_extract_integer (rtx x) |
| { |
| if (GET_CODE (x) == CONST_VECTOR) |
| x = simplify_subreg (DImode, x, GET_MODE (x), 0); |
| |
| gcc_assert (CONST_INT_P (x)); |
| |
| return INTVAL (x); |
| } |
| |
| /* Implement TARGET_LEGITIMATE_CONSTANT_P. This is all constants for which |
| we are willing to load the value into a register via a move pattern. |
| Normally this is all symbolic constants, integral constants that |
| take three or fewer instructions, and floating-point zero. */ |
| |
| bool |
| alpha_legitimate_constant_p (machine_mode mode, rtx x) |
| { |
| HOST_WIDE_INT i0; |
| |
| switch (GET_CODE (x)) |
| { |
| case LABEL_REF: |
| case HIGH: |
| return true; |
| |
| case CONST: |
| if (GET_CODE (XEXP (x, 0)) == PLUS |
| && CONST_INT_P (XEXP (XEXP (x, 0), 1))) |
| x = XEXP (XEXP (x, 0), 0); |
| else |
| return true; |
| |
| if (GET_CODE (x) != SYMBOL_REF) |
| return true; |
| /* FALLTHRU */ |
| |
| case SYMBOL_REF: |
| /* TLS symbols are never valid. */ |
| return SYMBOL_REF_TLS_MODEL (x) == 0; |
| |
| case CONST_WIDE_INT: |
| if (TARGET_BUILD_CONSTANTS) |
| return true; |
| if (x == CONST0_RTX (mode)) |
| return true; |
| mode = DImode; |
| gcc_assert (CONST_WIDE_INT_NUNITS (x) == 2); |
| i0 = CONST_WIDE_INT_ELT (x, 1); |
| if (alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) == NULL) |
| return false; |
| i0 = CONST_WIDE_INT_ELT (x, 0); |
| goto do_integer; |
| |
| case CONST_DOUBLE: |
| if (x == CONST0_RTX (mode)) |
| return true; |
| return false; |
| |
| case CONST_VECTOR: |
| if (x == CONST0_RTX (mode)) |
| return true; |
| if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT) |
| return false; |
| if (GET_MODE_SIZE (mode) != 8) |
| return false; |
| /* FALLTHRU */ |
| |
| case CONST_INT: |
| if (TARGET_BUILD_CONSTANTS) |
| return true; |
| i0 = alpha_extract_integer (x); |
| do_integer: |
| return alpha_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) != NULL; |
| |
| default: |
| return false; |
| } |
| } |
| |
| /* Operand 1 is known to be a constant, and should require more than one |
| instruction to load. Emit that multi-part load. */ |
| |
| bool |
| alpha_split_const_mov (machine_mode mode, rtx *operands) |
| { |
| HOST_WIDE_INT i0; |
| rtx temp = NULL_RTX; |
| |
| i0 = alpha_extract_integer (operands[1]); |
| |
| temp = alpha_emit_set_const (operands[0], mode, i0, 3, false); |
| |
| if (!temp && TARGET_BUILD_CONSTANTS) |
| temp = alpha_emit_set_long_const (operands[0], i0); |
| |
| if (temp) |
| { |
| if (!rtx_equal_p (operands[0], temp)) |
| emit_move_insn (operands[0], temp); |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* Expand a move instruction; return true if all work is done. |
| We don't handle non-bwx subword loads here. */ |
| |
| bool |
| alpha_expand_mov (machine_mode mode, rtx *operands) |
| { |
| rtx tmp; |
| |
| /* If the output is not a register, the input must be. */ |
| if (MEM_P (operands[0]) |
| && ! reg_or_0_operand (operands[1], mode)) |
| operands[1] = force_reg (mode, operands[1]); |
| |
| /* Allow legitimize_address to perform some simplifications. */ |
| if (mode == Pmode && symbolic_operand (operands[1], mode)) |
| { |
| tmp = alpha_legitimize_address_1 (operands[1], operands[0], mode); |
| if (tmp) |
| { |
| if (tmp == operands[0]) |
| return true; |
| operands[1] = tmp; |
| return false; |
| } |
| } |
| |
| /* Early out for non-constants and valid constants. */ |
| if (! CONSTANT_P (operands[1]) || input_operand (operands[1], mode)) |
| return false; |
| |
| /* Split large integers. */ |
| if (CONST_INT_P (operands[1]) |
| || GET_CODE (operands[1]) == CONST_VECTOR) |
| { |
| if (alpha_split_const_mov (mode, operands)) |
| return true; |
| } |
| |
| /* Otherwise we've nothing left but to drop the thing to memory. */ |
| tmp = force_const_mem (mode, operands[1]); |
| |
| if (tmp == NULL_RTX) |
| return false; |
| |
| if (reload_in_progress) |
| { |
| emit_move_insn (operands[0], XEXP (tmp, 0)); |
| operands[1] = replace_equiv_address (tmp, operands[0]); |
| } |
| else |
| operands[1] = validize_mem (tmp); |
| return false; |
| } |
| |
| /* Expand a non-bwx QImode or HImode move instruction; |
| return true if all work is done. */ |
| |
| bool |
| alpha_expand_mov_nobwx (machine_mode mode, rtx *operands) |
| { |
| rtx seq; |
| |
| /* If the output is not a register, the input must be. */ |
| if (MEM_P (operands[0])) |
| operands[1] = force_reg (mode, operands[1]); |
| |
| /* Handle four memory cases, unaligned and aligned for either the input |
| or the output. The only case where we can be called during reload is |
| for aligned loads; all other cases require temporaries. */ |
| |
| if (any_memory_operand (operands[1], mode)) |
| { |
| if (aligned_memory_operand (operands[1], mode)) |
| { |
| if (reload_in_progress) |
| { |
| seq = gen_reload_in_aligned (mode, operands[0], operands[1]); |
| emit_insn (seq); |
| } |
| else |
| { |
| rtx aligned_mem, bitnum; |
| rtx scratch = gen_reg_rtx (SImode); |
| rtx subtarget; |
| bool copyout; |
| |
| get_aligned_mem (operands[1], &aligned_mem, &bitnum); |
| |
| subtarget = operands[0]; |
| if (REG_P (subtarget)) |
| subtarget = gen_lowpart (DImode, subtarget), copyout = false; |
| else |
| subtarget = gen_reg_rtx (DImode), copyout = true; |
| |
| if (mode == QImode) |
| seq = gen_aligned_loadqi (subtarget, aligned_mem, |
| bitnum, scratch); |
| else |
| seq = gen_aligned_loadhi (subtarget, aligned_mem, |
| bitnum, scratch); |
| emit_insn (seq); |
| |
| if (copyout) |
| emit_move_insn (operands[0], gen_lowpart (mode, subtarget)); |
| } |
| } |
| else |
| { |
| /* Don't pass these as parameters since that makes the generated |
| code depend on parameter evaluation order which will cause |
| bootstrap failures. */ |
| |
| rtx temp1, temp2, subtarget, ua; |
| bool copyout; |
| |
| temp1 = gen_reg_rtx (DImode); |
| temp2 = gen_reg_rtx (DImode); |
| |
| subtarget = operands[0]; |
| if (REG_P (subtarget)) |
| subtarget = gen_lowpart (DImode, subtarget), copyout = false; |
| else |
| subtarget = gen_reg_rtx (DImode), copyout = true; |
| |
| ua = get_unaligned_address (operands[1]); |
| if (mode == QImode) |
| seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2); |
| else |
| seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2); |
| |
| alpha_set_memflags (seq, operands[1]); |
| emit_insn (seq); |
| |
| if (copyout) |
| emit_move_insn (operands[0], gen_lowpart (mode, subtarget)); |
| } |
| return true; |
| } |
| |
| if (any_memory_operand (operands[0], mode)) |
| { |
| if (aligned_memory_operand (operands[0], mode)) |
| { |
| rtx aligned_mem, bitnum; |
| rtx temp1 = gen_reg_rtx (SImode); |
| rtx temp2 = gen_reg_rtx (SImode); |
| |
| get_aligned_mem (operands[0], &aligned_mem, &bitnum); |
| |
| emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum, |
| temp1, temp2)); |
| } |
| else |
| { |
| rtx temp1 = gen_reg_rtx (DImode); |
| rtx temp2 = gen_reg_rtx (DImode); |
| rtx temp3 = gen_reg_rtx (DImode); |
| rtx ua = get_unaligned_address (operands[0]); |
| |
| seq = gen_unaligned_store |
| (mode, ua, operands[1], temp1, temp2, temp3); |
| |
| alpha_set_memflags (seq, operands[0]); |
| emit_insn (seq); |
| } |
| return true; |
| } |
| |
| return false; |
| } |
| |
| /* Implement the movmisalign patterns. One of the operands is a memory |
| that is not naturally aligned. Emit instructions to load it. */ |
| |
| void |
| alpha_expand_movmisalign (machine_mode mode, rtx *operands) |
| { |
| /* Honor misaligned loads, for those we promised to do so. */ |
| if (MEM_P (operands[1])) |
| { |
| rtx tmp; |
| |
| if (register_operand (operands[0], mode)) |
| tmp = operands[0]; |
| else |
| tmp = gen_reg_rtx (mode); |
| |
| alpha_expand_unaligned_load (tmp, operands[1], 8, 0, 0); |
| if (tmp != operands[0]) |
| emit_move_insn (operands[0], tmp); |
| } |
| else if (MEM_P (operands[0])) |
| { |
| if (!reg_or_0_operand (operands[1], mode)) |
| operands[1] = force_reg (mode, operands[1]); |
| alpha_expand_unaligned_store (operands[0], operands[1], 8, 0); |
| } |
| else |
| gcc_unreachable (); |
| } |
| |
| /* Generate an unsigned DImode to FP conversion. This is the same code |
| optabs would emit if we didn't have TFmode patterns. |
| |
| For SFmode, this is the only construction I've found that can pass |
| gcc.c-torture/execute/ieee/rbug.c. No scenario that uses DFmode |
| intermediates will work, because you'll get intermediate rounding |
| that ruins the end result. Some of this could be fixed by turning |
| on round-to-positive-infinity, but that requires diddling the fpsr, |
| which kills performance. I tried turning this around and converting |
| to a negative number, so that I could turn on /m, but either I did |
| it wrong or there's something else cause I wound up with the exact |
| same single-bit error. There is a branch-less form of this same code: |
| |
| srl $16,1,$1 |
| and $16,1,$2 |
| cmplt $16,0,$3 |
| or $1,$2,$2 |
| cmovge $16,$16,$2 |
| itoft $3,$f10 |
| itoft $2,$f11 |
| cvtqs $f11,$f11 |
| adds $f11,$f11,$f0 |
| fcmoveq $f10,$f11,$f0 |
| |
| I'm not using it because it's the same number of instructions as |
| this branch-full form, and it has more serialized long latency |
| instructions on the critical path. |
| |
| For DFmode, we can avoid rounding errors by breaking up the word |
| into two pieces, converting them separately, and adding them back: |
| |
| LC0: .long 0,0x5f800000 |
| |
| itoft $16,$f11 |
| lda $2,LC0 |
| cmplt $16,0,$1 |
| cpyse $f11,$f31,$f10 |
| cpyse $f31,$f11,$f11 |
| s4addq $1,$2,$1 |
| lds $f12,0($1) |
| cvtqt $f10,$f10 |
| cvtqt $f11,$f11 |
| addt $f12,$f10,$f0 |
| addt $f0,$f11,$f0 |
| |
| This doesn't seem to be a clear-cut win over the optabs form. |
| It probably all depends on the distribution of numbers being |
| converted -- in the optabs form, all but high-bit-set has a |
| much lower minimum execution time. */ |
| |
| void |
| alpha_emit_floatuns (rtx operands[2]) |
| { |
| rtx neglab, donelab, i0, i1, f0, in, out; |
| machine_mode mode; |
| |
| out = operands[0]; |
| in = force_reg (DImode, operands[1]); |
| mode = GET_MODE (out); |
| neglab = gen_label_rtx (); |
| donelab = gen_label_rtx (); |
| i0 = gen_reg_rtx (DImode); |
| i1 = gen_reg_rtx (DImode); |
| f0 = gen_reg_rtx (mode); |
| |
| emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab); |
| |
| emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in))); |
| emit_jump_insn (gen_jump (donelab)); |
| emit_barrier (); |
| |
| emit_label (neglab); |
| |
| emit_insn (gen_lshrdi3 (i0, in, const1_rtx)); |
| emit_insn (gen_anddi3 (i1, in, const1_rtx)); |
| emit_insn (gen_iordi3 (i0, i0, i1)); |
| emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0))); |
| emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0))); |
| |
| emit_label (donelab); |
| } |
| |
| /* Generate the comparison for a conditional branch. */ |
| |
| void |
| alpha_emit_conditional_branch (rtx operands[], machine_mode cmp_mode) |
| { |
| enum rtx_code cmp_code, branch_code; |
| machine_mode branch_mode = VOIDmode; |
| enum rtx_code code = GET_CODE (operands[0]); |
| rtx op0 = operands[1], op1 = operands[2]; |
| rtx tem; |
| |
| if (cmp_mode == TFmode) |
| { |
| op0 = alpha_emit_xfloating_compare (&code, op0, op1); |
| op1 = const0_rtx; |
| cmp_mode = DImode; |
| } |
| |
| /* The general case: fold the comparison code to the types of compares |
| that we have, choosing the branch as necessary. */ |
| switch (code) |
| { |
| case EQ: case LE: case LT: case LEU: case LTU: |
| case UNORDERED: |
| /* We have these compares. */ |
| cmp_code = code, branch_code = NE; |
| break; |
| |
| case NE: |
| case ORDERED: |
| /* These must be reversed. */ |
| cmp_code = reverse_condition (code), branch_code = EQ; |
| break; |
| |
| case GE: case GT: case GEU: case GTU: |
| /* For FP, we swap them, for INT, we reverse them. */ |
| if (cmp_mode == DFmode) |
| { |
| cmp_code = swap_condition (code); |
| branch_code = NE; |
| std::swap (op0, op1); |
| } |
| else |
| { |
| cmp_code = reverse_condition (code); |
| branch_code = EQ; |
| } |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| if (cmp_mode == DFmode) |
| { |
| if (flag_unsafe_math_optimizations && cmp_code != UNORDERED) |
| { |
| /* When we are not as concerned about non-finite values, and we |
| are comparing against zero, we can branch directly. */ |
| if (op1 == CONST0_RTX (DFmode)) |
| cmp_code = UNKNOWN, branch_code = code; |
| else if (op0 == CONST0_RTX (DFmode)) |
| { |
| /* Undo the swap we probably did just above. */ |
| std::swap (op0, op1); |
| branch_code = swap_condition (cmp_code); |
| cmp_code = UNKNOWN; |
| } |
| } |
| else |
| { |
| /* ??? We mark the branch mode to be CCmode to prevent the |
| compare and branch from being combined, since the compare |
| insn follows IEEE rules that the branch does not. */ |
| branch_mode = CCmode; |
| } |
| } |
| else |
| { |
| /* The following optimizations are only for signed compares. */ |
| if (code != LEU && code != LTU && code != GEU && code != GTU) |
| { |
| /* Whee. Compare and branch against 0 directly. */ |
| if (op1 == const0_rtx) |
| cmp_code = UNKNOWN, branch_code = code; |
| |
| /* If the constants doesn't fit into an immediate, but can |
| be generated by lda/ldah, we adjust the argument and |
| compare against zero, so we can use beq/bne directly. */ |
| /* ??? Don't do this when comparing against symbols, otherwise |
| we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will |
| be declared false out of hand (at least for non-weak). */ |
| else if (CONST_INT_P (op1) |
| && (code == EQ || code == NE) |
| && !(symbolic_operand (op0, VOIDmode) |
| || (REG_P (op0) && REG_POINTER (op0)))) |
| { |
| rtx n_op1 = GEN_INT (-INTVAL (op1)); |
| |
| if (! satisfies_constraint_I (op1) |
| && (satisfies_constraint_K (n_op1) |
| || satisfies_constraint_L (n_op1))) |
| cmp_code = PLUS, branch_code = code, op1 = n_op1; |
| } |
| } |
| |
| if (!reg_or_0_operand (op0, DImode)) |
| op0 = force_reg (DImode, op0); |
| if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode)) |
| op1 = force_reg (DImode, op1); |
| } |
| |
| /* Emit an initial compare instruction, if necessary. */ |
| tem = op0; |
| if (cmp_code != UNKNOWN) |
| { |
| tem = gen_reg_rtx (cmp_mode); |
| emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)); |
| } |
| |
| /* Emit the branch instruction. */ |
| tem = gen_rtx_SET (pc_rtx, |
| gen_rtx_IF_THEN_ELSE (VOIDmode, |
| gen_rtx_fmt_ee (branch_code, |
| branch_mode, tem, |
| CONST0_RTX (cmp_mode)), |
| gen_rtx_LABEL_REF (VOIDmode, |
| operands[3]), |
| pc_rtx)); |
| emit_jump_insn (tem); |
| } |
| |
| /* Certain simplifications can be done to make invalid setcc operations |
| valid. Return the final comparison, or NULL if we can't work. */ |
| |
| bool |
| alpha_emit_setcc (rtx operands[], machine_mode cmp_mode) |
| { |
| enum rtx_code cmp_code; |
| enum rtx_code code = GET_CODE (operands[1]); |
| rtx op0 = operands[2], op1 = operands[3]; |
| rtx tmp; |
| |
| if (cmp_mode == TFmode) |
| { |
| op0 = alpha_emit_xfloating_compare (&code, op0, op1); |
| op1 = const0_rtx; |
| cmp_mode = DImode; |
| } |
| |
| if (cmp_mode == DFmode && !TARGET_FIX) |
| return 0; |
| |
| /* The general case: fold the comparison code to the types of compares |
| that we have, choosing the branch as necessary. */ |
| |
| cmp_code = UNKNOWN; |
| switch (code) |
| { |
| case EQ: case LE: case LT: case LEU: case LTU: |
| case UNORDERED: |
| /* We have these compares. */ |
| if (cmp_mode == DFmode) |
| cmp_code = code, code = NE; |
| break; |
| |
| case NE: |
| if (cmp_mode == DImode && op1 == const0_rtx) |
| break; |
| /* FALLTHRU */ |
| |
| case ORDERED: |
| cmp_code = reverse_condition (code); |
| code = EQ; |
| break; |
| |
| case GE: case GT: case GEU: case GTU: |
| /* These normally need swapping, but for integer zero we have |
| special patterns that recognize swapped operands. */ |
| if (cmp_mode == DImode && op1 == const0_rtx) |
| break; |
| code = swap_condition (code); |
| if (cmp_mode == DFmode) |
| cmp_code = code, code = NE; |
| std::swap (op0, op1); |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| if (cmp_mode == DImode) |
| { |
| if (!register_operand (op0, DImode)) |
| op0 = force_reg (DImode, op0); |
| if (!reg_or_8bit_operand (op1, DImode)) |
| op1 = force_reg (DImode, op1); |
| } |
| |
| /* Emit an initial compare instruction, if necessary. */ |
| if (cmp_code != UNKNOWN) |
| { |
| tmp = gen_reg_rtx (cmp_mode); |
| emit_insn (gen_rtx_SET (tmp, gen_rtx_fmt_ee (cmp_code, cmp_mode, |
| op0, op1))); |
| |
| op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp; |
| op1 = const0_rtx; |
| } |
| |
| /* Emit the setcc instruction. */ |
| emit_insn (gen_rtx_SET (operands[0], gen_rtx_fmt_ee (code, DImode, |
| op0, op1))); |
| return true; |
| } |
| |
| |
| /* Rewrite a comparison against zero CMP of the form |
| (CODE (cc0) (const_int 0)) so it can be written validly in |
| a conditional move (if_then_else CMP ...). |
| If both of the operands that set cc0 are nonzero we must emit |
| an insn to perform the compare (it can't be done within |
| the conditional move). */ |
| |
| rtx |
| alpha_emit_conditional_move (rtx cmp, machine_mode mode) |
| { |
| enum rtx_code code = GET_CODE (cmp); |
| enum rtx_code cmov_code = NE; |
| rtx op0 = XEXP (cmp, 0); |
| rtx op1 = XEXP (cmp, 1); |
| machine_mode cmp_mode |
| = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0)); |
| machine_mode cmov_mode = VOIDmode; |
| int local_fast_math = flag_unsafe_math_optimizations; |
| rtx tem; |
| |
| if (cmp_mode == TFmode) |
| { |
| op0 = alpha_emit_xfloating_compare (&code, op0, op1); |
| op1 = const0_rtx; |
| cmp_mode = DImode; |
| } |
| |
| gcc_assert (cmp_mode == DFmode || cmp_mode == DImode); |
| |
| if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode)) |
| { |
| enum rtx_code cmp_code; |
| |
| if (! TARGET_FIX) |
| return 0; |
| |
| /* If we have fp<->int register move instructions, do a cmov by |
| performing the comparison in fp registers, and move the |
| zero/nonzero value to integer registers, where we can then |
| use a normal cmov, or vice-versa. */ |
| |
| switch (code) |
| { |
| case EQ: case LE: case LT: case LEU: case LTU: |
| case UNORDERED: |
| /* We have these compares. */ |
| cmp_code = code, code = NE; |
| break; |
| |
| case NE: |
| case ORDERED: |
| /* These must be reversed. */ |
| cmp_code = reverse_condition (code), code = EQ; |
| break; |
| |
| case GE: case GT: case GEU: case GTU: |
| /* These normally need swapping, but for integer zero we have |
| special patterns that recognize swapped operands. */ |
| if (cmp_mode == DImode && op1 == const0_rtx) |
| cmp_code = code, code = NE; |
| else |
| { |
| cmp_code = swap_condition (code); |
| code = NE; |
| std::swap (op0, op1); |
| } |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| if (cmp_mode == DImode) |
| { |
| if (!reg_or_0_operand (op0, DImode)) |
| op0 = force_reg (DImode, op0); |
| if (!reg_or_8bit_operand (op1, DImode)) |
| op1 = force_reg (DImode, op1); |
| } |
| |
| tem = gen_reg_rtx (cmp_mode); |
| emit_insn (gen_rtx_SET (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, |
| op0, op1))); |
| |
| cmp_mode = cmp_mode == DImode ? E_DFmode : E_DImode; |
| op0 = gen_lowpart (cmp_mode, tem); |
| op1 = CONST0_RTX (cmp_mode); |
| cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1); |
| local_fast_math = 1; |
| } |
| |
| if (cmp_mode == DImode) |
| { |
| if (!reg_or_0_operand (op0, DImode)) |
| op0 = force_reg (DImode, op0); |
| if (!reg_or_8bit_operand (op1, DImode)) |
| op1 = force_reg (DImode, op1); |
| } |
| |
| /* We may be able to use a conditional move directly. |
| This avoids emitting spurious compares. */ |
| if (signed_comparison_operator (cmp, VOIDmode) |
| && (cmp_mode == DImode || local_fast_math) |
| && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode))) |
| return gen_rtx_fmt_ee (code, VOIDmode, op0, op1); |
| |
| /* We can't put the comparison inside the conditional move; |
| emit a compare instruction and put that inside the |
| conditional move. Make sure we emit only comparisons we have; |
| swap or reverse as necessary. */ |
| |
| if (!can_create_pseudo_p ()) |
| return NULL_RTX; |
| |
| switch (code) |
| { |
| case EQ: case LE: case LT: case LEU: case LTU: |
| case UNORDERED: |
| /* We have these compares: */ |
| break; |
| |
| case NE: |
| case ORDERED: |
| /* These must be reversed. */ |
| code = reverse_condition (code); |
| cmov_code = EQ; |
| break; |
| |
| case GE: case GT: case GEU: case GTU: |
| /* These normally need swapping, but for integer zero we have |
| special patterns that recognize swapped operands. */ |
| if (cmp_mode == DImode && op1 == const0_rtx) |
| break; |
| code = swap_condition (code); |
| std::swap (op0, op1); |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| if (cmp_mode == DImode) |
| { |
| if (!reg_or_0_operand (op0, DImode)) |
| op0 = force_reg (DImode, op0); |
| if (!reg_or_8bit_operand (op1, DImode)) |
| op1 = force_reg (DImode, op1); |
| } |
| |
| /* ??? We mark the branch mode to be CCmode to prevent the compare |
| and cmov from being combined, since the compare insn follows IEEE |
| rules that the cmov does not. */ |
| if (cmp_mode == DFmode && !local_fast_math) |
| cmov_mode = CCmode; |
| |
| tem = gen_reg_rtx (cmp_mode); |
| emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1)); |
| return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode)); |
| } |
| |
| /* Simplify a conditional move of two constants into a setcc with |
| arithmetic. This is done with a splitter since combine would |
| just undo the work if done during code generation. It also catches |
| cases we wouldn't have before cse. */ |
| |
| int |
| alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond, |
| rtx t_rtx, rtx f_rtx) |
| { |
| HOST_WIDE_INT t, f, diff; |
| machine_mode mode; |
| rtx target, subtarget, tmp; |
| |
| mode = GET_MODE (dest); |
| t = INTVAL (t_rtx); |
| f = INTVAL (f_rtx); |
| diff = t - f; |
| |
| if (((code == NE || code == EQ) && diff < 0) |
| || (code == GE || code == GT)) |
| { |
| code = reverse_condition (code); |
| std::swap (t, f); |
| diff = -diff; |
| } |
| |
| subtarget = target = dest; |
| if (mode != DImode) |
| { |
| target = gen_lowpart (DImode, dest); |
| if (can_create_pseudo_p ()) |
| subtarget = gen_reg_rtx (DImode); |
| else |
| subtarget = target; |
| } |
| /* Below, we must be careful to use copy_rtx on target and subtarget |
| in intermediate insns, as they may be a subreg rtx, which may not |
| be shared. */ |
| |
| if (f == 0 && exact_log2 (diff) > 0 |
| /* On EV6, we've got enough shifters to make non-arithmetic shifts |
| viable over a longer latency cmove. On EV5, the E0 slot is a |
| scarce resource, and on EV4 shift has the same latency as a cmove. */ |
| && (diff <= 8 || alpha_tune == PROCESSOR_EV6)) |
| { |
| tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); |
| emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp)); |
| |
| tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget), |
| GEN_INT (exact_log2 (t))); |
| emit_insn (gen_rtx_SET (target, tmp)); |
| } |
| else if (f == 0 && t == -1) |
| { |
| tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); |
| emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp)); |
| |
| emit_insn (gen_negdi2 (target, copy_rtx (subtarget))); |
| } |
| else if (diff == 1 || diff == 4 || diff == 8) |
| { |
| rtx add_op; |
| |
| tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx); |
| emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp)); |
| |
| if (diff == 1) |
| emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f))); |
| else |
| { |
| add_op = GEN_INT (f); |
| if (sext_add_operand (add_op, mode)) |
| { |
| tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget), |
| GEN_INT (exact_log2 (diff))); |
| tmp = gen_rtx_PLUS (DImode, tmp, add_op); |
| emit_insn (gen_rtx_SET (target, tmp)); |
| } |
| else |
| return 0; |
| } |
| } |
| else |
| return 0; |
| |
| return 1; |
| } |
| |
| /* Look up the function X_floating library function name for the |
| given operation. */ |
| |
| struct GTY(()) xfloating_op |
| { |
| const enum rtx_code code; |
| const char *const GTY((skip)) osf_func; |
| const char *const GTY((skip)) vms_func; |
| rtx libcall; |
| }; |
| |
| static GTY(()) struct xfloating_op xfloating_ops[] = |
| { |
| { PLUS, "_OtsAddX", "OTS$ADD_X", 0 }, |
| { MINUS, "_OtsSubX", "OTS$SUB_X", 0 }, |
| { MULT, "_OtsMulX", "OTS$MUL_X", 0 }, |
| { DIV, "_OtsDivX", "OTS$DIV_X", 0 }, |
| { EQ, "_OtsEqlX", "OTS$EQL_X", 0 }, |
| { NE, "_OtsNeqX", "OTS$NEQ_X", 0 }, |
| { LT, "_OtsLssX", "OTS$LSS_X", 0 }, |
| { LE, "_OtsLeqX", "OTS$LEQ_X", 0 }, |
| { GT, "_OtsGtrX", "OTS$GTR_X", 0 }, |
| { GE, "_OtsGeqX", "OTS$GEQ_X", 0 }, |
| { FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0 }, |
| { FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0 }, |
| { UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0 }, |
| { FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0 }, |
| { FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0 } |
| }; |
| |
| static GTY(()) struct xfloating_op vax_cvt_ops[] = |
| { |
| { FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0 }, |
| { FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0 } |
| }; |
| |
| static rtx |
| alpha_lookup_xfloating_lib_func (enum rtx_code code) |
| { |
| struct xfloating_op *ops = xfloating_ops; |
| long n = ARRAY_SIZE (xfloating_ops); |
| long i; |
| |
| gcc_assert (TARGET_HAS_XFLOATING_LIBS); |
| |
| /* How irritating. Nothing to key off for the main table. */ |
| if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE)) |
| { |
| ops = vax_cvt_ops; |
| n = ARRAY_SIZE (vax_cvt_ops); |
| } |
| |
| for (i = 0; i < n; ++i, ++ops) |
| if (ops->code == code) |
| { |
| rtx func = ops->libcall; |
| if (!func) |
| { |
| func = init_one_libfunc (TARGET_ABI_OPEN_VMS |
| ? ops->vms_func : ops->osf_func); |
| ops->libcall = func; |
| } |
| return func; |
| } |
| |
| gcc_unreachable (); |
| } |
| |
| /* Most X_floating operations take the rounding mode as an argument. |
| Compute that here. */ |
| |
| static int |
| alpha_compute_xfloating_mode_arg (enum rtx_code code, |
| enum alpha_fp_rounding_mode round) |
| { |
| int mode; |
| |
| switch (round) |
| { |
| case ALPHA_FPRM_NORM: |
| mode = 2; |
| break; |
| case ALPHA_FPRM_MINF: |
| mode = 1; |
| break; |
| case ALPHA_FPRM_CHOP: |
| mode = 0; |
| break; |
| case ALPHA_FPRM_DYN: |
| mode = 4; |
| break; |
| default: |
| gcc_unreachable (); |
| |
| /* XXX For reference, round to +inf is mode = 3. */ |
| } |
| |
| if (code == FLOAT_TRUNCATE && alpha_fptm == ALPHA_FPTM_N) |
| mode |= 0x10000; |
| |
| return mode; |
| } |
| |
| /* Emit an X_floating library function call. |
| |
| Note that these functions do not follow normal calling conventions: |
| TFmode arguments are passed in two integer registers (as opposed to |
| indirect); TFmode return values appear in R16+R17. |
| |
| FUNC is the function to call. |
| TARGET is where the output belongs. |
| OPERANDS are the inputs. |
| NOPERANDS is the count of inputs. |
| EQUIV is the expression equivalent for the function. |
| */ |
| |
| static void |
| alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[], |
| int noperands, rtx equiv) |
| { |
| rtx usage = NULL_RTX, reg; |
| int regno = 16, i; |
| |
| start_sequence (); |
| |
| for (i = 0; i < noperands; ++i) |
| { |
| switch (GET_MODE (operands[i])) |
| { |
| case E_TFmode: |
| reg = gen_rtx_REG (TFmode, regno); |
| regno += 2; |
| break; |
| |
| case E_DFmode: |
| reg = gen_rtx_REG (DFmode, regno + 32); |
| regno += 1; |
| break; |
| |
| case E_VOIDmode: |
| gcc_assert (CONST_INT_P (operands[i])); |
| /* FALLTHRU */ |
| case E_DImode: |
| reg = gen_rtx_REG (DImode, regno); |
| regno += 1; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| emit_move_insn (reg, operands[i]); |
| use_reg (&usage, reg); |
| } |
| |
| switch (GET_MODE (target)) |
| { |
| case E_TFmode: |
| reg = gen_rtx_REG (TFmode, 16); |
| break; |
| case E_DFmode: |
| reg = gen_rtx_REG (DFmode, 32); |
| break; |
| case E_DImode: |
| reg = gen_rtx_REG (DImode, 0); |
| break; |
| default: |
| gcc_unreachable (); |
| } |
| |
| rtx mem = gen_rtx_MEM (QImode, func); |
| rtx_insn *tmp = emit_call_insn (gen_call_value (reg, mem, const0_rtx, |
| const0_rtx, const0_rtx)); |
| CALL_INSN_FUNCTION_USAGE (tmp) = usage; |
| RTL_CONST_CALL_P (tmp) = 1; |
| |
| tmp = get_insns (); |
| end_sequence (); |
| |
| emit_libcall_block (tmp, target, reg, equiv); |
| } |
|