blob: f6eae1997692078dc447b6b40bb2530a7c89d11b [file] [log] [blame]
/* Definitions of target machine for GNU compiler.
Copyright (C) 1999-2015 Free Software Foundation, Inc.
Contributed by James E. Wilson <wilson@cygnus.com> and
David Mosberger <davidm@hpl.hp.com>.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "rtl.h"
#include "hash-set.h"
#include "machmode.h"
#include "vec.h"
#include "double-int.h"
#include "input.h"
#include "alias.h"
#include "symtab.h"
#include "wide-int.h"
#include "inchash.h"
#include "tree.h"
#include "fold-const.h"
#include "stringpool.h"
#include "stor-layout.h"
#include "calls.h"
#include "varasm.h"
#include "regs.h"
#include "hard-reg-set.h"
#include "insn-config.h"
#include "conditions.h"
#include "output.h"
#include "insn-attr.h"
#include "flags.h"
#include "recog.h"
#include "hashtab.h"
#include "function.h"
#include "statistics.h"
#include "real.h"
#include "fixed-value.h"
#include "expmed.h"
#include "dojump.h"
#include "explow.h"
#include "emit-rtl.h"
#include "stmt.h"
#include "expr.h"
#include "insn-codes.h"
#include "optabs.h"
#include "except.h"
#include "ggc.h"
#include "predict.h"
#include "dominance.h"
#include "cfg.h"
#include "cfgrtl.h"
#include "cfganal.h"
#include "lcm.h"
#include "cfgbuild.h"
#include "cfgcleanup.h"
#include "basic-block.h"
#include "libfuncs.h"
#include "diagnostic-core.h"
#include "sched-int.h"
#include "timevar.h"
#include "target.h"
#include "target-def.h"
#include "common/common-target.h"
#include "tm_p.h"
#include "hash-table.h"
#include "langhooks.h"
#include "tree-ssa-alias.h"
#include "internal-fn.h"
#include "gimple-fold.h"
#include "tree-eh.h"
#include "gimple-expr.h"
#include "is-a.h"
#include "gimple.h"
#include "gimplify.h"
#include "intl.h"
#include "df.h"
#include "debug.h"
#include "params.h"
#include "dbgcnt.h"
#include "tm-constrs.h"
#include "sel-sched.h"
#include "reload.h"
#include "opts.h"
#include "dumpfile.h"
#include "builtins.h"
/* This is used for communication between ASM_OUTPUT_LABEL and
ASM_OUTPUT_LABELREF. */
int ia64_asm_output_label = 0;
/* Register names for ia64_expand_prologue. */
static const char * const ia64_reg_numbers[96] =
{ "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39",
"r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47",
"r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55",
"r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63",
"r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71",
"r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79",
"r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87",
"r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95",
"r96", "r97", "r98", "r99", "r100","r101","r102","r103",
"r104","r105","r106","r107","r108","r109","r110","r111",
"r112","r113","r114","r115","r116","r117","r118","r119",
"r120","r121","r122","r123","r124","r125","r126","r127"};
/* ??? These strings could be shared with REGISTER_NAMES. */
static const char * const ia64_input_reg_names[8] =
{ "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" };
/* ??? These strings could be shared with REGISTER_NAMES. */
static const char * const ia64_local_reg_names[80] =
{ "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7",
"loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15",
"loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23",
"loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31",
"loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39",
"loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47",
"loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55",
"loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63",
"loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71",
"loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" };
/* ??? These strings could be shared with REGISTER_NAMES. */
static const char * const ia64_output_reg_names[8] =
{ "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" };
/* Variables which are this size or smaller are put in the sdata/sbss
sections. */
unsigned int ia64_section_threshold;
/* The following variable is used by the DFA insn scheduler. The value is
TRUE if we do insn bundling instead of insn scheduling. */
int bundling_p = 0;
enum ia64_frame_regs
{
reg_fp,
reg_save_b0,
reg_save_pr,
reg_save_ar_pfs,
reg_save_ar_unat,
reg_save_ar_lc,
reg_save_gp,
number_of_ia64_frame_regs
};
/* Structure to be filled in by ia64_compute_frame_size with register
save masks and offsets for the current function. */
struct ia64_frame_info
{
HOST_WIDE_INT total_size; /* size of the stack frame, not including
the caller's scratch area. */
HOST_WIDE_INT spill_cfa_off; /* top of the reg spill area from the cfa. */
HOST_WIDE_INT spill_size; /* size of the gr/br/fr spill area. */
HOST_WIDE_INT extra_spill_size; /* size of spill area for others. */
HARD_REG_SET mask; /* mask of saved registers. */
unsigned int gr_used_mask; /* mask of registers in use as gr spill
registers or long-term scratches. */
int n_spilled; /* number of spilled registers. */
int r[number_of_ia64_frame_regs]; /* Frame related registers. */
int n_input_regs; /* number of input registers used. */
int n_local_regs; /* number of local registers used. */
int n_output_regs; /* number of output registers used. */
int n_rotate_regs; /* number of rotating registers used. */
char need_regstk; /* true if a .regstk directive needed. */
char initialized; /* true if the data is finalized. */
};
/* Current frame information calculated by ia64_compute_frame_size. */
static struct ia64_frame_info current_frame_info;
/* The actual registers that are emitted. */
static int emitted_frame_related_regs[number_of_ia64_frame_regs];
static int ia64_first_cycle_multipass_dfa_lookahead (void);
static void ia64_dependencies_evaluation_hook (rtx_insn *, rtx_insn *);
static void ia64_init_dfa_pre_cycle_insn (void);
static rtx ia64_dfa_pre_cycle_insn (void);
static int ia64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
static int ia64_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *);
static void ia64_h_i_d_extended (void);
static void * ia64_alloc_sched_context (void);
static void ia64_init_sched_context (void *, bool);
static void ia64_set_sched_context (void *);
static void ia64_clear_sched_context (void *);
static void ia64_free_sched_context (void *);
static int ia64_mode_to_int (machine_mode);
static void ia64_set_sched_flags (spec_info_t);
static ds_t ia64_get_insn_spec_ds (rtx_insn *);
static ds_t ia64_get_insn_checked_ds (rtx_insn *);
static bool ia64_skip_rtx_p (const_rtx);
static int ia64_speculate_insn (rtx_insn *, ds_t, rtx *);
static bool ia64_needs_block_p (ds_t);
static rtx ia64_gen_spec_check (rtx_insn *, rtx_insn *, ds_t);
static int ia64_spec_check_p (rtx);
static int ia64_spec_check_src_p (rtx);
static rtx gen_tls_get_addr (void);
static rtx gen_thread_pointer (void);
static int find_gr_spill (enum ia64_frame_regs, int);
static int next_scratch_gr_reg (void);
static void mark_reg_gr_used_mask (rtx, void *);
static void ia64_compute_frame_size (HOST_WIDE_INT);
static void setup_spill_pointers (int, rtx, HOST_WIDE_INT);
static void finish_spill_pointers (void);
static rtx spill_restore_mem (rtx, HOST_WIDE_INT);
static void do_spill (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT, rtx);
static void do_restore (rtx (*)(rtx, rtx, rtx), rtx, HOST_WIDE_INT);
static rtx gen_movdi_x (rtx, rtx, rtx);
static rtx gen_fr_spill_x (rtx, rtx, rtx);
static rtx gen_fr_restore_x (rtx, rtx, rtx);
static void ia64_option_override (void);
static bool ia64_can_eliminate (const int, const int);
static machine_mode hfa_element_mode (const_tree, bool);
static void ia64_setup_incoming_varargs (cumulative_args_t, machine_mode,
tree, int *, int);
static int ia64_arg_partial_bytes (cumulative_args_t, machine_mode,
tree, bool);
static rtx ia64_function_arg_1 (cumulative_args_t, machine_mode,
const_tree, bool, bool);
static rtx ia64_function_arg (cumulative_args_t, machine_mode,
const_tree, bool);
static rtx ia64_function_incoming_arg (cumulative_args_t,
machine_mode, const_tree, bool);
static void ia64_function_arg_advance (cumulative_args_t, machine_mode,
const_tree, bool);
static unsigned int ia64_function_arg_boundary (machine_mode,
const_tree);
static bool ia64_function_ok_for_sibcall (tree, tree);
static bool ia64_return_in_memory (const_tree, const_tree);
static rtx ia64_function_value (const_tree, const_tree, bool);
static rtx ia64_libcall_value (machine_mode, const_rtx);
static bool ia64_function_value_regno_p (const unsigned int);
static int ia64_register_move_cost (machine_mode, reg_class_t,
reg_class_t);
static int ia64_memory_move_cost (machine_mode mode, reg_class_t,
bool);
static bool ia64_rtx_costs (rtx, int, int, int, int *, bool);
static int ia64_unspec_may_trap_p (const_rtx, unsigned);
static void fix_range (const char *);
static struct machine_function * ia64_init_machine_status (void);
static void emit_insn_group_barriers (FILE *);
static void emit_all_insn_group_barriers (FILE *);
static void final_emit_insn_group_barriers (FILE *);
static void emit_predicate_relation_info (void);
static void ia64_reorg (void);
static bool ia64_in_small_data_p (const_tree);
static void process_epilogue (FILE *, rtx, bool, bool);
static bool ia64_assemble_integer (rtx, unsigned int, int);
static void ia64_output_function_prologue (FILE *, HOST_WIDE_INT);
static void ia64_output_function_epilogue (FILE *, HOST_WIDE_INT);
static void ia64_output_function_end_prologue (FILE *);
static void ia64_print_operand (FILE *, rtx, int);
static void ia64_print_operand_address (FILE *, rtx);
static bool ia64_print_operand_punct_valid_p (unsigned char code);
static int ia64_issue_rate (void);
static int ia64_adjust_cost_2 (rtx_insn *, int, rtx_insn *, int, dw_t);
static void ia64_sched_init (FILE *, int, int);
static void ia64_sched_init_global (FILE *, int, int);
static void ia64_sched_finish_global (FILE *, int);
static void ia64_sched_finish (FILE *, int);
static int ia64_dfa_sched_reorder (FILE *, int, rtx_insn **, int *, int, int);
static int ia64_sched_reorder (FILE *, int, rtx_insn **, int *, int);
static int ia64_sched_reorder2 (FILE *, int, rtx_insn **, int *, int);
static int ia64_variable_issue (FILE *, int, rtx_insn *, int);
static void ia64_asm_unwind_emit (FILE *, rtx_insn *);
static void ia64_asm_emit_except_personality (rtx);
static void ia64_asm_init_sections (void);
static enum unwind_info_type ia64_debug_unwind_info (void);
static struct bundle_state *get_free_bundle_state (void);
static void free_bundle_state (struct bundle_state *);
static void initiate_bundle_states (void);
static void finish_bundle_states (void);
static int insert_bundle_state (struct bundle_state *);
static void initiate_bundle_state_table (void);
static void finish_bundle_state_table (void);
static int try_issue_nops (struct bundle_state *, int);
static int try_issue_insn (struct bundle_state *, rtx);
static void issue_nops_and_insn (struct bundle_state *, int, rtx_insn *,
int, int);
static int get_max_pos (state_t);
static int get_template (state_t, int);
static rtx_insn *get_next_important_insn (rtx_insn *, rtx_insn *);
static bool important_for_bundling_p (rtx_insn *);
static bool unknown_for_bundling_p (rtx_insn *);
static void bundling (FILE *, int, rtx_insn *, rtx_insn *);
static void ia64_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
HOST_WIDE_INT, tree);
static void ia64_file_start (void);
static void ia64_globalize_decl_name (FILE *, tree);
static int ia64_hpux_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
static int ia64_reloc_rw_mask (void) ATTRIBUTE_UNUSED;
static section *ia64_select_rtx_section (machine_mode, rtx,
unsigned HOST_WIDE_INT);
static void ia64_output_dwarf_dtprel (FILE *, int, rtx)
ATTRIBUTE_UNUSED;
static unsigned int ia64_section_type_flags (tree, const char *, int);
static void ia64_init_libfuncs (void)
ATTRIBUTE_UNUSED;
static void ia64_hpux_init_libfuncs (void)
ATTRIBUTE_UNUSED;
static void ia64_sysv4_init_libfuncs (void)
ATTRIBUTE_UNUSED;
static void ia64_vms_init_libfuncs (void)
ATTRIBUTE_UNUSED;
static void ia64_soft_fp_init_libfuncs (void)
ATTRIBUTE_UNUSED;
static bool ia64_vms_valid_pointer_mode (machine_mode mode)
ATTRIBUTE_UNUSED;
static tree ia64_vms_common_object_attribute (tree *, tree, tree, int, bool *)
ATTRIBUTE_UNUSED;
static bool ia64_attribute_takes_identifier_p (const_tree);
static tree ia64_handle_model_attribute (tree *, tree, tree, int, bool *);
static tree ia64_handle_version_id_attribute (tree *, tree, tree, int, bool *);
static void ia64_encode_section_info (tree, rtx, int);
static rtx ia64_struct_value_rtx (tree, int);
static tree ia64_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
static bool ia64_scalar_mode_supported_p (machine_mode mode);
static bool ia64_vector_mode_supported_p (machine_mode mode);
static bool ia64_libgcc_floating_mode_supported_p (machine_mode mode);
static bool ia64_legitimate_constant_p (machine_mode, rtx);
static bool ia64_legitimate_address_p (machine_mode, rtx, bool);
static bool ia64_cannot_force_const_mem (machine_mode, rtx);
static const char *ia64_mangle_type (const_tree);
static const char *ia64_invalid_conversion (const_tree, const_tree);
static const char *ia64_invalid_unary_op (int, const_tree);
static const char *ia64_invalid_binary_op (int, const_tree, const_tree);
static machine_mode ia64_c_mode_for_suffix (char);
static void ia64_trampoline_init (rtx, tree, rtx);
static void ia64_override_options_after_change (void);
static bool ia64_member_type_forces_blk (const_tree, machine_mode);
static tree ia64_builtin_decl (unsigned, bool);
static reg_class_t ia64_preferred_reload_class (rtx, reg_class_t);
static machine_mode ia64_get_reg_raw_mode (int regno);
static section * ia64_hpux_function_section (tree, enum node_frequency,
bool, bool);
static bool ia64_vectorize_vec_perm_const_ok (machine_mode vmode,
const unsigned char *sel);
#define MAX_VECT_LEN 8
struct expand_vec_perm_d
{
rtx target, op0, op1;
unsigned char perm[MAX_VECT_LEN];
machine_mode vmode;
unsigned char nelt;
bool one_operand_p;
bool testing_p;
};
static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
/* Table of valid machine attributes. */
static const struct attribute_spec ia64_attribute_table[] =
{
/* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
affects_type_identity } */
{ "syscall_linkage", 0, 0, false, true, true, NULL, false },
{ "model", 1, 1, true, false, false, ia64_handle_model_attribute,
false },
#if TARGET_ABI_OPEN_VMS
{ "common_object", 1, 1, true, false, false,
ia64_vms_common_object_attribute, false },
#endif
{ "version_id", 1, 1, true, false, false,
ia64_handle_version_id_attribute, false },
{ NULL, 0, 0, false, false, false, NULL, false }
};
/* Initialize the GCC target structure. */
#undef TARGET_ATTRIBUTE_TABLE
#define TARGET_ATTRIBUTE_TABLE ia64_attribute_table
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS ia64_init_builtins
#undef TARGET_EXPAND_BUILTIN
#define TARGET_EXPAND_BUILTIN ia64_expand_builtin
#undef TARGET_BUILTIN_DECL
#define TARGET_BUILTIN_DECL ia64_builtin_decl
#undef TARGET_ASM_BYTE_OP
#define TARGET_ASM_BYTE_OP "\tdata1\t"
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\tdata2\t"
#undef TARGET_ASM_ALIGNED_SI_OP
#define TARGET_ASM_ALIGNED_SI_OP "\tdata4\t"
#undef TARGET_ASM_ALIGNED_DI_OP
#define TARGET_ASM_ALIGNED_DI_OP "\tdata8\t"
#undef TARGET_ASM_UNALIGNED_HI_OP
#define TARGET_ASM_UNALIGNED_HI_OP "\tdata2.ua\t"
#undef TARGET_ASM_UNALIGNED_SI_OP
#define TARGET_ASM_UNALIGNED_SI_OP "\tdata4.ua\t"
#undef TARGET_ASM_UNALIGNED_DI_OP
#define TARGET_ASM_UNALIGNED_DI_OP "\tdata8.ua\t"
#undef TARGET_ASM_INTEGER
#define TARGET_ASM_INTEGER ia64_assemble_integer
#undef TARGET_OPTION_OVERRIDE
#define TARGET_OPTION_OVERRIDE ia64_option_override
#undef TARGET_ASM_FUNCTION_PROLOGUE
#define TARGET_ASM_FUNCTION_PROLOGUE ia64_output_function_prologue
#undef TARGET_ASM_FUNCTION_END_PROLOGUE
#define TARGET_ASM_FUNCTION_END_PROLOGUE ia64_output_function_end_prologue
#undef TARGET_ASM_FUNCTION_EPILOGUE
#define TARGET_ASM_FUNCTION_EPILOGUE ia64_output_function_epilogue
#undef TARGET_PRINT_OPERAND
#define TARGET_PRINT_OPERAND ia64_print_operand
#undef TARGET_PRINT_OPERAND_ADDRESS
#define TARGET_PRINT_OPERAND_ADDRESS ia64_print_operand_address
#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
#define TARGET_PRINT_OPERAND_PUNCT_VALID_P ia64_print_operand_punct_valid_p
#undef TARGET_IN_SMALL_DATA_P
#define TARGET_IN_SMALL_DATA_P ia64_in_small_data_p
#undef TARGET_SCHED_ADJUST_COST_2
#define TARGET_SCHED_ADJUST_COST_2 ia64_adjust_cost_2
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE ia64_issue_rate
#undef TARGET_SCHED_VARIABLE_ISSUE
#define TARGET_SCHED_VARIABLE_ISSUE ia64_variable_issue
#undef TARGET_SCHED_INIT
#define TARGET_SCHED_INIT ia64_sched_init
#undef TARGET_SCHED_FINISH
#define TARGET_SCHED_FINISH ia64_sched_finish
#undef TARGET_SCHED_INIT_GLOBAL
#define TARGET_SCHED_INIT_GLOBAL ia64_sched_init_global
#undef TARGET_SCHED_FINISH_GLOBAL
#define TARGET_SCHED_FINISH_GLOBAL ia64_sched_finish_global
#undef TARGET_SCHED_REORDER
#define TARGET_SCHED_REORDER ia64_sched_reorder
#undef TARGET_SCHED_REORDER2
#define TARGET_SCHED_REORDER2 ia64_sched_reorder2
#undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
#define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK ia64_dependencies_evaluation_hook
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD ia64_first_cycle_multipass_dfa_lookahead
#undef TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN
#define TARGET_SCHED_INIT_DFA_PRE_CYCLE_INSN ia64_init_dfa_pre_cycle_insn
#undef TARGET_SCHED_DFA_PRE_CYCLE_INSN
#define TARGET_SCHED_DFA_PRE_CYCLE_INSN ia64_dfa_pre_cycle_insn
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD\
ia64_first_cycle_multipass_dfa_lookahead_guard
#undef TARGET_SCHED_DFA_NEW_CYCLE
#define TARGET_SCHED_DFA_NEW_CYCLE ia64_dfa_new_cycle
#undef TARGET_SCHED_H_I_D_EXTENDED
#define TARGET_SCHED_H_I_D_EXTENDED ia64_h_i_d_extended
#undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
#define TARGET_SCHED_ALLOC_SCHED_CONTEXT ia64_alloc_sched_context
#undef TARGET_SCHED_INIT_SCHED_CONTEXT
#define TARGET_SCHED_INIT_SCHED_CONTEXT ia64_init_sched_context
#undef TARGET_SCHED_SET_SCHED_CONTEXT
#define TARGET_SCHED_SET_SCHED_CONTEXT ia64_set_sched_context
#undef TARGET_SCHED_CLEAR_SCHED_CONTEXT
#define TARGET_SCHED_CLEAR_SCHED_CONTEXT ia64_clear_sched_context
#undef TARGET_SCHED_FREE_SCHED_CONTEXT
#define TARGET_SCHED_FREE_SCHED_CONTEXT ia64_free_sched_context
#undef TARGET_SCHED_SET_SCHED_FLAGS
#define TARGET_SCHED_SET_SCHED_FLAGS ia64_set_sched_flags
#undef TARGET_SCHED_GET_INSN_SPEC_DS
#define TARGET_SCHED_GET_INSN_SPEC_DS ia64_get_insn_spec_ds
#undef TARGET_SCHED_GET_INSN_CHECKED_DS
#define TARGET_SCHED_GET_INSN_CHECKED_DS ia64_get_insn_checked_ds
#undef TARGET_SCHED_SPECULATE_INSN
#define TARGET_SCHED_SPECULATE_INSN ia64_speculate_insn
#undef TARGET_SCHED_NEEDS_BLOCK_P
#define TARGET_SCHED_NEEDS_BLOCK_P ia64_needs_block_p
#undef TARGET_SCHED_GEN_SPEC_CHECK
#define TARGET_SCHED_GEN_SPEC_CHECK ia64_gen_spec_check
#undef TARGET_SCHED_SKIP_RTX_P
#define TARGET_SCHED_SKIP_RTX_P ia64_skip_rtx_p
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL ia64_function_ok_for_sibcall
#undef TARGET_ARG_PARTIAL_BYTES
#define TARGET_ARG_PARTIAL_BYTES ia64_arg_partial_bytes
#undef TARGET_FUNCTION_ARG
#define TARGET_FUNCTION_ARG ia64_function_arg
#undef TARGET_FUNCTION_INCOMING_ARG
#define TARGET_FUNCTION_INCOMING_ARG ia64_function_incoming_arg
#undef TARGET_FUNCTION_ARG_ADVANCE
#define TARGET_FUNCTION_ARG_ADVANCE ia64_function_arg_advance
#undef TARGET_FUNCTION_ARG_BOUNDARY
#define TARGET_FUNCTION_ARG_BOUNDARY ia64_function_arg_boundary
#undef TARGET_ASM_OUTPUT_MI_THUNK
#define TARGET_ASM_OUTPUT_MI_THUNK ia64_output_mi_thunk
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
#undef TARGET_ASM_FILE_START
#define TARGET_ASM_FILE_START ia64_file_start
#undef TARGET_ASM_GLOBALIZE_DECL_NAME
#define TARGET_ASM_GLOBALIZE_DECL_NAME ia64_globalize_decl_name
#undef TARGET_REGISTER_MOVE_COST
#define TARGET_REGISTER_MOVE_COST ia64_register_move_cost
#undef TARGET_MEMORY_MOVE_COST
#define TARGET_MEMORY_MOVE_COST ia64_memory_move_cost
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS ia64_rtx_costs
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
#undef TARGET_UNSPEC_MAY_TRAP_P
#define TARGET_UNSPEC_MAY_TRAP_P ia64_unspec_may_trap_p
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG ia64_reorg
#undef TARGET_ENCODE_SECTION_INFO
#define TARGET_ENCODE_SECTION_INFO ia64_encode_section_info
#undef TARGET_SECTION_TYPE_FLAGS
#define TARGET_SECTION_TYPE_FLAGS ia64_section_type_flags
#ifdef HAVE_AS_TLS
#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
#define TARGET_ASM_OUTPUT_DWARF_DTPREL ia64_output_dwarf_dtprel
#endif
/* ??? Investigate. */
#if 0
#undef TARGET_PROMOTE_PROTOTYPES
#define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
#endif
#undef TARGET_FUNCTION_VALUE
#define TARGET_FUNCTION_VALUE ia64_function_value
#undef TARGET_LIBCALL_VALUE
#define TARGET_LIBCALL_VALUE ia64_libcall_value
#undef TARGET_FUNCTION_VALUE_REGNO_P
#define TARGET_FUNCTION_VALUE_REGNO_P ia64_function_value_regno_p
#undef TARGET_STRUCT_VALUE_RTX
#define TARGET_STRUCT_VALUE_RTX ia64_struct_value_rtx
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY ia64_return_in_memory
#undef TARGET_SETUP_INCOMING_VARARGS
#define TARGET_SETUP_INCOMING_VARARGS ia64_setup_incoming_varargs
#undef TARGET_STRICT_ARGUMENT_NAMING
#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
#undef TARGET_MUST_PASS_IN_STACK
#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
#undef TARGET_GET_RAW_RESULT_MODE
#define TARGET_GET_RAW_RESULT_MODE ia64_get_reg_raw_mode
#undef TARGET_GET_RAW_ARG_MODE
#define TARGET_GET_RAW_ARG_MODE ia64_get_reg_raw_mode
#undef TARGET_MEMBER_TYPE_FORCES_BLK
#define TARGET_MEMBER_TYPE_FORCES_BLK ia64_member_type_forces_blk
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
#define TARGET_GIMPLIFY_VA_ARG_EXPR ia64_gimplify_va_arg
#undef TARGET_ASM_UNWIND_EMIT
#define TARGET_ASM_UNWIND_EMIT ia64_asm_unwind_emit
#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY ia64_asm_emit_except_personality
#undef TARGET_ASM_INIT_SECTIONS
#define TARGET_ASM_INIT_SECTIONS ia64_asm_init_sections
#undef TARGET_DEBUG_UNWIND_INFO
#define TARGET_DEBUG_UNWIND_INFO ia64_debug_unwind_info
#undef TARGET_SCALAR_MODE_SUPPORTED_P
#define TARGET_SCALAR_MODE_SUPPORTED_P ia64_scalar_mode_supported_p
#undef TARGET_VECTOR_MODE_SUPPORTED_P
#define TARGET_VECTOR_MODE_SUPPORTED_P ia64_vector_mode_supported_p
#undef TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P
#define TARGET_LIBGCC_FLOATING_MODE_SUPPORTED_P \
ia64_libgcc_floating_mode_supported_p
/* ia64 architecture manual 4.4.7: ... reads, writes, and flushes may occur
in an order different from the specified program order. */
#undef TARGET_RELAXED_ORDERING
#define TARGET_RELAXED_ORDERING true
#undef TARGET_LEGITIMATE_CONSTANT_P
#define TARGET_LEGITIMATE_CONSTANT_P ia64_legitimate_constant_p
#undef TARGET_LEGITIMATE_ADDRESS_P
#define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
#undef TARGET_CANNOT_FORCE_CONST_MEM
#define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
#undef TARGET_MANGLE_TYPE
#define TARGET_MANGLE_TYPE ia64_mangle_type
#undef TARGET_INVALID_CONVERSION
#define TARGET_INVALID_CONVERSION ia64_invalid_conversion
#undef TARGET_INVALID_UNARY_OP
#define TARGET_INVALID_UNARY_OP ia64_invalid_unary_op
#undef TARGET_INVALID_BINARY_OP
#define TARGET_INVALID_BINARY_OP ia64_invalid_binary_op
#undef TARGET_C_MODE_FOR_SUFFIX
#define TARGET_C_MODE_FOR_SUFFIX ia64_c_mode_for_suffix
#undef TARGET_CAN_ELIMINATE
#define TARGET_CAN_ELIMINATE ia64_can_eliminate
#undef TARGET_TRAMPOLINE_INIT
#define TARGET_TRAMPOLINE_INIT ia64_trampoline_init
#undef TARGET_CAN_USE_DOLOOP_P
#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
#undef TARGET_INVALID_WITHIN_DOLOOP
#define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE ia64_override_options_after_change
#undef TARGET_PREFERRED_RELOAD_CLASS
#define TARGET_PREFERRED_RELOAD_CLASS ia64_preferred_reload_class
#undef TARGET_DELAY_SCHED2
#define TARGET_DELAY_SCHED2 true
/* Variable tracking should be run after all optimizations which
change order of insns. It also needs a valid CFG. */
#undef TARGET_DELAY_VARTRACK
#define TARGET_DELAY_VARTRACK true
#undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
#define TARGET_VECTORIZE_VEC_PERM_CONST_OK ia64_vectorize_vec_perm_const_ok
#undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
#define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P ia64_attribute_takes_identifier_p
struct gcc_target targetm = TARGET_INITIALIZER;
/* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
identifier as an argument, so the front end shouldn't look it up. */
static bool
ia64_attribute_takes_identifier_p (const_tree attr_id)
{
if (is_attribute_p ("model", attr_id))
return true;
#if TARGET_ABI_OPEN_VMS
if (is_attribute_p ("common_object", attr_id))
return true;
#endif
return false;
}
typedef enum
{
ADDR_AREA_NORMAL, /* normal address area */
ADDR_AREA_SMALL /* addressable by "addl" (-2MB < addr < 2MB) */
}
ia64_addr_area;
static GTY(()) tree small_ident1;
static GTY(()) tree small_ident2;
static void
init_idents (void)
{
if (small_ident1 == 0)
{
small_ident1 = get_identifier ("small");
small_ident2 = get_identifier ("__small__");
}
}
/* Retrieve the address area that has been chosen for the given decl. */
static ia64_addr_area
ia64_get_addr_area (tree decl)
{
tree model_attr;
model_attr = lookup_attribute ("model", DECL_ATTRIBUTES (decl));
if (model_attr)
{
tree id;
init_idents ();
id = TREE_VALUE (TREE_VALUE (model_attr));
if (id == small_ident1 || id == small_ident2)
return ADDR_AREA_SMALL;
}
return ADDR_AREA_NORMAL;
}
static tree
ia64_handle_model_attribute (tree *node, tree name, tree args,
int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
{
ia64_addr_area addr_area = ADDR_AREA_NORMAL;
ia64_addr_area area;
tree arg, decl = *node;
init_idents ();
arg = TREE_VALUE (args);
if (arg == small_ident1 || arg == small_ident2)
{
addr_area = ADDR_AREA_SMALL;
}
else
{
warning (OPT_Wattributes, "invalid argument of %qE attribute",
name);
*no_add_attrs = true;
}
switch (TREE_CODE (decl))
{
case VAR_DECL:
if ((DECL_CONTEXT (decl) && TREE_CODE (DECL_CONTEXT (decl))
== FUNCTION_DECL)
&& !TREE_STATIC (decl))
{
error_at (DECL_SOURCE_LOCATION (decl),
"an address area attribute cannot be specified for "
"local variables");
*no_add_attrs = true;
}
area = ia64_get_addr_area (decl);
if (area != ADDR_AREA_NORMAL && addr_area != area)
{
error ("address area of %q+D conflicts with previous "
"declaration", decl);
*no_add_attrs = true;
}
break;
case FUNCTION_DECL:
error_at (DECL_SOURCE_LOCATION (decl),
"address area attribute cannot be specified for "
"functions");
*no_add_attrs = true;
break;
default:
warning (OPT_Wattributes, "%qE attribute ignored",
name);
*no_add_attrs = true;
break;
}
return NULL_TREE;
}
/* Part of the low level implementation of DEC Ada pragma Common_Object which
enables the shared use of variables stored in overlaid linker areas
corresponding to the use of Fortran COMMON. */
static tree
ia64_vms_common_object_attribute (tree *node, tree name, tree args,
int flags ATTRIBUTE_UNUSED,
bool *no_add_attrs)
{
tree decl = *node;
tree id;
gcc_assert (DECL_P (decl));
DECL_COMMON (decl) = 1;
id = TREE_VALUE (args);
if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
{
error ("%qE attribute requires a string constant argument", name);
*no_add_attrs = true;
return NULL_TREE;
}
return NULL_TREE;
}
/* Part of the low level implementation of DEC Ada pragma Common_Object. */
void
ia64_vms_output_aligned_decl_common (FILE *file, tree decl, const char *name,
unsigned HOST_WIDE_INT size,
unsigned int align)
{
tree attr = DECL_ATTRIBUTES (decl);
if (attr)
attr = lookup_attribute ("common_object", attr);
if (attr)
{
tree id = TREE_VALUE (TREE_VALUE (attr));
const char *name;
if (TREE_CODE (id) == IDENTIFIER_NODE)
name = IDENTIFIER_POINTER (id);
else if (TREE_CODE (id) == STRING_CST)
name = TREE_STRING_POINTER (id);
else
abort ();
fprintf (file, "\t.vms_common\t\"%s\",", name);
}
else
fprintf (file, "%s", COMMON_ASM_OP);
/* Code from elfos.h. */
assemble_name (file, name);
fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u",
size, align / BITS_PER_UNIT);
fputc ('\n', file);
}
static void
ia64_encode_addr_area (tree decl, rtx symbol)
{
int flags;
flags = SYMBOL_REF_FLAGS (symbol);
switch (ia64_get_addr_area (decl))
{
case ADDR_AREA_NORMAL: break;
case ADDR_AREA_SMALL: flags |= SYMBOL_FLAG_SMALL_ADDR; break;
default: gcc_unreachable ();
}
SYMBOL_REF_FLAGS (symbol) = flags;
}
static void
ia64_encode_section_info (tree decl, rtx rtl, int first)
{
default_encode_section_info (decl, rtl, first);
/* Careful not to prod global register variables. */
if (TREE_CODE (decl) == VAR_DECL
&& GET_CODE (DECL_RTL (decl)) == MEM
&& GET_CODE (XEXP (DECL_RTL (decl), 0)) == SYMBOL_REF
&& (TREE_STATIC (decl) || DECL_EXTERNAL (decl)))
ia64_encode_addr_area (decl, XEXP (rtl, 0));
}
/* Return 1 if the operands of a move are ok. */
int
ia64_move_ok (rtx dst, rtx src)
{
/* If we're under init_recog_no_volatile, we'll not be able to use
memory_operand. So check the code directly and don't worry about
the validity of the underlying address, which should have been
checked elsewhere anyway. */
if (GET_CODE (dst) != MEM)
return 1;
if (GET_CODE (src) == MEM)
return 0;
if (register_operand (src, VOIDmode))
return 1;
/* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */
if (INTEGRAL_MODE_P (GET_MODE (dst)))
return src == const0_rtx;
else
return satisfies_constraint_G (src);
}
/* Return 1 if the operands are ok for a floating point load pair. */
int
ia64_load_pair_ok (rtx dst, rtx src)
{
/* ??? There is a thinko in the implementation of the "x" constraint and the
FP_REGS class. The constraint will also reject (reg f30:TI) so we must
also return false for it. */
if (GET_CODE (dst) != REG
|| !(FP_REGNO_P (REGNO (dst)) && FP_REGNO_P (REGNO (dst) + 1)))
return 0;
if (GET_CODE (src) != MEM || MEM_VOLATILE_P (src))
return 0;
switch (GET_CODE (XEXP (src, 0)))
{
case REG:
case POST_INC:
break;
case POST_DEC:
return 0;
case POST_MODIFY:
{
rtx adjust = XEXP (XEXP (XEXP (src, 0), 1), 1);
if (GET_CODE (adjust) != CONST_INT
|| INTVAL (adjust) != GET_MODE_SIZE (GET_MODE (src)))
return 0;
}
break;
default:
abort ();
}
return 1;
}
int
addp4_optimize_ok (rtx op1, rtx op2)
{
return (basereg_operand (op1, GET_MODE(op1)) !=
basereg_operand (op2, GET_MODE(op2)));
}
/* Check if OP is a mask suitable for use with SHIFT in a dep.z instruction.
Return the length of the field, or <= 0 on failure. */
int
ia64_depz_field_mask (rtx rop, rtx rshift)
{
unsigned HOST_WIDE_INT op = INTVAL (rop);
unsigned HOST_WIDE_INT shift = INTVAL (rshift);
/* Get rid of the zero bits we're shifting in. */
op >>= shift;
/* We must now have a solid block of 1's at bit 0. */
return exact_log2 (op + 1);
}
/* Return the TLS model to use for ADDR. */
static enum tls_model
tls_symbolic_operand_type (rtx addr)
{
enum tls_model tls_kind = TLS_MODEL_NONE;
if (GET_CODE (addr) == CONST)
{
if (GET_CODE (XEXP (addr, 0)) == PLUS
&& GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF)
tls_kind = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (addr, 0), 0));
}
else if (GET_CODE (addr) == SYMBOL_REF)
tls_kind = SYMBOL_REF_TLS_MODEL (addr);
return tls_kind;
}
/* Returns true if REG (assumed to be a `reg' RTX) is valid for use
as a base register. */
static inline bool
ia64_reg_ok_for_base_p (const_rtx reg, bool strict)
{
if (strict
&& REGNO_OK_FOR_BASE_P (REGNO (reg)))
return true;
else if (!strict
&& (GENERAL_REGNO_P (REGNO (reg))
|| !HARD_REGISTER_P (reg)))
return true;
else
return false;
}
static bool
ia64_legitimate_address_reg (const_rtx reg, bool strict)
{
if ((REG_P (reg) && ia64_reg_ok_for_base_p (reg, strict))
|| (GET_CODE (reg) == SUBREG && REG_P (XEXP (reg, 0))
&& ia64_reg_ok_for_base_p (XEXP (reg, 0), strict)))
return true;
return false;
}
static bool
ia64_legitimate_address_disp (const_rtx reg, const_rtx disp, bool strict)
{
if (GET_CODE (disp) == PLUS
&& rtx_equal_p (reg, XEXP (disp, 0))
&& (ia64_legitimate_address_reg (XEXP (disp, 1), strict)
|| (CONST_INT_P (XEXP (disp, 1))
&& IN_RANGE (INTVAL (XEXP (disp, 1)), -256, 255))))
return true;
return false;
}
/* Implement TARGET_LEGITIMATE_ADDRESS_P. */
static bool
ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED,
rtx x, bool strict)
{
if (ia64_legitimate_address_reg (x, strict))
return true;
else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
&& ia64_legitimate_address_reg (XEXP (x, 0), strict)
&& XEXP (x, 0) != arg_pointer_rtx)
return true;
else if (GET_CODE (x) == POST_MODIFY
&& ia64_legitimate_address_reg (XEXP (x, 0), strict)
&& XEXP (x, 0) != arg_pointer_rtx
&& ia64_legitimate_address_disp (XEXP (x, 0), XEXP (x, 1), strict))
return true;
else
return false;
}
/* Return true if X is a constant that is valid for some immediate
field in an instruction. */
static bool
ia64_legitimate_constant_p (machine_mode mode, rtx x)
{
switch (GET_CODE (x))
{
case CONST_INT:
case LABEL_REF:
return true;
case CONST_DOUBLE:
if (GET_MODE (x) == VOIDmode || mode == SFmode || mode == DFmode)
return true;
return satisfies_constraint_G (x);
case CONST:
case SYMBOL_REF:
/* ??? Short term workaround for PR 28490. We must make the code here
match the code in ia64_expand_move and move_operand, even though they
are both technically wrong. */
if (tls_symbolic_operand_type (x) == 0)
{
HOST_WIDE_INT addend = 0;
rtx op = x;
if (GET_CODE (op) == CONST
&& GET_CODE (XEXP (op, 0)) == PLUS
&& GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT)
{
addend = INTVAL (XEXP (XEXP (op, 0), 1));
op = XEXP (XEXP (op, 0), 0);
}
if (any_offset_symbol_operand (op, mode)
|| function_operand (op, mode))
return true;
if (aligned_offset_symbol_operand (op, mode))
return (addend & 0x3fff) == 0;
return false;
}
return false;
case CONST_VECTOR:
if (mode == V2SFmode)
return satisfies_constraint_Y (x);
return (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
&& GET_MODE_SIZE (mode) <= 8);
default:
return false;
}
}
/* Don't allow TLS addresses to get spilled to memory. */
static bool
ia64_cannot_force_const_mem (machine_mode mode, rtx x)
{
if (mode == RFmode)
return true;
return tls_symbolic_operand_type (x) != 0;
}
/* Expand a symbolic constant load. */
bool
ia64_expand_load_address (rtx dest, rtx src)
{
gcc_assert (GET_CODE (dest) == REG);
/* ILP32 mode still loads 64-bits of data from the GOT. This avoids
having to pointer-extend the value afterward. Other forms of address
computation below are also more natural to compute as 64-bit quantities.
If we've been given an SImode destination register, change it. */
if (GET_MODE (dest) != Pmode)
dest = gen_rtx_REG_offset (dest, Pmode, REGNO (dest),
byte_lowpart_offset (Pmode, GET_MODE (dest)));
if (TARGET_NO_PIC)
return false;
if (small_addr_symbolic_operand (src, VOIDmode))
return false;
if (TARGET_AUTO_PIC)
emit_insn (gen_load_gprel64 (dest, src));
else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (src))
emit_insn (gen_load_fptr (dest, src));
else if (sdata_symbolic_operand (src, VOIDmode))
emit_insn (gen_load_gprel (dest, src));
else if (local_symbolic_operand64 (src, VOIDmode))
{
/* We want to use @gprel rather than @ltoff relocations for local
symbols:
- @gprel does not require dynamic linker
- and does not use .sdata section
https://gcc.gnu.org/bugzilla/60465 */
emit_insn (gen_load_gprel64 (dest, src));
}
else
{
HOST_WIDE_INT addend = 0;
rtx tmp;
/* We did split constant offsets in ia64_expand_move, and we did try
to keep them split in move_operand, but we also allowed reload to
rematerialize arbitrary constants rather than spill the value to
the stack and reload it. So we have to be prepared here to split
them apart again. */
if (GET_CODE (src) == CONST)
{
HOST_WIDE_INT hi, lo;
hi = INTVAL (XEXP (XEXP (src, 0), 1));
lo = ((hi & 0x3fff) ^ 0x2000) - 0x2000;
hi = hi - lo;
if (lo != 0)
{
addend = lo;
src = plus_constant (Pmode, XEXP (XEXP (src, 0), 0), hi);
}
}
tmp = gen_rtx_HIGH (Pmode, src);
tmp = gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
tmp = gen_rtx_LO_SUM (Pmode, gen_const_mem (Pmode, dest), src);
emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
if (addend)
{
tmp = gen_rtx_PLUS (Pmode, dest, GEN_INT (addend));
emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
}
}
return true;
}
static GTY(()) rtx gen_tls_tga;
static rtx
gen_tls_get_addr (void)
{
if (!gen_tls_tga)
gen_tls_tga = init_one_libfunc ("__tls_get_addr");
return gen_tls_tga;
}
static GTY(()) rtx thread_pointer_rtx;
static rtx
gen_thread_pointer (void)
{
if (!thread_pointer_rtx)
thread_pointer_rtx = gen_rtx_REG (Pmode, 13);
return thread_pointer_rtx;
}
static rtx
ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
rtx orig_op1, HOST_WIDE_INT addend)
{
rtx tga_op1, tga_op2, tga_ret, tga_eqv, tmp;
rtx_insn *insns;
rtx orig_op0 = op0;
HOST_WIDE_INT addend_lo, addend_hi;
switch (tls_kind)
{
case TLS_MODEL_GLOBAL_DYNAMIC:
start_sequence ();
tga_op1 = gen_reg_rtx (Pmode);
emit_insn (gen_load_dtpmod (tga_op1, op1));
tga_op2 = gen_reg_rtx (Pmode);
emit_insn (gen_load_dtprel (tga_op2, op1));
tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
LCT_CONST, Pmode, 2, tga_op1,
Pmode, tga_op2, Pmode);
insns = get_insns ();
end_sequence ();
if (GET_MODE (op0) != Pmode)
op0 = tga_ret;
emit_libcall_block (insns, op0, tga_ret, op1);
break;
case TLS_MODEL_LOCAL_DYNAMIC:
/* ??? This isn't the completely proper way to do local-dynamic
If the call to __tls_get_addr is used only by a single symbol,
then we should (somehow) move the dtprel to the second arg
to avoid the extra add. */
start_sequence ();
tga_op1 = gen_reg_rtx (Pmode);
emit_insn (gen_load_dtpmod (tga_op1, op1));
tga_op2 = const0_rtx;
tga_ret = emit_library_call_value (gen_tls_get_addr (), NULL_RTX,
LCT_CONST, Pmode, 2, tga_op1,
Pmode, tga_op2, Pmode);
insns = get_insns ();
end_sequence ();
tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
UNSPEC_LD_BASE);
tmp = gen_reg_rtx (Pmode);
emit_libcall_block (insns, tmp, tga_ret, tga_eqv);
if (!register_operand (op0, Pmode))
op0 = gen_reg_rtx (Pmode);
if (TARGET_TLS64)
{
emit_insn (gen_load_dtprel (op0, op1));
emit_insn (gen_adddi3 (op0, tmp, op0));
}
else
emit_insn (gen_add_dtprel (op0, op1, tmp));
break;
case TLS_MODEL_INITIAL_EXEC:
addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
addend_hi = addend - addend_lo;
op1 = plus_constant (Pmode, op1, addend_hi);
addend = addend_lo;
tmp = gen_reg_rtx (Pmode);
emit_insn (gen_load_tprel (tmp, op1));
if (!register_operand (op0, Pmode))
op0 = gen_reg_rtx (Pmode);
emit_insn (gen_adddi3 (op0, tmp, gen_thread_pointer ()));
break;
case TLS_MODEL_LOCAL_EXEC:
if (!register_operand (op0, Pmode))
op0 = gen_reg_rtx (Pmode);
op1 = orig_op1;
addend = 0;
if (TARGET_TLS64)
{
emit_insn (gen_load_tprel (op0, op1));
emit_insn (gen_adddi3 (op0, op0, gen_thread_pointer ()));
}
else
emit_insn (gen_add_tprel (op0, op1, gen_thread_pointer ()));
break;
default:
gcc_unreachable ();
}
if (addend)
op0 = expand_simple_binop (Pmode, PLUS, op0, GEN_INT (addend),
orig_op0, 1, OPTAB_DIRECT);
if (orig_op0 == op0)
return NULL_RTX;
if (GET_MODE (orig_op0) == Pmode)
return op0;
return gen_lowpart (GET_MODE (orig_op0), op0);
}
rtx
ia64_expand_move (rtx op0, rtx op1)
{
machine_mode mode = GET_MODE (op0);
if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
op1 = force_reg (mode, op1);
if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
{
HOST_WIDE_INT addend = 0;
enum tls_model tls_kind;
rtx sym = op1;
if (GET_CODE (op1) == CONST
&& GET_CODE (XEXP (op1, 0)) == PLUS
&& GET_CODE (XEXP (XEXP (op1, 0), 1)) == CONST_INT)
{
addend = INTVAL (XEXP (XEXP (op1, 0), 1));
sym = XEXP (XEXP (op1, 0), 0);
}
tls_kind = tls_symbolic_operand_type (sym);
if (tls_kind)
return ia64_expand_tls_address (tls_kind, op0, sym, op1, addend);
if (any_offset_symbol_operand (sym, mode))
addend = 0;
else if (aligned_offset_symbol_operand (sym, mode))
{
HOST_WIDE_INT addend_lo, addend_hi;
addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
addend_hi = addend - addend_lo;
if (addend_lo != 0)
{
op1 = plus_constant (mode, sym, addend_hi);
addend = addend_lo;
}
else
addend = 0;
}
else
op1 = sym;
if (reload_completed)
{
/* We really should have taken care of this offset earlier. */
gcc_assert (addend == 0);
if (ia64_expand_load_address (op0, op1))
return NULL_RTX;
}
if (addend)
{
rtx subtarget = !can_create_pseudo_p () ? op0 : gen_reg_rtx (mode);
emit_insn (gen_rtx_SET (VOIDmode, subtarget, op1));
op1 = expand_simple_binop (mode, PLUS, subtarget,
GEN_INT (addend), op0, 1, OPTAB_DIRECT);
if (op0 == op1)
return NULL_RTX;
}
}
return op1;
}
/* Split a move from OP1 to OP0 conditional on COND. */
void
ia64_emit_cond_move (rtx op0, rtx op1, rtx cond)
{
rtx_insn *insn, *first = get_last_insn ();
emit_move_insn (op0, op1);
for (insn = get_last_insn (); insn != first; insn = PREV_INSN (insn))
if (INSN_P (insn))
PATTERN (insn) = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond),
PATTERN (insn));
}
/* Split a post-reload TImode or TFmode reference into two DImode
components. This is made extra difficult by the fact that we do
not get any scratch registers to work with, because reload cannot
be prevented from giving us a scratch that overlaps the register
pair involved. So instead, when addressing memory, we tweak the
pointer register up and back down with POST_INCs. Or up and not
back down when we can get away with it.
REVERSED is true when the loads must be done in reversed order
(high word first) for correctness. DEAD is true when the pointer
dies with the second insn we generate and therefore the second
address must not carry a postmodify.
May return an insn which is to be emitted after the moves. */
static rtx
ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
{
rtx fixup = 0;
switch (GET_CODE (in))
{
case REG:
out[reversed] = gen_rtx_REG (DImode, REGNO (in));
out[!reversed] = gen_rtx_REG (DImode, REGNO (in) + 1);
break;
case CONST_INT:
case CONST_DOUBLE:
/* Cannot occur reversed. */
gcc_assert (!reversed);
if (GET_MODE (in) != TFmode)
split_double (in, &out[0], &out[1]);
else
/* split_double does not understand how to split a TFmode
quantity into a pair of DImode constants. */
{
REAL_VALUE_TYPE r;
unsigned HOST_WIDE_INT p[2];
long l[4]; /* TFmode is 128 bits */
REAL_VALUE_FROM_CONST_DOUBLE (r, in);
real_to_target (l, &r, TFmode);
if (FLOAT_WORDS_BIG_ENDIAN)
{
p[0] = (((unsigned HOST_WIDE_INT) l[0]) << 32) + l[1];
p[1] = (((unsigned HOST_WIDE_INT) l[2]) << 32) + l[3];
}
else
{
p[0] = (((unsigned HOST_WIDE_INT) l[1]) << 32) + l[0];
p[1] = (((unsigned HOST_WIDE_INT) l[3]) << 32) + l[2];
}
out[0] = GEN_INT (p[0]);
out[1] = GEN_INT (p[1]);
}
break;
case MEM:
{
rtx base = XEXP (in, 0);
rtx offset;
switch (GET_CODE (base))
{
case REG:
if (!reversed)
{
out[0] = adjust_automodify_address
(in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
out[1] = adjust_automodify_address
(in, DImode, dead ? 0 : gen_rtx_POST_DEC (Pmode, base), 8);
}
else
{
/* Reversal requires a pre-increment, which can only
be done as a separate insn. */
emit_insn (gen_adddi3 (base, base, GEN_INT (8)));
out[0] = adjust_automodify_address
(in, DImode, gen_rtx_POST_DEC (Pmode, base), 8);
out[1] = adjust_address (in, DImode, 0);
}
break;
case POST_INC:
gcc_assert (!reversed && !dead);
/* Just do the increment in two steps. */
out[0] = adjust_automodify_address (in, DImode, 0, 0);
out[1] = adjust_automodify_address (in, DImode, 0, 8);
break;
case POST_DEC:
gcc_assert (!reversed && !dead);
/* Add 8, subtract 24. */
base = XEXP (base, 0);
out[0] = adjust_automodify_address
(in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
out[1] = adjust_automodify_address
(in, DImode,
gen_rtx_POST_MODIFY (Pmode, base,
plus_constant (Pmode, base, -24)),
8);
break;
case POST_MODIFY:
gcc_assert (!reversed && !dead);
/* Extract and adjust the modification. This case is
trickier than the others, because we might have an
index register, or we might have a combined offset that
doesn't fit a signed 9-bit displacement field. We can
assume the incoming expression is already legitimate. */
offset = XEXP (base, 1);
base = XEXP (base, 0);
out[0] = adjust_automodify_address
(in, DImode, gen_rtx_POST_INC (Pmode, base), 0);
if (GET_CODE (XEXP (offset, 1)) == REG)
{
/* Can't adjust the postmodify to match. Emit the
original, then a separate addition insn. */
out[1] = adjust_automodify_address (in, DImode, 0, 8);
fixup = gen_adddi3 (base, base, GEN_INT (-8));
}
else
{
gcc_assert (GET_CODE (XEXP (offset, 1)) == CONST_INT);
if (INTVAL (XEXP (offset, 1)) < -256 + 8)
{
/* Again the postmodify cannot be made to match,
but in this case it's more efficient to get rid
of the postmodify entirely and fix up with an
add insn. */
out[1] = adjust_automodify_address (in, DImode, base, 8);
fixup = gen_adddi3
(base, base, GEN_INT (INTVAL (XEXP (offset, 1)) - 8));
}
else
{
/* Combined offset still fits in the displacement field.
(We cannot overflow it at the high end.) */
out[1] = adjust_automodify_address
(in, DImode, gen_rtx_POST_MODIFY
(Pmode, base, gen_rtx_PLUS
(Pmode, base,
GEN_INT (INTVAL (XEXP (offset, 1)) - 8))),
8);
}
}
break;
default:
gcc_unreachable ();
}
break;
}
default:
gcc_unreachable ();
}
return fixup;
}
/* Split a TImode or TFmode move instruction after reload.
This is used by *movtf_internal and *movti_internal. */
void
ia64_split_tmode_move (rtx operands[])
{
rtx in[2], out[2], insn;
rtx fixup[2];
bool dead = false;
bool reversed = false;
/* It is possible for reload to decide to overwrite a pointer with
the value it points to. In that case we have to do the loads in
the appropriate order so that the pointer is not destroyed too
early. Also we must not generate a postmodify for that second
load, or rws_access_regno will die. And we must not generate a
postmodify for the second load if the destination register
overlaps with the base register. */
if (GET_CODE (operands[1]) == MEM
&& reg_overlap_mentioned_p (operands[0], operands[1]))
{
rtx base = XEXP (operands[1], 0);
while (GET_CODE (base) != REG)
base = XEXP (base, 0);
if (REGNO (base) == REGNO (operands[0]))
reversed = true;
if (refers_to_regno_p (REGNO (operands[0]),
REGNO (operands[0])+2,
base, 0))
dead = true;
}
/* Another reason to do the moves in reversed order is if the first
element of the target register pair is also the second element of
the source register pair. */
if (GET_CODE (operands[0]) == REG && GET_CODE (operands[1]) == REG
&& REGNO (operands[0]) == REGNO (operands[1]) + 1)
reversed = true;
fixup[0] = ia64_split_tmode (in, operands[1], reversed, dead);
fixup[1] = ia64_split_tmode (out, operands[0], reversed, dead);
#define MAYBE_ADD_REG_INC_NOTE(INSN, EXP) \
if (GET_CODE (EXP) == MEM \
&& (GET_CODE (XEXP (EXP, 0)) == POST_MODIFY \
|| GET_CODE (XEXP (EXP, 0)) == POST_INC \
|| GET_CODE (XEXP (EXP, 0)) == POST_DEC)) \
add_reg_note (insn, REG_INC, XEXP (XEXP (EXP, 0), 0))
insn = emit_insn (gen_rtx_SET (VOIDmode, out[0], in[0]));
MAYBE_ADD_REG_INC_NOTE (insn, in[0]);
MAYBE_ADD_REG_INC_NOTE (insn, out[0]);
insn = emit_insn (gen_rtx_SET (VOIDmode, out[1], in[1]));
MAYBE_ADD_REG_INC_NOTE (insn, in[1]);
MAYBE_ADD_REG_INC_NOTE (insn, out[1]);
if (fixup[0])
emit_insn (fixup[0]);
if (fixup[1])
emit_insn (fixup[1]);
#undef MAYBE_ADD_REG_INC_NOTE
}
/* ??? Fixing GR->FR XFmode moves during reload is hard. You need to go
through memory plus an extra GR scratch register. Except that you can
either get the first from SECONDARY_MEMORY_NEEDED or the second from
SECONDARY_RELOAD_CLASS, but not both.
We got into problems in the first place by allowing a construct like
(subreg:XF (reg:TI)), which we got from a union containing a long double.
This solution attempts to prevent this situation from occurring. When
we see something like the above, we spill the inner register to memory. */
static rtx
spill_xfmode_rfmode_operand (rtx in, int force, machine_mode mode)
{
if (GET_CODE (in) == SUBREG
&& GET_MODE (SUBREG_REG (in)) == TImode
&& GET_CODE (SUBREG_REG (in)) == REG)
{
rtx memt = assign_stack_temp (TImode, 16);
emit_move_insn (memt, SUBREG_REG (in));
return adjust_address (memt, mode, 0);
}
else if (force && GET_CODE (in) == REG)
{
rtx memx = assign_stack_temp (mode, 16);
emit_move_insn (memx, in);
return memx;
}
else
return in;
}
/* Expand the movxf or movrf pattern (MODE says which) with the given
OPERANDS, returning true if the pattern should then invoke
DONE. */
bool
ia64_expand_movxf_movrf (machine_mode mode, rtx operands[])
{
rtx op0 = operands[0];
if (GET_CODE (op0) == SUBREG)
op0 = SUBREG_REG (op0);
/* We must support XFmode loads into general registers for stdarg/vararg,
unprototyped calls, and a rare case where a long double is passed as
an argument after a float HFA fills the FP registers. We split them into
DImode loads for convenience. We also need to support XFmode stores
for the last case. This case does not happen for stdarg/vararg routines,
because we do a block store to memory of unnamed arguments. */
if (GET_CODE (op0) == REG && GR_REGNO_P (REGNO (op0)))
{
rtx out[2];
/* We're hoping to transform everything that deals with XFmode
quantities and GR registers early in the compiler. */
gcc_assert (can_create_pseudo_p ());
/* Struct to register can just use TImode instead. */
if ((GET_CODE (operands[1]) == SUBREG
&& GET_MODE (SUBREG_REG (operands[1])) == TImode)
|| (GET_CODE (operands[1]) == REG
&& GR_REGNO_P (REGNO (operands[1]))))
{
rtx op1 = operands[1];
if (GET_CODE (op1) == SUBREG)
op1 = SUBREG_REG (op1);
else
op1 = gen_rtx_REG (TImode, REGNO (op1));
emit_move_insn (gen_rtx_REG (TImode, REGNO (op0)), op1);
return true;
}
if (GET_CODE (operands[1]) == CONST_DOUBLE)
{
/* Don't word-swap when reading in the constant. */
emit_move_insn (gen_rtx_REG (DImode, REGNO (op0)),
operand_subword (operands[1], WORDS_BIG_ENDIAN,
0, mode));
emit_move_insn (gen_rtx_REG (DImode, REGNO (op0) + 1),
operand_subword (operands[1], !WORDS_BIG_ENDIAN,
0, mode));
return true;
}
/* If the quantity is in a register not known to be GR, spill it. */
if (register_operand (operands[1], mode))
operands[1] = spill_xfmode_rfmode_operand (operands[1], 1, mode);
gcc_assert (GET_CODE (operands[1]) == MEM);
/* Don't word-swap when reading in the value. */
out[0] = gen_rtx_REG (DImode, REGNO (op0));
out[1] = gen_rtx_REG (DImode, REGNO (op0) + 1);
emit_move_insn (out[0], adjust_address (operands[1], DImode, 0));
emit_move_insn (out[1], adjust_address (operands[1], DImode, 8));
return true;
}
if (GET_CODE (operands[1]) == REG && GR_REGNO_P (REGNO (operands[1])))
{
/* We're hoping to transform everything that deals with XFmode
quantities and GR registers early in the compiler. */
gcc_assert (can_create_pseudo_p ());
/* Op0 can't be a GR_REG here, as that case is handled above.
If op0 is a register, then we spill op1, so that we now have a
MEM operand. This requires creating an XFmode subreg of a TImode reg
to force the spill. */
if (register_operand (operands[0], mode))
{
rtx op1 = gen_rtx_REG (TImode, REGNO (operands[1]));
op1 = gen_rtx_SUBREG (mode, op1, 0);
operands[1] = spill_xfmode_rfmode_operand (op1, 0, mode);
}
else
{
rtx in[2];
gcc_assert (GET_CODE (operands[0]) == MEM);
/* Don't word-swap when writing out the value. */
in[0] = gen_rtx_REG (DImode, REGNO (operands[1]));
in[1] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
emit_move_insn (adjust_address (operands[0], DImode, 0), in[0]);
emit_move_insn (adjust_address (operands[0], DImode, 8), in[1]);
return true;
}
}
if (!reload_in_progress && !reload_completed)
{
operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
if (GET_MODE (op0) == TImode && GET_CODE (op0) == REG)
{
rtx memt, memx, in = operands[1];
if (CONSTANT_P (in))
in = validize_mem (force_const_mem (mode, in));
if (GET_CODE (in) == MEM)
memt = adjust_address (in, TImode, 0);
else
{
memt = assign_stack_temp (TImode, 16);
memx = adjust_address (memt, mode, 0);
emit_move_insn (memx, in);
}
emit_move_insn (op0, memt);
return true;
}
if (!ia64_move_ok (operands[0], operands[1]))
operands[1] = force_reg (mode, operands[1]);
}
return false;
}
/* Emit comparison instruction if necessary, replacing *EXPR, *OP0, *OP1
with the expression that holds the compare result (in VOIDmode). */
static GTY(()) rtx cmptf_libfunc;
void
ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
{
enum rtx_code code = GET_CODE (*expr);
rtx cmp;
/* If we have a BImode input, then we already have a compare result, and
do not need to emit another comparison. */
if (GET_MODE (*op0) == BImode)
{
gcc_assert ((code == NE || code == EQ) && *op1 == const0_rtx);
cmp = *op0;
}
/* HPUX TFmode compare requires a library call to _U_Qfcmp, which takes a
magic number as its third argument, that indicates what to do.
The return value is an integer to be compared against zero. */
else if (TARGET_HPUX && GET_MODE (*op0) == TFmode)
{
enum qfcmp_magic {
QCMP_INV = 1, /* Raise FP_INVALID on NaNs as a side effect. */
QCMP_UNORD = 2,
QCMP_EQ = 4,
QCMP_LT = 8,
QCMP_GT = 16
};
int magic;
enum rtx_code ncode;
rtx ret, insns;
gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
switch (code)
{
/* 1 = equal, 0 = not equal. Equality operators do
not raise FP_INVALID when given a NaN operand. */
case EQ: magic = QCMP_EQ; ncode = NE; break;
case NE: magic = QCMP_EQ; ncode = EQ; break;
/* isunordered() from C99. */
case UNORDERED: magic = QCMP_UNORD; ncode = NE; break;
case ORDERED: magic = QCMP_UNORD; ncode = EQ; break;
/* Relational operators raise FP_INVALID when given
a NaN operand. */
case LT: magic = QCMP_LT |QCMP_INV; ncode = NE; break;
case LE: magic = QCMP_LT|QCMP_EQ|QCMP_INV; ncode = NE; break;
case GT: magic = QCMP_GT |QCMP_INV; ncode = NE; break;
case GE: magic = QCMP_GT|QCMP_EQ|QCMP_INV; ncode = NE; break;
/* Unordered relational operators do not raise FP_INVALID
when given a NaN operand. */
case UNLT: magic = QCMP_LT |QCMP_UNORD; ncode = NE; break;
case UNLE: magic = QCMP_LT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
case UNGT: magic = QCMP_GT |QCMP_UNORD; ncode = NE; break;
case UNGE: magic = QCMP_GT|QCMP_EQ|QCMP_UNORD; ncode = NE; break;
/* Not supported. */
case UNEQ:
case LTGT:
default: gcc_unreachable ();
}
start_sequence ();
ret = emit_library_call_value (cmptf_libfunc, 0, LCT_CONST, DImode, 3,
*op0, TFmode, *op1, TFmode,
GEN_INT (magic), DImode);
cmp = gen_reg_rtx (BImode);
emit_insn (gen_rtx_SET (VOIDmode, cmp,
gen_rtx_fmt_ee (ncode, BImode,
ret, const0_rtx)));
insns = get_insns ();
end_sequence ();
emit_libcall_block (insns, cmp, cmp,
gen_rtx_fmt_ee (code, BImode, *op0, *op1));
code = NE;
}
else
{
cmp = gen_reg_rtx (BImode);
emit_insn (gen_rtx_SET (VOIDmode, cmp,
gen_rtx_fmt_ee (code, BImode, *op0, *op1)));
code = NE;
}
*expr = gen_rtx_fmt_ee (code, VOIDmode, cmp, const0_rtx);
*op0 = cmp;
*op1 = const0_rtx;
}
/* Generate an integral vector comparison. Return true if the condition has
been reversed, and so the sense of the comparison should be inverted. */
static bool
ia64_expand_vecint_compare (enum rtx_code code, machine_mode mode,
rtx dest, rtx op0, rtx op1)
{
bool negate = false;
rtx x;
/* Canonicalize the comparison to EQ, GT, GTU. */
switch (code)
{
case EQ:
case GT:
case GTU:
break;
case NE:
case LE:
case LEU:
code = reverse_condition (code);
negate = true;
break;
case GE:
case GEU:
code = reverse_condition (code);
negate = true;
/* FALLTHRU */
case LT:
case LTU:
code = swap_condition (code);
x = op0, op0 = op1, op1 = x;
break;
default:
gcc_unreachable ();
}
/* Unsigned parallel compare is not supported by the hardware. Play some
tricks to turn this into a signed comparison against 0. */
if (code == GTU)
{
switch (mode)
{
case V2SImode:
{
rtx t1, t2, mask;
/* Subtract (-(INT MAX) - 1) from both operands to make
them signed. */
mask = gen_int_mode (0x80000000, SImode);
mask = gen_rtx_CONST_VECTOR (V2SImode, gen_rtvec (2, mask, mask));
mask = force_reg (mode, mask);
t1 = gen_reg_rtx (mode);
emit_insn (gen_subv2si3 (t1, op0, mask));
t2 = gen_reg_rtx (mode);
emit_insn (gen_subv2si3 (t2, op1, mask));
op0 = t1;
op1 = t2;
code = GT;
}
break;
case V8QImode:
case V4HImode:
/* Perform a parallel unsigned saturating subtraction. */
x = gen_reg_rtx (mode);
emit_insn (gen_rtx_SET (VOIDmode, x,
gen_rtx_US_MINUS (mode, op0, op1)));
code = EQ;
op0 = x;
op1 = CONST0_RTX (mode);
negate = !negate;
break;
default:
gcc_unreachable ();
}
}
x = gen_rtx_fmt_ee (code, mode, op0, op1);
emit_insn (gen_rtx_SET (VOIDmode, dest, x));
return negate;
}
/* Emit an integral vector conditional move. */
void
ia64_expand_vecint_cmov (rtx operands[])
{
machine_mode mode = GET_MODE (operands[0]);
enum rtx_code code = GET_CODE (operands[3]);
bool negate;
rtx cmp, x, ot, of;
cmp = gen_reg_rtx (mode);
negate = ia64_expand_vecint_compare (code, mode, cmp,
operands[4], operands[5]);
ot = operands[1+negate];
of = operands[2-negate];
if (ot == CONST0_RTX (mode))
{
if (of == CONST0_RTX (mode))
{
emit_move_insn (operands[0], ot);
return;
}
x = gen_rtx_NOT (mode, cmp);
x = gen_rtx_AND (mode, x, of);
emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
}
else if (of == CONST0_RTX (mode))
{
x = gen_rtx_AND (mode, cmp, ot);
emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
}
else
{
rtx t, f;
t = gen_reg_rtx (mode);
x = gen_rtx_AND (mode, cmp, operands[1+negate]);
emit_insn (gen_rtx_SET (VOIDmode, t, x));
f = gen_reg_rtx (mode);
x = gen_rtx_NOT (mode, cmp);
x = gen_rtx_AND (mode, x, operands[2-negate]);
emit_insn (gen_rtx_SET (VOIDmode, f, x));
x = gen_rtx_IOR (mode, t, f);
emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
}
}
/* Emit an integral vector min or max operation. Return true if all done. */
bool
ia64_expand_vecint_minmax (enum rtx_code code, machine_mode mode,
rtx operands[])
{
rtx xops[6];
/* These four combinations are supported directly. */
if (mode == V8QImode && (code == UMIN || code == UMAX))
return false;
if (mode == V4HImode && (code == SMIN || code == SMAX))
return false;
/* This combination can be implemented with only saturating subtraction. */
if (mode == V4HImode && code == UMAX)
{
rtx x, tmp = gen_reg_rtx (mode);
x = gen_rtx_US_MINUS (mode, operands[1], operands[2]);
emit_insn (gen_rtx_SET (VOIDmode, tmp, x));
emit_insn (gen_addv4hi3 (operands[0], tmp, operands[2]));
return true;
}
/* Everything else implemented via vector comparisons. */
xops[0] = operands[0];
xops[4] = xops[1] = operands[1];
xops[5] = xops[2] = operands[2];
switch (code)
{
case UMIN:
code = LTU;
break;
case UMAX:
code = GTU;
break;
case SMIN:
code = LT;
break;
case SMAX:
code = GT;
break;
default:
gcc_unreachable ();
}
xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
ia64_expand_vecint_cmov (xops);
return true;
}
/* The vectors LO and HI each contain N halves of a double-wide vector.
Reassemble either the first N/2 or the second N/2 elements. */
void
ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
{
machine_mode vmode = GET_MODE (lo);
unsigned int i, high, nelt = GET_MODE_NUNITS (vmode);
struct expand_vec_perm_d d;
bool ok;
d.target = gen_lowpart (vmode, out);
d.op0 = (TARGET_BIG_ENDIAN ? hi : lo);
d.op1 = (TARGET_BIG_ENDIAN ? lo : hi);
d.vmode = vmode;
d.nelt = nelt;
d.one_operand_p = false;
d.testing_p = false;
high = (highp ? nelt / 2 : 0);
for (i = 0; i < nelt / 2; ++i)
{
d.perm[i * 2] = i + high;
d.perm[i * 2 + 1] = i + high + nelt;
}
ok = ia64_expand_vec_perm_const_1 (&d);
gcc_assert (ok);
}
/* Return a vector of the sign-extension of VEC. */
static rtx
ia64_unpack_sign (rtx vec, bool unsignedp)
{
machine_mode mode = GET_MODE (vec);
rtx zero = CONST0_RTX (mode);
if (unsignedp)
return zero;
else
{
rtx sign = gen_reg_rtx (mode);
bool neg;
neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
gcc_assert (!neg);
return sign;
}
}
/* Emit an integral vector unpack operation. */
void
ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
{
rtx sign = ia64_unpack_sign (operands[1], unsignedp);
ia64_unpack_assemble (operands[0], operands[1], sign, highp);
}
/* Emit an integral vector widening sum operations. */
void
ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
{
machine_mode wmode;
rtx l, h, t, sign;
sign = ia64_unpack_sign (operands[1], unsignedp);
wmode = GET_MODE (operands[0]);
l = gen_reg_rtx (wmode);
h = gen_reg_rtx (wmode);
ia64_unpack_assemble (l, operands[1], sign, false);
ia64_unpack_assemble (h, operands[1], sign, true);
t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
if (t != operands[0])
emit_move_insn (operands[0], t);
}
/* Emit the appropriate sequence for a call. */
void
ia64_expand_call (rtx retval, rtx addr, rtx nextarg ATTRIBUTE_UNUSED,
int sibcall_p)
{
rtx insn, b0;
addr = XEXP (addr, 0);
addr = convert_memory_address (DImode, addr);
b0 = gen_rtx_REG (DImode, R_BR (0));
/* ??? Should do this for functions known to bind local too. */
if (TARGET_NO_PIC || TARGET_AUTO_PIC)
{
if (sibcall_p)
insn = gen_sibcall_nogp (addr);
else if (! retval)
insn = gen_call_nogp (addr, b0);
else
insn = gen_call_value_nogp (retval, addr, b0);
insn = emit_call_insn (insn);
}
else
{
if (sibcall_p)
insn = gen_sibcall_gp (addr);
else if (! retval)
insn = gen_call_gp (addr, b0);
else
insn = gen_call_value_gp (retval, addr, b0);
insn = emit_call_insn (insn);
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
}
if (sibcall_p)
use_reg (&CALL_INSN_FUNCTION_USAGE (insn), b0);
if (TARGET_ABI_OPEN_VMS)
use_reg (&CALL_INSN_FUNCTION_USAGE (insn),
gen_rtx_REG (DImode, GR_REG (25)));
}
static void
reg_emitted (enum ia64_frame_regs r)
{
if (emitted_frame_related_regs[r] == 0)
emitted_frame_related_regs[r] = current_frame_info.r[r];
else
gcc_assert (emitted_frame_related_regs[r] == current_frame_info.r[r]);
}
static int
get_reg (enum ia64_frame_regs r)
{
reg_emitted (r);
return current_frame_info.r[r];
}
static bool
is_emitted (int regno)
{
unsigned int r;
for (r = reg_fp; r < number_of_ia64_frame_regs; r++)
if (emitted_frame_related_regs[r] == regno)
return true;
return false;
}
void
ia64_reload_gp (void)
{
rtx tmp;
if (current_frame_info.r[reg_save_gp])
{
tmp = gen_rtx_REG (DImode, get_reg (reg_save_gp));
}
else
{
HOST_WIDE_INT offset;
rtx offset_r;
offset = (current_frame_info.spill_cfa_off
+ current_frame_info.spill_size);
if (frame_pointer_needed)
{
tmp = hard_frame_pointer_rtx;
offset = -offset;
}
else
{
tmp = stack_pointer_rtx;
offset = current_frame_info.total_size - offset;
}
offset_r = GEN_INT (offset);
if (satisfies_constraint_I (offset_r))
emit_insn (gen_adddi3 (pic_offset_table_rtx, tmp, offset_r));
else
{
emit_move_insn (pic_offset_table_rtx, offset_r);
emit_insn (gen_adddi3 (pic_offset_table_rtx,
pic_offset_table_rtx, tmp));
}
tmp = gen_rtx_MEM (DImode, pic_offset_table_rtx);
}
emit_move_insn (pic_offset_table_rtx, tmp);
}
void
ia64_split_call (rtx retval, rtx addr, rtx retaddr, rtx scratch_r,
rtx scratch_b, int noreturn_p, int sibcall_p)
{
rtx insn;
bool is_desc = false;
/* If we find we're calling through a register, then we're actually
calling through a descriptor, so load up the values. */
if (REG_P (addr) && GR_REGNO_P (REGNO (addr)))
{
rtx tmp;
bool addr_dead_p;
/* ??? We are currently constrained to *not* use peep2, because
we can legitimately change the global lifetime of the GP
(in the form of killing where previously live). This is
because a call through a descriptor doesn't use the previous
value of the GP, while a direct call does, and we do not
commit to either form until the split here.
That said, this means that we lack precise life info for
whether ADDR is dead after this call. This is not terribly
important, since we can fix things up essentially for free
with the POST_DEC below, but it's nice to not use it when we
can immediately tell it's not necessary. */
addr_dead_p = ((noreturn_p || sibcall_p
|| TEST_HARD_REG_BIT (regs_invalidated_by_call,
REGNO (addr)))
&& !FUNCTION_ARG_REGNO_P (REGNO (addr)));
/* Load the code address into scratch_b. */
tmp = gen_rtx_POST_INC (Pmode, addr);
tmp = gen_rtx_MEM (Pmode, tmp);
emit_move_insn (scratch_r, tmp);
emit_move_insn (scratch_b, scratch_r);
/* Load the GP address. If ADDR is not dead here, then we must
revert the change made above via the POST_INCREMENT. */
if (!addr_dead_p)
tmp = gen_rtx_POST_DEC (Pmode, addr);
else
tmp = addr;
tmp = gen_rtx_MEM (Pmode, tmp);
emit_move_insn (pic_offset_table_rtx, tmp);
is_desc = true;
addr = scratch_b;
}
if (sibcall_p)
insn = gen_sibcall_nogp (addr);
else if (retval)
insn = gen_call_value_nogp (retval, addr, retaddr);
else
insn = gen_call_nogp (addr, retaddr);
emit_call_insn (insn);
if ((!TARGET_CONST_GP || is_desc) && !noreturn_p && !sibcall_p)
ia64_reload_gp ();
}
/* Expand an atomic operation. We want to perform MEM <CODE>= VAL atomically.
This differs from the generic code in that we know about the zero-extending
properties of cmpxchg, and the zero-extending requirements of ar.ccv. We
also know that ld.acq+cmpxchg.rel equals a full barrier.
The loop we want to generate looks like
cmp_reg = mem;
label:
old_reg = cmp_reg;
new_reg = cmp_reg op val;
cmp_reg = compare-and-swap(mem, old_reg, new_reg)
if (cmp_reg != old_reg)
goto label;
Note that we only do the plain load from memory once. Subsequent
iterations use the value loaded by the compare-and-swap pattern. */
void
ia64_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
rtx old_dst, rtx new_dst, enum memmodel model)
{
machine_mode mode = GET_MODE (mem);
rtx old_reg, new_reg, cmp_reg, ar_ccv, label;
enum insn_code icode;
/* Special case for using fetchadd. */
if ((mode == SImode || mode == DImode)
&& (code == PLUS || code == MINUS)
&& fetchadd_operand (val, mode))
{
if (code == MINUS)
val = GEN_INT (-INTVAL (val));
if (!old_dst)
old_dst = gen_reg_rtx (mode);
switch (model)
{
case MEMMODEL_ACQ_REL:
case MEMMODEL_SEQ_CST:
case MEMMODEL_SYNC_SEQ_CST:
emit_insn (gen_memory_barrier ());
/* FALLTHRU */
case MEMMODEL_RELAXED:
case MEMMODEL_ACQUIRE:
case MEMMODEL_SYNC_ACQUIRE:
case MEMMODEL_CONSUME:
if (mode == SImode)
icode = CODE_FOR_fetchadd_acq_si;
else
icode = CODE_FOR_fetchadd_acq_di;
break;
case MEMMODEL_RELEASE:
case MEMMODEL_SYNC_RELEASE:
if (mode == SImode)
icode = CODE_FOR_fetchadd_rel_si;
else
icode = CODE_FOR_fetchadd_rel_di;
break;
default:
gcc_unreachable ();
}
emit_insn (GEN_FCN (icode) (old_dst, mem, val));
if (new_dst)
{
new_reg = expand_simple_binop (mode, PLUS, old_dst, val, new_dst,
true, OPTAB_WIDEN);
if (new_reg != new_dst)
emit_move_insn (new_dst, new_reg);
}
return;
}
/* Because of the volatile mem read, we get an ld.acq, which is the
front half of the full barrier. The end half is the cmpxchg.rel.
For relaxed and release memory models, we don't need this. But we
also don't bother trying to prevent it either. */
gcc_assert (is_mm_relaxed (model) || is_mm_release (model)
|| MEM_VOLATILE_P (mem));
old_reg = gen_reg_rtx (DImode);
cmp_reg = gen_reg_rtx (DImode);
label = gen_label_rtx ();
if (mode != DImode)
{
val = simplify_gen_subreg (DImode, val, mode, 0);
emit_insn (gen_extend_insn (cmp_reg, mem, DImode, mode, 1));
}
else
emit_move_insn (cmp_reg, mem);
emit_label (label);
ar_ccv = gen_rtx_REG (DImode, AR_CCV_REGNUM);
emit_move_insn (old_reg, cmp_reg);
emit_move_insn (ar_ccv, cmp_reg);
if (old_dst)
emit_move_insn (old_dst, gen_lowpart (mode, cmp_reg));
new_reg = cmp_reg;
if (code == NOT)
{
new_reg = expand_simple_binop (DImode, AND, new_reg, val, NULL_RTX,
true, OPTAB_DIRECT);
new_reg = expand_simple_unop (DImode, code, new_reg, NULL_RTX, true);
}
else
new_reg = expand_simple_binop (DImode, code, new_reg, val, NULL_RTX,
true, OPTAB_DIRECT);
if (mode != DImode)
new_reg = gen_lowpart (mode, new_reg);
if (new_dst)
emit_move_insn (new_dst, new_reg);
switch (model)
{
case MEMMODEL_RELAXED:
case MEMMODEL_ACQUIRE:
case MEMMODEL_SYNC_ACQUIRE:
case MEMMODEL_CONSUME:
switch (mode)
{
case QImode: icode = CODE_FOR_cmpxchg_acq_qi; break;
case HImode: icode = CODE_FOR_cmpxchg_acq_hi; break;
case SImode: icode = CODE_FOR_cmpxchg_acq_si; break;
case DImode: icode = CODE_FOR_cmpxchg_acq_di; break;
default:
gcc_unreachable ();
}
break;
case MEMMODEL_RELEASE:
case MEMMODEL_SYNC_RELEASE:
case MEMMODEL_ACQ_REL:
case MEMMODEL_SEQ_CST:
case MEMMODEL_SYNC_SEQ_CST:
switch (mode)
{
case QImode: icode = CODE_FOR_cmpxchg_rel_qi; break;
case HImode: icode = CODE_FOR_cmpxchg_rel_hi; break;
case SImode: icode = CODE_FOR_cmpxchg_rel_si; break;
case DImode: icode = CODE_FOR_cmpxchg_rel_di; break;
default:
gcc_unreachable ();
}
break;
default:
gcc_unreachable ();
}
emit_insn (GEN_FCN (icode) (cmp_reg, mem, ar_ccv, new_reg));
emit_cmp_and_jump_insns (cmp_reg, old_reg, NE, NULL, DImode, true, label);
}
/* Begin the assembly file. */
static void
ia64_file_start (void)
{
default_file_start ();
emit_safe_across_calls ();
}
void
emit_safe_across_calls (void)
{
unsigned int rs, re;
int out_state;
rs = 1;
out_state = 0;
while (1)
{
while (rs < 64 && call_used_regs[PR_REG (rs)])
rs++;
if (rs >= 64)
break;
for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++)
continue;
if (out_state == 0)
{
fputs ("\t.pred.safe_across_calls ", asm_out_file);
out_state = 1;
}
else
fputc (',', asm_out_file);
if (re == rs + 1)
fprintf (asm_out_file, "p%u", rs);
else
fprintf (asm_out_file, "p%u-p%u", rs, re - 1);
rs = re + 1;
}
if (out_state)
fputc ('\n', asm_out_file);
}
/* Globalize a declaration. */
static void
ia64_globalize_decl_name (FILE * stream, tree decl)
{
const char *name = XSTR (XEXP (DECL_RTL (decl), 0), 0);
tree version_attr = lookup_attribute ("version_id", DECL_ATTRIBUTES (decl));
if (version_attr)
{
tree v = TREE_VALUE (TREE_VALUE (version_attr));
const char *p = TREE_STRING_POINTER (v);
fprintf (stream, "\t.alias %s#, \"%s{%s}\"\n", name, name, p);
}
targetm.asm_out.globalize_label (stream, name);
if (TREE_CODE (decl) == FUNCTION_DECL)
ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
}
/* Helper function for ia64_compute_frame_size: find an appropriate general
register to spill some special register to. SPECIAL_SPILL_MASK contains
bits in GR0 to GR31 that have already been allocated by this routine.
TRY_LOCALS is true if we should attempt to locate a local regnum. */
static int
find_gr_spill (enum ia64_frame_regs r, int try_locals)
{
int regno;
if (emitted_frame_related_regs[r] != 0)
{
regno = emitted_frame_related_regs[r];
if (regno >= LOC_REG (0) && regno < LOC_REG (80 - frame_pointer_needed)
&& current_frame_info.n_local_regs < regno - LOC_REG (0) + 1)
current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
else if (crtl->is_leaf
&& regno >= GR_REG (1) && regno <= GR_REG (31))
current_frame_info.gr_used_mask |= 1 << regno;
return regno;
}
/* If this is a leaf function, first try an otherwise unused
call-clobbered register. */
if (crtl->is_leaf)
{
for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
if (! df_regs_ever_live_p (regno)
&& call_used_regs[regno]
&& ! fixed_regs[regno]
&& ! global_regs[regno]
&& ((current_frame_info.gr_used_mask >> regno) & 1) == 0
&& ! is_emitted (regno))
{
current_frame_info.gr_used_mask |= 1 << regno;
return regno;
}
}
if (try_locals)
{
regno = current_frame_info.n_local_regs;
/* If there is a frame pointer, then we can't use loc79, because
that is HARD_FRAME_POINTER_REGNUM. In particular, see the
reg_name switching code in ia64_expand_prologue. */
while (regno < (80 - frame_pointer_needed))
if (! is_emitted (LOC_REG (regno++)))
{
current_frame_info.n_local_regs = regno;
return LOC_REG (regno - 1);
}
}
/* Failed to find a general register to spill to. Must use stack. */
return 0;
}
/* In order to make for nice schedules, we try to allocate every temporary
to a different register. We must of course stay away from call-saved,
fixed, and global registers. We must also stay away from registers
allocated in current_frame_info.gr_used_mask, since those include regs
used all through the prologue.
Any register allocated here must be used immediately. The idea is to
aid scheduling, not to solve data flow problems. */
static int last_scratch_gr_reg;
static int
next_scratch_gr_reg (void)
{
int i, regno;
for (i = 0; i < 32; ++i)
{
regno = (last_scratch_gr_reg + i + 1) & 31;
if (call_used_regs[regno]
&& ! fixed_regs[regno]
&& ! global_regs[regno]
&& ((current_frame_info.gr_used_mask >> regno) & 1) == 0)
{
last_scratch_gr_reg = regno;
return regno;
}
}
/* There must be _something_ available. */
gcc_unreachable ();
}
/* Helper function for ia64_compute_frame_size, called through
diddle_return_value. Mark REG in current_frame_info.gr_used_mask. */
static void
mark_reg_gr_used_mask (rtx reg, void *data ATTRIBUTE_UNUSED)
{
unsigned int regno = REGNO (reg);
if (regno < 32)
{
unsigned int i, n = hard_regno_nregs[regno][GET_MODE (reg)];
for (i = 0; i < n; ++i)
current_frame_info.gr_used_mask |= 1 << (regno + i);
}
}
/* Returns the number of bytes offset between the frame pointer and the stack
pointer for the current function. SIZE is the number of bytes of space
needed for local variables. */
static void
ia64_compute_frame_size (HOST_WIDE_INT size)
{
HOST_WIDE_INT total_size;
HOST_WIDE_INT spill_size = 0;
HOST_WIDE_INT extra_spill_size = 0;
HOST_WIDE_INT pretend_args_size;
HARD_REG_SET mask;
int n_spilled = 0;
int spilled_gr_p = 0;
int spilled_fr_p = 0;
unsigned int regno;
int min_regno;
int max_regno;
int i;
if (current_frame_info.initialized)
return;
memset (&current_frame_info, 0, sizeof current_frame_info);
CLEAR_HARD_REG_SET (mask);
/* Don't allocate scratches to the return register. */
diddle_return_value (mark_reg_gr_used_mask, NULL);
/* Don't allocate scratches to the EH scratch registers. */
if (cfun->machine->ia64_eh_epilogue_sp)
mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_sp, NULL);
if (cfun->machine->ia64_eh_epilogue_bsp)
mark_reg_gr_used_mask (cfun->machine->ia64_eh_epilogue_bsp, NULL);
/* Static stack checking uses r2 and r3. */
if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
current_frame_info.gr_used_mask |= 0xc;
/* Find the size of the register stack frame. We have only 80 local
registers, because we reserve 8 for the inputs and 8 for the
outputs. */
/* Skip HARD_FRAME_POINTER_REGNUM (loc79) when frame_pointer_needed,
since we'll be adjusting that down later. */
regno = LOC_REG (78) + ! frame_pointer_needed;
for (; regno >= LOC_REG (0); regno--)
if (df_regs_ever_live_p (regno) && !is_emitted (regno))
break;
current_frame_info.n_local_regs = regno - LOC_REG (0) + 1;
/* For functions marked with the syscall_linkage attribute, we must mark
all eight input registers as in use, so that locals aren't visible to
the caller. */
if (cfun->machine->n_varargs > 0
|| lookup_attribute ("syscall_linkage",
TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
current_frame_info.n_input_regs = 8;
else
{
for (regno = IN_REG (7); regno >= IN_REG (0); regno--)
if (df_regs_ever_live_p (regno))
break;
current_frame_info.n_input_regs = regno - IN_REG (0) + 1;
}
for (regno = OUT_REG (7); regno >= OUT_REG (0); regno--)
if (df_regs_ever_live_p (regno))
break;
i = regno - OUT_REG (0) + 1;
#ifndef PROFILE_HOOK
/* When -p profiling, we need one output register for the mcount argument.
Likewise for -a profiling for the bb_init_func argument. For -ax
profiling, we need two output registers for the two bb_init_trace_func
arguments. */
if (crtl->profile)
i = MAX (i, 1);
#endif
current_frame_info.n_output_regs = i;
/* ??? No rotating register support yet. */
current_frame_info.n_rotate_regs = 0;
/* Discover which registers need spilling, and how much room that
will take. Begin with floating point and general registers,
which will always wind up on the stack. */
for (regno = FR_REG (2); regno <= FR_REG (127); regno++)
if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
{
SET_HARD_REG_BIT (mask, regno);
spill_size += 16;
n_spilled += 1;
spilled_fr_p = 1;
}
for (regno = GR_REG (1); regno <= GR_REG (31); regno++)
if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
{
SET_HARD_REG_BIT (mask, regno);
spill_size += 8;
n_spilled += 1;
spilled_gr_p = 1;
}
for (regno = BR_REG (1); regno <= BR_REG (7); regno++)
if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
{
SET_HARD_REG_BIT (mask, regno);
spill_size += 8;
n_spilled += 1;
}
/* Now come all special registers that might get saved in other
general registers. */
if (frame_pointer_needed)
{
current_frame_info.r[reg_fp] = find_gr_spill (reg_fp, 1);
/* If we did not get a register, then we take LOC79. This is guaranteed
to be free, even if regs_ever_live is already set, because this is
HARD_FRAME_POINTER_REGNUM. This requires incrementing n_local_regs,
as we don't count loc79 above. */
if (current_frame_info.r[reg_fp] == 0)
{
current_frame_info.r[reg_fp] = LOC_REG (79);
current_frame_info.n_local_regs = LOC_REG (79) - LOC_REG (0) + 1;
}
}
if (! crtl->is_leaf)
{
/* Emit a save of BR0 if we call other functions. Do this even
if this function doesn't return, as EH depends on this to be
able to unwind the stack. */
SET_HARD_REG_BIT (mask, BR_REG (0));
current_frame_info.r[reg_save_b0] = find_gr_spill (reg_save_b0, 1);
if (current_frame_info.r[reg_save_b0] == 0)
{
extra_spill_size += 8;
n_spilled += 1;
}
/* Similarly for ar.pfs. */
SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
current_frame_info.r[reg_save_ar_pfs] = find_gr_spill (reg_save_ar_pfs, 1);
if (current_frame_info.r[reg_save_ar_pfs] == 0)
{
extra_spill_size += 8;
n_spilled += 1;
}
/* Similarly for gp. Note that if we're calling setjmp, the stacked
registers are clobbered, so we fall back to the stack. */
current_frame_info.r[reg_save_gp]
= (cfun->calls_setjmp ? 0 : find_gr_spill (reg_save_gp, 1));
if (current_frame_info.r[reg_save_gp] == 0)
{
SET_HARD_REG_BIT (mask, GR_REG (1));
spill_size += 8;
n_spilled += 1;
}
}
else
{
if (df_regs_ever_live_p (BR_REG (0)) && ! call_used_regs[BR_REG (0)])
{
SET_HARD_REG_BIT (mask, BR_REG (0));
extra_spill_size += 8;
n_spilled += 1;
}
if (df_regs_ever_live_p (AR_PFS_REGNUM))
{
SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
current_frame_info.r[reg_save_ar_pfs]
= find_gr_spill (reg_save_ar_pfs, 1);
if (current_frame_info.r[reg_save_ar_pfs] == 0)
{
extra_spill_size += 8;
n_spilled += 1;
}
}
}
/* Unwind descriptor hackery: things are most efficient if we allocate
consecutive GR save registers for RP, PFS, FP in that order. However,
it is absolutely critical that FP get the only hard register that's
guaranteed to be free, so we allocated it first. If all three did
happen to be allocated hard regs, and are consecutive, rearrange them
into the preferred order now.
If we have already emitted code for any of those registers,
then it's already too late to change. */
min_regno = MIN (current_frame_info.r[reg_fp],
MIN (current_frame_info.r[reg_save_b0],
current_frame_info.r[reg_save_ar_pfs]));
max_regno = MAX (current_frame_info.r[reg_fp],
MAX (current_frame_info.r[reg_save_b0],
current_frame_info.r[reg_save_ar_pfs]));
if (min_regno > 0
&& min_regno + 2 == max_regno
&& (current_frame_info.r[reg_fp] == min_regno + 1
|| current_frame_info.r[reg_save_b0] == min_regno + 1
|| current_frame_info.r[reg_save_ar_pfs] == min_regno + 1)
&& (emitted_frame_related_regs[reg_save_b0] == 0
|| emitted_frame_related_regs[reg_save_b0] == min_regno)
&& (emitted_frame_related_regs[reg_save_ar_pfs] == 0
|| emitted_frame_related_regs[reg_save_ar_pfs] == min_regno + 1)
&& (emitted_frame_related_regs[reg_fp] == 0
|| emitted_frame_related_regs[reg_fp] == min_regno + 2))
{
current_frame_info.r[reg_save_b0] = min_regno;
current_frame_info.r[reg_save_ar_pfs] = min_regno + 1;
current_frame_info.r[reg_fp] = min_regno + 2;
}
/* See if we need to store the predicate register block. */
for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
break;
if (regno <= PR_REG (63))
{
SET_HARD_REG_BIT (mask, PR_REG (0));
current_frame_info.r[reg_save_pr] = find_gr_spill (reg_save_pr, 1);
if (current_frame_info.r[reg_save_pr] == 0)
{
extra_spill_size += 8;
n_spilled += 1;
}
/* ??? Mark them all as used so that register renaming and such
are free to use them. */
for (regno = PR_REG (0); regno <= PR_REG (63); regno++)
df_set_regs_ever_live (regno, true);
}
/* If we're forced to use st8.spill, we're forced to save and restore
ar.unat as well. The check for existing liveness allows inline asm
to touch ar.unat. */
if (spilled_gr_p || cfun->machine->n_varargs
|| df_regs_ever_live_p (AR_UNAT_REGNUM))
{
df_set_regs_ever_live (AR_UNAT_REGNUM, true);
SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
current_frame_info.r[reg_save_ar_unat]
= find_gr_spill (reg_save_ar_unat, spill_size == 0);
if (current_frame_info.r[reg_save_ar_unat] == 0)
{
extra_spill_size += 8;
n_spilled += 1;
}
}
if (df_regs_ever_live_p (AR_LC_REGNUM))
{
SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
current_frame_info.r[reg_save_ar_lc]
= find_gr_spill (reg_save_ar_lc, spill_size == 0);
if (current_frame_info.r[reg_save_ar_lc] == 0)
{
extra_spill_size += 8;
n_spilled += 1;
}
}
/* If we have an odd number of words of pretend arguments written to
the stack, then the FR save area will be unaligned. We round the
size of this area up to keep things 16 byte aligned. */
if (spilled_fr_p)
pretend_args_size = IA64_STACK_ALIGN (crtl->args.pretend_args_size);
else
pretend_args_size = crtl->args.pretend_args_size;
total_size = (spill_size + extra_spill_size + size + pretend_args_size
+ crtl->outgoing_args_size);
total_size = IA64_STACK_ALIGN (total_size);
/* We always use the 16-byte scratch area provided by the caller, but
if we are a leaf function, there's no one to which we need to provide
a scratch area. However, if the function allocates dynamic stack space,
the dynamic offset is computed early and contains STACK_POINTER_OFFSET,
so we need to cope. */
if (crtl->is_leaf && !cfun->calls_alloca)
total_size = MAX (0, total_size - 16);
current_frame_info.total_size = total_size;
current_frame_info.spill_cfa_off = pretend_args_size - 16;
current_frame_info.spill_size = spill_size;
current_frame_info.extra_spill_size = extra_spill_size;
COPY_HARD_REG_SET (current_frame_info.mask, mask);
current_frame_info.n_spilled = n_spilled;
current_frame_info.initialized = reload_completed;
}
/* Worker function for TARGET_CAN_ELIMINATE. */
bool
ia64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
{
return (to == BR_REG (0) ? crtl->is_leaf : true);
}
/* Compute the initial difference between the specified pair of registers. */
HOST_WIDE_INT
ia64_initial_elimination_offset (int from, int to)
{
HOST_WIDE_INT offset;
ia64_compute_frame_size (get_frame_size ());
switch (from)
{
case FRAME_POINTER_REGNUM:
switch (to)
{
case HARD_FRAME_POINTER_REGNUM:
offset = -current_frame_info.total_size;
if (!crtl->is_leaf || cfun->calls_alloca)
offset += 16 + crtl->outgoing_args_size;
break;
case STACK_POINTER_REGNUM:
offset = 0;
if (!crtl->is_leaf || cfun->calls_alloca)
offset += 16 + crtl->outgoing_args_size;
break;
default:
gcc_unreachable ();
}
break;
case ARG_POINTER_REGNUM:
/* Arguments start above the 16 byte save area, unless stdarg
in which case we store through the 16 byte save area. */
switch (to)
{
case HARD_FRAME_POINTER_REGNUM:
offset = 16 - crtl->args.pretend_args_size;
break;
case STACK_POINTER_REGNUM:
offset = (current_frame_info.total_size
+ 16 - crtl->args.pretend_args_size);
break;
default:
gcc_unreachable ();
}
break;
default:
gcc_unreachable ();
}
return offset;
}
/* If there are more than a trivial number of register spills, we use
two interleaved iterators so that we can get two memory references
per insn group.
In order to simplify things in the prologue and epilogue expanders,
we use helper functions to fix up the memory references after the
fact with the appropriate offsets to a POST_MODIFY memory mode.
The following data structure tracks the state of the two iterators
while insns are being emitted. */
struct spill_fill_data
{
rtx_insn *init_after; /* point at which to emit initializations */
rtx init_reg[2]; /* initial base register */
rtx iter_reg[2]; /* the iterator registers */
rtx *prev_addr[2]; /* address of last memory use */
rtx_insn *prev_insn[2]; /* the insn corresponding to prev_addr */
HOST_WIDE_INT prev_off[2]; /* last offset */
int n_iter; /* number of iterators in use */
int next_iter; /* next iterator to use */
unsigned int save_gr_used_mask;
};
static struct spill_fill_data spill_fill_data;
static void
setup_spill_pointers (int n_spills,