blob: 3c698899e3bf1333a0d15b5c9467a9064d63b93c [file] [log] [blame]
/* Subroutines for insn-output.c for HPPA.
Copyright (C) 1992-2015 Free Software Foundation, Inc.
Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "rtl.h"
#include "regs.h"
#include "hard-reg-set.h"
#include "insn-config.h"
#include "conditions.h"
#include "insn-attr.h"
#include "flags.h"
#include "hash-set.h"
#include "machmode.h"
#include "vec.h"
#include "double-int.h"
#include "input.h"
#include "alias.h"
#include "symtab.h"
#include "wide-int.h"
#include "inchash.h"
#include "tree.h"
#include "fold-const.h"
#include "stor-layout.h"
#include "stringpool.h"
#include "varasm.h"
#include "calls.h"
#include "output.h"
#include "dbxout.h"
#include "except.h"
#include "hashtab.h"
#include "function.h"
#include "statistics.h"
#include "real.h"
#include "fixed-value.h"
#include "expmed.h"
#include "dojump.h"
#include "explow.h"
#include "emit-rtl.h"
#include "stmt.h"
#include "expr.h"
#include "insn-codes.h"
#include "optabs.h"
#include "reload.h"
#include "diagnostic-core.h"
#include "ggc.h"
#include "recog.h"
#include "predict.h"
#include "tm_p.h"
#include "target.h"
#include "common/common-target.h"
#include "target-def.h"
#include "langhooks.h"
#include "dominance.h"
#include "cfg.h"
#include "cfgrtl.h"
#include "cfganal.h"
#include "lcm.h"
#include "cfgbuild.h"
#include "cfgcleanup.h"
#include "basic-block.h"
#include "df.h"
#include "opts.h"
#include "builtins.h"
/* Return nonzero if there is a bypass for the output of
OUT_INSN and the fp store IN_INSN. */
int
pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
{
machine_mode store_mode;
machine_mode other_mode;
rtx set;
if (recog_memoized (in_insn) < 0
|| (get_attr_type (in_insn) != TYPE_FPSTORE
&& get_attr_type (in_insn) != TYPE_FPSTORE_LOAD)
|| recog_memoized (out_insn) < 0)
return 0;
store_mode = GET_MODE (SET_SRC (PATTERN (in_insn)));
set = single_set (out_insn);
if (!set)
return 0;
other_mode = GET_MODE (SET_SRC (set));
return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
}
#ifndef DO_FRAME_NOTES
#ifdef INCOMING_RETURN_ADDR_RTX
#define DO_FRAME_NOTES 1
#else
#define DO_FRAME_NOTES 0
#endif
#endif
static void pa_option_override (void);
static void copy_reg_pointer (rtx, rtx);
static void fix_range (const char *);
static int hppa_register_move_cost (machine_mode mode, reg_class_t,
reg_class_t);
static int hppa_address_cost (rtx, machine_mode mode, addr_space_t, bool);
static bool hppa_rtx_costs (rtx, int, int, int, int *, bool);
static inline rtx force_mode (machine_mode, rtx);
static void pa_reorg (void);
static void pa_combine_instructions (void);
static int pa_can_combine_p (rtx_insn *, rtx_insn *, rtx_insn *, int, rtx,
rtx, rtx);
static bool forward_branch_p (rtx_insn *);
static void compute_zdepwi_operands (unsigned HOST_WIDE_INT, unsigned *);
static void compute_zdepdi_operands (unsigned HOST_WIDE_INT, unsigned *);
static int compute_movmem_length (rtx_insn *);
static int compute_clrmem_length (rtx_insn *);
static bool pa_assemble_integer (rtx, unsigned int, int);
static void remove_useless_addtr_insns (int);
static void store_reg (int, HOST_WIDE_INT, int);
static void store_reg_modify (int, int, HOST_WIDE_INT);
static void load_reg (int, HOST_WIDE_INT, int);
static void set_reg_plus_d (int, int, HOST_WIDE_INT, int);
static rtx pa_function_value (const_tree, const_tree, bool);
static rtx pa_libcall_value (machine_mode, const_rtx);
static bool pa_function_value_regno_p (const unsigned int);
static void pa_output_function_prologue (FILE *, HOST_WIDE_INT);
static void update_total_code_bytes (unsigned int);
static void pa_output_function_epilogue (FILE *, HOST_WIDE_INT);
static int pa_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
static int pa_adjust_priority (rtx_insn *, int);
static int pa_issue_rate (void);
static int pa_reloc_rw_mask (void);
static void pa_som_asm_init_sections (void) ATTRIBUTE_UNUSED;
static section *pa_som_tm_clone_table_section (void) ATTRIBUTE_UNUSED;
static section *pa_select_section (tree, int, unsigned HOST_WIDE_INT)
ATTRIBUTE_UNUSED;
static void pa_encode_section_info (tree, rtx, int);
static const char *pa_strip_name_encoding (const char *);
static bool pa_function_ok_for_sibcall (tree, tree);
static void pa_globalize_label (FILE *, const char *)
ATTRIBUTE_UNUSED;
static void pa_asm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
HOST_WIDE_INT, tree);
#if !defined(USE_COLLECT2)
static void pa_asm_out_constructor (rtx, int);
static void pa_asm_out_destructor (rtx, int);
#endif
static void pa_init_builtins (void);
static rtx pa_expand_builtin (tree, rtx, rtx, machine_mode mode, int);
static rtx hppa_builtin_saveregs (void);
static void hppa_va_start (tree, rtx);
static tree hppa_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
static bool pa_scalar_mode_supported_p (machine_mode);
static bool pa_commutative_p (const_rtx x, int outer_code);
static void copy_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
static int length_fp_args (rtx_insn *) ATTRIBUTE_UNUSED;
static rtx hppa_legitimize_address (rtx, rtx, machine_mode);
static inline void pa_file_start_level (void) ATTRIBUTE_UNUSED;
static inline void pa_file_start_space (int) ATTRIBUTE_UNUSED;
static inline void pa_file_start_file (int) ATTRIBUTE_UNUSED;
static inline void pa_file_start_mcount (const char*) ATTRIBUTE_UNUSED;
static void pa_elf_file_start (void) ATTRIBUTE_UNUSED;
static void pa_som_file_start (void) ATTRIBUTE_UNUSED;
static void pa_linux_file_start (void) ATTRIBUTE_UNUSED;
static void pa_hpux64_gas_file_start (void) ATTRIBUTE_UNUSED;
static void pa_hpux64_hpas_file_start (void) ATTRIBUTE_UNUSED;
static void output_deferred_plabels (void);
static void output_deferred_profile_counters (void) ATTRIBUTE_UNUSED;
#ifdef ASM_OUTPUT_EXTERNAL_REAL
static void pa_hpux_file_end (void);
#endif
static void pa_init_libfuncs (void);
static rtx pa_struct_value_rtx (tree, int);
static bool pa_pass_by_reference (cumulative_args_t, machine_mode,
const_tree, bool);
static int pa_arg_partial_bytes (cumulative_args_t, machine_mode,
tree, bool);
static void pa_function_arg_advance (cumulative_args_t, machine_mode,
const_tree, bool);
static rtx pa_function_arg (cumulative_args_t, machine_mode,
const_tree, bool);
static unsigned int pa_function_arg_boundary (machine_mode, const_tree);
static struct machine_function * pa_init_machine_status (void);
static reg_class_t pa_secondary_reload (bool, rtx, reg_class_t,
machine_mode,
secondary_reload_info *);
static void pa_extra_live_on_entry (bitmap);
static machine_mode pa_promote_function_mode (const_tree,
machine_mode, int *,
const_tree, int);
static void pa_asm_trampoline_template (FILE *);
static void pa_trampoline_init (rtx, tree, rtx);
static rtx pa_trampoline_adjust_address (rtx);
static rtx pa_delegitimize_address (rtx);
static bool pa_print_operand_punct_valid_p (unsigned char);
static rtx pa_internal_arg_pointer (void);
static bool pa_can_eliminate (const int, const int);
static void pa_conditional_register_usage (void);
static machine_mode pa_c_mode_for_suffix (char);
static section *pa_function_section (tree, enum node_frequency, bool, bool);
static bool pa_cannot_force_const_mem (machine_mode, rtx);
static bool pa_legitimate_constant_p (machine_mode, rtx);
static unsigned int pa_section_type_flags (tree, const char *, int);
static bool pa_legitimate_address_p (machine_mode, rtx, bool);
/* The following extra sections are only used for SOM. */
static GTY(()) section *som_readonly_data_section;
static GTY(()) section *som_one_only_readonly_data_section;
static GTY(()) section *som_one_only_data_section;
static GTY(()) section *som_tm_clone_table_section;
/* Counts for the number of callee-saved general and floating point
registers which were saved by the current function's prologue. */
static int gr_saved, fr_saved;
/* Boolean indicating whether the return pointer was saved by the
current function's prologue. */
static bool rp_saved;
static rtx find_addr_reg (rtx);
/* Keep track of the number of bytes we have output in the CODE subspace
during this compilation so we'll know when to emit inline long-calls. */
unsigned long total_code_bytes;
/* The last address of the previous function plus the number of bytes in
associated thunks that have been output. This is used to determine if
a thunk can use an IA-relative branch to reach its target function. */
static unsigned int last_address;
/* Variables to handle plabels that we discover are necessary at assembly
output time. They are output after the current function. */
struct GTY(()) deferred_plabel
{
rtx internal_label;
rtx symbol;
};
static GTY((length ("n_deferred_plabels"))) struct deferred_plabel *
deferred_plabels;
static size_t n_deferred_plabels = 0;
/* Initialize the GCC target structure. */
#undef TARGET_OPTION_OVERRIDE
#define TARGET_OPTION_OVERRIDE pa_option_override
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
#undef TARGET_ASM_ALIGNED_SI_OP
#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
#undef TARGET_ASM_ALIGNED_DI_OP
#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
#undef TARGET_ASM_UNALIGNED_HI_OP
#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
#undef TARGET_ASM_UNALIGNED_SI_OP
#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
#undef TARGET_ASM_UNALIGNED_DI_OP
#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
#undef TARGET_ASM_INTEGER
#define TARGET_ASM_INTEGER pa_assemble_integer
#undef TARGET_ASM_FUNCTION_PROLOGUE
#define TARGET_ASM_FUNCTION_PROLOGUE pa_output_function_prologue
#undef TARGET_ASM_FUNCTION_EPILOGUE
#define TARGET_ASM_FUNCTION_EPILOGUE pa_output_function_epilogue
#undef TARGET_FUNCTION_VALUE
#define TARGET_FUNCTION_VALUE pa_function_value
#undef TARGET_LIBCALL_VALUE
#define TARGET_LIBCALL_VALUE pa_libcall_value
#undef TARGET_FUNCTION_VALUE_REGNO_P
#define TARGET_FUNCTION_VALUE_REGNO_P pa_function_value_regno_p
#undef TARGET_LEGITIMIZE_ADDRESS
#define TARGET_LEGITIMIZE_ADDRESS hppa_legitimize_address
#undef TARGET_SCHED_ADJUST_COST
#define TARGET_SCHED_ADJUST_COST pa_adjust_cost
#undef TARGET_SCHED_ADJUST_PRIORITY
#define TARGET_SCHED_ADJUST_PRIORITY pa_adjust_priority
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE pa_issue_rate
#undef TARGET_ENCODE_SECTION_INFO
#define TARGET_ENCODE_SECTION_INFO pa_encode_section_info
#undef TARGET_STRIP_NAME_ENCODING
#define TARGET_STRIP_NAME_ENCODING pa_strip_name_encoding
#undef TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL pa_function_ok_for_sibcall
#undef TARGET_COMMUTATIVE_P
#define TARGET_COMMUTATIVE_P pa_commutative_p
#undef TARGET_ASM_OUTPUT_MI_THUNK
#define TARGET_ASM_OUTPUT_MI_THUNK pa_asm_output_mi_thunk
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
#undef TARGET_ASM_FILE_END
#ifdef ASM_OUTPUT_EXTERNAL_REAL
#define TARGET_ASM_FILE_END pa_hpux_file_end
#else
#define TARGET_ASM_FILE_END output_deferred_plabels
#endif
#undef TARGET_ASM_RELOC_RW_MASK
#define TARGET_ASM_RELOC_RW_MASK pa_reloc_rw_mask
#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
#define TARGET_PRINT_OPERAND_PUNCT_VALID_P pa_print_operand_punct_valid_p
#if !defined(USE_COLLECT2)
#undef TARGET_ASM_CONSTRUCTOR
#define TARGET_ASM_CONSTRUCTOR pa_asm_out_constructor
#undef TARGET_ASM_DESTRUCTOR
#define TARGET_ASM_DESTRUCTOR pa_asm_out_destructor
#endif
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS pa_init_builtins
#undef TARGET_EXPAND_BUILTIN
#define TARGET_EXPAND_BUILTIN pa_expand_builtin
#undef TARGET_REGISTER_MOVE_COST
#define TARGET_REGISTER_MOVE_COST hppa_register_move_cost
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS hppa_rtx_costs
#undef TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST hppa_address_cost
#undef TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG pa_reorg
#undef TARGET_INIT_LIBFUNCS
#define TARGET_INIT_LIBFUNCS pa_init_libfuncs
#undef TARGET_PROMOTE_FUNCTION_MODE
#define TARGET_PROMOTE_FUNCTION_MODE pa_promote_function_mode
#undef TARGET_PROMOTE_PROTOTYPES
#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
#undef TARGET_STRUCT_VALUE_RTX
#define TARGET_STRUCT_VALUE_RTX pa_struct_value_rtx
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY pa_return_in_memory
#undef TARGET_MUST_PASS_IN_STACK
#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
#undef TARGET_PASS_BY_REFERENCE
#define TARGET_PASS_BY_REFERENCE pa_pass_by_reference
#undef TARGET_CALLEE_COPIES
#define TARGET_CALLEE_COPIES hook_bool_CUMULATIVE_ARGS_mode_tree_bool_true
#undef TARGET_ARG_PARTIAL_BYTES
#define TARGET_ARG_PARTIAL_BYTES pa_arg_partial_bytes
#undef TARGET_FUNCTION_ARG
#define TARGET_FUNCTION_ARG pa_function_arg
#undef TARGET_FUNCTION_ARG_ADVANCE
#define TARGET_FUNCTION_ARG_ADVANCE pa_function_arg_advance
#undef TARGET_FUNCTION_ARG_BOUNDARY
#define TARGET_FUNCTION_ARG_BOUNDARY pa_function_arg_boundary
#undef TARGET_EXPAND_BUILTIN_SAVEREGS
#define TARGET_EXPAND_BUILTIN_SAVEREGS hppa_builtin_saveregs
#undef TARGET_EXPAND_BUILTIN_VA_START
#define TARGET_EXPAND_BUILTIN_VA_START hppa_va_start
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
#define TARGET_GIMPLIFY_VA_ARG_EXPR hppa_gimplify_va_arg_expr
#undef TARGET_SCALAR_MODE_SUPPORTED_P
#define TARGET_SCALAR_MODE_SUPPORTED_P pa_scalar_mode_supported_p
#undef TARGET_CANNOT_FORCE_CONST_MEM
#define TARGET_CANNOT_FORCE_CONST_MEM pa_cannot_force_const_mem
#undef TARGET_SECONDARY_RELOAD
#define TARGET_SECONDARY_RELOAD pa_secondary_reload
#undef TARGET_EXTRA_LIVE_ON_ENTRY
#define TARGET_EXTRA_LIVE_ON_ENTRY pa_extra_live_on_entry
#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
#define TARGET_ASM_TRAMPOLINE_TEMPLATE pa_asm_trampoline_template
#undef TARGET_TRAMPOLINE_INIT
#define TARGET_TRAMPOLINE_INIT pa_trampoline_init
#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
#define TARGET_TRAMPOLINE_ADJUST_ADDRESS pa_trampoline_adjust_address
#undef TARGET_DELEGITIMIZE_ADDRESS
#define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
#undef TARGET_INTERNAL_ARG_POINTER
#define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
#undef TARGET_CAN_ELIMINATE
#define TARGET_CAN_ELIMINATE pa_can_eliminate
#undef TARGET_CONDITIONAL_REGISTER_USAGE
#define TARGET_CONDITIONAL_REGISTER_USAGE pa_conditional_register_usage
#undef TARGET_C_MODE_FOR_SUFFIX
#define TARGET_C_MODE_FOR_SUFFIX pa_c_mode_for_suffix
#undef TARGET_ASM_FUNCTION_SECTION
#define TARGET_ASM_FUNCTION_SECTION pa_function_section
#undef TARGET_LEGITIMATE_CONSTANT_P
#define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
#undef TARGET_SECTION_TYPE_FLAGS
#define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
#undef TARGET_LEGITIMATE_ADDRESS_P
#define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
struct gcc_target targetm = TARGET_INITIALIZER;
/* Parse the -mfixed-range= option string. */
static void
fix_range (const char *const_str)
{
int i, first, last;
char *str, *dash, *comma;
/* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
REG2 are either register names or register numbers. The effect
of this option is to mark the registers in the range from REG1 to
REG2 as ``fixed'' so they won't be used by the compiler. This is
used, e.g., to ensure that kernel mode code doesn't use fr4-fr31. */
i = strlen (const_str);
str = (char *) alloca (i + 1);
memcpy (str, const_str, i + 1);
while (1)
{
dash = strchr (str, '-');
if (!dash)
{
warning (0, "value of -mfixed-range must have form REG1-REG2");
return;
}
*dash = '\0';
comma = strchr (dash + 1, ',');
if (comma)
*comma = '\0';
first = decode_reg_name (str);
if (first < 0)
{
warning (0, "unknown register name: %s", str);
return;
}
last = decode_reg_name (dash + 1);
if (last < 0)
{
warning (0, "unknown register name: %s", dash + 1);
return;
}
*dash = '-';
if (first > last)
{
warning (0, "%s-%s is an empty range", str, dash + 1);
return;
}
for (i = first; i <= last; ++i)
fixed_regs[i] = call_used_regs[i] = 1;
if (!comma)
break;
*comma = ',';
str = comma + 1;
}
/* Check if all floating point registers have been fixed. */
for (i = FP_REG_FIRST; i <= FP_REG_LAST; i++)
if (!fixed_regs[i])
break;
if (i > FP_REG_LAST)
target_flags |= MASK_DISABLE_FPREGS;
}
/* Implement the TARGET_OPTION_OVERRIDE hook. */
static void
pa_option_override (void)
{
unsigned int i;
cl_deferred_option *opt;
vec<cl_deferred_option> *v
= (vec<cl_deferred_option> *) pa_deferred_options;
if (v)
FOR_EACH_VEC_ELT (*v, i, opt)
{
switch (opt->opt_index)
{
case OPT_mfixed_range_:
fix_range (opt->arg);
break;
default:
gcc_unreachable ();
}
}
if (flag_pic && TARGET_PORTABLE_RUNTIME)
{
warning (0, "PIC code generation is not supported in the portable runtime model");
}
if (flag_pic && TARGET_FAST_INDIRECT_CALLS)
{
warning (0, "PIC code generation is not compatible with fast indirect calls");
}
if (! TARGET_GAS && write_symbols != NO_DEBUG)
{
warning (0, "-g is only supported when using GAS on this processor,");
warning (0, "-g option disabled");
write_symbols = NO_DEBUG;
}
/* We only support the "big PIC" model now. And we always generate PIC
code when in 64bit mode. */
if (flag_pic == 1 || TARGET_64BIT)
flag_pic = 2;
/* Disable -freorder-blocks-and-partition as we don't support hot and
cold partitioning. */
if (flag_reorder_blocks_and_partition)
{
inform (input_location,
"-freorder-blocks-and-partition does not work "
"on this architecture");
flag_reorder_blocks_and_partition = 0;
flag_reorder_blocks = 1;
}
/* We can't guarantee that .dword is available for 32-bit targets. */
if (UNITS_PER_WORD == 4)
targetm.asm_out.aligned_op.di = NULL;
/* The unaligned ops are only available when using GAS. */
if (!TARGET_GAS)
{
targetm.asm_out.unaligned_op.hi = NULL;
targetm.asm_out.unaligned_op.si = NULL;
targetm.asm_out.unaligned_op.di = NULL;
}
init_machine_status = pa_init_machine_status;
}
enum pa_builtins
{
PA_BUILTIN_COPYSIGNQ,
PA_BUILTIN_FABSQ,
PA_BUILTIN_INFQ,
PA_BUILTIN_HUGE_VALQ,
PA_BUILTIN_max
};
static GTY(()) tree pa_builtins[(int) PA_BUILTIN_max];
static void
pa_init_builtins (void)
{
#ifdef DONT_HAVE_FPUTC_UNLOCKED
{
tree decl = builtin_decl_explicit (BUILT_IN_PUTC_UNLOCKED);
set_builtin_decl (BUILT_IN_FPUTC_UNLOCKED, decl,
builtin_decl_implicit_p (BUILT_IN_PUTC_UNLOCKED));
}
#endif
#if TARGET_HPUX_11
{
tree decl;
if ((decl = builtin_decl_explicit (BUILT_IN_FINITE)) != NULL_TREE)
set_user_assembler_name (decl, "_Isfinite");
if ((decl = builtin_decl_explicit (BUILT_IN_FINITEF)) != NULL_TREE)
set_user_assembler_name (decl, "_Isfinitef");
}
#endif
if (HPUX_LONG_DOUBLE_LIBRARY)
{
tree decl, ftype;
/* Under HPUX, the __float128 type is a synonym for "long double". */
(*lang_hooks.types.register_builtin_type) (long_double_type_node,
"__float128");
/* TFmode support builtins. */
ftype = build_function_type_list (long_double_type_node,
long_double_type_node,
NULL_TREE);
decl = add_builtin_function ("__builtin_fabsq", ftype,
PA_BUILTIN_FABSQ, BUILT_IN_MD,
"_U_Qfabs", NULL_TREE);
TREE_READONLY (decl) = 1;
pa_builtins[PA_BUILTIN_FABSQ] = decl;
ftype = build_function_type_list (long_double_type_node,
long_double_type_node,
long_double_type_node,
NULL_TREE);
decl = add_builtin_function ("__builtin_copysignq", ftype,
PA_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
"_U_Qfcopysign", NULL_TREE);
TREE_READONLY (decl) = 1;
pa_builtins[PA_BUILTIN_COPYSIGNQ] = decl;
ftype = build_function_type_list (long_double_type_node, NULL_TREE);
decl = add_builtin_function ("__builtin_infq", ftype,
PA_BUILTIN_INFQ, BUILT_IN_MD,
NULL, NULL_TREE);
pa_builtins[PA_BUILTIN_INFQ] = decl;
decl = add_builtin_function ("__builtin_huge_valq", ftype,
PA_BUILTIN_HUGE_VALQ, BUILT_IN_MD,
NULL, NULL_TREE);
pa_builtins[PA_BUILTIN_HUGE_VALQ] = decl;
}
}
static rtx
pa_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
machine_mode mode ATTRIBUTE_UNUSED,
int ignore ATTRIBUTE_UNUSED)
{
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
switch (fcode)
{
case PA_BUILTIN_FABSQ:
case PA_BUILTIN_COPYSIGNQ:
return expand_call (exp, target, ignore);
case PA_BUILTIN_INFQ:
case PA_BUILTIN_HUGE_VALQ:
{
machine_mode target_mode = TYPE_MODE (TREE_TYPE (exp));
REAL_VALUE_TYPE inf;
rtx tmp;
real_inf (&inf);
tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, target_mode);
tmp = validize_mem (force_const_mem (target_mode, tmp));
if (target == 0)
target = gen_reg_rtx (target_mode);
emit_move_insn (target, tmp);
return target;
}
default:
gcc_unreachable ();
}
return NULL_RTX;
}
/* Function to init struct machine_function.
This will be called, via a pointer variable,
from push_function_context. */
static struct machine_function *
pa_init_machine_status (void)
{
return ggc_cleared_alloc<machine_function> ();
}
/* If FROM is a probable pointer register, mark TO as a probable
pointer register with the same pointer alignment as FROM. */
static void
copy_reg_pointer (rtx to, rtx from)
{
if (REG_POINTER (from))
mark_reg_pointer (to, REGNO_POINTER_ALIGN (REGNO (from)));
}
/* Return 1 if X contains a symbolic expression. We know these
expressions will have one of a few well defined forms, so
we need only check those forms. */
int
pa_symbolic_expression_p (rtx x)
{
/* Strip off any HIGH. */
if (GET_CODE (x) == HIGH)
x = XEXP (x, 0);
return symbolic_operand (x, VOIDmode);
}
/* Accept any constant that can be moved in one instruction into a
general register. */
int
pa_cint_ok_for_move (HOST_WIDE_INT ival)
{
/* OK if ldo, ldil, or zdepi, can be used. */
return (VAL_14_BITS_P (ival)
|| pa_ldil_cint_p (ival)
|| pa_zdepi_cint_p (ival));
}
/* True iff ldil can be used to load this CONST_INT. The least
significant 11 bits of the value must be zero and the value must
not change sign when extended from 32 to 64 bits. */
int
pa_ldil_cint_p (HOST_WIDE_INT ival)
{
HOST_WIDE_INT x = ival & (((HOST_WIDE_INT) -1 << 31) | 0x7ff);
return x == 0 || x == ((HOST_WIDE_INT) -1 << 31);
}
/* True iff zdepi can be used to generate this CONST_INT.
zdepi first sign extends a 5-bit signed number to a given field
length, then places this field anywhere in a zero. */
int
pa_zdepi_cint_p (unsigned HOST_WIDE_INT x)
{
unsigned HOST_WIDE_INT lsb_mask, t;
/* This might not be obvious, but it's at least fast.
This function is critical; we don't have the time loops would take. */
lsb_mask = x & -x;
t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
/* Return true iff t is a power of two. */
return ((t & (t - 1)) == 0);
}
/* True iff depi or extru can be used to compute (reg & mask).
Accept bit pattern like these:
0....01....1
1....10....0
1..10..01..1 */
int
pa_and_mask_p (unsigned HOST_WIDE_INT mask)
{
mask = ~mask;
mask += mask & -mask;
return (mask & (mask - 1)) == 0;
}
/* True iff depi can be used to compute (reg | MASK). */
int
pa_ior_mask_p (unsigned HOST_WIDE_INT mask)
{
mask += mask & -mask;
return (mask & (mask - 1)) == 0;
}
/* Legitimize PIC addresses. If the address is already
position-independent, we return ORIG. Newly generated
position-independent addresses go to REG. If we need more
than one register, we lose. */
static rtx
legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
{
rtx pic_ref = orig;
gcc_assert (!PA_SYMBOL_REF_TLS_P (orig));
/* Labels need special handling. */
if (pic_label_operand (orig, mode))
{
rtx_insn *insn;
/* We do not want to go through the movXX expanders here since that
would create recursion.
Nor do we really want to call a generator for a named pattern
since that requires multiple patterns if we want to support
multiple word sizes.
So instead we just emit the raw set, which avoids the movXX
expanders completely. */
mark_reg_pointer (reg, BITS_PER_UNIT);
insn = emit_insn (gen_rtx_SET (VOIDmode, reg, orig));
/* Put a REG_EQUAL note on this insn, so that it can be optimized. */
add_reg_note (insn, REG_EQUAL, orig);
/* During and after reload, we need to generate a REG_LABEL_OPERAND note
and update LABEL_NUSES because this is not done automatically. */
if (reload_in_progress || reload_completed)
{
/* Extract LABEL_REF. */
if (GET_CODE (orig) == CONST)
orig = XEXP (XEXP (orig, 0), 0);
/* Extract CODE_LABEL. */
orig = XEXP (orig, 0);
add_reg_note (insn, REG_LABEL_OPERAND, orig);
/* Make sure we have label and not a note. */
if (LABEL_P (orig))
LABEL_NUSES (orig)++;
}
crtl->uses_pic_offset_table = 1;
return reg;
}
if (GET_CODE (orig) == SYMBOL_REF)
{
rtx_insn *insn;
rtx tmp_reg;
gcc_assert (reg);
/* Before reload, allocate a temporary register for the intermediate
result. This allows the sequence to be deleted when the final
result is unused and the insns are trivially dead. */
tmp_reg = ((reload_in_progress || reload_completed)
? reg : gen_reg_rtx (Pmode));
if (function_label_operand (orig, VOIDmode))
{
/* Force function label into memory in word mode. */
orig = XEXP (force_const_mem (word_mode, orig), 0);
/* Load plabel address from DLT. */
emit_move_insn (tmp_reg,
gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
gen_rtx_HIGH (word_mode, orig)));
pic_ref
= gen_const_mem (Pmode,
gen_rtx_LO_SUM (Pmode, tmp_reg,
gen_rtx_UNSPEC (Pmode,
gen_rtvec (1, orig),
UNSPEC_DLTIND14R)));
emit_move_insn (reg, pic_ref);
/* Now load address of function descriptor. */
pic_ref = gen_rtx_MEM (Pmode, reg);
}
else
{
/* Load symbol reference from DLT. */
emit_move_insn (tmp_reg,
gen_rtx_PLUS (word_mode, pic_offset_table_rtx,
gen_rtx_HIGH (word_mode, orig)));
pic_ref
= gen_const_mem (Pmode,
gen_rtx_LO_SUM (Pmode, tmp_reg,
gen_rtx_UNSPEC (Pmode,
gen_rtvec (1, orig),
UNSPEC_DLTIND14R)));
}
crtl->uses_pic_offset_table = 1;
mark_reg_pointer (reg, BITS_PER_UNIT);
insn = emit_move_insn (reg, pic_ref);
/* Put a REG_EQUAL note on this insn, so that it can be optimized. */
set_unique_reg_note (insn, REG_EQUAL, orig);
return reg;
}
else if (GET_CODE (orig) == CONST)
{
rtx base;
if (GET_CODE (XEXP (orig, 0)) == PLUS
&& XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
return orig;
gcc_assert (reg);
gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
base == reg ? 0 : reg);
if (GET_CODE (orig) == CONST_INT)
{
if (INT_14_BITS (orig))
return plus_constant (Pmode, base, INTVAL (orig));
orig = force_reg (Pmode, orig);
}
pic_ref = gen_rtx_PLUS (Pmode, base, orig);
/* Likewise, should we set special REG_NOTEs here? */
}
return pic_ref;
}
static GTY(()) rtx gen_tls_tga;
static rtx
gen_tls_get_addr (void)
{
if (!gen_tls_tga)
gen_tls_tga = init_one_libfunc ("__tls_get_addr");
return gen_tls_tga;
}
static rtx
hppa_tls_call (rtx arg)
{
rtx ret;
ret = gen_reg_rtx (Pmode);
emit_library_call_value (gen_tls_get_addr (), ret,
LCT_CONST, Pmode, 1, arg, Pmode);
return ret;
}
static rtx
legitimize_tls_address (rtx addr)
{
rtx ret, tmp, t1, t2, tp;
rtx_insn *insn;
/* Currently, we can't handle anything but a SYMBOL_REF. */
if (GET_CODE (addr) != SYMBOL_REF)
return addr;
switch (SYMBOL_REF_TLS_MODEL (addr))
{
case TLS_MODEL_GLOBAL_DYNAMIC:
tmp = gen_reg_rtx (Pmode);
if (flag_pic)
emit_insn (gen_tgd_load_pic (tmp, addr));
else
emit_insn (gen_tgd_load (tmp, addr));
ret = hppa_tls_call (tmp);
break;
case TLS_MODEL_LOCAL_DYNAMIC:
ret = gen_reg_rtx (Pmode);
tmp = gen_reg_rtx (Pmode);
start_sequence ();
if (flag_pic)
emit_insn (gen_tld_load_pic (tmp, addr));
else
emit_insn (gen_tld_load (tmp, addr));
t1 = hppa_tls_call (tmp);
insn = get_insns ();
end_sequence ();
t2 = gen_reg_rtx (Pmode);
emit_libcall_block (insn, t2, t1,
gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
UNSPEC_TLSLDBASE));
emit_insn (gen_tld_offset_load (ret, addr, t2));
break;
case TLS_MODEL_INITIAL_EXEC:
tp = gen_reg_rtx (Pmode);
tmp = gen_reg_rtx (Pmode);
ret = gen_reg_rtx (Pmode);
emit_insn (gen_tp_load (tp));
if (flag_pic)
emit_insn (gen_tie_load_pic (tmp, addr));
else
emit_insn (gen_tie_load (tmp, addr));
emit_move_insn (ret, gen_rtx_PLUS (Pmode, tp, tmp));
break;
case TLS_MODEL_LOCAL_EXEC:
tp = gen_reg_rtx (Pmode);
ret = gen_reg_rtx (Pmode);
emit_insn (gen_tp_load (tp));
emit_insn (gen_tle_load (ret, addr, tp));
break;
default:
gcc_unreachable ();
}
return ret;
}
/* Try machine-dependent ways of modifying an illegitimate address
to be legitimate. If we find one, return the new, valid address.
This macro is used in only one place: `memory_address' in explow.c.
OLDX is the address as it was before break_out_memory_refs was called.
In some cases it is useful to look at this to decide what needs to be done.
It is always safe for this macro to do nothing. It exists to recognize
opportunities to optimize the output.
For the PA, transform:
memory(X + <large int>)
into:
if (<large int> & mask) >= 16
Y = (<large int> & ~mask) + mask + 1 Round up.
else
Y = (<large int> & ~mask) Round down.
Z = X + Y
memory (Z + (<large int> - Y));
This is for CSE to find several similar references, and only use one Z.
X can either be a SYMBOL_REF or REG, but because combine cannot
perform a 4->2 combination we do nothing for SYMBOL_REF + D where
D will not fit in 14 bits.
MODE_FLOAT references allow displacements which fit in 5 bits, so use
0x1f as the mask.
MODE_INT references allow displacements which fit in 14 bits, so use
0x3fff as the mask.
This relies on the fact that most mode MODE_FLOAT references will use FP
registers and most mode MODE_INT references will use integer registers.
(In the rare case of an FP register used in an integer MODE, we depend
on secondary reloads to clean things up.)
It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
addressing modes to be used).
Put X and Z into registers. Then put the entire expression into
a register. */
rtx
hppa_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
machine_mode mode)
{
rtx orig = x;
/* We need to canonicalize the order of operands in unscaled indexed
addresses since the code that checks if an address is valid doesn't
always try both orders. */
if (!TARGET_NO_SPACE_REGS
&& GET_CODE (x) == PLUS
&& GET_MODE (x) == Pmode
&& REG_P (XEXP (x, 0))
&& REG_P (XEXP (x, 1))
&& REG_POINTER (XEXP (x, 0))
&& !REG_POINTER (XEXP (x, 1)))
return gen_rtx_PLUS (Pmode, XEXP (x, 1), XEXP (x, 0));
if (tls_referenced_p (x))
return legitimize_tls_address (x);
else if (flag_pic)
return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
/* Strip off CONST. */
if (GET_CODE (x) == CONST)
x = XEXP (x, 0);
/* Special case. Get the SYMBOL_REF into a register and use indexing.
That should always be safe. */
if (GET_CODE (x) == PLUS
&& GET_CODE (XEXP (x, 0)) == REG
&& GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
{
rtx reg = force_reg (Pmode, XEXP (x, 1));
return force_reg (Pmode, gen_rtx_PLUS (Pmode, reg, XEXP (x, 0)));
}
/* Note we must reject symbols which represent function addresses
since the assembler/linker can't handle arithmetic on plabels. */
if (GET_CODE (x) == PLUS
&& GET_CODE (XEXP (x, 1)) == CONST_INT
&& ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
&& !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
|| GET_CODE (XEXP (x, 0)) == REG))
{
rtx int_part, ptr_reg;
int newoffset;
int offset = INTVAL (XEXP (x, 1));
int mask;
mask = (GET_MODE_CLASS (mode) == MODE_FLOAT
&& !INT14_OK_STRICT ? 0x1f : 0x3fff);
/* Choose which way to round the offset. Round up if we
are >= halfway to the next boundary. */
if ((offset & mask) >= ((mask + 1) / 2))
newoffset = (offset & ~ mask) + mask + 1;
else
newoffset = (offset & ~ mask);
/* If the newoffset will not fit in 14 bits (ldo), then
handling this would take 4 or 5 instructions (2 to load
the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
add the new offset and the SYMBOL_REF.) Combine can
not handle 4->2 or 5->2 combinations, so do not create
them. */
if (! VAL_14_BITS_P (newoffset)
&& GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
{
rtx const_part = plus_constant (Pmode, XEXP (x, 0), newoffset);
rtx tmp_reg
= force_reg (Pmode,
gen_rtx_HIGH (Pmode, const_part));
ptr_reg
= force_reg (Pmode,
gen_rtx_LO_SUM (Pmode,
tmp_reg, const_part));
}
else
{
if (! VAL_14_BITS_P (newoffset))
int_part = force_reg (Pmode, GEN_INT (newoffset));
else
int_part = GEN_INT (newoffset);
ptr_reg = force_reg (Pmode,
gen_rtx_PLUS (Pmode,
force_reg (Pmode, XEXP (x, 0)),
int_part));
}
return plus_constant (Pmode, ptr_reg, offset - newoffset);
}
/* Handle (plus (mult (a) (shadd_constant)) (b)). */
if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
&& GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
&& pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
&& (OBJECT_P (XEXP (x, 1))
|| GET_CODE (XEXP (x, 1)) == SUBREG)
&& GET_CODE (XEXP (x, 1)) != CONST)
{
int val = INTVAL (XEXP (XEXP (x, 0), 1));
rtx reg1, reg2;
reg1 = XEXP (x, 1);
if (GET_CODE (reg1) != REG)
reg1 = force_reg (Pmode, force_operand (reg1, 0));
reg2 = XEXP (XEXP (x, 0), 0);
if (GET_CODE (reg2) != REG)
reg2 = force_reg (Pmode, force_operand (reg2, 0));
return force_reg (Pmode, gen_rtx_PLUS (Pmode,
gen_rtx_MULT (Pmode,
reg2,
GEN_INT (val)),
reg1));
}
/* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
Only do so for floating point modes since this is more speculative
and we lose if it's an integer store. */
if (GET_CODE (x) == PLUS
&& GET_CODE (XEXP (x, 0)) == PLUS
&& GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
&& GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
&& pa_shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
&& (mode == SFmode || mode == DFmode))
{
/* First, try and figure out what to use as a base register. */
rtx reg1, reg2, base, idx;
reg1 = XEXP (XEXP (x, 0), 1);
reg2 = XEXP (x, 1);
base = NULL_RTX;
idx = NULL_RTX;
/* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
then pa_emit_move_sequence will turn on REG_POINTER so we'll know
it's a base register below. */
if (GET_CODE (reg1) != REG)
reg1 = force_reg (Pmode, force_operand (reg1, 0));
if (GET_CODE (reg2) != REG)
reg2 = force_reg (Pmode, force_operand (reg2, 0));
/* Figure out what the base and index are. */
if (GET_CODE (reg1) == REG
&& REG_POINTER (reg1))
{
base = reg1;
idx = gen_rtx_PLUS (Pmode,
gen_rtx_MULT (Pmode,
XEXP (XEXP (XEXP (x, 0), 0), 0),
XEXP (XEXP (XEXP (x, 0), 0), 1)),
XEXP (x, 1));
}
else if (GET_CODE (reg2) == REG
&& REG_POINTER (reg2))
{
base = reg2;
idx = XEXP (x, 0);
}
if (base == 0)
return orig;
/* If the index adds a large constant, try to scale the
constant so that it can be loaded with only one insn. */
if (GET_CODE (XEXP (idx, 1)) == CONST_INT
&& VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
/ INTVAL (XEXP (XEXP (idx, 0), 1)))
&& INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
{
/* Divide the CONST_INT by the scale factor, then add it to A. */
int val = INTVAL (XEXP (idx, 1));
val /= INTVAL (XEXP (XEXP (idx, 0), 1));
reg1 = XEXP (XEXP (idx, 0), 0);
if (GET_CODE (reg1) != REG)
reg1 = force_reg (Pmode, force_operand (reg1, 0));
reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val)));
/* We can now generate a simple scaled indexed address. */
return
force_reg
(Pmode, gen_rtx_PLUS (Pmode,
gen_rtx_MULT (Pmode, reg1,
XEXP (XEXP (idx, 0), 1)),
base));
}
/* If B + C is still a valid base register, then add them. */
if (GET_CODE (XEXP (idx, 1)) == CONST_INT
&& INTVAL (XEXP (idx, 1)) <= 4096
&& INTVAL (XEXP (idx, 1)) >= -4096)
{
int val = INTVAL (XEXP (XEXP (idx, 0), 1));
rtx reg1, reg2;
reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1)));
reg2 = XEXP (XEXP (idx, 0), 0);
if (GET_CODE (reg2) != CONST_INT)
reg2 = force_reg (Pmode, force_operand (reg2, 0));
return force_reg (Pmode, gen_rtx_PLUS (Pmode,
gen_rtx_MULT (Pmode,
reg2,
GEN_INT (val)),
reg1));
}
/* Get the index into a register, then add the base + index and
return a register holding the result. */
/* First get A into a register. */
reg1 = XEXP (XEXP (idx, 0), 0);
if (GET_CODE (reg1) != REG)
reg1 = force_reg (Pmode, force_operand (reg1, 0));
/* And get B into a register. */
reg2 = XEXP (idx, 1);
if (GET_CODE (reg2) != REG)
reg2 = force_reg (Pmode, force_operand (reg2, 0));
reg1 = force_reg (Pmode,
gen_rtx_PLUS (Pmode,
gen_rtx_MULT (Pmode, reg1,
XEXP (XEXP (idx, 0), 1)),
reg2));
/* Add the result to our base register and return. */
return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1));
}
/* Uh-oh. We might have an address for x[n-100000]. This needs
special handling to avoid creating an indexed memory address
with x-100000 as the base.
If the constant part is small enough, then it's still safe because
there is a guard page at the beginning and end of the data segment.
Scaled references are common enough that we want to try and rearrange the
terms so that we can use indexing for these addresses too. Only
do the optimization for floatint point modes. */
if (GET_CODE (x) == PLUS
&& pa_symbolic_expression_p (XEXP (x, 1)))
{
/* Ugly. We modify things here so that the address offset specified
by the index expression is computed first, then added to x to form
the entire address. */
rtx regx1, regx2, regy1, regy2, y;
/* Strip off any CONST. */
y = XEXP (x, 1);
if (GET_CODE (y) == CONST)
y = XEXP (y, 0);
if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
{
/* See if this looks like
(plus (mult (reg) (shadd_const))
(const (plus (symbol_ref) (const_int))))
Where const_int is small. In that case the const
expression is a valid pointer for indexing.
If const_int is big, but can be divided evenly by shadd_const
and added to (reg). This allows more scaled indexed addresses. */
if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
&& GET_CODE (XEXP (x, 0)) == MULT
&& GET_CODE (XEXP (y, 1)) == CONST_INT
&& INTVAL (XEXP (y, 1)) >= -4096
&& INTVAL (XEXP (y, 1)) <= 4095
&& GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
&& pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
{
int val = INTVAL (XEXP (XEXP (x, 0), 1));
rtx reg1, reg2;
reg1 = XEXP (x, 1);
if (GET_CODE (reg1) != REG)
reg1 = force_reg (Pmode, force_operand (reg1, 0));
reg2 = XEXP (XEXP (x, 0), 0);
if (GET_CODE (reg2) != REG)
reg2 = force_reg (Pmode, force_operand (reg2, 0));
return force_reg (Pmode,
gen_rtx_PLUS (Pmode,
gen_rtx_MULT (Pmode,
reg2,
GEN_INT (val)),
reg1));
}
else if ((mode == DFmode || mode == SFmode)
&& GET_CODE (XEXP (y, 0)) == SYMBOL_REF
&& GET_CODE (XEXP (x, 0)) == MULT
&& GET_CODE (XEXP (y, 1)) == CONST_INT
&& INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
&& GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
&& pa_shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
{
regx1
= force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
/ INTVAL (XEXP (XEXP (x, 0), 1))));
regx2 = XEXP (XEXP (x, 0), 0);
if (GET_CODE (regx2) != REG)
regx2 = force_reg (Pmode, force_operand (regx2, 0));
regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode,
regx2, regx1));
return
force_reg (Pmode,
gen_rtx_PLUS (Pmode,
gen_rtx_MULT (Pmode, regx2,
XEXP (XEXP (x, 0), 1)),
force_reg (Pmode, XEXP (y, 0))));
}
else if (GET_CODE (XEXP (y, 1)) == CONST_INT
&& INTVAL (XEXP (y, 1)) >= -4096
&& INTVAL (XEXP (y, 1)) <= 4095)
{
/* This is safe because of the guard page at the
beginning and end of the data space. Just
return the original address. */
return orig;
}
else
{
/* Doesn't look like one we can optimize. */
regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
regx1 = force_reg (Pmode,
gen_rtx_fmt_ee (GET_CODE (y), Pmode,
regx1, regy2));
return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1));
}
}
}
return orig;
}
/* Implement the TARGET_REGISTER_MOVE_COST hook.
Compute extra cost of moving data between one register class
and another.
Make moves from SAR so expensive they should never happen. We used to
have 0xffff here, but that generates overflow in rare cases.
Copies involving a FP register and a non-FP register are relatively
expensive because they must go through memory.
Other copies are reasonably cheap. */
static int
hppa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
reg_class_t from, reg_class_t to)
{
if (from == SHIFT_REGS)
return 0x100;
else if (to == SHIFT_REGS && FP_REG_CLASS_P (from))
return 18;
else if ((FP_REG_CLASS_P (from) && ! FP_REG_CLASS_P (to))
|| (FP_REG_CLASS_P (to) && ! FP_REG_CLASS_P (from)))
return 16;
else
return 2;
}
/* For the HPPA, REG and REG+CONST is cost 0
and addresses involving symbolic constants are cost 2.
PIC addresses are very expensive.
It is no coincidence that this has the same structure
as pa_legitimate_address_p. */
static int
hppa_address_cost (rtx X, machine_mode mode ATTRIBUTE_UNUSED,
addr_space_t as ATTRIBUTE_UNUSED,
bool speed ATTRIBUTE_UNUSED)
{
switch (GET_CODE (X))
{
case REG:
case PLUS:
case LO_SUM:
return 1;
case HIGH:
return 2;
default:
return 4;
}
}
/* Compute a (partial) cost for rtx X. Return true if the complete
cost has been computed, and false if subexpressions should be
scanned. In either case, *TOTAL contains the cost result. */
static bool
hppa_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
int *total, bool speed ATTRIBUTE_UNUSED)
{
int factor;
switch (code)
{
case CONST_INT:
if (INTVAL (x) == 0)
*total = 0;
else if (INT_14_BITS (x))
*total = 1;
else
*total = 2;
return true;
case HIGH:
*total = 2;
return true;
case CONST:
case LABEL_REF:
case SYMBOL_REF:
*total = 4;
return true;
case CONST_DOUBLE:
if ((x == CONST0_RTX (DFmode) || x == CONST0_RTX (SFmode))
&& outer_code != SET)
*total = 0;
else
*total = 8;
return true;
case MULT:
if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
{
*total = COSTS_N_INSNS (3);
return true;
}
/* A mode size N times larger than SImode needs O(N*N) more insns. */
factor = GET_MODE_SIZE (GET_MODE (x)) / 4;
if (factor == 0)
factor = 1;
if (TARGET_PA_11 && !TARGET_DISABLE_FPREGS && !TARGET_SOFT_FLOAT)
*total = factor * factor * COSTS_N_INSNS (8);
else
*total = factor * factor * COSTS_N_INSNS (20);
return true;
case DIV:
if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
{
*total = COSTS_N_INSNS (14);
return true;
}
/* FALLTHRU */
case UDIV:
case MOD:
case UMOD:
/* A mode size N times larger than SImode needs O(N*N) more insns. */
factor = GET_MODE_SIZE (GET_MODE (x)) / 4;
if (factor == 0)
factor = 1;
*total = factor * factor * COSTS_N_INSNS (60);
return true;
case PLUS: /* this includes shNadd insns */
case MINUS:
if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
{
*total = COSTS_N_INSNS (3);
return true;
}
/* A size N times larger than UNITS_PER_WORD needs N times as
many insns, taking N times as long. */
factor = GET_MODE_SIZE (GET_MODE (x)) / UNITS_PER_WORD;
if (factor == 0)
factor = 1;
*total = factor * COSTS_N_INSNS (1);
return true;
case ASHIFT:
case ASHIFTRT:
case LSHIFTRT:
*total = COSTS_N_INSNS (1);
return true;
default:
return false;
}
}
/* Ensure mode of ORIG, a REG rtx, is MODE. Returns either ORIG or a
new rtx with the correct mode. */
static inline rtx
force_mode (machine_mode mode, rtx orig)
{
if (mode == GET_MODE (orig))
return orig;
gcc_assert (REGNO (orig) < FIRST_PSEUDO_REGISTER);
return gen_rtx_REG (mode, REGNO (orig));
}
/* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
static bool
pa_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
{
return tls_referenced_p (x);
}
/* Emit insns to move operands[1] into operands[0].
Return 1 if we have written out everything that needs to be done to
do the move. Otherwise, return 0 and the caller will emit the move
normally.
Note SCRATCH_REG may not be in the proper mode depending on how it
will be used. This routine is responsible for creating a new copy
of SCRATCH_REG in the proper mode. */
int
pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
{
register rtx operand0 = operands[0];
register rtx operand1 = operands[1];
register rtx tem;
/* We can only handle indexed addresses in the destination operand
of floating point stores. Thus, we need to break out indexed
addresses from the destination operand. */
if (GET_CODE (operand0) == MEM && IS_INDEX_ADDR_P (XEXP (operand0, 0)))
{
gcc_assert (can_create_pseudo_p ());
tem = copy_to_mode_reg (Pmode, XEXP (operand0, 0));
operand0 = replace_equiv_address (operand0, tem);
}
/* On targets with non-equivalent space registers, break out unscaled
indexed addresses from the source operand before the final CSE.
We have to do this because the REG_POINTER flag is not correctly
carried through various optimization passes and CSE may substitute
a pseudo without the pointer set for one with the pointer set. As
a result, we loose various opportunities to create insns with
unscaled indexed addresses. */
if (!TARGET_NO_SPACE_REGS
&& !cse_not_expected
&& GET_CODE (operand1) == MEM
&& GET_CODE (XEXP (operand1, 0)) == PLUS
&& REG_P (XEXP (XEXP (operand1, 0), 0))
&& REG_P (XEXP (XEXP (operand1, 0), 1)))
operand1
= replace_equiv_address (operand1,
copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
if (scratch_reg
&& reload_in_progress && GET_CODE (operand0) == REG
&& REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
operand0 = reg_equiv_mem (REGNO (operand0));
else if (scratch_reg
&& reload_in_progress && GET_CODE (operand0) == SUBREG
&& GET_CODE (SUBREG_REG (operand0)) == REG
&& REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
{
/* We must not alter SUBREG_BYTE (operand0) since that would confuse
the code which tracks sets/uses for delete_output_reload. */
rtx temp = gen_rtx_SUBREG (GET_MODE (operand0),
reg_equiv_mem (REGNO (SUBREG_REG (operand0))),
SUBREG_BYTE (operand0));
operand0 = alter_subreg (&temp, true);
}
if (scratch_reg
&& reload_in_progress && GET_CODE (operand1) == REG
&& REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
operand1 = reg_equiv_mem (REGNO (operand1));
else if (scratch_reg
&& reload_in_progress && GET_CODE (operand1) == SUBREG
&& GET_CODE (SUBREG_REG (operand1)) == REG
&& REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
{
/* We must not alter SUBREG_BYTE (operand0) since that would confuse
the code which tracks sets/uses for delete_output_reload. */
rtx temp = gen_rtx_SUBREG (GET_MODE (operand1),
reg_equiv_mem (REGNO (SUBREG_REG (operand1))),
SUBREG_BYTE (operand1));
operand1 = alter_subreg (&temp, true);
}
if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
&& ((tem = find_replacement (&XEXP (operand0, 0)))
!= XEXP (operand0, 0)))
operand0 = replace_equiv_address (operand0, tem);
if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
&& ((tem = find_replacement (&XEXP (operand1, 0)))
!= XEXP (operand1, 0)))
operand1 = replace_equiv_address (operand1, tem);
/* Handle secondary reloads for loads/stores of FP registers from
REG+D addresses where D does not fit in 5 or 14 bits, including
(subreg (mem (addr))) cases, and reloads for other unsupported
memory operands. */
if (scratch_reg
&& FP_REG_P (operand0)
&& (MEM_P (operand1)
|| (GET_CODE (operand1) == SUBREG
&& MEM_P (XEXP (operand1, 0)))))
{
rtx op1 = operand1;
if (GET_CODE (op1) == SUBREG)
op1 = XEXP (op1, 0);
if (reg_plus_base_memory_operand (op1, GET_MODE (op1)))
{
if (!(TARGET_PA_20
&& !TARGET_ELF32
&& INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
&& !INT_5_BITS (XEXP (XEXP (op1, 0), 1)))
{
/* SCRATCH_REG will hold an address and maybe the actual data.
We want it in WORD_MODE regardless of what mode it was
originally given to us. */
scratch_reg = force_mode (word_mode, scratch_reg);
/* D might not fit in 14 bits either; for such cases load D
into scratch reg. */
if (!INT_14_BITS (XEXP (XEXP (op1, 0), 1)))
{
emit_move_insn (scratch_reg, XEXP (XEXP (op1, 0), 1));
emit_move_insn (scratch_reg,
gen_rtx_fmt_ee (GET_CODE (XEXP (op1, 0)),
Pmode,
XEXP (XEXP (op1, 0), 0),
scratch_reg));
}
else
emit_move_insn (scratch_reg, XEXP (op1, 0));
emit_insn (gen_rtx_SET (VOIDmode, operand0,
replace_equiv_address (op1, scratch_reg)));
return 1;
}
}
else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
|| IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
|| IS_INDEX_ADDR_P (XEXP (op1, 0)))
{
/* Load memory address into SCRATCH_REG. */
scratch_reg = force_mode (word_mode, scratch_reg);
emit_move_insn (scratch_reg, XEXP (op1, 0));
emit_insn (gen_rtx_SET (VOIDmode, operand0,
replace_equiv_address (op1, scratch_reg)));
return 1;
}
}
else if (scratch_reg
&& FP_REG_P (operand1)
&& (MEM_P (operand0)
|| (GET_CODE (operand0) == SUBREG
&& MEM_P (XEXP (operand0, 0)))))
{
rtx op0 = operand0;
if (GET_CODE (op0) == SUBREG)
op0 = XEXP (op0, 0);
if (reg_plus_base_memory_operand (op0, GET_MODE (op0)))
{
if (!(TARGET_PA_20
&& !TARGET_ELF32
&& INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
&& !INT_5_BITS (XEXP (XEXP (op0, 0), 1)))
{
/* SCRATCH_REG will hold an address and maybe the actual data.
We want it in WORD_MODE regardless of what mode it was
originally given to us. */
scratch_reg = force_mode (word_mode, scratch_reg);
/* D might not fit in 14 bits either; for such cases load D
into scratch reg. */
if (!INT_14_BITS (XEXP (XEXP (op0, 0), 1)))
{
emit_move_insn (scratch_reg, XEXP (XEXP (op0, 0), 1));
emit_move_insn (scratch_reg,
gen_rtx_fmt_ee (GET_CODE (XEXP (op0, 0)),
Pmode,
XEXP (XEXP (op0, 0), 0),
scratch_reg));
}
else
emit_move_insn (scratch_reg, XEXP (op0, 0));
emit_insn (gen_rtx_SET (VOIDmode,
replace_equiv_address (op0, scratch_reg),
operand1));
return 1;
}
}
else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
|| IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
|| IS_INDEX_ADDR_P (XEXP (op0, 0)))
{
/* Load memory address into SCRATCH_REG. */
emit_move_insn (scratch_reg, XEXP (op0, 0));
emit_insn (gen_rtx_SET (VOIDmode,
replace_equiv_address (op0, scratch_reg),
operand1));
return 1;
}
}
/* Handle secondary reloads for loads of FP registers from constant
expressions by forcing the constant into memory. For the most part,
this is only necessary for SImode and DImode.
Use scratch_reg to hold the address of the memory location. */
else if (scratch_reg
&& CONSTANT_P (operand1)
&& FP_REG_P (operand0))
{
rtx const_mem, xoperands[2];
if (operand1 == CONST0_RTX (mode))
{
emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
return 1;
}
/* SCRATCH_REG will hold an address and maybe the actual data. We want
it in WORD_MODE regardless of what mode it was originally given
to us. */
scratch_reg = force_mode (word_mode, scratch_reg);
/* Force the constant into memory and put the address of the
memory location into scratch_reg. */
const_mem = force_const_mem (mode, operand1);
xoperands[0] = scratch_reg;
xoperands[1] = XEXP (const_mem, 0);
pa_emit_move_sequence (xoperands, Pmode, 0);
/* Now load the destination register. */
emit_insn (gen_rtx_SET (mode, operand0,
replace_equiv_address (const_mem, scratch_reg)));
return 1;
}
/* Handle secondary reloads for SAR. These occur when trying to load
the SAR from memory or a constant. */
else if (scratch_reg
&& GET_CODE (operand0) == REG
&& REGNO (operand0) < FIRST_PSEUDO_REGISTER
&& REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
&& (GET_CODE (operand1) == MEM || GET_CODE (operand1) == CONST_INT))
{
/* D might not fit in 14 bits either; for such cases load D into
scratch reg. */
if (GET_CODE (operand1) == MEM
&& !memory_address_p (GET_MODE (operand0), XEXP (operand1, 0)))
{
/* We are reloading the address into the scratch register, so we
want to make sure the scratch register is a full register. */
scratch_reg = force_mode (word_mode, scratch_reg);
emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
emit_move_insn (scratch_reg,
gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)),
Pmode,
XEXP (XEXP (operand1, 0), 0),
scratch_reg));
/* Now we are going to load the scratch register from memory,
we want to load it in the same width as the original MEM,
which must be the same as the width of the ultimate destination,
OPERAND0. */
scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
emit_move_insn (scratch_reg,
replace_equiv_address (operand1, scratch_reg));
}
else
{
/* We want to load the scratch register using the same mode as
the ultimate destination. */
scratch_reg = force_mode (GET_MODE (operand0), scratch_reg);
emit_move_insn (scratch_reg, operand1);
}
/* And emit the insn to set the ultimate destination. We know that
the scratch register has the same mode as the destination at this
point. */
emit_move_insn (operand0, scratch_reg);
return 1;
}
/* Handle the most common case: storing into a register. */
if (register_operand (operand0, mode))
{
/* Legitimize TLS symbol references. This happens for references
that aren't a legitimate constant. */
if (PA_SYMBOL_REF_TLS_P (operand1))
operand1 = legitimize_tls_address (operand1);
if (register_operand (operand1, mode)
|| (GET_CODE (operand1) == CONST_INT
&& pa_cint_ok_for_move (INTVAL (operand1)))
|| (operand1 == CONST0_RTX (mode))
|| (GET_CODE (operand1) == HIGH
&& !symbolic_operand (XEXP (operand1, 0), VOIDmode))
/* Only `general_operands' can come here, so MEM is ok. */
|| GET_CODE (operand1) == MEM)
{
/* Various sets are created during RTL generation which don't
have the REG_POINTER flag correctly set. After the CSE pass,
instruction recognition can fail if we don't consistently
set this flag when performing register copies. This should
also improve the opportunities for creating insns that use
unscaled indexing. */
if (REG_P (operand0) && REG_P (operand1))
{
if (REG_POINTER (operand1)
&& !REG_POINTER (operand0)
&& !HARD_REGISTER_P (operand0))
copy_reg_pointer (operand0, operand1);
}
/* When MEMs are broken out, the REG_POINTER flag doesn't
get set. In some cases, we can set the REG_POINTER flag
from the declaration for the MEM. */
if (REG_P (operand0)
&& GET_CODE (operand1) == MEM
&& !REG_POINTER (operand0))
{
tree decl = MEM_EXPR (operand1);
/* Set the register pointer flag and register alignment
if the declaration for this memory reference is a
pointer type. */
if (decl)
{
tree type;
/* If this is a COMPONENT_REF, use the FIELD_DECL from
tree operand 1. */
if (TREE_CODE (decl) == COMPONENT_REF)
decl = TREE_OPERAND (decl, 1);
type = TREE_TYPE (decl);
type = strip_array_types (type);
if (POINTER_TYPE_P (type))
{
int align;
type = TREE_TYPE (type);
/* Using TYPE_ALIGN_OK is rather conservative as
only the ada frontend actually sets it. */
align = (TYPE_ALIGN_OK (type) ? TYPE_ALIGN (type)
: BITS_PER_UNIT);
mark_reg_pointer (operand0, align);
}
}
}
emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
return 1;
}
}
else if (GET_CODE (operand0) == MEM)
{
if (mode == DFmode && operand1 == CONST0_RTX (mode)
&& !(reload_in_progress || reload_completed))
{
rtx temp = gen_reg_rtx (DFmode);
emit_insn (gen_rtx_SET (VOIDmode, temp, operand1));
emit_insn (gen_rtx_SET (VOIDmode, operand0, temp));
return 1;
}
if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
{
/* Run this case quickly. */
emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1));
return 1;
}
if (! (reload_in_progress || reload_completed))
{
operands[0] = validize_mem (operand0);
operands[1] = operand1 = force_reg (mode, operand1);
}
}
/* Simplify the source if we need to.
Note we do have to handle function labels here, even though we do
not consider them legitimate constants. Loop optimizations can
call the emit_move_xxx with one as a source. */
if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
|| (GET_CODE (operand1) == HIGH
&& symbolic_operand (XEXP (operand1, 0), mode))
|| function_label_operand (operand1, VOIDmode)
|| tls_referenced_p (operand1))
{
int ishighonly = 0;
if (GET_CODE (operand1) == HIGH)
{
ishighonly = 1;
operand1 = XEXP (operand1, 0);
}
if (symbolic_operand (operand1, mode))
{
/* Argh. The assembler and linker can't handle arithmetic
involving plabels.
So we force the plabel into memory, load operand0 from
the memory location, then add in the constant part. */
if ((GET_CODE (operand1) == CONST
&& GET_CODE (XEXP (operand1, 0)) == PLUS
&& function_label_operand (XEXP (XEXP (operand1, 0), 0),
VOIDmode))
|| function_label_operand (operand1, VOIDmode))
{
rtx temp, const_part;
/* Figure out what (if any) scratch register to use. */
if (reload_in_progress || reload_completed)
{
scratch_reg = scratch_reg ? scratch_reg : operand0;
/* SCRATCH_REG will hold an address and maybe the actual
data. We want it in WORD_MODE regardless of what mode it
was originally given to us. */
scratch_reg = force_mode (word_mode, scratch_reg);
}
else if (flag_pic)
scratch_reg = gen_reg_rtx (Pmode);
if (GET_CODE (operand1) == CONST)
{
/* Save away the constant part of the expression. */
const_part = XEXP (XEXP (operand1, 0), 1);
gcc_assert (GET_CODE (const_part) == CONST_INT);
/* Force the function label into memory. */
temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
}
else
{
/* No constant part. */
const_part = NULL_RTX;
/* Force the function label into memory. */
temp = force_const_mem (mode, operand1);
}
/* Get the address of the memory location. PIC-ify it if
necessary. */
temp = XEXP (temp, 0);
if (flag_pic)
temp = legitimize_pic_address (temp, mode, scratch_reg);
/* Put the address of the memory location into our destination
register. */
operands[1] = temp;
pa_emit_move_sequence (operands, mode, scratch_reg);
/* Now load from the memory location into our destination
register. */
operands[1] = gen_rtx_MEM (Pmode, operands[0]);
pa_emit_move_sequence (operands, mode, scratch_reg);
/* And add back in the constant part. */
if (const_part != NULL_RTX)
expand_inc (operand0, const_part);
return 1;
}
if (flag_pic)
{
rtx_insn *insn;
rtx temp;
if (reload_in_progress || reload_completed)
{
temp = scratch_reg ? scratch_reg : operand0;
/* TEMP will hold an address and maybe the actual
data. We want it in WORD_MODE regardless of what mode it
was originally given to us. */
temp = force_mode (word_mode, temp);
}
else
temp = gen_reg_rtx (Pmode);
/* Force (const (plus (symbol) (const_int))) to memory
if the const_int will not fit in 14 bits. Although
this requires a relocation, the instruction sequence
needed to load the value is shorter. */
if (GET_CODE (operand1) == CONST
&& GET_CODE (XEXP (operand1, 0)) == PLUS
&& GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
&& !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)))
{
rtx x, m = force_const_mem (mode, operand1);
x = legitimize_pic_address (XEXP (m, 0), mode, temp);
x = replace_equiv_address (m, x);
insn = emit_move_insn (operand0, x);
}
else
{
operands[1] = legitimize_pic_address (operand1, mode, temp);
if (REG_P (operand0) && REG_P (operands[1]))
copy_reg_pointer (operand0, operands[1]);
insn = emit_move_insn (operand0, operands[1]);
}
/* Put a REG_EQUAL note on this insn. */
set_unique_reg_note (insn, REG_EQUAL, operand1);
}
/* On the HPPA, references to data space are supposed to use dp,
register 27, but showing it in the RTL inhibits various cse
and loop optimizations. */
else
{
rtx temp, set;
if (reload_in_progress || reload_completed)
{
temp = scratch_reg ? scratch_reg : operand0;
/* TEMP will hold an address and maybe the actual
data. We want it in WORD_MODE regardless of what mode it
was originally given to us. */
temp = force_mode (word_mode, temp);
}
else
temp = gen_reg_rtx (mode);
/* Loading a SYMBOL_REF into a register makes that register
safe to be used as the base in an indexed address.
Don't mark hard registers though. That loses. */
if (GET_CODE (operand0) == REG
&& REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
mark_reg_pointer (operand0, BITS_PER_UNIT);
if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
mark_reg_pointer (temp, BITS_PER_UNIT);
if (ishighonly)
set = gen_rtx_SET (mode, operand0, temp);
else
set = gen_rtx_SET (VOIDmode,
operand0,
gen_rtx_LO_SUM (mode, temp, operand1));
emit_insn (gen_rtx_SET (VOIDmode,
temp,
gen_rtx_HIGH (mode, operand1)));
emit_insn (set);
}
return 1;
}
else if (tls_referenced_p (operand1))
{
rtx tmp = operand1;
rtx addend = NULL;
if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
{
addend = XEXP (XEXP (tmp, 0), 1);
tmp = XEXP (XEXP (tmp, 0), 0);
}
gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
tmp = legitimize_tls_address (tmp);
if (addend)
{
tmp = gen_rtx_PLUS (mode, tmp, addend);
tmp = force_operand (tmp, operands[0]);
}
operands[1] = tmp;
}
else if (GET_CODE (operand1) != CONST_INT
|| !pa_cint_ok_for_move (INTVAL (operand1)))
{
rtx temp;
rtx_insn *insn;
rtx op1 = operand1;
HOST_WIDE_INT value = 0;
HOST_WIDE_INT insv = 0;
int insert = 0;
if (GET_CODE (operand1) == CONST_INT)
value = INTVAL (operand1);
if (TARGET_64BIT
&& GET_CODE (operand1) == CONST_INT
&& HOST_BITS_PER_WIDE_INT > 32
&& GET_MODE_BITSIZE (GET_MODE (operand0)) > 32)
{
HOST_WIDE_INT nval;
/* Extract the low order 32 bits of the value and sign extend.
If the new value is the same as the original value, we can
can use the original value as-is. If the new value is
different, we use it and insert the most-significant 32-bits
of the original value into the final result. */
nval = ((value & (((HOST_WIDE_INT) 2 << 31) - 1))
^ ((HOST_WIDE_INT) 1 << 31)) - ((HOST_WIDE_INT) 1 << 31);
if (value != nval)
{
#if HOST_BITS_PER_WIDE_INT > 32
insv = value >= 0 ? value >> 32 : ~(~value >> 32);
#endif
insert = 1;
value = nval;
operand1 = GEN_INT (nval);
}
}
if (reload_in_progress || reload_completed)
temp = scratch_reg ? scratch_reg : operand0;
else
temp = gen_reg_rtx (mode);
/* We don't directly split DImode constants on 32-bit targets
because PLUS uses an 11-bit immediate and the insn sequence
generated is not as efficient as the one using HIGH/LO_SUM. */
if (GET_CODE (operand1) == CONST_INT
&& GET_MODE_BITSIZE (mode) <= BITS_PER_WORD
&& GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
&& !insert)
{
/* Directly break constant into high and low parts. This
provides better optimization opportunities because various
passes recognize constants split with PLUS but not LO_SUM.
We use a 14-bit signed low part except when the addition
of 0x4000 to the high part might change the sign of the
high part. */
HOST_WIDE_INT low = value & 0x3fff;
HOST_WIDE_INT high = value & ~ 0x3fff;
if (low >= 0x2000)
{
if (high == 0x7fffc000 || (mode == HImode && high == 0x4000))
high += 0x2000;
else
high += 0x4000;
}
low = value - high;
emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (high)));
operands[1] = gen_rtx_PLUS (mode, temp, GEN_INT (low));
}
else
{
emit_insn (gen_rtx_SET (VOIDmode, temp,
gen_rtx_HIGH (mode, operand1)));
operands[1] = gen_rtx_LO_SUM (mode, temp, operand1);
}
insn = emit_move_insn (operands[0], operands[1]);
/* Now insert the most significant 32 bits of the value
into the register. When we don't have a second register
available, it could take up to nine instructions to load
a 64-bit integer constant. Prior to reload, we force
constants that would take more than three instructions
to load to the constant pool. During and after reload,
we have to handle all possible values. */
if (insert)
{
/* Use a HIGH/LO_SUM/INSV sequence if we have a second
register and the value to be inserted is outside the
range that can be loaded with three depdi instructions. */
if (temp != operand0 && (insv >= 16384 || insv < -16384))
{
operand1 = GEN_INT (insv);
emit_insn (gen_rtx_SET (VOIDmode, temp,
gen_rtx_HIGH (mode, operand1)));
emit_move_insn (temp, gen_rtx_LO_SUM (mode, temp, operand1));
if (mode == DImode)
insn = emit_insn (gen_insvdi (operand0, GEN_INT (32),
const0_rtx, temp));
else
insn = emit_insn (gen_insvsi (operand0, GEN_INT (32),
const0_rtx, temp));
}
else
{
int len = 5, pos = 27;
/* Insert the bits using the depdi instruction. */
while (pos >= 0)
{
HOST_WIDE_INT v5 = ((insv & 31) ^ 16) - 16;
HOST_WIDE_INT sign = v5 < 0;
/* Left extend the insertion. */
insv = (insv >= 0 ? insv >> len : ~(~insv >> len));
while (pos > 0 && (insv & 1) == sign)
{
insv = (insv >= 0 ? insv >> 1 : ~(~insv >> 1));
len += 1;
pos -= 1;
}
if (mode == DImode)
insn = emit_insn (gen_insvdi (operand0,
GEN_INT (len),
GEN_INT (pos),
GEN_INT (v5)));
else
insn = emit_insn (gen_insvsi (operand0,
GEN_INT (len),
GEN_INT (pos),
GEN_INT (v5)));
len = pos > 0 && pos < 5 ? pos : 5;
pos -= len;
}
}
}
set_unique_reg_note (insn, REG_EQUAL, op1);
return 1;
}
}
/* Now have insn-emit do whatever it normally does. */
return 0;
}
/* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
it will need a link/runtime reloc). */
int
pa_reloc_needed (tree exp)
{
int reloc = 0;
switch (TREE_CODE (exp))
{
case ADDR_EXPR:
return 1;
case POINTER_PLUS_EXPR:
case PLUS_EXPR:
case MINUS_EXPR:
reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
reloc |= pa_reloc_needed (TREE_OPERAND (exp, 1));
break;
CASE_CONVERT:
case NON_LVALUE_EXPR:
reloc = pa_reloc_needed (TREE_OPERAND (exp, 0));
break;
case CONSTRUCTOR:
{
tree value;
unsigned HOST_WIDE_INT ix;
FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (exp), ix, value)
if (value)
reloc |= pa_reloc_needed (value);
}
break;
case ERROR_MARK:
break;
default:
break;
}
return reloc;
}
/* Return the best assembler insn template
for moving operands[1] into operands[0] as a fullword. */
const char *
pa_singlemove_string (rtx *operands)
{
HOST_WIDE_INT intval;
if (GET_CODE (operands[0]) == MEM)
return "stw %r1,%0";
if (GET_CODE (operands[1]) == MEM)
return "ldw %1,%0";
if (GET_CODE (operands[1]) == CONST_DOUBLE)
{
long i;
REAL_VALUE_TYPE d;
gcc_assert (GET_MODE (operands[1]) == SFmode);
/* Translate the CONST_DOUBLE to a CONST_INT with the same target
bit pattern. */
REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
REAL_VALUE_TO_TARGET_SINGLE (d, i);
operands[1] = GEN_INT (i);
/* Fall through to CONST_INT case. */
}
if (GET_CODE (operands[1]) == CONST_INT)
{
intval = INTVAL (operands[1]);
if (VAL_14_BITS_P (intval))
return "ldi %1,%0";
else if ((intval & 0x7ff) == 0)
return "ldil L'%1,%0";
else if (pa_zdepi_cint_p (intval))
return "{zdepi %Z1,%0|depwi,z %Z1,%0}";
else
return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
}
return "copy %1,%0";
}
/* Compute position (in OP[1]) and width (in OP[2])
useful for copying IMM to a register using the zdepi
instructions. Store the immediate value to insert in OP[0]. */
static void
compute_zdepwi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
{
int lsb, len;
/* Find the least significant set bit in IMM. */
for (lsb = 0; lsb < 32; lsb++)
{
if ((imm & 1) != 0)
break;
imm >>= 1;
}
/* Choose variants based on *sign* of the 5-bit field. */
if ((imm & 0x10) == 0)
len = (lsb <= 28) ? 4 : 32 - lsb;
else
{
/* Find the width of the bitstring in IMM. */
for (len = 5; len < 32 - lsb; len++)
{
if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
break;
}
/* Sign extend IMM as a 5-bit value. */
imm = (imm & 0xf) - 0x10;
}
op[0] = imm;
op[1] = 31 - lsb;
op[2] = len;
}
/* Compute position (in OP[1]) and width (in OP[2])
useful for copying IMM to a register using the depdi,z
instructions. Store the immediate value to insert in OP[0]. */
static void
compute_zdepdi_operands (unsigned HOST_WIDE_INT imm, unsigned *op)
{
int lsb, len, maxlen;
maxlen = MIN (HOST_BITS_PER_WIDE_INT, 64);
/* Find the least significant set bit in IMM. */
for (lsb = 0; lsb < maxlen; lsb++)
{
if ((imm & 1) != 0)
break;
imm >>= 1;
}
/* Choose variants based on *sign* of the 5-bit field. */
if ((imm & 0x10) == 0)
len = (lsb <= maxlen - 4) ? 4 : maxlen - lsb;
else
{
/* Find the width of the bitstring in IMM. */
for (len = 5; len < maxlen - lsb; len++)
{
if ((imm & ((unsigned HOST_WIDE_INT) 1 << len)) == 0)
break;
}
/* Extend length if host is narrow and IMM is negative. */
if (HOST_BITS_PER_WIDE_INT == 32 && len == maxlen - lsb)
len += 32;
/* Sign extend IMM as a 5-bit value. */
imm = (imm & 0xf) - 0x10;
}
op[0] = imm;
op[1] = 63 - lsb;
op[2] = len;
}
/* Output assembler code to perform a doubleword move insn
with operands OPERANDS. */
const char *
pa_output_move_double (rtx *operands)
{
enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
rtx latehalf[2];
rtx addreg0 = 0, addreg1 = 0;
int highonly = 0;
/* First classify both operands. */
if (REG_P (operands[0]))
optype0 = REGOP;
else if (offsettable_memref_p (operands[0]))
optype0 = OFFSOP;
else if (GET_CODE (operands[0]) == MEM)
optype0 = MEMOP;
else
optype0 = RNDOP;
if (REG_P (operands[1]))
optype1 = REGOP;
else if (CONSTANT_P (operands[1]))
optype1 = CNSTOP;
else if (offsettable_memref_p (operands[1]))
optype1 = OFFSOP;
else if (GET_CODE (operands[1]) == MEM)
optype1 = MEMOP;
else
optype1 = RNDOP;
/* Check for the cases that the operand constraints are not
supposed to allow to happen. */
gcc_assert (optype0 == REGOP || optype1 == REGOP);
/* Handle copies between general and floating registers. */
if (optype0 == REGOP && optype1 == REGOP
&& FP_REG_P (operands[0]) ^ FP_REG_P (operands[1]))
{
if (FP_REG_P (operands[0]))
{
output_asm_insn ("{stws|stw} %1,-16(%%sp)", operands);
output_asm_insn ("{stws|stw} %R1,-12(%%sp)", operands);
return "{fldds|fldd} -16(%%sp),%0";
}
else
{
output_asm_insn ("{fstds|fstd} %1,-16(%%sp)", operands);
output_asm_insn ("{ldws|ldw} -16(%%sp),%0", operands);
return "{ldws|ldw} -12(%%sp),%R0";
}
}
/* Handle auto decrementing and incrementing loads and stores
specifically, since the structure of the function doesn't work
for them without major modification. Do it better when we learn
this port about the general inc/dec addressing of PA.
(This was written by tege. Chide him if it doesn't work.) */
if (optype0 == MEMOP)
{
/* We have to output the address syntax ourselves, since print_operand
doesn't deal with the addresses we want to use. Fix this later. */
rtx addr = XEXP (operands[0], 0);
if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
{
rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
operands[0] = XEXP (addr, 0);
gcc_assert (GET_CODE (operands[1]) == REG
&& GET_CODE (operands[0]) == REG);
gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
/* No overlap between high target register and address
register. (We do this in a non-obvious way to
save a register file writeback) */
if (GET_CODE (addr) == POST_INC)
return "{stws|stw},ma %1,8(%0)\n\tstw %R1,-4(%0)";
return "{stws|stw},ma %1,-8(%0)\n\tstw %R1,12(%0)";
}
else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
{
rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0);
operands[0] = XEXP (addr, 0);
gcc_assert (GET_CODE (operands[1]) == REG
&& GET_CODE (operands[0]) == REG);
gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
/* No overlap between high target register and address
register. (We do this in a non-obvious way to save a
register file writeback) */
if (GET_CODE (addr) == PRE_INC)
return "{stws|stw},mb %1,8(%0)\n\tstw %R1,4(%0)";
return "{stws|stw},mb %1,-8(%0)\n\tstw %R1,4(%0)";
}
}
if (optype1 == MEMOP)
{
/* We have to output the address syntax ourselves, since print_operand
doesn't deal with the addresses we want to use. Fix this later. */
rtx addr = XEXP (operands[1], 0);
if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
{
rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
operands[1] = XEXP (addr, 0);
gcc_assert (GET_CODE (operands[0]) == REG
&& GET_CODE (operands[1]) == REG);
if (!reg_overlap_mentioned_p (high_reg, addr))
{
/* No overlap between high target register and address
register. (We do this in a non-obvious way to
save a register file writeback) */
if (GET_CODE (addr) == POST_INC)
return "{ldws|ldw},ma 8(%1),%0\n\tldw -4(%1),%R0";
return "{ldws|ldw},ma -8(%1),%0\n\tldw 12(%1),%R0";
}
else
{
/* This is an undefined situation. We should load into the
address register *and* update that register. Probably
we don't need to handle this at all. */
if (GET_CODE (addr) == POST_INC)
return "ldw 4(%1),%R0\n\t{ldws|ldw},ma 8(%1),%0";
return "ldw 4(%1),%R0\n\t{ldws|ldw},ma -8(%1),%0";
}
}
else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
{
rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0);
operands[1] = XEXP (addr, 0);
gcc_assert (GET_CODE (operands[0]) == REG
&& GET_CODE (operands[1]) == REG);
if (!reg_overlap_mentioned_p (high_reg, addr))
{
/* No overlap between high target register and address
register. (We do this in a non-obvious way to
save a register file writeback) */
if (GET_CODE (addr) == PRE_INC)
return "{ldws|ldw},mb 8(%1),%0\n\tldw 4(%1),%R0";
return "{ldws|ldw},mb -8(%1),%0\n\tldw 4(%1),%R0";
}
else
{
/* This is an undefined situation. We should load into the
address register *and* update that register. Probably
we don't need to handle this at all. */
if (GET_CODE (addr) == PRE_INC)
return "ldw 12(%1),%R0\n\t{ldws|ldw},mb 8(%1),%0";
return "ldw -4(%1),%R0\n\t{ldws|ldw},mb -8(%1),%0";
}
}
else if (GET_CODE (addr) == PLUS
&& GET_CODE (XEXP (addr, 0)) == MULT)
{
rtx xoperands[4];
/* Load address into left half of destination register. */
xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
xoperands[1] = XEXP (addr, 1);
xoperands[2] = XEXP (XEXP (addr, 0), 0);
xoperands[3] = XEXP (XEXP (addr, 0), 1);
output_asm_insn ("{sh%O3addl %2,%1,%0|shladd,l %2,%O3,%1,%0}",
xoperands);
return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
}
else if (GET_CODE (addr) == PLUS
&& REG_P (XEXP (addr, 0))
&& REG_P (XEXP (addr, 1)))
{
rtx xoperands[3];
/* Load address into left half of destination register. */
xoperands[0] = gen_rtx_SUBREG (SImode, operands[0], 0);
xoperands[1] = XEXP (addr, 0);
xoperands[2] = XEXP (addr, 1);
output_asm_insn ("{addl|add,l} %1,%2,%0",
xoperands);
return "ldw 4(%0),%R0\n\tldw 0(%0),%0";
}
}
/* If an operand is an unoffsettable memory ref, find a register
we can increment temporarily to make it refer to the second word. */
if (optype0 == MEMOP)
addreg0 = find_addr_reg (XEXP (operands[0], 0));
if (optype1 == MEMOP)
addreg1 = find_addr_reg (XEXP (operands[1], 0));
/* Ok, we can do one word at a time.
Normally we do the low-numbered word first.
In either case, set up in LATEHALF the operands to use
for the high-numbered word and in some cases alter the
operands in OPERANDS to be suitable for the low-numbered word. */
if (optype0 == REGOP)
latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
else if (optype0 == OFFSOP)
latehalf[0] = adjust_address_nv (operands[0], SImode, 4);
else
latehalf[0] = operands[0];
if (optype1 == REGOP)
latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
else if (optype1 == OFFSOP)
latehalf[1] = adjust_address_nv (operands[1], SImode, 4);
else if (optype1 == CNSTOP)
{
if (GET_CODE (operands[1]) == HIGH)
{
operands[1] = XEXP (operands[1], 0);
highonly = 1;
}
split_double (operands[1], &operands[1], &latehalf[1]);
}
else
latehalf[1] = operands[1];
/* If the first move would clobber the source of the second one,
do them in the other order.
This can happen in two cases:
mem -> register where the first half of the destination register
is the same register used in the memory's address. Reload
can create such insns.
mem in this case will be either register indirect or register
indirect plus a valid offset.
register -> register move where REGNO(dst) == REGNO(src + 1)
someone (Tim/Tege?) claimed this can happen for parameter loads.
Handle mem -> register case first. */
if (optype0 == REGOP
&& (optype1 == MEMOP || optype1 == OFFSOP)
&& refers_to_regno_p (REGNO (operands[0]), operands[1]))
{
/* Do the late half first. */
if (addreg1)
output_asm_insn ("ldo 4(%0),%0", &addreg1);
output_asm_insn (pa_singlemove_string (latehalf), latehalf);
/* Then clobber. */
if (addreg1)
output_asm_insn ("ldo -4(%0),%0", &addreg1);
return pa_singlemove_string (operands);
}
/* Now handle register -> register case. */
if (optype0 == REGOP && optype1 == REGOP
&& REGNO (operands[0]) == REGNO (operands[1]) + 1)
{
output_asm_insn (pa_singlemove_string (latehalf), latehalf);
return pa_singlemove_string (operands);
}
/* Normal case: do the two words, low-numbered first. */
output_asm_insn (pa_singlemove_string (operands), operands);
/* Make any unoffsettable addresses point at high-numbered word. */
if (addreg0)
output_asm_insn ("ldo 4(%0),%0", &addreg0);
if (addreg1)
output_asm_insn ("ldo 4(%0),%0", &addreg1);
/* Do high-numbered word. */
if (highonly)
output_asm_insn ("ldil L'%1,%0", latehalf);
else
output_asm_insn (pa_singlemove_string (latehalf), latehalf);
/* Undo the adds we just did. */
if (addreg0)
output_asm_insn ("ldo -4(%0),%0", &addreg0);
if (addreg1)
output_asm_insn ("ldo -4(%0),%0", &addreg1);
return "";
}
const char *
pa_output_fp_move_double (rtx *operands)
{
if (FP_REG_P (operands[0]))
{
if (FP_REG_P (operands[1])
|| operands[1] == CONST0_RTX (GET_MODE (operands[0])))
output_asm_insn ("fcpy,dbl %f1,%0", operands);
else
output_asm_insn ("fldd%F1 %1,%0", operands);
}
else if (FP_REG_P (operands[1]))
{
output_asm_insn ("fstd%F0 %1,%0", operands);
}
else
{
rtx xoperands[2];
gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
/* This is a pain. You have to be prepared to deal with an
arbitrary address here including pre/post increment/decrement.
so avoid this in the MD. */
gcc_assert (GET_CODE (operands[0]) == REG);
xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
xoperands[0] = operands[0];
output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
}
return "";
}
/* Return a REG that occurs in ADDR with coefficient 1.
ADDR can be effectively incremented by incrementing REG. */
static rtx
find_addr_reg (rtx addr)
{
while (GET_CODE (addr) == PLUS)
{
if (GET_CODE (XEXP (addr, 0)) == REG)
addr = XEXP (addr, 0);
else if (GET_CODE (XEXP (addr, 1)) == REG)
addr = XEXP (addr, 1);
else if (CONSTANT_P (XEXP (addr, 0)))
addr = XEXP (addr, 1);
else if (CONSTANT_P (XEXP (addr, 1)))
addr = XEXP (addr, 0);
else
gcc_unreachable ();
}
gcc_assert (GET_CODE (addr) == REG);
return addr;
}
/* Emit code to perform a block move.
OPERANDS[0] is the destination pointer as a REG, clobbered.
OPERANDS[1] is the source pointer as a REG, clobbered.
OPERANDS[2] is a register for temporary storage.
OPERANDS[3] is a register for temporary storage.
OPERANDS[4] is the size as a CONST_INT
OPERANDS[5] is the alignment safe to use, as a CONST_INT.
OPERANDS[6] is another temporary register. */
const char *
pa_output_block_move (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
{
int align = INTVAL (operands[5]);
unsigned long n_bytes = INTVAL (operands[4]);
/* We can't move more than a word at a time because the PA
has no longer integer move insns. (Could use fp mem ops?) */
if (align > (TARGET_64BIT ? 8 : 4))
align = (TARGET_64BIT ? 8 : 4);
/* Note that we know each loop below will execute at least twice
(else we would have open-coded the copy). */
switch (align)
{
case 8:
/* Pre-adjust the loop counter. */
operands[4] = GEN_INT (n_bytes - 16);
output_asm_insn ("ldi %4,%2", operands);
/* Copying loop. */
output_asm_insn ("ldd,ma 8(%1),%3", operands);
output_asm_insn ("ldd,ma 8(%1),%6", operands);
output_asm_insn ("std,ma %3,8(%0)", operands);
output_asm_insn ("addib,>= -16,%2,.-12", operands);
output_asm_insn ("std,ma %6,8(%0)", operands);
/* Handle the residual. There could be up to 7 bytes of
residual to copy! */
if (n_bytes % 16 != 0)
{
operands[4] = GEN_INT (n_bytes % 8);
if (n_bytes % 16 >= 8)
output_asm_insn ("ldd,ma 8(%1),%3", operands);
if (n_bytes % 8 != 0)
output_asm_insn ("ldd 0(%1),%6", operands);
if (n_bytes % 16 >= 8)
output_asm_insn ("std,ma %3,8(%0)", operands);
if (n_bytes % 8 != 0)
output_asm_insn ("stdby,e %6,%4(%0)", operands);
}
return "";
case 4:
/* Pre-adjust the loop counter. */
operands[4] = GEN_INT (n_bytes - 8);
output_asm_insn ("ldi %4,%2", operands);
/* Copying loop. */
output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
output_asm_insn ("{ldws|ldw},ma 4(%1),%6", operands);
output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
output_asm_insn ("addib,>= -8,%2,.-12", operands);
output_asm_insn ("{stws|stw},ma %6,4(%0)", operands);
/* Handle the residual. There could be up to 7 bytes of
residual to copy! */
if (n_bytes % 8 != 0)
{
operands[4] = GEN_INT (n_bytes % 4);
if (n_bytes % 8 >= 4)
output_asm_insn ("{ldws|ldw},ma 4(%1),%3", operands);
if (n_bytes % 4 != 0)
output_asm_insn ("ldw 0(%1),%6", operands);
if (n_bytes % 8 >= 4)
output_asm_insn ("{stws|stw},ma %3,4(%0)", operands);
if (n_bytes % 4 != 0)
output_asm_insn ("{stbys|stby},e %6,%4(%0)", operands);
}
return "";
case 2:
/* Pre-adjust the loop counter. */
operands[4] = GEN_INT (n_bytes - 4);
output_asm_insn ("ldi %4,%2", operands);
/* Copying loop. */
output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
output_asm_insn ("{ldhs|ldh},ma 2(%1),%6", operands);
output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
output_asm_insn ("addib,>= -4,%2,.-12", operands);
output_asm_insn ("{sths|sth},ma %6,2(%0)", operands);
/* Handle the residual. */
if (n_bytes % 4 != 0)
{
if (n_bytes % 4 >= 2)
output_asm_insn ("{ldhs|ldh},ma 2(%1),%3", operands);
if (n_bytes % 2 != 0)
output_asm_insn ("ldb 0(%1),%6", operands);
if (n_bytes % 4 >= 2)
output_asm_insn ("{sths|sth},ma %3,2(%0)", operands);
if (n_bytes % 2 != 0)
output_asm_insn ("stb %6,0(%0)", operands);
}
return "";
case 1:
/* Pre-adjust the loop counter. */
operands[4] = GEN_INT (n_bytes - 2);
output_asm_insn ("ldi %4,%2", operands);
/* Copying loop. */
output_asm_insn ("{ldbs|ldb},ma 1(%1),%3", operands);
output_asm_insn ("{ldbs|ldb},ma 1(%1),%6", operands);
output_asm_insn ("{stbs|stb},ma %3,1(%0)", operands);
output_asm_insn ("addib,>= -2,%2,.-12", operands);
output_asm_insn ("{stbs|stb},ma %6,1(%0)", operands);
/* Handle the residual. */
if (n_bytes % 2 != 0)
{
output_asm_insn ("ldb 0(%1),%3", operands);
output_asm_insn ("stb %3,0(%0)", operands);
}
return "";
default:
gcc_unreachable ();
}
}
/* Count the number of insns necessary to handle this block move.
Basic structure is the same as emit_block_move, except that we
count insns rather than emit them. */
static int
compute_movmem_length (rtx_insn *insn)
{
rtx pat = PATTERN (insn);
unsigned int align = INTVAL (XEXP (XVECEXP (pat, 0, 7), 0));
unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
unsigned int n_insns = 0;
/* We can't move more than four bytes at a time because the PA
has no longer integer move insns. (Could use fp mem ops?) */
if (align > (TARGET_64BIT ? 8 : 4))
align = (TARGET_64BIT ? 8 : 4);
/* The basic copying loop. */
n_insns = 6;
/* Residuals. */
if (n_bytes % (2 * align) != 0)
{
if ((n_bytes % (2 * align)) >= align)
n_insns += 2;
if ((n_bytes % align) != 0)
n_insns += 2;
}
/* Lengths are expressed in bytes now; each insn is 4 bytes. */
return n_insns * 4;
}
/* Emit code to perform a block clear.
OPERANDS[0] is the destination pointer as a REG, clobbered.
OPERANDS[1] is a register for temporary storage.
OPERANDS[2] is the size as a CONST_INT
OPERANDS[3] is the alignment safe to use, as a CONST_INT. */
const char *
pa_output_block_clear (rtx *operands, int size_is_constant ATTRIBUTE_UNUSED)
{
int align = INTVAL (operands[3]);
unsigned long n_bytes = INTVAL (operands[2]);
/* We can't clear more than a word at a time because the PA
has no longer integer move insns. */
if (align > (TARGET_64BIT ? 8 : 4))
align = (TARGET_64BIT ? 8 : 4);
/* Note that we know each loop below will execute at least twice
(else we would have open-coded the copy). */
switch (align)
{
case 8:
/* Pre-adjust the loop counter. */
operands[2] = GEN_INT (n_bytes - 16);
output_asm_insn ("ldi %2,%1", operands);
/* Loop. */
output_asm_insn ("std,ma %%r0,8(%0)", operands);
output_asm_insn ("addib,>= -16,%1,.-4", operands);
output_asm_insn ("std,ma %%r0,8(%0)", operands);
/* Handle the residual. There could be up to 7 bytes of
residual to copy! */
if (n_bytes % 16 != 0)
{
operands[2] = GEN_INT (n_bytes % 8);
if (n_bytes % 16 >= 8)
output_asm_insn ("std,ma %%r0,8(%0)", operands);
if (n_bytes % 8 != 0)
output_asm_insn ("stdby,e %%r0,%2(%0)", operands);
}
return "";
case 4:
/* Pre-adjust the loop counter. */
operands[2] = GEN_INT (n_bytes - 8);
output_asm_insn ("ldi %2,%1", operands);
/* Loop. */
output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
output_asm_insn ("addib,>= -8,%1,.-4", operands);
output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
/* Handle the residual. There could be up to 7 bytes of
residual to copy! */
if (n_bytes % 8 != 0)
{
operands[2] = GEN_INT (n_bytes % 4);
if (n_bytes % 8 >= 4)
output_asm_insn ("{stws|stw},ma %%r0,4(%0)", operands);
if (n_bytes % 4 != 0)
output_asm_insn ("{stbys|stby},e %%r0,%2(%0)", operands);
}
return "";
case 2:
/* Pre-adjust the loop counter. */
operands[2] = GEN_INT (n_bytes - 4);
output_asm_insn ("ldi %2,%1", operands);
/* Loop. */
output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
output_asm_insn ("addib,>= -4,%1,.-4", operands);
output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
/* Handle the residual. */
if (n_bytes % 4 != 0)
{
if (n_bytes % 4 >= 2)
output_asm_insn ("{sths|sth},ma %%r0,2(%0)", operands);
if (n_bytes % 2 != 0)
output_asm_insn ("stb %%r0,0(%0)", operands);
}
return "";