blob: a9288fc1ff69ad102b957c12286fda30f9b69e1a [file] [log] [blame]
/* Subroutines for insn-output.c for HPPA.
Copyright (C) 1992, , 1994, 95, 96, 1997 Free Software Foundation, Inc.
Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c
This file is part of GNU CC.
GNU CC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
GNU CC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU CC; see the file COPYING. If not, write to
the Free Software Foundation, 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#include <stdio.h>
#include "config.h"
#include "rtl.h"
#include "regs.h"
#include "hard-reg-set.h"
#include "real.h"
#include "insn-config.h"
#include "conditions.h"
#include "insn-flags.h"
#include "output.h"
#include "insn-attr.h"
#include "flags.h"
#include "tree.h"
#include "reload.h"
#include "c-tree.h"
#include "expr.h"
#include "obstack.h"
/* Save the operands last given to a compare for use when we
generate a scc or bcc insn. */
rtx hppa_compare_op0, hppa_compare_op1;
enum cmp_type hppa_branch_type;
/* Which cpu we are scheduling for. */
enum processor_type pa_cpu;
/* String to hold which cpu we are scheduling for. */
char *pa_cpu_string;
/* Set by the FUNCTION_PROFILER macro. */
int hp_profile_labelno;
/* Counts for the number of callee-saved general and floating point
registers which were saved by the current function's prologue. */
static int gr_saved, fr_saved;
/* Whether or not the current function uses an out-of-line prologue
and epilogue. */
static int out_of_line_prologue_epilogue;
static rtx find_addr_reg ();
/* Keep track of the number of bytes we have output in the CODE subspaces
during this compilation so we'll know when to emit inline long-calls. */
unsigned int total_code_bytes;
/* Variables to handle plabels that we discover are necessary at assembly
output time. They are output after the current function. */
struct deferred_plabel
{
rtx internal_label;
char *name;
} *deferred_plabels = 0;
int n_deferred_plabels = 0;
void
override_options ()
{
/* Default to 7100 scheduling. If the 7100LC scheduling ever
gets reasonably tuned, it should be the default since that
what most PAs sold now are. */
if (pa_cpu_string == NULL
|| ! strcmp (pa_cpu_string, "7100"))
{
pa_cpu_string = "7100";
pa_cpu = PROCESSOR_7100;
}
else if (! strcmp (pa_cpu_string, "700"))
{
pa_cpu_string = "700";
pa_cpu = PROCESSOR_700;
}
else if (! strcmp (pa_cpu_string, "7100LC"))
{
pa_cpu_string = "7100LC";
pa_cpu = PROCESSOR_7100LC;
}
else
{
warning ("Unknown -mschedule= option (%s).\nValid options are 700, 7100 and 7100LC\n", pa_cpu_string);
}
if (flag_pic && TARGET_PORTABLE_RUNTIME)
{
warning ("PIC code generation is not supported in the portable runtime model\n");
}
if (flag_pic && (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS))
{
warning ("PIC code generation is not compatable with fast indirect calls\n");
}
if (flag_pic && profile_flag)
{
warning ("PIC code generation is not compatable with profiling\n");
}
if (TARGET_SPACE && (flag_pic || profile_flag))
{
warning ("Out of line entry/exit sequences are not compatable\n");
warning ("with PIC or profiling\n");
}
if (! TARGET_GAS && write_symbols != NO_DEBUG)
{
warning ("-g is only supported when using GAS on this processor,");
warning ("-g option disabled.");
write_symbols = NO_DEBUG;
}
}
/* Return non-zero only if OP is a register of mode MODE,
or CONST0_RTX. */
int
reg_or_0_operand (op, mode)
rtx op;
enum machine_mode mode;
{
return (op == CONST0_RTX (mode) || register_operand (op, mode));
}
/* Return non-zero if OP is suitable for use in a call to a named
function.
(???) For 2.5 try to eliminate either call_operand_address or
function_label_operand, they perform very similar functions. */
int
call_operand_address (op, mode)
rtx op;
enum machine_mode mode;
{
return (CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
}
/* Return 1 if X contains a symbolic expression. We know these
expressions will have one of a few well defined forms, so
we need only check those forms. */
int
symbolic_expression_p (x)
register rtx x;
{
/* Strip off any HIGH. */
if (GET_CODE (x) == HIGH)
x = XEXP (x, 0);
return (symbolic_operand (x, VOIDmode));
}
int
symbolic_operand (op, mode)
register rtx op;
enum machine_mode mode;
{
switch (GET_CODE (op))
{
case SYMBOL_REF:
case LABEL_REF:
return 1;
case CONST:
op = XEXP (op, 0);
return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF
|| GET_CODE (XEXP (op, 0)) == LABEL_REF)
&& GET_CODE (XEXP (op, 1)) == CONST_INT);
default:
return 0;
}
}
/* Return truth value of statement that OP is a symbolic memory
operand of mode MODE. */
int
symbolic_memory_operand (op, mode)
rtx op;
enum machine_mode mode;
{
if (GET_CODE (op) == SUBREG)
op = SUBREG_REG (op);
if (GET_CODE (op) != MEM)
return 0;
op = XEXP (op, 0);
return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST
|| GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF);
}
/* Return 1 if the operand is either a register or a memory operand that is
not symbolic. */
int
reg_or_nonsymb_mem_operand (op, mode)
register rtx op;
enum machine_mode mode;
{
if (register_operand (op, mode))
return 1;
if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
return 1;
return 0;
}
/* Return 1 if the operand is either a register, zero, or a memory operand
that is not symbolic. */
int
reg_or_0_or_nonsymb_mem_operand (op, mode)
register rtx op;
enum machine_mode mode;
{
if (register_operand (op, mode))
return 1;
if (op == CONST0_RTX (mode))
return 1;
if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode))
return 1;
return 0;
}
/* Accept any constant that can be moved in one instructions into a
general register. */
int
cint_ok_for_move (intval)
HOST_WIDE_INT intval;
{
/* OK if ldo, ldil, or zdepi, can be used. */
return (VAL_14_BITS_P (intval) || (intval & 0x7ff) == 0
|| zdepi_cint_p (intval));
}
/* Accept anything that can be moved in one instruction into a general
register. */
int
move_operand (op, mode)
rtx op;
enum machine_mode mode;
{
if (register_operand (op, mode))
return 1;
if (GET_CODE (op) == CONST_INT)
return cint_ok_for_move (INTVAL (op));
if (GET_CODE (op) == SUBREG)
op = SUBREG_REG (op);
if (GET_CODE (op) != MEM)
return 0;
op = XEXP (op, 0);
if (GET_CODE (op) == LO_SUM)
return (register_operand (XEXP (op, 0), Pmode)
&& CONSTANT_P (XEXP (op, 1)));
/* Since move_operand is only used for source operands, we can always
allow scaled indexing! */
if (GET_CODE (op) == PLUS
&& ((GET_CODE (XEXP (op, 0)) == MULT
&& GET_CODE (XEXP (XEXP (op, 0), 0)) == REG
&& GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT
&& INTVAL (XEXP (XEXP (op, 0), 1)) == GET_MODE_SIZE (mode)
&& GET_CODE (XEXP (op, 1)) == REG)
|| (GET_CODE (XEXP (op, 1)) == MULT
&&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG
&& GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT
&& INTVAL (XEXP (XEXP (op, 1), 1)) == GET_MODE_SIZE (mode)
&& GET_CODE (XEXP (op, 0)) == REG)))
return 1;
return memory_address_p (mode, op);
}
/* Accept REG and any CONST_INT that can be moved in one instruction into a
general register. */
int
reg_or_cint_move_operand (op, mode)
rtx op;
enum machine_mode mode;
{
if (register_operand (op, mode))
return 1;
if (GET_CODE (op) == CONST_INT)
return cint_ok_for_move (INTVAL (op));
return 0;
}
int
pic_label_operand (op, mode)
rtx op;
enum machine_mode mode;
{
if (!flag_pic)
return 0;
switch (GET_CODE (op))
{
case LABEL_REF:
return 1;
case CONST:
op = XEXP (op, 0);
return (GET_CODE (XEXP (op, 0)) == LABEL_REF
&& GET_CODE (XEXP (op, 1)) == CONST_INT);
default:
return 0;
}
}
int
fp_reg_operand (op, mode)
rtx op;
enum machine_mode mode;
{
return reg_renumber && FP_REG_P (op);
}
/* Return truth value of whether OP can be used as an operand in a
three operand arithmetic insn that accepts registers of mode MODE
or 14-bit signed integers. */
int
arith_operand (op, mode)
rtx op;
enum machine_mode mode;
{
return (register_operand (op, mode)
|| (GET_CODE (op) == CONST_INT && INT_14_BITS (op)));
}
/* Return truth value of whether OP can be used as an operand in a
three operand arithmetic insn that accepts registers of mode MODE
or 11-bit signed integers. */
int
arith11_operand (op, mode)
rtx op;
enum machine_mode mode;
{
return (register_operand (op, mode)
|| (GET_CODE (op) == CONST_INT && INT_11_BITS (op)));
}
/* A constant integer suitable for use in a PRE_MODIFY memory
reference. */
int
pre_cint_operand (op, mode)
rtx op;
enum machine_mode mode;
{
return (GET_CODE (op) == CONST_INT
&& INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10);
}
/* A constant integer suitable for use in a POST_MODIFY memory
reference. */
int
post_cint_operand (op, mode)
rtx op;
enum machine_mode mode;
{
return (GET_CODE (op) == CONST_INT
&& INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10);
}
int
arith_double_operand (op, mode)
rtx op;
enum machine_mode mode;
{
return (register_operand (op, mode)
|| (GET_CODE (op) == CONST_DOUBLE
&& GET_MODE (op) == mode
&& VAL_14_BITS_P (CONST_DOUBLE_LOW (op))
&& (CONST_DOUBLE_HIGH (op) >= 0
== ((CONST_DOUBLE_LOW (op) & 0x1000) == 0))));
}
/* Return truth value of whether OP is a integer which fits the
range constraining immediate operands in three-address insns, or
is an integer register. */
int
ireg_or_int5_operand (op, mode)
rtx op;
enum machine_mode mode;
{
return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op))
|| (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32));
}
/* Return truth value of whether OP is a integer which fits the
range constraining immediate operands in three-address insns. */
int
int5_operand (op, mode)
rtx op;
enum machine_mode mode;
{
return (GET_CODE (op) == CONST_INT && INT_5_BITS (op));
}
int
uint5_operand (op, mode)
rtx op;
enum machine_mode mode;
{
return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op));
}
int
int11_operand (op, mode)
rtx op;
enum machine_mode mode;
{
return (GET_CODE (op) == CONST_INT && INT_11_BITS (op));
}
int
uint32_operand (op, mode)
rtx op;
enum machine_mode mode;
{
#if HOST_BITS_PER_WIDE_INT > 32
/* All allowed constants will fit a CONST_INT. */
return (GET_CODE (op) == CONST_INT
&& (INTVAL (op) >= 0 && INTVAL (op) < 0x100000000L));
#else
return (GET_CODE (op) == CONST_INT
|| (GET_CODE (op) == CONST_DOUBLE
&& CONST_DOUBLE_HIGH (op) == 0));
#endif
}
int
arith5_operand (op, mode)
rtx op;
enum machine_mode mode;
{
return register_operand (op, mode) || int5_operand (op, mode);
}
/* True iff zdepi can be used to generate this CONST_INT. */
int
zdepi_cint_p (x)
unsigned HOST_WIDE_INT x;
{
unsigned HOST_WIDE_INT lsb_mask, t;
/* This might not be obvious, but it's at least fast.
This function is critical; we don't have the time loops would take. */
lsb_mask = x & -x;
t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1);
/* Return true iff t is a power of two. */
return ((t & (t - 1)) == 0);
}
/* True iff depi or extru can be used to compute (reg & mask).
Accept bit pattern like these:
0....01....1
1....10....0
1..10..01..1 */
int
and_mask_p (mask)
unsigned HOST_WIDE_INT mask;
{
mask = ~mask;
mask += mask & -mask;
return (mask & (mask - 1)) == 0;
}
/* True iff depi or extru can be used to compute (reg & OP). */
int
and_operand (op, mode)
rtx op;
enum machine_mode mode;
{
return (register_operand (op, mode)
|| (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op))));
}
/* True iff depi can be used to compute (reg | MASK). */
int
ior_mask_p (mask)
unsigned HOST_WIDE_INT mask;
{
mask += mask & -mask;
return (mask & (mask - 1)) == 0;
}
/* True iff depi can be used to compute (reg | OP). */
int
ior_operand (op, mode)
rtx op;
enum machine_mode mode;
{
return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op)));
}
int
lhs_lshift_operand (op, mode)
rtx op;
enum machine_mode mode;
{
return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode);
}
/* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx.
Such values can be the left hand side x in (x << r), using the zvdepi
instruction. */
int
lhs_lshift_cint_operand (op, mode)
rtx op;
enum machine_mode mode;
{
unsigned HOST_WIDE_INT x;
if (GET_CODE (op) != CONST_INT)
return 0;
x = INTVAL (op) >> 4;
return (x & (x + 1)) == 0;
}
int
arith32_operand (op, mode)
rtx op;
enum machine_mode mode;
{
return register_operand (op, mode) || GET_CODE (op) == CONST_INT;
}
int
pc_or_label_operand (op, mode)
rtx op;
enum machine_mode mode;
{
return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF);
}
/* Legitimize PIC addresses. If the address is already
position-independent, we return ORIG. Newly generated
position-independent addresses go to REG. If we need more
than one register, we lose. */
rtx
legitimize_pic_address (orig, mode, reg)
rtx orig, reg;
enum machine_mode mode;
{
rtx pic_ref = orig;
/* Labels need special handling. */
if (pic_label_operand (orig))
{
emit_insn (gen_pic_load_label (reg, orig));
current_function_uses_pic_offset_table = 1;
return reg;
}
if (GET_CODE (orig) == SYMBOL_REF)
{
if (reg == 0)
abort ();
if (flag_pic == 2)
{
emit_insn (gen_pic2_highpart (reg, pic_offset_table_rtx, orig));
pic_ref = gen_rtx (MEM, Pmode,
gen_rtx (LO_SUM, Pmode, reg,
gen_rtx (UNSPEC, SImode, gen_rtvec (1, orig), 0)));
}
else
pic_ref = gen_rtx (MEM, Pmode,
gen_rtx (PLUS, Pmode, pic_offset_table_rtx, orig));
current_function_uses_pic_offset_table = 1;
RTX_UNCHANGING_P (pic_ref) = 1;
emit_move_insn (reg, pic_ref);
return reg;
}
else if (GET_CODE (orig) == CONST)
{
rtx base;
if (GET_CODE (XEXP (orig, 0)) == PLUS
&& XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
return orig;
if (reg == 0)
abort ();
if (GET_CODE (XEXP (orig, 0)) == PLUS)
{
base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
base == reg ? 0 : reg);
}
else abort ();
if (GET_CODE (orig) == CONST_INT)
{
if (INT_14_BITS (orig))
return plus_constant_for_output (base, INTVAL (orig));
orig = force_reg (Pmode, orig);
}
pic_ref = gen_rtx (PLUS, Pmode, base, orig);
/* Likewise, should we set special REG_NOTEs here? */
}
return pic_ref;
}
/* Try machine-dependent ways of modifying an illegitimate address
to be legitimate. If we find one, return the new, valid address.
This macro is used in only one place: `memory_address' in explow.c.
OLDX is the address as it was before break_out_memory_refs was called.
In some cases it is useful to look at this to decide what needs to be done.
MODE and WIN are passed so that this macro can use
GO_IF_LEGITIMATE_ADDRESS.
It is always safe for this macro to do nothing. It exists to recognize
opportunities to optimize the output.
For the PA, transform:
memory(X + <large int>)
into:
if (<large int> & mask) >= 16
Y = (<large int> & ~mask) + mask + 1 Round up.
else
Y = (<large int> & ~mask) Round down.
Z = X + Y
memory (Z + (<large int> - Y));
This is for CSE to find several similar references, and only use one Z.
X can either be a SYMBOL_REF or REG, but because combine can not
perform a 4->2 combination we do nothing for SYMBOL_REF + D where
D will not fit in 14 bits.
MODE_FLOAT references allow displacements which fit in 5 bits, so use
0x1f as the mask.
MODE_INT references allow displacements which fit in 14 bits, so use
0x3fff as the mask.
This relies on the fact that most mode MODE_FLOAT references will use FP
registers and most mode MODE_INT references will use integer registers.
(In the rare case of an FP register used in an integer MODE, we depend
on secondary reloads to clean things up.)
It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special
manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed
addressing modes to be used).
Put X and Z into registers. Then put the entire expression into
a register. */
rtx
hppa_legitimize_address (x, oldx, mode)
rtx x, oldx;
enum machine_mode mode;
{
rtx orig = x;
if (flag_pic)
return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode));
/* Strip off CONST. */
if (GET_CODE (x) == CONST)
x = XEXP (x, 0);
/* Special case. Get the SYMBOL_REF into a register and use indexing.
That should always be safe. */
if (GET_CODE (x) == PLUS
&& GET_CODE (XEXP (x, 0)) == REG
&& GET_CODE (XEXP (x, 1)) == SYMBOL_REF)
{
rtx reg = force_reg (SImode, XEXP (x, 1));
return force_reg (SImode, gen_rtx (PLUS, SImode, reg, XEXP (x, 0)));
}
/* Note we must reject symbols which represent function addresses
since the assembler/linker can't handle arithmetic on plabels. */
if (GET_CODE (x) == PLUS
&& GET_CODE (XEXP (x, 1)) == CONST_INT
&& ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
&& !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0)))
|| GET_CODE (XEXP (x, 0)) == REG))
{
rtx int_part, ptr_reg;
int newoffset;
int offset = INTVAL (XEXP (x, 1));
int mask = GET_MODE_CLASS (mode) == MODE_FLOAT ? 0x1f : 0x3fff;
/* Choose which way to round the offset. Round up if we
are >= halfway to the next boundary. */
if ((offset & mask) >= ((mask + 1) / 2))
newoffset = (offset & ~ mask) + mask + 1;
else
newoffset = (offset & ~ mask);
/* If the newoffset will not fit in 14 bits (ldo), then
handling this would take 4 or 5 instructions (2 to load
the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to
add the new offset and the SYMBOL_REF.) Combine can
not handle 4->2 or 5->2 combinations, so do not create
them. */
if (! VAL_14_BITS_P (newoffset)
&& GET_CODE (XEXP (x, 0)) == SYMBOL_REF)
{
rtx const_part = gen_rtx (CONST, VOIDmode,
gen_rtx (PLUS, Pmode,
XEXP (x, 0),
GEN_INT (newoffset)));
rtx tmp_reg
= force_reg (Pmode,
gen_rtx (HIGH, Pmode, const_part));
ptr_reg
= force_reg (Pmode,
gen_rtx (LO_SUM, Pmode,
tmp_reg, const_part));
}
else
{
if (! VAL_14_BITS_P (newoffset))
int_part = force_reg (Pmode, GEN_INT (newoffset));
else
int_part = GEN_INT (newoffset);
ptr_reg = force_reg (Pmode,
gen_rtx (PLUS, Pmode,
force_reg (Pmode, XEXP (x, 0)),
int_part));
}
return plus_constant (ptr_reg, offset - newoffset);
}
/* Handle (plus (mult (a) (shadd_constant)) (b)). */
if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT
&& GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
&& shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))
&& (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o'
|| GET_CODE (XEXP (x, 1)) == SUBREG)
&& GET_CODE (XEXP (x, 1)) != CONST)
{
int val = INTVAL (XEXP (XEXP (x, 0), 1));
rtx reg1, reg2;
reg1 = XEXP (x, 1);
if (GET_CODE (reg1) != REG)
reg1 = force_reg (Pmode, force_operand (reg1, 0));
reg2 = XEXP (XEXP (x, 0), 0);
if (GET_CODE (reg2) != REG)
reg2 = force_reg (Pmode, force_operand (reg2, 0));
return force_reg (Pmode, gen_rtx (PLUS, Pmode,
gen_rtx (MULT, Pmode,
reg2, GEN_INT (val)),
reg1));
}
/* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)).
Only do so for floating point modes since this is more speculative
and we lose if it's an integer store. */
if (GET_CODE (x) == PLUS
&& GET_CODE (XEXP (x, 0)) == PLUS
&& GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
&& GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
&& shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)))
&& (mode == SFmode || mode == DFmode))
{
/* First, try and figure out what to use as a base register. */
rtx reg1, reg2, base, idx, orig_base;
reg1 = XEXP (XEXP (x, 0), 1);
reg2 = XEXP (x, 1);
base = NULL_RTX;
idx = NULL_RTX;
/* Make sure they're both regs. If one was a SYMBOL_REF [+ const],
then emit_move_sequence will turn on REGNO_POINTER_FLAG so we'll
know it's a base register below. */
if (GET_CODE (reg1) != REG)
reg1 = force_reg (Pmode, force_operand (reg1, 0));
if (GET_CODE (reg2) != REG)
reg2 = force_reg (Pmode, force_operand (reg2, 0));
/* Figure out what the base and index are. */
if (GET_CODE (reg1) == REG
&& REGNO_POINTER_FLAG (REGNO (reg1)))
{
base = reg1;
orig_base = XEXP (XEXP (x, 0), 1);
idx = gen_rtx (PLUS, Pmode,
gen_rtx (MULT, Pmode,
XEXP (XEXP (XEXP (x, 0), 0), 0),
XEXP (XEXP (XEXP (x, 0), 0), 1)),
XEXP (x, 1));
}
else if (GET_CODE (reg2) == REG
&& REGNO_POINTER_FLAG (REGNO (reg2)))
{
base = reg2;
orig_base = XEXP (x, 1);
idx = XEXP (x, 0);
}
if (base == 0)
return orig;
/* If the index adds a large constant, try to scale the
constant so that it can be loaded with only one insn. */
if (GET_CODE (XEXP (idx, 1)) == CONST_INT
&& VAL_14_BITS_P (INTVAL (XEXP (idx, 1))
/ INTVAL (XEXP (XEXP (idx, 0), 1)))
&& INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0)
{
/* Divide the CONST_INT by the scale factor, then add it to A. */
int val = INTVAL (XEXP (idx, 1));
val /= INTVAL (XEXP (XEXP (idx, 0), 1));
reg1 = XEXP (XEXP (idx, 0), 0);
if (GET_CODE (reg1) != REG)
reg1 = force_reg (Pmode, force_operand (reg1, 0));
reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode, reg1, GEN_INT (val)));
/* We can now generate a simple scaled indexed address. */
return force_reg (Pmode, gen_rtx (PLUS, Pmode,
gen_rtx (MULT, Pmode, reg1,
XEXP (XEXP (idx, 0), 1)),
base));
}
/* If B + C is still a valid base register, then add them. */
if (GET_CODE (XEXP (idx, 1)) == CONST_INT
&& INTVAL (XEXP (idx, 1)) <= 4096
&& INTVAL (XEXP (idx, 1)) >= -4096)
{
int val = INTVAL (XEXP (XEXP (idx, 0), 1));
rtx reg1, reg2;
reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode, base, XEXP (idx, 1)));
reg2 = XEXP (XEXP (idx, 0), 0);
if (GET_CODE (reg2) != CONST_INT)
reg2 = force_reg (Pmode, force_operand (reg2, 0));
return force_reg (Pmode, gen_rtx (PLUS, Pmode,
gen_rtx (MULT, Pmode,
reg2, GEN_INT (val)),
reg1));
}
/* Get the index into a register, then add the base + index and
return a register holding the result. */
/* First get A into a register. */
reg1 = XEXP (XEXP (idx, 0), 0);
if (GET_CODE (reg1) != REG)
reg1 = force_reg (Pmode, force_operand (reg1, 0));
/* And get B into a register. */
reg2 = XEXP (idx, 1);
if (GET_CODE (reg2) != REG)
reg2 = force_reg (Pmode, force_operand (reg2, 0));
reg1 = force_reg (Pmode, gen_rtx (PLUS, Pmode,
gen_rtx (MULT, Pmode, reg1,
XEXP (XEXP (idx, 0), 1)),
reg2));
/* Add the result to our base register and return. */
return force_reg (Pmode, gen_rtx (PLUS, Pmode, base, reg1));
}
/* Uh-oh. We might have an address for x[n-100000]. This needs
special handling to avoid creating an indexed memory address
with x-100000 as the base.
If the constant part is small enough, then it's still safe because
there is a guard page at the beginning and end of the data segment.
Scaled references are common enough that we want to try and rearrange the
terms so that we can use indexing for these addresses too. Only
do the optimization for floatint point modes. */
if (GET_CODE (x) == PLUS
&& symbolic_expression_p (XEXP (x, 1)))
{
/* Ugly. We modify things here so that the address offset specified
by the index expression is computed first, then added to x to form
the entire address. */
rtx regx1, regx2, regy1, regy2, y;
/* Strip off any CONST. */
y = XEXP (x, 1);
if (GET_CODE (y) == CONST)
y = XEXP (y, 0);
if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS)
{
/* See if this looks like
(plus (mult (reg) (shadd_const))
(const (plus (symbol_ref) (const_int))))
Where const_int is small. In that case the const
expression is a valid pointer for indexing.
If const_int is big, but can be divided evenly by shadd_const
and added to (reg). This allows more scaled indexed addresses. */
if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF
&& GET_CODE (XEXP (x, 0)) == MULT
&& GET_CODE (XEXP (y, 1)) == CONST_INT
&& INTVAL (XEXP (y, 1)) >= -4096
&& INTVAL (XEXP (y, 1)) <= 4095
&& GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
&& shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
{
int val = INTVAL (XEXP (XEXP (x, 0), 1));
rtx reg1, reg2;
reg1 = XEXP (x, 1);
if (GET_CODE (reg1) != REG)
reg1 = force_reg (Pmode, force_operand (reg1, 0));
reg2 = XEXP (XEXP (x, 0), 0);
if (GET_CODE (reg2) != REG)
reg2 = force_reg (Pmode, force_operand (reg2, 0));
return force_reg (Pmode, gen_rtx (PLUS, Pmode,
gen_rtx (MULT, Pmode,
reg2, GEN_INT (val)),
reg1));
}
else if ((mode == DFmode || mode == SFmode)
&& GET_CODE (XEXP (y, 0)) == SYMBOL_REF
&& GET_CODE (XEXP (x, 0)) == MULT
&& GET_CODE (XEXP (y, 1)) == CONST_INT
&& INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0
&& GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
&& shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))))
{
regx1
= force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1))
/ INTVAL (XEXP (XEXP (x, 0), 1))));
regx2 = XEXP (XEXP (x, 0), 0);
if (GET_CODE (regx2) != REG)
regx2 = force_reg (Pmode, force_operand (regx2, 0));
regx2 = force_reg (Pmode, gen_rtx (GET_CODE (y), Pmode,
regx2, regx1));
return force_reg (Pmode,
gen_rtx (PLUS, Pmode,
gen_rtx (MULT, Pmode, regx2,
XEXP (XEXP (x, 0), 1)),
force_reg (Pmode, XEXP (y, 0))));
}
else if (GET_CODE (XEXP (y, 1)) == CONST_INT
&& INTVAL (XEXP (y, 1)) >= -4096
&& INTVAL (XEXP (y, 1)) <= 4095)
{
/* This is safe because of the guard page at the
beginning and end of the data space. Just
return the original address. */
return orig;
}
else
{
/* Doesn't look like one we can optimize. */
regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0));
regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0));
regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0));
regx1 = force_reg (Pmode,
gen_rtx (GET_CODE (y), Pmode, regx1, regy2));
return force_reg (Pmode, gen_rtx (PLUS, Pmode, regx1, regy1));
}
}
}
return orig;
}
/* For the HPPA, REG and REG+CONST is cost 0
and addresses involving symbolic constants are cost 2.
PIC addresses are very expensive.
It is no coincidence that this has the same structure
as GO_IF_LEGITIMATE_ADDRESS. */
int
hppa_address_cost (X)
rtx X;
{
if (GET_CODE (X) == PLUS)
return 1;
else if (GET_CODE (X) == LO_SUM)
return 1;
else if (GET_CODE (X) == HIGH)
return 2;
return 4;
}
/* Emit insns to move operands[1] into operands[0].
Return 1 if we have written out everything that needs to be done to
do the move. Otherwise, return 0 and the caller will emit the move
normally. */
int
emit_move_sequence (operands, mode, scratch_reg)
rtx *operands;
enum machine_mode mode;
rtx scratch_reg;
{
register rtx operand0 = operands[0];
register rtx operand1 = operands[1];
if (reload_in_progress && GET_CODE (operand0) == REG
&& REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
operand0 = reg_equiv_mem[REGNO (operand0)];
else if (reload_in_progress && GET_CODE (operand0) == SUBREG
&& GET_CODE (SUBREG_REG (operand0)) == REG
&& REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
{
SUBREG_REG (operand0) = reg_equiv_mem[REGNO (SUBREG_REG (operand0))];
operand0 = alter_subreg (operand0);
}
if (reload_in_progress && GET_CODE (operand1) == REG
&& REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
operand1 = reg_equiv_mem[REGNO (operand1)];
else if (reload_in_progress && GET_CODE (operand1) == SUBREG
&& GET_CODE (SUBREG_REG (operand1)) == REG
&& REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
{
SUBREG_REG (operand1) = reg_equiv_mem[REGNO (SUBREG_REG (operand1))];
operand1 = alter_subreg (operand1);
}
/* Handle secondary reloads for loads/stores of FP registers from
REG+D addresses where D does not fit in 5 bits, including
(subreg (mem (addr))) cases. */
if (fp_reg_operand (operand0, mode)
&& ((GET_CODE (operand1) == MEM
&& ! memory_address_p (DFmode, XEXP (operand1, 0)))
|| ((GET_CODE (operand1) == SUBREG
&& GET_CODE (XEXP (operand1, 0)) == MEM
&& !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0)))))
&& scratch_reg)
{
if (GET_CODE (operand1) == SUBREG)
operand1 = XEXP (operand1, 0);
scratch_reg = gen_rtx (REG, SImode, REGNO (scratch_reg));
/* D might not fit in 14 bits either; for such cases load D into
scratch reg. */
if (!memory_address_p (SImode, XEXP (operand1, 0)))
{
emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand1, 0)),
SImode,
XEXP (XEXP (operand1, 0), 0),
scratch_reg));
}
else
emit_move_insn (scratch_reg, XEXP (operand1, 0));
emit_insn (gen_rtx (SET, VOIDmode, operand0, gen_rtx (MEM, mode,
scratch_reg)));
return 1;
}
else if (fp_reg_operand (operand1, mode)
&& ((GET_CODE (operand0) == MEM
&& ! memory_address_p (DFmode, XEXP (operand0, 0)))
|| ((GET_CODE (operand0) == SUBREG)
&& GET_CODE (XEXP (operand0, 0)) == MEM
&& !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0))))
&& scratch_reg)
{
if (GET_CODE (operand0) == SUBREG)
operand0 = XEXP (operand0, 0);
scratch_reg = gen_rtx (REG, SImode, REGNO (scratch_reg));
/* D might not fit in 14 bits either; for such cases load D into
scratch reg. */
if (!memory_address_p (SImode, XEXP (operand0, 0)))
{
emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1));
emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand0, 0)),
SImode,
XEXP (XEXP (operand0, 0), 0),
scratch_reg));
}
else
emit_move_insn (scratch_reg, XEXP (operand0, 0));
emit_insn (gen_rtx (SET, VOIDmode, gen_rtx (MEM, mode, scratch_reg),
operand1));
return 1;
}
/* Handle secondary reloads for loads of FP registers from constant
expressions by forcing the constant into memory.
use scratch_reg to hold the address of the memory location.
??? The proper fix is to change PREFERRED_RELOAD_CLASS to return
NO_REGS when presented with a const_int and an register class
containing only FP registers. Doing so unfortunately creates
more problems than it solves. Fix this for 2.5. */
else if (fp_reg_operand (operand0, mode)
&& CONSTANT_P (operand1)
&& scratch_reg)
{
rtx xoperands[2];
/* Force the constant into memory and put the address of the
memory location into scratch_reg. */
xoperands[0] = scratch_reg;
xoperands[1] = XEXP (force_const_mem (mode, operand1), 0);
emit_move_sequence (xoperands, Pmode, 0);
/* Now load the destination register. */
emit_insn (gen_rtx (SET, mode, operand0,
gen_rtx (MEM, mode, scratch_reg)));
return 1;
}
/* Handle secondary reloads for SAR. These occur when trying to load
the SAR from memory a FP register, or with a constant. */
else if (GET_CODE (operand0) == REG
&& REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS
&& (GET_CODE (operand1) == MEM
|| GET_CODE (operand1) == CONST_INT
|| (GET_CODE (operand1) == REG
&& FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1)))))
&& scratch_reg)
{
/* D might not fit in 14 bits either; for such cases load D into
scratch reg. */
if (GET_CODE (operand1) == MEM
&& !memory_address_p (SImode, XEXP (operand1, 0)))
{
emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1));
emit_move_insn (scratch_reg, gen_rtx (GET_CODE (XEXP (operand1, 0)),
SImode,
XEXP (XEXP (operand1, 0), 0),
scratch_reg));
emit_move_insn (scratch_reg, gen_rtx (MEM, GET_MODE (operand1),
scratch_reg));
}
else
emit_move_insn (scratch_reg, operand1);
emit_move_insn (operand0, scratch_reg);
return 1;
}
/* Handle most common case: storing into a register. */
else if (register_operand (operand0, mode))
{
if (register_operand (operand1, mode)
|| (GET_CODE (operand1) == CONST_INT && INT_14_BITS (operand1))
|| (operand1 == CONST0_RTX (mode))
|| (GET_CODE (operand1) == HIGH
&& !symbolic_operand (XEXP (operand1, 0), VOIDmode))
/* Only `general_operands' can come here, so MEM is ok. */
|| GET_CODE (operand1) == MEM)
{
/* Run this case quickly. */
emit_insn (gen_rtx (SET, VOIDmode, operand0, operand1));
return 1;
}
}
else if (GET_CODE (operand0) == MEM)
{
if (mode == DFmode && operand1 == CONST0_RTX (mode)
&& !(reload_in_progress || reload_completed))
{
rtx temp = gen_reg_rtx (DFmode);
emit_insn (gen_rtx (SET, VOIDmode, temp, operand1));
emit_insn (gen_rtx (SET, VOIDmode, operand0, temp));
return 1;
}
if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode))
{
/* Run this case quickly. */
emit_insn (gen_rtx (SET, VOIDmode, operand0, operand1));
return 1;
}
if (! (reload_in_progress || reload_completed))
{
operands[0] = validize_mem (operand0);
operands[1] = operand1 = force_reg (mode, operand1);
}
}
/* Simplify the source if we need to. */
if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode))
|| (GET_CODE (operand1) == HIGH
&& symbolic_operand (XEXP (operand1, 0), mode)))
{
int ishighonly = 0;
if (GET_CODE (operand1) == HIGH)
{
ishighonly = 1;
operand1 = XEXP (operand1, 0);
}
if (symbolic_operand (operand1, mode))
{
/* Argh. The assembler and linker can't handle arithmetic
involving plabels.
So we force the plabel into memory, load operand0 from
the memory location, then add in the constant part. */
if (GET_CODE (operand1) == CONST
&& GET_CODE (XEXP (operand1, 0)) == PLUS
&& function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode))
{
rtx temp, const_part;
/* Figure out what (if any) scratch register to use. */
if (reload_in_progress || reload_completed)
scratch_reg = scratch_reg ? scratch_reg : operand0;
else if (flag_pic)
scratch_reg = gen_reg_rtx (Pmode);
/* Save away the constant part of the expression. */
const_part = XEXP (XEXP (operand1, 0), 1);
if (GET_CODE (const_part) != CONST_INT)
abort ();
/* Force the function label into memory. */
temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0));
/* Get the address of the memory location. PIC-ify it if
necessary. */
temp = XEXP (temp, 0);
if (flag_pic)
temp = legitimize_pic_address (temp, mode, scratch_reg);
/* Put the address of the memory location into our destination
register. */
operands[1] = temp;
emit_move_sequence (operands, mode, scratch_reg);
/* Now load from the memory location into our destination
register. */
operands[1] = gen_rtx (MEM, Pmode, operands[0]);
emit_move_sequence (operands, mode, scratch_reg);
/* And add back in the constant part. */
expand_inc (operand0, const_part);
return 1;
}
if (flag_pic)
{
rtx temp;
if (reload_in_progress || reload_completed)
temp = scratch_reg ? scratch_reg : operand0;
else
temp = gen_reg_rtx (Pmode);
/* (const (plus (symbol) (const_int))) must be forced to
memory during/after reload if the const_int will not fit
in 14 bits. */
if (GET_CODE (operand1) == CONST
&& GET_CODE (XEXP (operand1, 0)) == PLUS
&& GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT
&& !INT_14_BITS (XEXP (XEXP (operand1, 0), 1))
&& (reload_completed || reload_in_progress)
&& flag_pic)
{
operands[1] = force_const_mem (mode, operand1);
operands[1] = legitimize_pic_address (XEXP (operands[1], 0),
mode, temp);
emit_move_sequence (operands, mode, temp);
}
else
{
operands[1] = legitimize_pic_address (operand1, mode, temp);
emit_insn (gen_rtx (SET, VOIDmode, operand0, operands[1]));
}
}
/* On the HPPA, references to data space are supposed to use dp,
register 27, but showing it in the RTL inhibits various cse
and loop optimizations. */
else
{
rtx temp, set;
if (reload_in_progress || reload_completed)
temp = scratch_reg ? scratch_reg : operand0;
else
temp = gen_reg_rtx (mode);
/* Loading a SYMBOL_REF into a register makes that register
safe to be used as the base in an indexed address.
Don't mark hard registers though. That loses. */
if (GET_CODE (operand0) == REG
&& REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
REGNO_POINTER_FLAG (REGNO (operand0)) = 1;
if (REGNO (temp) >= FIRST_PSEUDO_REGISTER)
REGNO_POINTER_FLAG (REGNO (temp)) = 1;
if (ishighonly)
set = gen_rtx (SET, mode, operand0, temp);
else
set = gen_rtx (SET, VOIDmode,
operand0,
gen_rtx (LO_SUM, mode, temp, operand1));
emit_insn (gen_rtx (SET, VOIDmode,
temp,
gen_rtx (HIGH, mode, operand1)));
emit_insn (set);
}
return 1;
}
else if (GET_CODE (operand1) != CONST_INT
|| ! cint_ok_for_move (INTVAL (operand1)))
{
rtx temp;
if (reload_in_progress || reload_completed)
temp = operand0;
else
temp = gen_reg_rtx (mode);
emit_insn (gen_rtx (SET, VOIDmode, temp,
gen_rtx (HIGH, mode, operand1)));
operands[1] = gen_rtx (LO_SUM, mode, temp, operand1);
}
}
/* Now have insn-emit do whatever it normally does. */
return 0;
}
/* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning
it will need a link/runtime reloc). */
int
reloc_needed (exp)
tree exp;
{
int reloc = 0;
switch (TREE_CODE (exp))
{
case ADDR_EXPR:
return 1;
case PLUS_EXPR:
case MINUS_EXPR:
reloc = reloc_needed (TREE_OPERAND (exp, 0));
reloc |= reloc_needed (TREE_OPERAND (exp, 1));
break;
case NOP_EXPR:
case CONVERT_EXPR:
case NON_LVALUE_EXPR:
reloc = reloc_needed (TREE_OPERAND (exp, 0));
break;
case CONSTRUCTOR:
{
register tree link;
for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link))
if (TREE_VALUE (link) != 0)
reloc |= reloc_needed (TREE_VALUE (link));
}
break;
case ERROR_MARK:
break;
}
return reloc;
}
/* Does operand (which is a symbolic_operand) live in text space? If
so SYMBOL_REF_FLAG, which is set by ENCODE_SECTION_INFO, will be true. */
int
read_only_operand (operand)
rtx operand;
{
if (GET_CODE (operand) == CONST)
operand = XEXP (XEXP (operand, 0), 0);
if (flag_pic)
{
if (GET_CODE (operand) == SYMBOL_REF)
return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand);
}
else
{
if (GET_CODE (operand) == SYMBOL_REF)
return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand);
}
return 1;
}
/* Return the best assembler insn template
for moving operands[1] into operands[0] as a fullword. */
char *
singlemove_string (operands)
rtx *operands;
{
HOST_WIDE_INT intval;
if (GET_CODE (operands[0]) == MEM)
return "stw %r1,%0";
if (GET_CODE (operands[1]) == MEM)
return "ldw %1,%0";
if (GET_CODE (operands[1]) == CONST_DOUBLE)
{
long i;
REAL_VALUE_TYPE d;
if (GET_MODE (operands[1]) != SFmode)
abort ();
/* Translate the CONST_DOUBLE to a CONST_INT with the same target
bit pattern. */
REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]);
REAL_VALUE_TO_TARGET_SINGLE (d, i);
operands[1] = GEN_INT (i);
/* Fall through to CONST_INT case. */
}
if (GET_CODE (operands[1]) == CONST_INT)
{
intval = INTVAL (operands[1]);
if (VAL_14_BITS_P (intval))
return "ldi %1,%0";
else if ((intval & 0x7ff) == 0)
return "ldil L'%1,%0";
else if (zdepi_cint_p (intval))
return "zdepi %Z1,%0";
else
return "ldil L'%1,%0\n\tldo R'%1(%0),%0";
}
return "copy %1,%0";
}
/* Compute position (in OP[1]) and width (in OP[2])
useful for copying IMM to a register using the zdepi
instructions. Store the immediate value to insert in OP[0]. */
void
compute_zdepi_operands (imm, op)
unsigned HOST_WIDE_INT imm;
unsigned *op;
{
int lsb, len;
/* Find the least significant set bit in IMM. */
for (lsb = 0; lsb < 32; lsb++)
{
if ((imm & 1) != 0)
break;
imm >>= 1;
}
/* Choose variants based on *sign* of the 5-bit field. */
if ((imm & 0x10) == 0)
len = (lsb <= 28) ? 4 : 32 - lsb;
else
{
/* Find the width of the bitstring in IMM. */
for (len = 5; len < 32; len++)
{
if ((imm & (1 << len)) == 0)
break;
}
/* Sign extend IMM as a 5-bit value. */
imm = (imm & 0xf) - 0x10;
}
op[0] = imm;
op[1] = 31 - lsb;
op[2] = len;
}
/* Output assembler code to perform a doubleword move insn
with operands OPERANDS. */
char *
output_move_double (operands)
rtx *operands;
{
enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1;
rtx latehalf[2];
rtx addreg0 = 0, addreg1 = 0;
/* First classify both operands. */
if (REG_P (operands[0]))
optype0 = REGOP;
else if (offsettable_memref_p (operands[0]))
optype0 = OFFSOP;
else if (GET_CODE (operands[0]) == MEM)
optype0 = MEMOP;
else
optype0 = RNDOP;
if (REG_P (operands[1]))
optype1 = REGOP;
else if (CONSTANT_P (operands[1]))
optype1 = CNSTOP;
else if (offsettable_memref_p (operands[1]))
optype1 = OFFSOP;
else if (GET_CODE (operands[1]) == MEM)
optype1 = MEMOP;
else
optype1 = RNDOP;
/* Check for the cases that the operand constraints are not
supposed to allow to happen. Abort if we get one,
because generating code for these cases is painful. */
if (optype0 != REGOP && optype1 != REGOP)
abort ();
/* Handle auto decrementing and incrementing loads and stores
specifically, since the structure of the function doesn't work
for them without major modification. Do it better when we learn
this port about the general inc/dec addressing of PA.
(This was written by tege. Chide him if it doesn't work.) */
if (optype0 == MEMOP)
{
/* We have to output the address syntax ourselves, since print_operand
doesn't deal with the addresses we want to use. Fix this later. */
rtx addr = XEXP (operands[0], 0);
if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
{
rtx high_reg = gen_rtx (SUBREG, SImode, operands[1], 0);
operands[0] = XEXP (addr, 0);
if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
abort ();
if (!reg_overlap_mentioned_p (high_reg, addr))
{
/* No overlap between high target register and address
register. (We do this in a non-obvious way to
save a register file writeback) */
if (GET_CODE (addr) == POST_INC)
return "stws,ma %1,8(0,%0)\n\tstw %R1,-4(0,%0)";
return "stws,ma %1,-8(0,%0)\n\tstw %R1,12(0,%0)";
}
else
abort();
}
else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
{
rtx high_reg = gen_rtx (SUBREG, SImode, operands[1], 0);
operands[0] = XEXP (addr, 0);
if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG)
abort ();
if (!reg_overlap_mentioned_p (high_reg, addr))
{
/* No overlap between high target register and address
register. (We do this in a non-obvious way to
save a register file writeback) */
if (GET_CODE (addr) == PRE_INC)
return "stws,mb %1,8(0,%0)\n\tstw %R1,4(0,%0)";
return "stws,mb %1,-8(0,%0)\n\tstw %R1,4(0,%0)";
}
else
abort();
}
}
if (optype1 == MEMOP)
{
/* We have to output the address syntax ourselves, since print_operand
doesn't deal with the addresses we want to use. Fix this later. */
rtx addr = XEXP (operands[1], 0);
if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC)
{
rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
operands[1] = XEXP (addr, 0);
if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
abort ();
if (!reg_overlap_mentioned_p (high_reg, addr))
{
/* No overlap between high target register and address
register. (We do this in a non-obvious way to
save a register file writeback) */
if (GET_CODE (addr) == POST_INC)
return "ldws,ma 8(0,%1),%0\n\tldw -4(0,%1),%R0";
return "ldws,ma -8(0,%1),%0\n\tldw 12(0,%1),%R0";
}
else
{
/* This is an undefined situation. We should load into the
address register *and* update that register. Probably
we don't need to handle this at all. */
if (GET_CODE (addr) == POST_INC)
return "ldw 4(0,%1),%R0\n\tldws,ma 8(0,%1),%0";
return "ldw 4(0,%1),%R0\n\tldws,ma -8(0,%1),%0";
}
}
else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
{
rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
operands[1] = XEXP (addr, 0);
if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG)
abort ();
if (!reg_overlap_mentioned_p (high_reg, addr))
{
/* No overlap between high target register and address
register. (We do this in a non-obvious way to
save a register file writeback) */
if (GET_CODE (addr) == PRE_INC)
return "ldws,mb 8(0,%1),%0\n\tldw 4(0,%1),%R0";
return "ldws,mb -8(0,%1),%0\n\tldw 4(0,%1),%R0";
}
else
{
/* This is an undefined situation. We should load into the
address register *and* update that register. Probably
we don't need to handle this at all. */
if (GET_CODE (addr) == PRE_INC)
return "ldw 12(0,%1),%R0\n\tldws,mb 8(0,%1),%0";
return "ldw -4(0,%1),%R0\n\tldws,mb -8(0,%1),%0";
}
}
else if (GET_CODE (addr) == PLUS
&& GET_CODE (XEXP (addr, 0)) == MULT)
{
rtx high_reg = gen_rtx (SUBREG, SImode, operands[0], 0);
if (!reg_overlap_mentioned_p (high_reg, addr))
{
rtx xoperands[3];
xoperands[0] = high_reg;
xoperands[1] = XEXP (addr, 1);
xoperands[2] = XEXP (XEXP (addr, 0), 0);
xoperands[3] = XEXP (XEXP (addr, 0), 1);
output_asm_insn ("sh%O3addl %2,%1,%0", xoperands);
return "ldw 4(0,%0),%R0\n\tldw 0(0,%0),%0";
}
else
{
rtx xoperands[3];
xoperands[0] = high_reg;
xoperands[1] = XEXP (addr, 1);
xoperands[2] = XEXP (XEXP (addr, 0), 0);
xoperands[3] = XEXP (XEXP (addr, 0), 1);
output_asm_insn ("sh%O3addl %2,%1,%R0", xoperands);
return "ldw 0(0,%R0),%0\n\tldw 4(0,%R0),%R0";
}
}
}
/* If an operand is an unoffsettable memory ref, find a register
we can increment temporarily to make it refer to the second word. */
if (optype0 == MEMOP)
addreg0 = find_addr_reg (XEXP (operands[0], 0));
if (optype1 == MEMOP)
addreg1 = find_addr_reg (XEXP (operands[1], 0));
/* Ok, we can do one word at a time.
Normally we do the low-numbered word first.
In either case, set up in LATEHALF the operands to use
for the high-numbered word and in some cases alter the
operands in OPERANDS to be suitable for the low-numbered word. */
if (optype0 == REGOP)
latehalf[0] = gen_rtx (REG, SImode, REGNO (operands[0]) + 1);
else if (optype0 == OFFSOP)
latehalf[0] = adj_offsettable_operand (operands[0], 4);
else
latehalf[0] = operands[0];
if (optype1 == REGOP)
latehalf[1] = gen_rtx (REG, SImode, REGNO (operands[1]) + 1);
else if (optype1 == OFFSOP)
latehalf[1] = adj_offsettable_operand (operands[1], 4);
else if (optype1 == CNSTOP)
split_double (operands[1], &operands[1], &latehalf[1]);
else
latehalf[1] = operands[1];
/* If the first move would clobber the source of the second one,
do them in the other order.
This can happen in two cases:
mem -> register where the first half of the destination register
is the same register used in the memory's address. Reload
can create such insns.
mem in this case will be either register indirect or register
indirect plus a valid offset.
register -> register move where REGNO(dst) == REGNO(src + 1)
someone (Tim/Tege?) claimed this can happen for parameter loads.
Handle mem -> register case first. */
if (optype0 == REGOP
&& (optype1 == MEMOP || optype1 == OFFSOP)
&& refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1,
operands[1], 0))
{
/* Do the late half first. */
if (addreg1)
output_asm_insn ("ldo 4(%0),%0", &addreg1);
output_asm_insn (singlemove_string (latehalf), latehalf);
/* Then clobber. */
if (addreg1)
output_asm_insn ("ldo -4(%0),%0", &addreg1);
return singlemove_string (operands);
}
/* Now handle register -> register case. */
if (optype0 == REGOP && optype1 == REGOP
&& REGNO (operands[0]) == REGNO (operands[1]) + 1)
{
output_asm_insn (singlemove_string (latehalf), latehalf);
return singlemove_string (operands);
}
/* Normal case: do the two words, low-numbered first. */
output_asm_insn (singlemove_string (operands), operands);
/* Make any unoffsettable addresses point at high-numbered word. */
if (addreg0)
output_asm_insn ("ldo 4(%0),%0", &addreg0);
if (addreg1)
output_asm_insn ("ldo 4(%0),%0", &addreg1);
/* Do that word. */
output_asm_insn (singlemove_string (latehalf), latehalf);
/* Undo the adds we just did. */
if (addreg0)
output_asm_insn ("ldo -4(%0),%0", &addreg0);
if (addreg1)
output_asm_insn ("ldo -4(%0),%0", &addreg1);
return "";
}
char *
output_fp_move_double (operands)
rtx *operands;
{
if (FP_REG_P (operands[0]))
{
if (FP_REG_P (operands[1])
|| operands[1] == CONST0_RTX (GET_MODE (operands[0])))
output_asm_insn ("fcpy,dbl %r1,%0", operands);
else
output_asm_insn ("fldd%F1 %1,%0", operands);
}
else if (FP_REG_P (operands[1]))
{
output_asm_insn ("fstd%F0 %1,%0", operands);
}
else if (operands[1] == CONST0_RTX (GET_MODE (operands[0])))
{
if (GET_CODE (operands[0]) == REG)
{
rtx xoperands[2];
xoperands[1] = gen_rtx (REG, SImode, REGNO (operands[0]) + 1);
xoperands[0] = operands[0];
output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
}
/* This is a pain. You have to be prepared to deal with an
arbitrary address here including pre/post increment/decrement.
so avoid this in the MD. */
else
abort ();
}
else abort ();
return "";
}
/* Return a REG that occurs in ADDR with coefficient 1.
ADDR can be effectively incremented by incrementing REG. */
static rtx
find_addr_reg (addr)
rtx addr;
{
while (GET_CODE (addr) == PLUS)
{
if (GET_CODE (XEXP (addr, 0)) == REG)
addr = XEXP (addr, 0);
else if (GET_CODE (XEXP (addr, 1)) == REG)
addr = XEXP (addr, 1);
else if (CONSTANT_P (XEXP (addr, 0)))
addr = XEXP (addr, 1);
else if (CONSTANT_P (XEXP (addr, 1)))
addr = XEXP (addr, 0);
else
abort ();
}
if (GET_CODE (addr) == REG)
return addr;
abort ();
}
/* Emit code to perform a block move.
OPERANDS[0] is the destination pointer as a REG, clobbered.
OPERANDS[1] is the source pointer as a REG, clobbered.
OPERANDS[2] is a register for temporary storage.
OPERANDS[4] is the size as a CONST_INT
OPERANDS[3] is a register for temporary storage.
OPERANDS[5] is the alignment safe to use, as a CONST_INT.
OPERNADS[6] is another temporary register. */
char *
output_block_move (operands, size_is_constant)
rtx *operands;
int size_is_constant;
{
int align = INTVAL (operands[5]);
unsigned long n_bytes = INTVAL (operands[4]);
/* We can't move more than four bytes at a time because the PA
has no longer integer move insns. (Could use fp mem ops?) */
if (align > 4)
align = 4;
/* Note that we know each loop below will execute at least twice
(else we would have open-coded the copy). */
switch (align)
{
case 4:
/* Pre-adjust the loop counter. */
operands[4] = GEN_INT (n_bytes - 8);
output_asm_insn ("ldi %4,%2", operands);
/* Copying loop. */
output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
output_asm_insn ("ldws,ma 4(0,%1),%6", operands);
output_asm_insn ("stws,ma %3,4(0,%0)", operands);
output_asm_insn ("addib,>= -8,%2,.-12", operands);
output_asm_insn ("stws,ma %6,4(0,%0)", operands);
/* Handle the residual. There could be up to 7 bytes of
residual to copy! */
if (n_bytes % 8 != 0)
{
operands[4] = GEN_INT (n_bytes % 4);
if (n_bytes % 8 >= 4)
output_asm_insn ("ldws,ma 4(0,%1),%3", operands);
if (n_bytes % 4 != 0)
output_asm_insn ("ldw 0(0,%1),%6", operands);
if (n_bytes % 8 >= 4)
output_asm_insn ("stws,ma %3,4(0,%0)", operands);
if (n_bytes % 4 != 0)
output_asm_insn ("stbys,e %6,%4(0,%0)", operands);
}
return "";
case 2:
/* Pre-adjust the loop counter. */
operands[4] = GEN_INT (n_bytes - 4);
output_asm_insn ("ldi %4,%2", operands);
/* Copying loop. */
output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
output_asm_insn ("ldhs,ma 2(0,%1),%6", operands);
output_asm_insn ("sths,ma %3,2(0,%0)", operands);
output_asm_insn ("addib,>= -4,%2,.-12", operands);
output_asm_insn ("sths,ma %6,2(0,%0)", operands);
/* Handle the residual. */
if (n_bytes % 4 != 0)
{
if (n_bytes % 4 >= 2)
output_asm_insn ("ldhs,ma 2(0,%1),%3", operands);
if (n_bytes % 2 != 0)
output_asm_insn ("ldb 0(0,%1),%6", operands);
if (n_bytes % 4 >= 2)
output_asm_insn ("sths,ma %3,2(0,%0)", operands);
if (n_bytes % 2 != 0)
output_asm_insn ("stb %6,0(0,%0)", operands);
}
return "";
case 1:
/* Pre-adjust the loop counter. */
operands[4] = GEN_INT (n_bytes - 2);
output_asm_insn ("ldi %4,%2", operands);
/* Copying loop. */
output_asm_insn ("ldbs,ma 1(0,%1),%3", operands);
output_asm_insn ("ldbs,ma 1(0,%1),%6", operands);
output_asm_insn ("stbs,ma %3,1(0,%0)", operands);
output_asm_insn ("addib,>= -2,%2,.-12", operands);
output_asm_insn ("stbs,ma %6,1(0,%0)", operands);
/* Handle the residual. */
if (n_bytes % 2 != 0)
{
output_asm_insn ("ldb 0(0,%1),%3", operands);
output_asm_insn ("stb %3,0(0,%0)", operands);
}
return "";
default:
abort ();
}
}
/* Count the number of insns necessary to handle this block move.
Basic structure is the same as emit_block_move, except that we
count insns rather than emit them. */
int
compute_movstrsi_length (insn)
rtx insn;
{
rtx pat = PATTERN (insn);
int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0));
unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0));
unsigned int n_insns = 0;
/* We can't move more than four bytes at a time because the PA
has no longer integer move insns. (Could use fp mem ops?) */
if (align > 4)
align = 4;
/* The basic copying loop. */
n_insns = 6;
/* Residuals. */
if (n_bytes % (2 * align) != 0)
{
if ((n_bytes % (2 * align)) >= align)
n_insns += 2;
if ((n_bytes % align) != 0)
n_insns += 2;
}
/* Lengths are expressed in bytes now; each insn is 4 bytes. */
return n_insns * 4;
}
char *
output_and (operands)
rtx *operands;
{
if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0)
{
unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
int ls0, ls1, ms0, p, len;
for (ls0 = 0; ls0 < 32; ls0++)
if ((mask & (1 << ls0)) == 0)
break;
for (ls1 = ls0; ls1 < 32; ls1++)
if ((mask & (1 << ls1)) != 0)
break;
for (ms0 = ls1; ms0 < 32; ms0++)
if ((mask & (1 << ms0)) == 0)
break;
if (ms0 != 32)
abort();
if (ls1 == 32)
{
len = ls0;
if (len == 0)
abort ();
operands[2] = GEN_INT (len);
return "extru %1,31,%2,%0";
}
else
{
/* We could use this `depi' for the case above as well, but `depi'
requires one more register file access than an `extru'. */
p = 31 - ls0;
len = ls1 - ls0;
operands[2] = GEN_INT (p);
operands[3] = GEN_INT (len);
return "depi 0,%2,%3,%0";
}
}
else
return "and %1,%2,%0";
}
char *
output_ior (operands)
rtx *operands;
{
unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
int bs0, bs1, p, len;
if (INTVAL (operands[2]) == 0)
return "copy %1,%0";
for (bs0 = 0; bs0 < 32; bs0++)
if ((mask & (1 << bs0)) != 0)
break;
for (bs1 = bs0; bs1 < 32; bs1++)
if ((mask & (1 << bs1)) == 0)
break;
if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask)
abort();
p = 31 - bs0;
len = bs1 - bs0;
operands[2] = GEN_INT (p);
operands[3] = GEN_INT (len);
return "depi -1,%2,%3,%0";
}
/* Output an ascii string. */
void
output_ascii (file, p, size)
FILE *file;
unsigned char *p;
int size;
{
int i;
int chars_output;
unsigned char partial_output[16]; /* Max space 4 chars can occupy. */
/* The HP assembler can only take strings of 256 characters at one
time. This is a limitation on input line length, *not* the
length of the string. Sigh. Even worse, it seems that the
restriction is in number of input characters (see \xnn &
\whatever). So we have to do this very carefully. */
fputs ("\t.STRING \"", file);
chars_output = 0;
for (i = 0; i < size; i += 4)
{
int co = 0;
int io = 0;
for (io = 0, co = 0; io < MIN (4, size - i); io++)
{
register unsigned int c = p[i + io];
if (c == '\"' || c == '\\')
partial_output[co++] = '\\';
if (c >= ' ' && c < 0177)
partial_output[co++] = c;
else
{
unsigned int hexd;
partial_output[co++] = '\\';
partial_output[co++] = 'x';
hexd = c / 16 - 0 + '0';
if (hexd > '9')
hexd -= '9' - 'a' + 1;
partial_output[co++] = hexd;
hexd = c % 16 - 0 + '0';
if (hexd > '9')
hexd -= '9' - 'a' + 1;
partial_output[co++] = hexd;
}
}
if (chars_output + co > 243)
{
fputs ("\"\n\t.STRING \"", file);
chars_output = 0;
}
fwrite (partial_output, 1, co, file);
chars_output += co;
co = 0;
}
fputs ("\"\n", file);
}
/* Try to rewrite floating point comparisons & branches to avoid
useless add,tr insns.
CHECK_NOTES is nonzero if we should examine REG_DEAD notes
to see if FPCC is dead. CHECK_NOTES is nonzero for the
first attempt to remove useless add,tr insns. It is zero
for the second pass as reorg sometimes leaves bogus REG_DEAD
notes lying around.
When CHECK_NOTES is zero we can only eliminate add,tr insns
when there's a 1:1 correspondence between fcmp and ftest/fbranch
instructions. */
void
remove_useless_addtr_insns (insns, check_notes)
rtx insns;
int check_notes;
{
rtx insn;
int all;
static int pass = 0;
/* This is fairly cheap, so always run it when optimizing. */
if (optimize > 0)
{
int fcmp_count = 0;
int fbranch_count = 0;
/* Walk all the insns in this function looking for fcmp & fbranch
instructions. Keep track of how many of each we find. */
insns = get_insns ();
for (insn = insns; insn; insn = next_insn (insn))
{
rtx tmp;
/* Ignore anything that isn't an INSN or a JUMP_INSN. */
if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN)
continue;
tmp = PATTERN (insn);
/* It must be a set. */
if (GET_CODE (tmp) != SET)
continue;
/* If the destination is CCFP, then we've found an fcmp insn. */
tmp = SET_DEST (tmp);
if (GET_CODE (tmp) == REG && REGNO (tmp) == 0)
{
fcmp_count++;
continue;
}
tmp = PATTERN (insn);
/* If this is an fbranch instruction, bump the fbranch counter. */
if (GET_CODE (tmp) == SET
&& SET_DEST (tmp) == pc_rtx
&& GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE
&& GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE
&& GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG
&& REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0)
{
fbranch_count++;
continue;
}
}
/* Find all floating point compare + branch insns. If possible,
reverse the comparison & the branch to avoid add,tr insns. */
for (insn = insns; insn; insn = next_insn (insn))
{
rtx tmp, next;
/* Ignore anything that isn't an INSN. */
if (GET_CODE (insn) != INSN)
continue;
tmp = PATTERN (insn);
/* It must be a set. */
if (GET_CODE (tmp) != SET)
continue;
/* The destination must be CCFP, which is register zero. */
tmp = SET_DEST (tmp);
if (GET_CODE (tmp) != REG || REGNO (tmp) != 0)
continue;
/* INSN should be a set of CCFP.
See if the result of this insn is used in a reversed FP
conditional branch. If so, reverse our condition and
the branch. Doing so avoids useless add,tr insns. */
next = next_insn (insn);
while (next)
{
/* Jumps, calls and labels stop our search. */
if (GET_CODE (next) == JUMP_INSN
|| GET_CODE (next) == CALL_INSN
|| GET_CODE (next) == CODE_LABEL)
break;
/* As does another fcmp insn. */
if (GET_CODE (next) == INSN
&& GET_CODE (PATTERN (next)) == SET
&& GET_CODE (SET_DEST (PATTERN (next))) == REG
&& REGNO (SET_DEST (PATTERN (next))) == 0)
break;
next = next_insn (next);
}
/* Is NEXT_INSN a branch? */
if (next
&& GET_CODE (next) == JUMP_INSN)
{
rtx pattern = PATTERN (next);
/* If it a reversed fp conditional branch (eg uses add,tr)
and CCFP dies, then reverse our conditional and the branch
to avoid the add,tr. */
if (GET_CODE (pattern) == SET
&& SET_DEST (pattern) == pc_rtx
&& GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE
&& GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE
&& GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG
&& REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0
&& GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC
&& (fcmp_count == fbranch_count
|| (check_notes
&& find_regno_note (next, REG_DEAD, 0))))
{
/* Reverse the branch. */
tmp = XEXP (SET_SRC (pattern), 1);
XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2);
XEXP (SET_SRC (pattern), 2) = tmp;
INSN_CODE (next) = -1;
/* Reverse our condition. */
tmp = PATTERN (insn);
PUT_CODE (XEXP (tmp, 1),
reverse_condition (GET_CODE (XEXP (tmp, 1))));
}
}
}
}
pass = !pass;
}
/* You may have trouble believing this, but this is the HP-PA stack
layout. Wow.
Offset Contents
Variable arguments (optional; any number may be allocated)
SP-(4*(N+9)) arg word N
: :
SP-56 arg word 5
SP-52 arg word 4
Fixed arguments (must be allocated; may remain unused)
SP-48 arg word 3
SP-44 arg word 2
SP-40 arg word 1
SP-36 arg word 0
Frame Marker
SP-32 External Data Pointer (DP)
SP-28 External sr4
SP-24 External/stub RP (RP')
SP-20 Current RP
SP-16 Static Link
SP-12 Clean up
SP-8 Calling Stub RP (RP'')
SP-4 Previous SP
Top of Frame
SP-0 Stack Pointer (points to next available address)
*/
/* This function saves registers as follows. Registers marked with ' are
this function's registers (as opposed to the previous function's).
If a frame_pointer isn't needed, r4 is saved as a general register;
the space for the frame pointer is still allocated, though, to keep
things simple.
Top of Frame
SP (FP') Previous FP
SP + 4 Alignment filler (sigh)
SP + 8 Space for locals reserved here.
.
.
.
SP + n All call saved register used.
.
.
.
SP + o All call saved fp registers used.
.
.
.
SP + p (SP') points to next available address.
*/
/* Emit RTL to store REG at the memory location specified by BASE+DISP.
Handle case where DISP > 8k by using the add_high_const pattern.
Note in DISP > 8k case, we will leave the high part of the address
in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
static void
store_reg (reg, disp, base)
int reg, disp, base;
{
if (VAL_14_BITS_P (disp))
{
emit_move_insn (gen_rtx (MEM, SImode,
gen_rtx (PLUS, SImode,
gen_rtx (REG, SImode, base),
GEN_INT (disp))),
gen_rtx (REG, SImode, reg));
}
else
{
emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
gen_rtx (REG, SImode, base),
GEN_INT (disp)));
emit_move_insn (gen_rtx (MEM, SImode,
gen_rtx (LO_SUM, SImode,
gen_rtx (REG, SImode, 1),
GEN_INT (disp))),
gen_rtx (REG, SImode, reg));
}
}
/* Emit RTL to load REG from the memory location specified by BASE+DISP.
Handle case where DISP > 8k by using the add_high_const pattern.
Note in DISP > 8k case, we will leave the high part of the address
in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
static void
load_reg (reg, disp, base)
int reg, disp, base;
{
if (VAL_14_BITS_P (disp))
{
emit_move_insn (gen_rtx (REG, SImode, reg),
gen_rtx (MEM, SImode,
gen_rtx (PLUS, SImode,
gen_rtx (REG, SImode, base),
GEN_INT (disp))));
}
else
{
emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
gen_rtx (REG, SImode, base),
GEN_INT (disp)));
emit_move_insn (gen_rtx (REG, SImode, reg),
gen_rtx (MEM, SImode,
gen_rtx (LO_SUM, SImode,
gen_rtx (REG, SImode, 1),
GEN_INT (disp))));
}
}
/* Emit RTL to set REG to the value specified by BASE+DISP.
Handle case where DISP > 8k by using the add_high_const pattern.
Note in DISP > 8k case, we will leave the high part of the address
in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/
static void
set_reg_plus_d(reg, base, disp)
int reg, base, disp;
{
if (VAL_14_BITS_P (disp))
{
emit_move_insn (gen_rtx (REG, SImode, reg),
gen_rtx (PLUS, SImode,
gen_rtx (REG, SImode, base),
GEN_INT (disp)));
}
else
{
emit_insn (gen_add_high_const (gen_rtx (REG, SImode, 1),
gen_rtx (REG, SImode, base),
GEN_INT (disp)));
emit_move_insn (gen_rtx (REG, SImode, reg),
gen_rtx (LO_SUM, SImode,
gen_rtx (REG, SImode, 1),
GEN_INT (disp)));
}
}
/* Global variables set by FUNCTION_PROLOGUE. */
/* Size of frame. Need to know this to emit return insns from
leaf procedures. */
static int actual_fsize;
static int local_fsize, save_fregs;
int
compute_frame_size (size, fregs_live)
int size;
int *fregs_live;
{
extern int current_function_outgoing_args_size;
int i, fsize;
/* 8 is space for frame pointer + filler. If any frame is allocated
we need to add this in because of STARTING_FRAME_OFFSET. */
fsize = size + (size || frame_pointer_needed ? 8 : 0);
/* We must leave enough space for all the callee saved registers
from 3 .. highest used callee save register since we don't
know if we're going to have an inline or out of line prologue
and epilogue. */
for (i = 18; i >= 3; i--)
if (regs_ever_live[i])
{
fsize += 4 * (i - 2);
break;
}
/* Round the stack. */
fsize = (fsize + 7) & ~7;
/* We must leave enough space for all the callee saved registers
from 3 .. highest used callee save register since we don't
know if we're going to have an inline or out of line prologue
and epilogue. */
for (i = 66; i >= 48; i -= 2)
if (regs_ever_live[i] || regs_ever_live[i + 1])
{
if (fregs_live)
*fregs_live = 1;
fsize += 4 * (i - 46);
break;
}
fsize += current_function_outgoing_args_size;
if (! leaf_function_p () || fsize)
fsize += 32;
return (fsize + 63) & ~63;
}
rtx hp_profile_label_rtx;
static char hp_profile_label_name[8];
void
output_function_prologue (file, size)
FILE *file;
int size;
{
/* The function's label and associated .PROC must never be
separated and must be output *after* any profiling declarations
to avoid changing spaces/subspaces within a procedure. */
ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0));
fputs ("\t.PROC\n", file);
/* hppa_expand_prologue does the dirty work now. We just need
to output the assembler directives which denote the start
of a function. */
fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize);
if (regs_ever_live[2] || profile_flag)
fputs (",CALLS,SAVE_RP", file);
else
fputs (",NO_CALLS", file);
if (frame_pointer_needed)
fputs (",SAVE_SP", file);
/* Pass on information about the number of callee register saves
performed in the prologue.
The compiler is supposed to pass the highest register number
saved, the assembler then has to adjust that number before
entering it into the unwind descriptor (to account for any
caller saved registers with lower register numbers than the
first callee saved register). */
if (gr_saved)
fprintf (file, ",ENTRY_GR=%d", gr_saved + 2);
if (fr_saved)
fprintf (file, ",ENTRY_FR=%d", fr_saved + 11);
fputs ("\n\t.ENTRY\n", file);
/* Horrid hack. emit_function_prologue will modify this RTL in
place to get the expected results. */
if (profile_flag)
ASM_GENERATE_INTERNAL_LABEL (hp_profile_label_name, "LP",
hp_profile_labelno);
/* If we're using GAS and not using the portable runtime model, then
we don't need to accumulate the total number of code bytes. */
if (TARGET_GAS && ! TARGET_PORTABLE_RUNTIME)
total_code_bytes = 0;
else if (insn_addresses)
{
unsigned int old_total = total_code_bytes;
total_code_bytes += insn_addresses[INSN_UID (get_last_insn())];
total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT;
/* Be prepared to handle overflows. */
total_code_bytes = old_total > total_code_bytes ? -1 : total_code_bytes;
}
else
total_code_bytes = -1;
remove_useless_addtr_insns (get_insns (), 0);
}
void
hppa_expand_prologue()
{
extern char call_used_regs[];
int size = get_frame_size ();
int merge_sp_adjust_with_store = 0;
int i, offset;
rtx tmpreg, size_rtx;
gr_saved = 0;
fr_saved = 0;
save_fregs = 0;
local_fsize = size + (size || frame_pointer_needed ? 8 : 0);
actual_fsize = compute_frame_size (size, &save_fregs);
/* Compute a few things we will use often. */
tmpreg = gen_rtx (REG, SImode, 1);
size_rtx = GEN_INT (actual_fsize);
/* Handle out of line prologues and epilogues. */
if (TARGET_SPACE)
{
rtx operands[2];
int saves = 0;
int outline_insn_count = 0;
int inline_insn_count = 0;
/* Count the number of insns for the inline and out of line
variants so we can choose one appropriately.
No need to screw with counting actual_fsize operations -- they're
done for both inline and out of line prologues. */
if (regs_ever_live[2])
inline_insn_count += 1;
if (! cint_ok_for_move (local_fsize))
outline_insn_count += 2;
else
outline_insn_count += 1;
/* Put the register save info into %r22. */
for (i = 18; i >= 3; i--)
if (regs_ever_live[i] && ! call_used_regs[i])
{
/* -1 because the stack adjustment is normally done in
the same insn as a register save. */
inline_insn_count += (i - 2) - 1;
saves = i;
break;
}
for (i = 66; i >= 48; i -= 2)
if (regs_ever_live[i] || regs_ever_live[i + 1])
{
/* +1 needed as we load %r1 with the start of the freg
save area. */
inline_insn_count += (i/2 - 23) + 1;
saves |= ((i/2 - 12 ) << 16);
break;
}
if (frame_pointer_needed)
inline_insn_count += 3;
if (! cint_ok_for_move (saves))
outline_insn_count += 2;
else
outline_insn_count += 1;
if (TARGET_PORTABLE_RUNTIME)
outline_insn_count += 2;
else
outline_insn_count += 1;
/* If there's a lot of insns in the prologue, then do it as
an out-of-line sequence. */
if (inline_insn_count > outline_insn_count)
{
/* Put the local_fisze into %r19. */
operands[0] = gen_rtx (REG, SImode, 19);
operands[1] = GEN_INT (local_fsize);
emit_move_insn (operands[0], operands[1]);
/* Put the stack size into %r21. */
operands[0] = gen_rtx (REG, SImode, 21);
operands[1] = size_rtx;
emit_move_insn (operands[0], operands[1]);
operands[0] = gen_rtx (REG, SImode, 22);
operands[1] = GEN_INT (saves);
emit_move_insn (operands[0], operands[1]);
/* Now call the out-of-line prologue. */
emit_insn (gen_outline_prologue_call ());
emit_insn (gen_blockage ());
/* Note that we're using an out-of-line prologue. */
out_of_line_prologue_epilogue = 1;
return;
}
}
out_of_line_prologue_epilogue = 0;
/* Save RP first. The calling conventions manual states RP will
always be stored into the caller's frame at sp-20. */
if (regs_ever_live[2] || profile_flag)
store_reg (2, -20, STACK_POINTER_REGNUM);
/* Allocate the local frame and set up the frame pointer if needed. */
if (actual_fsize)
if (frame_pointer_needed)
{
/* Copy the old frame pointer temporarily into %r1. Set up the
new stack pointer, then store away the saved old frame pointer
into the stack at sp+actual_fsize and at the same time update
the stack pointer by actual_fsize bytes. Two versions, first
handles small (<8k) frames. The second handles large (>8k)
frames. */
emit_move_insn (tmpreg, frame_pointer_rtx);
emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
if (VAL_14_BITS_P (actual_fsize))
emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, size_rtx));
else
{
/* It is incorrect to store the saved frame pointer at *sp,
then increment sp (writes beyond the current stack boundary).
So instead use stwm to store at *sp and post-increment the
stack pointer as an atomic operation. Then increment sp to
finish allocating the new frame. */
emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, GEN_INT (64)));
set_reg_plus_d (STACK_POINTER_REGNUM,
STACK_POINTER_REGNUM,
actual_fsize - 64);
}
}
/* no frame pointer needed. */
else
{
/* In some cases we can perform the first callee register save
and allocating the stack frame at the same time. If so, just
make a note of it and defer allocating the frame until saving
the callee registers. */
if (VAL_14_BITS_P (-actual_fsize)
&& local_fsize == 0
&& ! profile_flag
&& ! flag_pic)
merge_sp_adjust_with_store = 1;
/* Can not optimize. Adjust the stack frame by actual_fsize bytes. */
else if (actual_fsize != 0)
set_reg_plus_d (STACK_POINTER_REGNUM,
STACK_POINTER_REGNUM,
actual_fsize);
}
/* The hppa calling conventions say that that %r19, the pic offset
register, is saved at sp - 32 (in this function's frame) when
generating PIC code. FIXME: What is the correct thing to do
for functions which make no calls and allocate no frame? Do
we need to allocate a frame, or can we just omit the save? For
now we'll just omit the save. */
if (actual_fsize != 0 && flag_pic)
store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM);
/* Profiling code.
Instead of taking one argument, the counter label, as most normal
mcounts do, _mcount appears to behave differently on the HPPA. It
takes the return address of the caller, the address of this routine,
and the address of the label. Also, it isn't magic, so
argument registers have to be preserved. */
if (profile_flag)
{
int pc_offset, i, arg_offset, basereg, offsetadj;
pc_offset = 4 + (frame_pointer_needed
? (VAL_14_BITS_P (actual_fsize) ? 12 : 20)
: (VAL_14_BITS_P (actual_fsize) ? 4 : 8));
/* When the function has a frame pointer, use it as the base
register for saving/restore registers. Else use the stack
pointer. Adjust the offset according to the frame size if
this function does not have a frame pointer. */
basereg = frame_pointer_needed ? FRAME_POINTER_REGNUM
: STACK_POINTER_REGNUM;
offsetadj = frame_pointer_needed ? 0 : actual_fsize;
/* Horrid hack. emit_function_prologue will modify this RTL in
place to get the expected results. sprintf here is just to
put something in the name. */
sprintf(hp_profile_label_name, "LP$%04d", -1);
hp_profile_label_rtx = gen_rtx (SYMBOL_REF, SImode,
hp_profile_label_name);
if (current_function_returns_struct)
store_reg (STRUCT_VALUE_REGNUM, - 12 - offsetadj, basereg);
for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
if (regs_ever_live [i])
{
store_reg (i, arg_offset, basereg);
/* Deal with arg_offset not fitting in 14 bits. */
pc_offset += VAL_14_BITS_P (arg_offset) ? 4 : 8;
}
emit_move_insn (gen_rtx (REG, SImode, 26), gen_rtx (REG, SImode, 2));
emit_move_insn (tmpreg, gen_rtx (HIGH, SImode, hp_profile_label_rtx));
emit_move_insn (gen_rtx (REG, SImode, 24),
gen_rtx (LO_SUM, SImode, tmpreg, hp_profile_label_rtx));
/* %r25 is set from within the output pattern. */
emit_insn (gen_call_profiler (GEN_INT (- pc_offset - 20)));
/* Restore argument registers. */
for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4)
if (regs_ever_live [i])
load_reg (i, arg_offset, basereg);
if (current_function_returns_struct)
load_reg (STRUCT_VALUE_REGNUM, -12 - offsetadj, basereg);
}
/* Normal register save.
Do not save the frame pointer in the frame_pointer_needed case. It
was done earlier. */
if (frame_pointer_needed)
{
for (i = 18, offset = local_fsize; i >= 4; i--)
if (regs_ever_live[i] && ! call_used_regs[i])
{
store_reg (i, offset, FRAME_POINTER_REGNUM);
offset += 4;
gr_saved++;
}
/* Account for %r3 which is saved in a special place. */
gr_saved++;
}
/* No frame pointer needed. */
else
{
for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
if (regs_ever_live[i] && ! call_used_regs[i])
{
/* If merge_sp_adjust_with_store is nonzero, then we can
optimize the first GR save. */
if (merge_sp_adjust_with_store)
{
merge_sp_adjust_with_store = 0;
emit_insn (gen_post_stwm (stack_pointer_rtx,
gen_rtx (REG, SImode, i),
GEN_INT (-offset)));
}
else
store_reg (i, offset, STACK_POINTER_REGNUM);
offset += 4;
gr_saved++;
}
/* If we wanted to merge the SP adjustment with a GR save, but we never
did any GR saves, then just emit the adjustment here. */
if (merge_sp_adjust_with_store)
set_reg_plus_d (STACK_POINTER_REGNUM,
STACK_POINTER_REGNUM,
actual_fsize);
}
/* Align pointer properly (doubleword boundary). */
offset = (offset + 7) & ~7;
/* Floating point register store. */
if (save_fregs)
{
/* First get the frame or stack pointer to the start of the FP register
save area. */
if (frame_pointer_needed)
set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
else
set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
/* Now actually save the FP registers. */
for (i = 66; i >= 48; i -= 2)
{
if (regs_ever_live[i] || regs_ever_live[i + 1])
{
emit_move_insn (gen_rtx (MEM, DFmode,
gen_rtx (POST_INC, DFmode, tmpreg)),
gen_rtx (REG, DFmode, i));
fr_saved++;
}
}
}
/* When generating PIC code it is necessary to save/restore the
PIC register around each function call. We used to do this
in the call patterns themselves, but that implementation
made incorrect assumptions about using global variables to hold
per-function rtl code generated in the backend.
So instead, we copy the PIC register into a reserved callee saved
register in the prologue. Then after each call we reload the PIC
register from the callee saved register. We also reload the PIC
register from the callee saved register in the epilogue ensure the
PIC register is valid at function exit.
This may (depending on the exact characteristics of the function)
even be more efficient.
Avoid this if the callee saved register wasn't used (these are
leaf functions). */
if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM_SAVED])
emit_move_insn (gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM_SAVED),
gen_rtx (REG, SImode, PIC_OFFSET_TABLE_REGNUM));
}
void
output_function_epilogue (file, size)
FILE *file;
int size;
{
rtx insn = get_last_insn ();
int i;
/* hppa_expand_epilogue does the dirty work now. We just need
to output the assembler directives which denote the end
of a function.
To make debuggers happy, emit a nop if the epilogue was completely
eliminated due to a volatile call as the last insn in the
current function. That way the return address (in %r2) will
always point to a valid instruction in the current function. */
/* Get the last real insn. */
if (GET_CODE (insn) == NOTE)
insn = prev_real_insn (insn);
/* If it is a sequence, then look inside. */
if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
insn = XVECEXP (PATTERN (insn), 0, 0);
/* If insn is a CALL_INSN, then it must be a call to a volatile
function (otherwise there would be epilogue insns). */
if (insn && GET_CODE (insn) == CALL_INSN)
fputs ("\tnop\n", file);
fputs ("\t.EXIT\n\t.PROCEND\n", file);
}
void
hppa_expand_epilogue ()
{
rtx tmpreg;
int offset,i;
int merge_sp_adjust_with_load = 0;
/* Handle out of line prologues and epilogues. */
if (TARGET_SPACE && out_of_line_prologue_epilogue)
{
int saves = 0;
rtx operands[2];
/* Put the register save info into %r22. */
for (i = 18; i >= 3; i--)
if (regs_ever_live[i] && ! call_used_regs[i])
{
saves = i;
break;
}
for (i = 66; i >= 48; i -= 2)
if (regs_ever_live[i] || regs_ever_live[i + 1])
{
saves |= ((i/2 - 12 ) << 16);
break;
}
emit_insn (gen_blockage ());
/* Put the local_fisze into %r19. */
operands[0] = gen_rtx (REG, SImode, 19);
operands[1] = GEN_INT (local_fsize);
emit_move_insn (operands[0], operands[1]);
/* Put the stack size into %r21. */
operands[0] = gen_rtx (REG, SImode, 21);
operands[1] = GEN_INT (actual_fsize);
emit_move_insn (operands[0], operands[1]);
operands[0] = gen_rtx (REG, SImode, 22);
operands[1] = GEN_INT (saves);
emit_move_insn (operands[0], operands[1]);
/* Now call the out-of-line epilogue. */
emit_insn (gen_outline_epilogue_call ());
return;
}
/* We will use this often. */
tmpreg = gen_rtx (REG, SImode, 1);
/* Try to restore RP early to avoid load/use interlocks when
RP gets used in the return (bv) instruction. This appears to still
be necessary even when we schedule the prologue and epilogue. */
if (frame_pointer_needed
&& (regs_ever_live [2] || profile_flag))
load_reg (2, -20, FRAME_POINTER_REGNUM);
/* No frame pointer, and stack is smaller than 8k. */
else if (! frame_pointer_needed
&& VAL_14_BITS_P (actual_fsize + 20)
&& (regs_ever_live[2] || profile_flag))
load_reg (2, - (actual_fsize + 20), STACK_POINTER_REGNUM);
/* General register restores. */
if (frame_pointer_needed)
{
for (i = 18, offset = local_fsize; i >= 4; i--)
if (regs_ever_live[i] && ! call_used_regs[i])
{
load_reg (i, offset, FRAME_POINTER_REGNUM);
offset += 4;
}
}
else
{
for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--)
{
if (regs_ever_live[i] && ! call_used_regs[i])
{
/* Only for the first load.
merge_sp_adjust_with_load holds the register load
with which we will merge the sp adjustment. */
if (VAL_14_BITS_P (actual_fsize + 20)
&& local_fsize == 0
&& ! merge_sp_adjust_with_load)
merge_sp_adjust_with_load = i;
else
load_reg (i, offset, STACK_POINTER_REGNUM);
offset += 4;
}
}
}
/* Align pointer properly (doubleword boundary). */
offset = (offset + 7) & ~7;
/* FP register restores. */
if (save_fregs)
{
/* Adjust the register to index off of. */
if (frame_pointer_needed)
set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset);
else
set_reg_plus_d (1, STACK_POINTER_REGNUM, offset);
/* Actually do the restores now. */
for (i = 66; i >= 48; i -= 2)
{
if (regs_ever_live[i] || regs_ever_live[i + 1])
{
emit_move_insn (gen_rtx (REG, DFmode, i),
gen_rtx (MEM, DFmode,
gen_rtx (POST_INC, DFmode, tmpreg)));
}
}
}
/* Emit a blockage insn here to keep these insns from being moved to
an earlier spot in the epilogue, or into the main instruction stream.
This is necessary as we must not cut the stack back before all the
restores are finished. */
emit_insn (gen_blockage ());
/* No frame pointer, but we have a stack greater than 8k. We restore
%r2 very late in this case. (All other cases are restored as early
as possible.) */
if (! frame_pointer_needed
&& ! VAL_14_BITS_P (actual_fsize + 20)
&& (regs_ever_live[2] || profile_flag))
{
set_reg_plus_d (STACK_POINTER_REGNUM,
STACK_POINTER_REGNUM,
- actual_fsize);
/* This used to try and be clever by not depending on the value in