| /* Subroutines for insn-output.c for HPPA. |
| Copyright (C) 1992, 93-98, 1999 Free Software Foundation, Inc. |
| Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.c |
| |
| This file is part of GNU CC. |
| |
| GNU CC is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 2, or (at your option) |
| any later version. |
| |
| GNU CC is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GNU CC; see the file COPYING. If not, write to |
| the Free Software Foundation, 59 Temple Place - Suite 330, |
| Boston, MA 02111-1307, USA. */ |
| |
| #include "config.h" |
| #include "system.h" |
| |
| #include "rtl.h" |
| #include "regs.h" |
| #include "hard-reg-set.h" |
| #include "real.h" |
| #include "insn-config.h" |
| #include "conditions.h" |
| #include "insn-flags.h" |
| #include "output.h" |
| #include "insn-attr.h" |
| #include "flags.h" |
| #include "tree.h" |
| #include "reload.h" |
| #include "c-tree.h" |
| #include "expr.h" |
| #include "obstack.h" |
| #include "toplev.h" |
| |
| static void restore_unscaled_index_insn_codes PROTO((rtx)); |
| static void record_unscaled_index_insn_codes PROTO((rtx)); |
| static void pa_combine_instructions PROTO((rtx)); |
| static int pa_can_combine_p PROTO((rtx, rtx, rtx, int, rtx, rtx, rtx)); |
| static int forward_branch_p PROTO((rtx)); |
| static int shadd_constant_p PROTO((int)); |
| |
| /* Save the operands last given to a compare for use when we |
| generate a scc or bcc insn. */ |
| |
| rtx hppa_compare_op0, hppa_compare_op1; |
| enum cmp_type hppa_branch_type; |
| |
| /* Which cpu we are scheduling for. */ |
| enum processor_type pa_cpu; |
| |
| /* String to hold which cpu we are scheduling for. */ |
| char *pa_cpu_string; |
| |
| /* Set by the FUNCTION_PROFILER macro. */ |
| int hp_profile_labelno; |
| |
| /* Counts for the number of callee-saved general and floating point |
| registers which were saved by the current function's prologue. */ |
| static int gr_saved, fr_saved; |
| |
| /* Whether or not the current function uses an out-of-line prologue |
| and epilogue. */ |
| static int out_of_line_prologue_epilogue; |
| |
| static rtx find_addr_reg (); |
| |
| /* Keep track of the number of bytes we have output in the CODE subspaces |
| during this compilation so we'll know when to emit inline long-calls. */ |
| |
| unsigned int total_code_bytes; |
| |
| /* Variables to handle plabels that we discover are necessary at assembly |
| output time. They are output after the current function. */ |
| |
| struct deferred_plabel |
| { |
| rtx internal_label; |
| char *name; |
| } *deferred_plabels = 0; |
| int n_deferred_plabels = 0; |
| |
| /* Array indexed by INSN_UIDs holding the INSN_CODE of an insn which |
| uses an unscaled indexed address before delay slot scheduling. */ |
| static int *unscaled_index_insn_codes; |
| |
| /* Upper bound for the array. */ |
| static int max_unscaled_index_insn_codes_uid; |
| |
| void |
| override_options () |
| { |
| /* Default to 7100LC scheduling. */ |
| if (pa_cpu_string && ! strcmp (pa_cpu_string, "7100")) |
| { |
| pa_cpu_string = "7100"; |
| pa_cpu = PROCESSOR_7100; |
| } |
| else if (pa_cpu_string && ! strcmp (pa_cpu_string, "700")) |
| { |
| pa_cpu_string = "700"; |
| pa_cpu = PROCESSOR_700; |
| } |
| else if (pa_cpu_string == NULL |
| || ! strcmp (pa_cpu_string, "7100LC")) |
| { |
| pa_cpu_string = "7100LC"; |
| pa_cpu = PROCESSOR_7100LC; |
| } |
| else if (pa_cpu_string && ! strcmp (pa_cpu_string, "7200")) |
| { |
| pa_cpu_string = "7200"; |
| pa_cpu = PROCESSOR_7200; |
| } |
| else if (pa_cpu_string && ! strcmp (pa_cpu_string, "8000")) |
| { |
| pa_cpu_string = "8000"; |
| pa_cpu = PROCESSOR_8000; |
| } |
| else |
| { |
| warning ("Unknown -mschedule= option (%s).\nValid options are 700, 7100, 7100LC, 7200, and 8000\n", pa_cpu_string); |
| } |
| |
| if (flag_pic && TARGET_PORTABLE_RUNTIME) |
| { |
| warning ("PIC code generation is not supported in the portable runtime model\n"); |
| } |
| |
| if (flag_pic && (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS)) |
| { |
| warning ("PIC code generation is not compatible with fast indirect calls\n"); |
| } |
| |
| if (flag_pic && profile_flag) |
| { |
| warning ("PIC code generation is not compatible with profiling\n"); |
| } |
| |
| if (TARGET_SPACE && (flag_pic || profile_flag)) |
| { |
| warning ("Out of line entry/exit sequences are not compatible\n"); |
| warning ("with PIC or profiling\n"); |
| } |
| |
| if (! TARGET_GAS && write_symbols != NO_DEBUG) |
| { |
| warning ("-g is only supported when using GAS on this processor,"); |
| warning ("-g option disabled."); |
| write_symbols = NO_DEBUG; |
| } |
| } |
| |
| |
| /* Return non-zero only if OP is a register of mode MODE, |
| or CONST0_RTX. */ |
| int |
| reg_or_0_operand (op, mode) |
| rtx op; |
| enum machine_mode mode; |
| { |
| return (op == CONST0_RTX (mode) || register_operand (op, mode)); |
| } |
| |
| /* Return non-zero if OP is suitable for use in a call to a named |
| function. |
| |
| (???) For 2.5 try to eliminate either call_operand_address or |
| function_label_operand, they perform very similar functions. */ |
| int |
| call_operand_address (op, mode) |
| rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| return (CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME); |
| } |
| |
| /* Return 1 if X contains a symbolic expression. We know these |
| expressions will have one of a few well defined forms, so |
| we need only check those forms. */ |
| int |
| symbolic_expression_p (x) |
| register rtx x; |
| { |
| |
| /* Strip off any HIGH. */ |
| if (GET_CODE (x) == HIGH) |
| x = XEXP (x, 0); |
| |
| return (symbolic_operand (x, VOIDmode)); |
| } |
| |
| int |
| symbolic_operand (op, mode) |
| register rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| switch (GET_CODE (op)) |
| { |
| case SYMBOL_REF: |
| case LABEL_REF: |
| return 1; |
| case CONST: |
| op = XEXP (op, 0); |
| return ((GET_CODE (XEXP (op, 0)) == SYMBOL_REF |
| || GET_CODE (XEXP (op, 0)) == LABEL_REF) |
| && GET_CODE (XEXP (op, 1)) == CONST_INT); |
| default: |
| return 0; |
| } |
| } |
| |
| /* Return truth value of statement that OP is a symbolic memory |
| operand of mode MODE. */ |
| |
| int |
| symbolic_memory_operand (op, mode) |
| rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| if (GET_CODE (op) == SUBREG) |
| op = SUBREG_REG (op); |
| if (GET_CODE (op) != MEM) |
| return 0; |
| op = XEXP (op, 0); |
| return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == CONST |
| || GET_CODE (op) == HIGH || GET_CODE (op) == LABEL_REF); |
| } |
| |
| /* Return 1 if the operand is either a register or a memory operand that is |
| not symbolic. */ |
| |
| int |
| reg_or_nonsymb_mem_operand (op, mode) |
| register rtx op; |
| enum machine_mode mode; |
| { |
| if (register_operand (op, mode)) |
| return 1; |
| |
| if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode)) |
| return 1; |
| |
| return 0; |
| } |
| |
| /* Return 1 if the operand is either a register, zero, or a memory operand |
| that is not symbolic. */ |
| |
| int |
| reg_or_0_or_nonsymb_mem_operand (op, mode) |
| register rtx op; |
| enum machine_mode mode; |
| { |
| if (register_operand (op, mode)) |
| return 1; |
| |
| if (op == CONST0_RTX (mode)) |
| return 1; |
| |
| if (memory_operand (op, mode) && ! symbolic_memory_operand (op, mode)) |
| return 1; |
| |
| return 0; |
| } |
| |
| /* Accept any constant that can be moved in one instructions into a |
| general register. */ |
| int |
| cint_ok_for_move (intval) |
| HOST_WIDE_INT intval; |
| { |
| /* OK if ldo, ldil, or zdepi, can be used. */ |
| return (VAL_14_BITS_P (intval) || (intval & 0x7ff) == 0 |
| || zdepi_cint_p (intval)); |
| } |
| |
| /* Accept anything that can be moved in one instruction into a general |
| register. */ |
| int |
| move_operand (op, mode) |
| rtx op; |
| enum machine_mode mode; |
| { |
| if (register_operand (op, mode)) |
| return 1; |
| |
| if (GET_CODE (op) == CONSTANT_P_RTX) |
| return 1; |
| |
| if (GET_CODE (op) == CONST_INT) |
| return cint_ok_for_move (INTVAL (op)); |
| |
| if (GET_CODE (op) == SUBREG) |
| op = SUBREG_REG (op); |
| if (GET_CODE (op) != MEM) |
| return 0; |
| |
| op = XEXP (op, 0); |
| if (GET_CODE (op) == LO_SUM) |
| return (register_operand (XEXP (op, 0), Pmode) |
| && CONSTANT_P (XEXP (op, 1))); |
| |
| /* Since move_operand is only used for source operands, we can always |
| allow scaled indexing! */ |
| if (! TARGET_DISABLE_INDEXING |
| && GET_CODE (op) == PLUS |
| && ((GET_CODE (XEXP (op, 0)) == MULT |
| && GET_CODE (XEXP (XEXP (op, 0), 0)) == REG |
| && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST_INT |
| && INTVAL (XEXP (XEXP (op, 0), 1)) == GET_MODE_SIZE (mode) |
| && GET_CODE (XEXP (op, 1)) == REG) |
| || (GET_CODE (XEXP (op, 1)) == MULT |
| &&GET_CODE (XEXP (XEXP (op, 1), 0)) == REG |
| && GET_CODE (XEXP (XEXP (op, 1), 1)) == CONST_INT |
| && INTVAL (XEXP (XEXP (op, 1), 1)) == GET_MODE_SIZE (mode) |
| && GET_CODE (XEXP (op, 0)) == REG))) |
| return 1; |
| |
| return memory_address_p (mode, op); |
| } |
| |
| /* Accept REG and any CONST_INT that can be moved in one instruction into a |
| general register. */ |
| int |
| reg_or_cint_move_operand (op, mode) |
| rtx op; |
| enum machine_mode mode; |
| { |
| if (register_operand (op, mode)) |
| return 1; |
| |
| if (GET_CODE (op) == CONST_INT) |
| return cint_ok_for_move (INTVAL (op)); |
| |
| return 0; |
| } |
| |
| int |
| pic_label_operand (op, mode) |
| rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| if (!flag_pic) |
| return 0; |
| |
| switch (GET_CODE (op)) |
| { |
| case LABEL_REF: |
| return 1; |
| case CONST: |
| op = XEXP (op, 0); |
| return (GET_CODE (XEXP (op, 0)) == LABEL_REF |
| && GET_CODE (XEXP (op, 1)) == CONST_INT); |
| default: |
| return 0; |
| } |
| } |
| |
| int |
| fp_reg_operand (op, mode) |
| rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| return reg_renumber && FP_REG_P (op); |
| } |
| |
| |
| |
| /* Return truth value of whether OP can be used as an operand in a |
| three operand arithmetic insn that accepts registers of mode MODE |
| or 14-bit signed integers. */ |
| int |
| arith_operand (op, mode) |
| rtx op; |
| enum machine_mode mode; |
| { |
| return (register_operand (op, mode) |
| || (GET_CODE (op) == CONST_INT && INT_14_BITS (op))); |
| } |
| |
| /* Return truth value of whether OP can be used as an operand in a |
| three operand arithmetic insn that accepts registers of mode MODE |
| or 11-bit signed integers. */ |
| int |
| arith11_operand (op, mode) |
| rtx op; |
| enum machine_mode mode; |
| { |
| return (register_operand (op, mode) |
| || (GET_CODE (op) == CONST_INT && INT_11_BITS (op))); |
| } |
| |
| /* A constant integer suitable for use in a PRE_MODIFY memory |
| reference. */ |
| int |
| pre_cint_operand (op, mode) |
| rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| return (GET_CODE (op) == CONST_INT |
| && INTVAL (op) >= -0x2000 && INTVAL (op) < 0x10); |
| } |
| |
| /* A constant integer suitable for use in a POST_MODIFY memory |
| reference. */ |
| int |
| post_cint_operand (op, mode) |
| rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| return (GET_CODE (op) == CONST_INT |
| && INTVAL (op) < 0x2000 && INTVAL (op) >= -0x10); |
| } |
| |
| int |
| arith_double_operand (op, mode) |
| rtx op; |
| enum machine_mode mode; |
| { |
| return (register_operand (op, mode) |
| || (GET_CODE (op) == CONST_DOUBLE |
| && GET_MODE (op) == mode |
| && VAL_14_BITS_P (CONST_DOUBLE_LOW (op)) |
| && ((CONST_DOUBLE_HIGH (op) >= 0) |
| == ((CONST_DOUBLE_LOW (op) & 0x1000) == 0)))); |
| } |
| |
| /* Return truth value of whether OP is a integer which fits the |
| range constraining immediate operands in three-address insns, or |
| is an integer register. */ |
| |
| int |
| ireg_or_int5_operand (op, mode) |
| rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| return ((GET_CODE (op) == CONST_INT && INT_5_BITS (op)) |
| || (GET_CODE (op) == REG && REGNO (op) > 0 && REGNO (op) < 32)); |
| } |
| |
| /* Return truth value of whether OP is a integer which fits the |
| range constraining immediate operands in three-address insns. */ |
| |
| int |
| int5_operand (op, mode) |
| rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| return (GET_CODE (op) == CONST_INT && INT_5_BITS (op)); |
| } |
| |
| int |
| uint5_operand (op, mode) |
| rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| return (GET_CODE (op) == CONST_INT && INT_U5_BITS (op)); |
| } |
| |
| int |
| int11_operand (op, mode) |
| rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| return (GET_CODE (op) == CONST_INT && INT_11_BITS (op)); |
| } |
| |
| int |
| uint32_operand (op, mode) |
| rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| #if HOST_BITS_PER_WIDE_INT > 32 |
| /* All allowed constants will fit a CONST_INT. */ |
| return (GET_CODE (op) == CONST_INT |
| && (INTVAL (op) >= 0 && INTVAL (op) < 0x100000000L)); |
| #else |
| return (GET_CODE (op) == CONST_INT |
| || (GET_CODE (op) == CONST_DOUBLE |
| && CONST_DOUBLE_HIGH (op) == 0)); |
| #endif |
| } |
| |
| int |
| arith5_operand (op, mode) |
| rtx op; |
| enum machine_mode mode; |
| { |
| return register_operand (op, mode) || int5_operand (op, mode); |
| } |
| |
| /* True iff zdepi can be used to generate this CONST_INT. */ |
| int |
| zdepi_cint_p (x) |
| unsigned HOST_WIDE_INT x; |
| { |
| unsigned HOST_WIDE_INT lsb_mask, t; |
| |
| /* This might not be obvious, but it's at least fast. |
| This function is critical; we don't have the time loops would take. */ |
| lsb_mask = x & -x; |
| t = ((x >> 4) + lsb_mask) & ~(lsb_mask - 1); |
| /* Return true iff t is a power of two. */ |
| return ((t & (t - 1)) == 0); |
| } |
| |
| /* True iff depi or extru can be used to compute (reg & mask). |
| Accept bit pattern like these: |
| 0....01....1 |
| 1....10....0 |
| 1..10..01..1 */ |
| int |
| and_mask_p (mask) |
| unsigned HOST_WIDE_INT mask; |
| { |
| mask = ~mask; |
| mask += mask & -mask; |
| return (mask & (mask - 1)) == 0; |
| } |
| |
| /* True iff depi or extru can be used to compute (reg & OP). */ |
| int |
| and_operand (op, mode) |
| rtx op; |
| enum machine_mode mode; |
| { |
| return (register_operand (op, mode) |
| || (GET_CODE (op) == CONST_INT && and_mask_p (INTVAL (op)))); |
| } |
| |
| /* True iff depi can be used to compute (reg | MASK). */ |
| int |
| ior_mask_p (mask) |
| unsigned HOST_WIDE_INT mask; |
| { |
| mask += mask & -mask; |
| return (mask & (mask - 1)) == 0; |
| } |
| |
| /* True iff depi can be used to compute (reg | OP). */ |
| int |
| ior_operand (op, mode) |
| rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| return (GET_CODE (op) == CONST_INT && ior_mask_p (INTVAL (op))); |
| } |
| |
| int |
| lhs_lshift_operand (op, mode) |
| rtx op; |
| enum machine_mode mode; |
| { |
| return register_operand (op, mode) || lhs_lshift_cint_operand (op, mode); |
| } |
| |
| /* True iff OP is a CONST_INT of the forms 0...0xxxx or 0...01...1xxxx. |
| Such values can be the left hand side x in (x << r), using the zvdepi |
| instruction. */ |
| int |
| lhs_lshift_cint_operand (op, mode) |
| rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| unsigned HOST_WIDE_INT x; |
| if (GET_CODE (op) != CONST_INT) |
| return 0; |
| x = INTVAL (op) >> 4; |
| return (x & (x + 1)) == 0; |
| } |
| |
| int |
| arith32_operand (op, mode) |
| rtx op; |
| enum machine_mode mode; |
| { |
| return register_operand (op, mode) || GET_CODE (op) == CONST_INT; |
| } |
| |
| int |
| pc_or_label_operand (op, mode) |
| rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| return (GET_CODE (op) == PC || GET_CODE (op) == LABEL_REF); |
| } |
| |
| /* Legitimize PIC addresses. If the address is already |
| position-independent, we return ORIG. Newly generated |
| position-independent addresses go to REG. If we need more |
| than one register, we lose. */ |
| |
| rtx |
| legitimize_pic_address (orig, mode, reg) |
| rtx orig, reg; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| rtx pic_ref = orig; |
| |
| /* Labels need special handling. */ |
| if (pic_label_operand (orig)) |
| { |
| emit_insn (gen_pic_load_label (reg, orig)); |
| current_function_uses_pic_offset_table = 1; |
| return reg; |
| } |
| if (GET_CODE (orig) == SYMBOL_REF) |
| { |
| if (reg == 0) |
| abort (); |
| |
| if (flag_pic == 2) |
| { |
| emit_insn (gen_pic2_highpart (reg, pic_offset_table_rtx, orig)); |
| pic_ref |
| = gen_rtx_MEM (Pmode, |
| gen_rtx_LO_SUM (Pmode, reg, |
| gen_rtx_UNSPEC (SImode, |
| gen_rtvec (1, orig), |
| 0))); |
| } |
| else |
| pic_ref = gen_rtx_MEM (Pmode, |
| gen_rtx_PLUS (Pmode, |
| pic_offset_table_rtx, orig)); |
| current_function_uses_pic_offset_table = 1; |
| RTX_UNCHANGING_P (pic_ref) = 1; |
| emit_move_insn (reg, pic_ref); |
| return reg; |
| } |
| else if (GET_CODE (orig) == CONST) |
| { |
| rtx base; |
| |
| if (GET_CODE (XEXP (orig, 0)) == PLUS |
| && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx) |
| return orig; |
| |
| if (reg == 0) |
| abort (); |
| |
| if (GET_CODE (XEXP (orig, 0)) == PLUS) |
| { |
| base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg); |
| orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode, |
| base == reg ? 0 : reg); |
| } |
| else abort (); |
| if (GET_CODE (orig) == CONST_INT) |
| { |
| if (INT_14_BITS (orig)) |
| return plus_constant_for_output (base, INTVAL (orig)); |
| orig = force_reg (Pmode, orig); |
| } |
| pic_ref = gen_rtx_PLUS (Pmode, base, orig); |
| /* Likewise, should we set special REG_NOTEs here? */ |
| } |
| return pic_ref; |
| } |
| |
| /* Try machine-dependent ways of modifying an illegitimate address |
| to be legitimate. If we find one, return the new, valid address. |
| This macro is used in only one place: `memory_address' in explow.c. |
| |
| OLDX is the address as it was before break_out_memory_refs was called. |
| In some cases it is useful to look at this to decide what needs to be done. |
| |
| MODE and WIN are passed so that this macro can use |
| GO_IF_LEGITIMATE_ADDRESS. |
| |
| It is always safe for this macro to do nothing. It exists to recognize |
| opportunities to optimize the output. |
| |
| For the PA, transform: |
| |
| memory(X + <large int>) |
| |
| into: |
| |
| if (<large int> & mask) >= 16 |
| Y = (<large int> & ~mask) + mask + 1 Round up. |
| else |
| Y = (<large int> & ~mask) Round down. |
| Z = X + Y |
| memory (Z + (<large int> - Y)); |
| |
| This is for CSE to find several similar references, and only use one Z. |
| |
| X can either be a SYMBOL_REF or REG, but because combine can not |
| perform a 4->2 combination we do nothing for SYMBOL_REF + D where |
| D will not fit in 14 bits. |
| |
| MODE_FLOAT references allow displacements which fit in 5 bits, so use |
| 0x1f as the mask. |
| |
| MODE_INT references allow displacements which fit in 14 bits, so use |
| 0x3fff as the mask. |
| |
| This relies on the fact that most mode MODE_FLOAT references will use FP |
| registers and most mode MODE_INT references will use integer registers. |
| (In the rare case of an FP register used in an integer MODE, we depend |
| on secondary reloads to clean things up.) |
| |
| |
| It is also beneficial to handle (plus (mult (X) (Y)) (Z)) in a special |
| manner if Y is 2, 4, or 8. (allows more shadd insns and shifted indexed |
| addressing modes to be used). |
| |
| Put X and Z into registers. Then put the entire expression into |
| a register. */ |
| |
| rtx |
| hppa_legitimize_address (x, oldx, mode) |
| rtx x, oldx ATTRIBUTE_UNUSED; |
| enum machine_mode mode; |
| { |
| rtx orig = x; |
| |
| if (flag_pic) |
| return legitimize_pic_address (x, mode, gen_reg_rtx (Pmode)); |
| |
| /* Strip off CONST. */ |
| if (GET_CODE (x) == CONST) |
| x = XEXP (x, 0); |
| |
| /* Special case. Get the SYMBOL_REF into a register and use indexing. |
| That should always be safe. */ |
| if (GET_CODE (x) == PLUS |
| && GET_CODE (XEXP (x, 0)) == REG |
| && GET_CODE (XEXP (x, 1)) == SYMBOL_REF) |
| { |
| rtx reg = force_reg (SImode, XEXP (x, 1)); |
| return force_reg (SImode, gen_rtx_PLUS (SImode, reg, XEXP (x, 0))); |
| } |
| |
| /* Note we must reject symbols which represent function addresses |
| since the assembler/linker can't handle arithmetic on plabels. */ |
| if (GET_CODE (x) == PLUS |
| && GET_CODE (XEXP (x, 1)) == CONST_INT |
| && ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF |
| && !FUNCTION_NAME_P (XSTR (XEXP (x, 0), 0))) |
| || GET_CODE (XEXP (x, 0)) == REG)) |
| { |
| rtx int_part, ptr_reg; |
| int newoffset; |
| int offset = INTVAL (XEXP (x, 1)); |
| int mask = GET_MODE_CLASS (mode) == MODE_FLOAT ? 0x1f : 0x3fff; |
| |
| /* Choose which way to round the offset. Round up if we |
| are >= halfway to the next boundary. */ |
| if ((offset & mask) >= ((mask + 1) / 2)) |
| newoffset = (offset & ~ mask) + mask + 1; |
| else |
| newoffset = (offset & ~ mask); |
| |
| /* If the newoffset will not fit in 14 bits (ldo), then |
| handling this would take 4 or 5 instructions (2 to load |
| the SYMBOL_REF + 1 or 2 to load the newoffset + 1 to |
| add the new offset and the SYMBOL_REF.) Combine can |
| not handle 4->2 or 5->2 combinations, so do not create |
| them. */ |
| if (! VAL_14_BITS_P (newoffset) |
| && GET_CODE (XEXP (x, 0)) == SYMBOL_REF) |
| { |
| rtx const_part |
| = gen_rtx_CONST (VOIDmode, gen_rtx_PLUS (Pmode, |
| XEXP (x, 0), |
| GEN_INT (newoffset))); |
| rtx tmp_reg |
| = force_reg (Pmode, |
| gen_rtx_HIGH (Pmode, const_part)); |
| ptr_reg |
| = force_reg (Pmode, |
| gen_rtx_LO_SUM (Pmode, tmp_reg, const_part)); |
| } |
| else |
| { |
| if (! VAL_14_BITS_P (newoffset)) |
| int_part = force_reg (Pmode, GEN_INT (newoffset)); |
| else |
| int_part = GEN_INT (newoffset); |
| |
| ptr_reg = force_reg (Pmode, |
| gen_rtx_PLUS (Pmode, |
| force_reg (Pmode, XEXP (x, 0)), |
| int_part)); |
| } |
| return plus_constant (ptr_reg, offset - newoffset); |
| } |
| |
| /* Handle (plus (mult (a) (shadd_constant)) (b)). */ |
| |
| if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT |
| && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT |
| && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1))) |
| && (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == 'o' |
| || GET_CODE (XEXP (x, 1)) == SUBREG) |
| && GET_CODE (XEXP (x, 1)) != CONST) |
| { |
| int val = INTVAL (XEXP (XEXP (x, 0), 1)); |
| rtx reg1, reg2; |
| |
| reg1 = XEXP (x, 1); |
| if (GET_CODE (reg1) != REG) |
| reg1 = force_reg (Pmode, force_operand (reg1, 0)); |
| |
| reg2 = XEXP (XEXP (x, 0), 0); |
| if (GET_CODE (reg2) != REG) |
| reg2 = force_reg (Pmode, force_operand (reg2, 0)); |
| |
| return force_reg (Pmode, gen_rtx_PLUS (Pmode, |
| gen_rtx_MULT (Pmode, reg2, |
| GEN_INT (val)), |
| reg1)); |
| } |
| |
| /* Similarly for (plus (plus (mult (a) (shadd_constant)) (b)) (c)). |
| |
| Only do so for floating point modes since this is more speculative |
| and we lose if it's an integer store. */ |
| if (GET_CODE (x) == PLUS |
| && GET_CODE (XEXP (x, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT |
| && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT |
| && shadd_constant_p (INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1))) |
| && (mode == SFmode || mode == DFmode)) |
| { |
| |
| /* First, try and figure out what to use as a base register. */ |
| rtx reg1, reg2, base, idx, orig_base; |
| |
| reg1 = XEXP (XEXP (x, 0), 1); |
| reg2 = XEXP (x, 1); |
| base = NULL_RTX; |
| idx = NULL_RTX; |
| |
| /* Make sure they're both regs. If one was a SYMBOL_REF [+ const], |
| then emit_move_sequence will turn on REGNO_POINTER_FLAG so we'll |
| know it's a base register below. */ |
| if (GET_CODE (reg1) != REG) |
| reg1 = force_reg (Pmode, force_operand (reg1, 0)); |
| |
| if (GET_CODE (reg2) != REG) |
| reg2 = force_reg (Pmode, force_operand (reg2, 0)); |
| |
| /* Figure out what the base and index are. */ |
| |
| if (GET_CODE (reg1) == REG |
| && REGNO_POINTER_FLAG (REGNO (reg1))) |
| { |
| base = reg1; |
| orig_base = XEXP (XEXP (x, 0), 1); |
| idx = gen_rtx_PLUS (Pmode, |
| gen_rtx_MULT (Pmode, |
| XEXP (XEXP (XEXP (x, 0), 0), 0), |
| XEXP (XEXP (XEXP (x, 0), 0), 1)), |
| XEXP (x, 1)); |
| } |
| else if (GET_CODE (reg2) == REG |
| && REGNO_POINTER_FLAG (REGNO (reg2))) |
| { |
| base = reg2; |
| orig_base = XEXP (x, 1); |
| idx = XEXP (x, 0); |
| } |
| |
| if (base == 0) |
| return orig; |
| |
| /* If the index adds a large constant, try to scale the |
| constant so that it can be loaded with only one insn. */ |
| if (GET_CODE (XEXP (idx, 1)) == CONST_INT |
| && VAL_14_BITS_P (INTVAL (XEXP (idx, 1)) |
| / INTVAL (XEXP (XEXP (idx, 0), 1))) |
| && INTVAL (XEXP (idx, 1)) % INTVAL (XEXP (XEXP (idx, 0), 1)) == 0) |
| { |
| /* Divide the CONST_INT by the scale factor, then add it to A. */ |
| int val = INTVAL (XEXP (idx, 1)); |
| |
| val /= INTVAL (XEXP (XEXP (idx, 0), 1)); |
| reg1 = XEXP (XEXP (idx, 0), 0); |
| if (GET_CODE (reg1) != REG) |
| reg1 = force_reg (Pmode, force_operand (reg1, 0)); |
| |
| reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, reg1, GEN_INT (val))); |
| |
| /* We can now generate a simple scaled indexed address. */ |
| return force_reg (Pmode, |
| gen_rtx_PLUS (Pmode, |
| gen_rtx_MULT (Pmode, reg1, |
| XEXP (XEXP (idx, 0), 1)), |
| base)); |
| } |
| |
| /* If B + C is still a valid base register, then add them. */ |
| if (GET_CODE (XEXP (idx, 1)) == CONST_INT |
| && INTVAL (XEXP (idx, 1)) <= 4096 |
| && INTVAL (XEXP (idx, 1)) >= -4096) |
| { |
| int val = INTVAL (XEXP (XEXP (idx, 0), 1)); |
| rtx reg1, reg2; |
| |
| reg1 = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, XEXP (idx, 1))); |
| |
| reg2 = XEXP (XEXP (idx, 0), 0); |
| if (GET_CODE (reg2) != CONST_INT) |
| reg2 = force_reg (Pmode, force_operand (reg2, 0)); |
| |
| return force_reg (Pmode, gen_rtx_PLUS (Pmode, |
| gen_rtx_MULT (Pmode, reg2, |
| GEN_INT (val)), |
| reg1)); |
| } |
| |
| /* Get the index into a register, then add the base + index and |
| return a register holding the result. */ |
| |
| /* First get A into a register. */ |
| reg1 = XEXP (XEXP (idx, 0), 0); |
| if (GET_CODE (reg1) != REG) |
| reg1 = force_reg (Pmode, force_operand (reg1, 0)); |
| |
| /* And get B into a register. */ |
| reg2 = XEXP (idx, 1); |
| if (GET_CODE (reg2) != REG) |
| reg2 = force_reg (Pmode, force_operand (reg2, 0)); |
| |
| reg1 = force_reg (Pmode, |
| gen_rtx_PLUS (Pmode, |
| gen_rtx_MULT (Pmode, reg1, |
| XEXP (XEXP (idx, 0), 1)), |
| reg2)); |
| |
| /* Add the result to our base register and return. */ |
| return force_reg (Pmode, gen_rtx_PLUS (Pmode, base, reg1)); |
| |
| } |
| |
| /* Uh-oh. We might have an address for x[n-100000]. This needs |
| special handling to avoid creating an indexed memory address |
| with x-100000 as the base. |
| |
| If the constant part is small enough, then it's still safe because |
| there is a guard page at the beginning and end of the data segment. |
| |
| Scaled references are common enough that we want to try and rearrange the |
| terms so that we can use indexing for these addresses too. Only |
| do the optimization for floatint point modes. */ |
| |
| if (GET_CODE (x) == PLUS |
| && symbolic_expression_p (XEXP (x, 1))) |
| { |
| /* Ugly. We modify things here so that the address offset specified |
| by the index expression is computed first, then added to x to form |
| the entire address. */ |
| |
| rtx regx1, regx2, regy1, regy2, y; |
| |
| /* Strip off any CONST. */ |
| y = XEXP (x, 1); |
| if (GET_CODE (y) == CONST) |
| y = XEXP (y, 0); |
| |
| if (GET_CODE (y) == PLUS || GET_CODE (y) == MINUS) |
| { |
| /* See if this looks like |
| (plus (mult (reg) (shadd_const)) |
| (const (plus (symbol_ref) (const_int)))) |
| |
| Where const_int is small. In that case the const |
| expression is a valid pointer for indexing. |
| |
| If const_int is big, but can be divided evenly by shadd_const |
| and added to (reg). This allows more scaled indexed addresses. */ |
| if (GET_CODE (XEXP (y, 0)) == SYMBOL_REF |
| && GET_CODE (XEXP (x, 0)) == MULT |
| && GET_CODE (XEXP (y, 1)) == CONST_INT |
| && INTVAL (XEXP (y, 1)) >= -4096 |
| && INTVAL (XEXP (y, 1)) <= 4095 |
| && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT |
| && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))) |
| { |
| int val = INTVAL (XEXP (XEXP (x, 0), 1)); |
| rtx reg1, reg2; |
| |
| reg1 = XEXP (x, 1); |
| if (GET_CODE (reg1) != REG) |
| reg1 = force_reg (Pmode, force_operand (reg1, 0)); |
| |
| reg2 = XEXP (XEXP (x, 0), 0); |
| if (GET_CODE (reg2) != REG) |
| reg2 = force_reg (Pmode, force_operand (reg2, 0)); |
| |
| return force_reg (Pmode, |
| gen_rtx_PLUS (Pmode, |
| gen_rtx_MULT (Pmode, reg2, |
| GEN_INT (val)), |
| reg1)); |
| } |
| else if ((mode == DFmode || mode == SFmode) |
| && GET_CODE (XEXP (y, 0)) == SYMBOL_REF |
| && GET_CODE (XEXP (x, 0)) == MULT |
| && GET_CODE (XEXP (y, 1)) == CONST_INT |
| && INTVAL (XEXP (y, 1)) % INTVAL (XEXP (XEXP (x, 0), 1)) == 0 |
| && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT |
| && shadd_constant_p (INTVAL (XEXP (XEXP (x, 0), 1)))) |
| { |
| regx1 |
| = force_reg (Pmode, GEN_INT (INTVAL (XEXP (y, 1)) |
| / INTVAL (XEXP (XEXP (x, 0), 1)))); |
| regx2 = XEXP (XEXP (x, 0), 0); |
| if (GET_CODE (regx2) != REG) |
| regx2 = force_reg (Pmode, force_operand (regx2, 0)); |
| regx2 = force_reg (Pmode, gen_rtx_fmt_ee (GET_CODE (y), Pmode, |
| regx2, regx1)); |
| return force_reg (Pmode, |
| gen_rtx_PLUS (Pmode, |
| gen_rtx_MULT (Pmode, regx2, |
| XEXP (XEXP (x, 0), |
| 1)), |
| force_reg (Pmode, XEXP (y, 0)))); |
| } |
| else if (GET_CODE (XEXP (y, 1)) == CONST_INT |
| && INTVAL (XEXP (y, 1)) >= -4096 |
| && INTVAL (XEXP (y, 1)) <= 4095) |
| { |
| /* This is safe because of the guard page at the |
| beginning and end of the data space. Just |
| return the original address. */ |
| return orig; |
| } |
| else |
| { |
| /* Doesn't look like one we can optimize. */ |
| regx1 = force_reg (Pmode, force_operand (XEXP (x, 0), 0)); |
| regy1 = force_reg (Pmode, force_operand (XEXP (y, 0), 0)); |
| regy2 = force_reg (Pmode, force_operand (XEXP (y, 1), 0)); |
| regx1 = force_reg (Pmode, |
| gen_rtx_fmt_ee (GET_CODE (y), Pmode, |
| regx1, regy2)); |
| return force_reg (Pmode, gen_rtx_PLUS (Pmode, regx1, regy1)); |
| } |
| } |
| } |
| |
| return orig; |
| } |
| |
| /* For the HPPA, REG and REG+CONST is cost 0 |
| and addresses involving symbolic constants are cost 2. |
| |
| PIC addresses are very expensive. |
| |
| It is no coincidence that this has the same structure |
| as GO_IF_LEGITIMATE_ADDRESS. */ |
| int |
| hppa_address_cost (X) |
| rtx X; |
| { |
| if (GET_CODE (X) == PLUS) |
| return 1; |
| else if (GET_CODE (X) == LO_SUM) |
| return 1; |
| else if (GET_CODE (X) == HIGH) |
| return 2; |
| return 4; |
| } |
| |
| /* Emit insns to move operands[1] into operands[0]. |
| |
| Return 1 if we have written out everything that needs to be done to |
| do the move. Otherwise, return 0 and the caller will emit the move |
| normally. */ |
| |
| int |
| emit_move_sequence (operands, mode, scratch_reg) |
| rtx *operands; |
| enum machine_mode mode; |
| rtx scratch_reg; |
| { |
| register rtx operand0 = operands[0]; |
| register rtx operand1 = operands[1]; |
| register rtx tem; |
| |
| if (scratch_reg |
| && reload_in_progress && GET_CODE (operand0) == REG |
| && REGNO (operand0) >= FIRST_PSEUDO_REGISTER) |
| operand0 = reg_equiv_mem[REGNO (operand0)]; |
| else if (scratch_reg |
| && reload_in_progress && GET_CODE (operand0) == SUBREG |
| && GET_CODE (SUBREG_REG (operand0)) == REG |
| && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER) |
| { |
| SUBREG_REG (operand0) = reg_equiv_mem[REGNO (SUBREG_REG (operand0))]; |
| operand0 = alter_subreg (operand0); |
| } |
| |
| if (scratch_reg |
| && reload_in_progress && GET_CODE (operand1) == REG |
| && REGNO (operand1) >= FIRST_PSEUDO_REGISTER) |
| operand1 = reg_equiv_mem[REGNO (operand1)]; |
| else if (scratch_reg |
| && reload_in_progress && GET_CODE (operand1) == SUBREG |
| && GET_CODE (SUBREG_REG (operand1)) == REG |
| && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER) |
| { |
| SUBREG_REG (operand1) = reg_equiv_mem[REGNO (SUBREG_REG (operand1))]; |
| operand1 = alter_subreg (operand1); |
| } |
| |
| if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM |
| && ((tem = find_replacement (&XEXP (operand0, 0))) |
| != XEXP (operand0, 0))) |
| operand0 = gen_rtx_MEM (GET_MODE (operand0), tem); |
| if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM |
| && ((tem = find_replacement (&XEXP (operand1, 0))) |
| != XEXP (operand1, 0))) |
| operand1 = gen_rtx_MEM (GET_MODE (operand1), tem); |
| |
| /* Handle secondary reloads for loads/stores of FP registers from |
| REG+D addresses where D does not fit in 5 bits, including |
| (subreg (mem (addr))) cases. */ |
| if (fp_reg_operand (operand0, mode) |
| && ((GET_CODE (operand1) == MEM |
| && ! memory_address_p (DFmode, XEXP (operand1, 0))) |
| || ((GET_CODE (operand1) == SUBREG |
| && GET_CODE (XEXP (operand1, 0)) == MEM |
| && !memory_address_p (DFmode, XEXP (XEXP (operand1, 0), 0))))) |
| && scratch_reg) |
| { |
| if (GET_CODE (operand1) == SUBREG) |
| operand1 = XEXP (operand1, 0); |
| |
| scratch_reg = gen_rtx_REG (SImode, REGNO (scratch_reg)); |
| |
| /* D might not fit in 14 bits either; for such cases load D into |
| scratch reg. */ |
| if (!memory_address_p (SImode, XEXP (operand1, 0))) |
| { |
| emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1)); |
| emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, 0)), |
| SImode, |
| XEXP (XEXP (operand1, 0), 0), |
| scratch_reg)); |
| } |
| else |
| emit_move_insn (scratch_reg, XEXP (operand1, 0)); |
| emit_insn (gen_rtx_SET (VOIDmode, operand0, gen_rtx_MEM (mode, |
| scratch_reg))); |
| return 1; |
| } |
| else if (fp_reg_operand (operand1, mode) |
| && ((GET_CODE (operand0) == MEM |
| && ! memory_address_p (DFmode, XEXP (operand0, 0))) |
| || ((GET_CODE (operand0) == SUBREG) |
| && GET_CODE (XEXP (operand0, 0)) == MEM |
| && !memory_address_p (DFmode, XEXP (XEXP (operand0, 0), 0)))) |
| && scratch_reg) |
| { |
| if (GET_CODE (operand0) == SUBREG) |
| operand0 = XEXP (operand0, 0); |
| |
| scratch_reg = gen_rtx_REG (SImode, REGNO (scratch_reg)); |
| /* D might not fit in 14 bits either; for such cases load D into |
| scratch reg. */ |
| if (!memory_address_p (SImode, XEXP (operand0, 0))) |
| { |
| emit_move_insn (scratch_reg, XEXP (XEXP (operand0, 0), 1)); |
| emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand0, |
| 0)), |
| SImode, |
| XEXP (XEXP (operand0, 0), |
| 0), |
| scratch_reg)); |
| } |
| else |
| emit_move_insn (scratch_reg, XEXP (operand0, 0)); |
| emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_MEM (mode, scratch_reg), |
| operand1)); |
| return 1; |
| } |
| /* Handle secondary reloads for loads of FP registers from constant |
| expressions by forcing the constant into memory. |
| |
| use scratch_reg to hold the address of the memory location. |
| |
| ??? The proper fix is to change PREFERRED_RELOAD_CLASS to return |
| NO_REGS when presented with a const_int and an register class |
| containing only FP registers. Doing so unfortunately creates |
| more problems than it solves. Fix this for 2.5. */ |
| else if (fp_reg_operand (operand0, mode) |
| && CONSTANT_P (operand1) |
| && scratch_reg) |
| { |
| rtx xoperands[2]; |
| |
| /* Force the constant into memory and put the address of the |
| memory location into scratch_reg. */ |
| xoperands[0] = scratch_reg; |
| xoperands[1] = XEXP (force_const_mem (mode, operand1), 0); |
| emit_move_sequence (xoperands, Pmode, 0); |
| |
| /* Now load the destination register. */ |
| emit_insn (gen_rtx_SET (mode, operand0, gen_rtx_MEM (mode, scratch_reg))); |
| return 1; |
| } |
| /* Handle secondary reloads for SAR. These occur when trying to load |
| the SAR from memory a FP register, or with a constant. */ |
| else if (GET_CODE (operand0) == REG |
| && REGNO_REG_CLASS (REGNO (operand0)) == SHIFT_REGS |
| && (GET_CODE (operand1) == MEM |
| || GET_CODE (operand1) == CONST_INT |
| || (GET_CODE (operand1) == REG |
| && FP_REG_CLASS_P (REGNO_REG_CLASS (REGNO (operand1))))) |
| && scratch_reg) |
| { |
| /* D might not fit in 14 bits either; for such cases load D into |
| scratch reg. */ |
| if (GET_CODE (operand1) == MEM |
| && !memory_address_p (SImode, XEXP (operand1, 0))) |
| { |
| emit_move_insn (scratch_reg, XEXP (XEXP (operand1, 0), 1)); |
| emit_move_insn (scratch_reg, gen_rtx_fmt_ee (GET_CODE (XEXP (operand1, |
| 0)), |
| SImode, |
| XEXP (XEXP (operand1, 0), |
| 0), |
| scratch_reg)); |
| emit_move_insn (scratch_reg, gen_rtx_MEM (GET_MODE (operand1), |
| scratch_reg)); |
| } |
| else |
| emit_move_insn (scratch_reg, operand1); |
| emit_move_insn (operand0, scratch_reg); |
| return 1; |
| } |
| /* Handle most common case: storing into a register. */ |
| else if (register_operand (operand0, mode)) |
| { |
| if (register_operand (operand1, mode) |
| || (GET_CODE (operand1) == CONST_INT && INT_14_BITS (operand1)) |
| || (operand1 == CONST0_RTX (mode)) |
| || (GET_CODE (operand1) == HIGH |
| && !symbolic_operand (XEXP (operand1, 0), VOIDmode)) |
| /* Only `general_operands' can come here, so MEM is ok. */ |
| || GET_CODE (operand1) == MEM) |
| { |
| /* Run this case quickly. */ |
| emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1)); |
| return 1; |
| } |
| } |
| else if (GET_CODE (operand0) == MEM) |
| { |
| if (mode == DFmode && operand1 == CONST0_RTX (mode) |
| && !(reload_in_progress || reload_completed)) |
| { |
| rtx temp = gen_reg_rtx (DFmode); |
| |
| emit_insn (gen_rtx_SET (VOIDmode, temp, operand1)); |
| emit_insn (gen_rtx_SET (VOIDmode, operand0, temp)); |
| return 1; |
| } |
| if (register_operand (operand1, mode) || operand1 == CONST0_RTX (mode)) |
| { |
| /* Run this case quickly. */ |
| emit_insn (gen_rtx_SET (VOIDmode, operand0, operand1)); |
| return 1; |
| } |
| if (! (reload_in_progress || reload_completed)) |
| { |
| operands[0] = validize_mem (operand0); |
| operands[1] = operand1 = force_reg (mode, operand1); |
| } |
| } |
| |
| /* Simplify the source if we need to. |
| Note we do have to handle function labels here, even though we do |
| not consider them legitimate constants. Loop optimizations can |
| call the emit_move_xxx with one as a source. */ |
| if ((GET_CODE (operand1) != HIGH && immediate_operand (operand1, mode)) |
| || function_label_operand (operand1, mode) |
| || (GET_CODE (operand1) == HIGH |
| && symbolic_operand (XEXP (operand1, 0), mode))) |
| { |
| int ishighonly = 0; |
| |
| if (GET_CODE (operand1) == HIGH) |
| { |
| ishighonly = 1; |
| operand1 = XEXP (operand1, 0); |
| } |
| if (symbolic_operand (operand1, mode)) |
| { |
| /* Argh. The assembler and linker can't handle arithmetic |
| involving plabels. |
| |
| So we force the plabel into memory, load operand0 from |
| the memory location, then add in the constant part. */ |
| if ((GET_CODE (operand1) == CONST |
| && GET_CODE (XEXP (operand1, 0)) == PLUS |
| && function_label_operand (XEXP (XEXP (operand1, 0), 0), Pmode)) |
| || function_label_operand (operand1, mode)) |
| { |
| rtx temp, const_part; |
| |
| /* Figure out what (if any) scratch register to use. */ |
| if (reload_in_progress || reload_completed) |
| scratch_reg = scratch_reg ? scratch_reg : operand0; |
| else if (flag_pic) |
| scratch_reg = gen_reg_rtx (Pmode); |
| |
| if (GET_CODE (operand1) == CONST) |
| { |
| /* Save away the constant part of the expression. */ |
| const_part = XEXP (XEXP (operand1, 0), 1); |
| if (GET_CODE (const_part) != CONST_INT) |
| abort (); |
| |
| /* Force the function label into memory. */ |
| temp = force_const_mem (mode, XEXP (XEXP (operand1, 0), 0)); |
| } |
| else |
| { |
| /* No constant part. */ |
| const_part = NULL_RTX; |
| |
| /* Force the function label into memory. */ |
| temp = force_const_mem (mode, operand1); |
| } |
| |
| |
| /* Get the address of the memory location. PIC-ify it if |
| necessary. */ |
| temp = XEXP (temp, 0); |
| if (flag_pic) |
| temp = legitimize_pic_address (temp, mode, scratch_reg); |
| |
| /* Put the address of the memory location into our destination |
| register. */ |
| operands[1] = temp; |
| emit_move_sequence (operands, mode, scratch_reg); |
| |
| /* Now load from the memory location into our destination |
| register. */ |
| operands[1] = gen_rtx_MEM (Pmode, operands[0]); |
| emit_move_sequence (operands, mode, scratch_reg); |
| |
| /* And add back in the constant part. */ |
| if (const_part != NULL_RTX) |
| expand_inc (operand0, const_part); |
| |
| return 1; |
| } |
| |
| if (flag_pic) |
| { |
| rtx temp; |
| |
| if (reload_in_progress || reload_completed) |
| temp = scratch_reg ? scratch_reg : operand0; |
| else |
| temp = gen_reg_rtx (Pmode); |
| |
| /* (const (plus (symbol) (const_int))) must be forced to |
| memory during/after reload if the const_int will not fit |
| in 14 bits. */ |
| if (GET_CODE (operand1) == CONST |
| && GET_CODE (XEXP (operand1, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (operand1, 0), 1)) == CONST_INT |
| && !INT_14_BITS (XEXP (XEXP (operand1, 0), 1)) |
| && (reload_completed || reload_in_progress) |
| && flag_pic) |
| { |
| operands[1] = force_const_mem (mode, operand1); |
| operands[1] = legitimize_pic_address (XEXP (operands[1], 0), |
| mode, temp); |
| emit_move_sequence (operands, mode, temp); |
| } |
| else |
| { |
| operands[1] = legitimize_pic_address (operand1, mode, temp); |
| emit_insn (gen_rtx_SET (VOIDmode, operand0, operands[1])); |
| } |
| } |
| /* On the HPPA, references to data space are supposed to use dp, |
| register 27, but showing it in the RTL inhibits various cse |
| and loop optimizations. */ |
| else |
| { |
| rtx temp, set; |
| |
| if (reload_in_progress || reload_completed) |
| temp = scratch_reg ? scratch_reg : operand0; |
| else |
| temp = gen_reg_rtx (mode); |
| |
| /* Loading a SYMBOL_REF into a register makes that register |
| safe to be used as the base in an indexed address. |
| |
| Don't mark hard registers though. That loses. */ |
| if (GET_CODE (operand0) == REG |
| && REGNO (operand0) >= FIRST_PSEUDO_REGISTER) |
| REGNO_POINTER_FLAG (REGNO (operand0)) = 1; |
| if (REGNO (temp) >= FIRST_PSEUDO_REGISTER) |
| REGNO_POINTER_FLAG (REGNO (temp)) = 1; |
| if (ishighonly) |
| set = gen_rtx_SET (mode, operand0, temp); |
| else |
| set = gen_rtx_SET (VOIDmode, operand0, |
| gen_rtx_LO_SUM (mode, temp, operand1)); |
| |
| emit_insn (gen_rtx_SET (VOIDmode, |
| temp, |
| gen_rtx_HIGH (mode, operand1))); |
| emit_insn (set); |
| |
| } |
| return 1; |
| } |
| else if (GET_CODE (operand1) != CONST_INT |
| || ! cint_ok_for_move (INTVAL (operand1))) |
| { |
| rtx temp; |
| |
| if (reload_in_progress || reload_completed) |
| temp = operand0; |
| else |
| temp = gen_reg_rtx (mode); |
| |
| emit_insn (gen_rtx_SET (VOIDmode, temp, |
| gen_rtx_HIGH (mode, operand1))); |
| operands[1] = gen_rtx_LO_SUM (mode, temp, operand1); |
| } |
| } |
| /* Now have insn-emit do whatever it normally does. */ |
| return 0; |
| } |
| |
| /* Examine EXP and return nonzero if it contains an ADDR_EXPR (meaning |
| it will need a link/runtime reloc). */ |
| |
| int |
| reloc_needed (exp) |
| tree exp; |
| { |
| int reloc = 0; |
| |
| switch (TREE_CODE (exp)) |
| { |
| case ADDR_EXPR: |
| return 1; |
| |
| case PLUS_EXPR: |
| case MINUS_EXPR: |
| reloc = reloc_needed (TREE_OPERAND (exp, 0)); |
| reloc |= reloc_needed (TREE_OPERAND (exp, 1)); |
| break; |
| |
| case NOP_EXPR: |
| case CONVERT_EXPR: |
| case NON_LVALUE_EXPR: |
| reloc = reloc_needed (TREE_OPERAND (exp, 0)); |
| break; |
| |
| case CONSTRUCTOR: |
| { |
| register tree link; |
| for (link = CONSTRUCTOR_ELTS (exp); link; link = TREE_CHAIN (link)) |
| if (TREE_VALUE (link) != 0) |
| reloc |= reloc_needed (TREE_VALUE (link)); |
| } |
| break; |
| |
| case ERROR_MARK: |
| break; |
| |
| default: |
| break; |
| } |
| return reloc; |
| } |
| |
| /* Does operand (which is a symbolic_operand) live in text space? If |
| so SYMBOL_REF_FLAG, which is set by ENCODE_SECTION_INFO, will be true. */ |
| |
| int |
| read_only_operand (operand) |
| rtx operand; |
| { |
| if (GET_CODE (operand) == CONST) |
| operand = XEXP (XEXP (operand, 0), 0); |
| if (flag_pic) |
| { |
| if (GET_CODE (operand) == SYMBOL_REF) |
| return SYMBOL_REF_FLAG (operand) && !CONSTANT_POOL_ADDRESS_P (operand); |
| } |
| else |
| { |
| if (GET_CODE (operand) == SYMBOL_REF) |
| return SYMBOL_REF_FLAG (operand) || CONSTANT_POOL_ADDRESS_P (operand); |
| } |
| return 1; |
| } |
| |
| |
| /* Return the best assembler insn template |
| for moving operands[1] into operands[0] as a fullword. */ |
| char * |
| singlemove_string (operands) |
| rtx *operands; |
| { |
| HOST_WIDE_INT intval; |
| |
| if (GET_CODE (operands[0]) == MEM) |
| return "stw %r1,%0"; |
| if (GET_CODE (operands[1]) == MEM) |
| return "ldw %1,%0"; |
| if (GET_CODE (operands[1]) == CONST_DOUBLE) |
| { |
| long i; |
| REAL_VALUE_TYPE d; |
| |
| if (GET_MODE (operands[1]) != SFmode) |
| abort (); |
| |
| /* Translate the CONST_DOUBLE to a CONST_INT with the same target |
| bit pattern. */ |
| REAL_VALUE_FROM_CONST_DOUBLE (d, operands[1]); |
| REAL_VALUE_TO_TARGET_SINGLE (d, i); |
| |
| operands[1] = GEN_INT (i); |
| /* Fall through to CONST_INT case. */ |
| } |
| if (GET_CODE (operands[1]) == CONST_INT) |
| { |
| intval = INTVAL (operands[1]); |
| |
| if (VAL_14_BITS_P (intval)) |
| return "ldi %1,%0"; |
| else if ((intval & 0x7ff) == 0) |
| return "ldil L'%1,%0"; |
| else if (zdepi_cint_p (intval)) |
| return "zdepi %Z1,%0"; |
| else |
| return "ldil L'%1,%0\n\tldo R'%1(%0),%0"; |
| } |
| return "copy %1,%0"; |
| } |
| |
| |
| /* Compute position (in OP[1]) and width (in OP[2]) |
| useful for copying IMM to a register using the zdepi |
| instructions. Store the immediate value to insert in OP[0]. */ |
| void |
| compute_zdepi_operands (imm, op) |
| unsigned HOST_WIDE_INT imm; |
| unsigned *op; |
| { |
| int lsb, len; |
| |
| /* Find the least significant set bit in IMM. */ |
| for (lsb = 0; lsb < 32; lsb++) |
| { |
| if ((imm & 1) != 0) |
| break; |
| imm >>= 1; |
| } |
| |
| /* Choose variants based on *sign* of the 5-bit field. */ |
| if ((imm & 0x10) == 0) |
| len = (lsb <= 28) ? 4 : 32 - lsb; |
| else |
| { |
| /* Find the width of the bitstring in IMM. */ |
| for (len = 5; len < 32; len++) |
| { |
| if ((imm & (1 << len)) == 0) |
| break; |
| } |
| |
| /* Sign extend IMM as a 5-bit value. */ |
| imm = (imm & 0xf) - 0x10; |
| } |
| |
| op[0] = imm; |
| op[1] = 31 - lsb; |
| op[2] = len; |
| } |
| |
| /* Output assembler code to perform a doubleword move insn |
| with operands OPERANDS. */ |
| |
| char * |
| output_move_double (operands) |
| rtx *operands; |
| { |
| enum { REGOP, OFFSOP, MEMOP, CNSTOP, RNDOP } optype0, optype1; |
| rtx latehalf[2]; |
| rtx addreg0 = 0, addreg1 = 0; |
| |
| /* First classify both operands. */ |
| |
| if (REG_P (operands[0])) |
| optype0 = REGOP; |
| else if (offsettable_memref_p (operands[0])) |
| optype0 = OFFSOP; |
| else if (GET_CODE (operands[0]) == MEM) |
| optype0 = MEMOP; |
| else |
| optype0 = RNDOP; |
| |
| if (REG_P (operands[1])) |
| optype1 = REGOP; |
| else if (CONSTANT_P (operands[1])) |
| optype1 = CNSTOP; |
| else if (offsettable_memref_p (operands[1])) |
| optype1 = OFFSOP; |
| else if (GET_CODE (operands[1]) == MEM) |
| optype1 = MEMOP; |
| else |
| optype1 = RNDOP; |
| |
| /* Check for the cases that the operand constraints are not |
| supposed to allow to happen. Abort if we get one, |
| because generating code for these cases is painful. */ |
| |
| if (optype0 != REGOP && optype1 != REGOP) |
| abort (); |
| |
| /* Handle auto decrementing and incrementing loads and stores |
| specifically, since the structure of the function doesn't work |
| for them without major modification. Do it better when we learn |
| this port about the general inc/dec addressing of PA. |
| (This was written by tege. Chide him if it doesn't work.) */ |
| |
| if (optype0 == MEMOP) |
| { |
| /* We have to output the address syntax ourselves, since print_operand |
| doesn't deal with the addresses we want to use. Fix this later. */ |
| |
| rtx addr = XEXP (operands[0], 0); |
| if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) |
| { |
| rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0); |
| |
| operands[0] = XEXP (addr, 0); |
| if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG) |
| abort (); |
| |
| if (!reg_overlap_mentioned_p (high_reg, addr)) |
| { |
| /* No overlap between high target register and address |
| register. (We do this in a non-obvious way to |
| save a register file writeback) */ |
| if (GET_CODE (addr) == POST_INC) |
| return "stws,ma %1,8(0,%0)\n\tstw %R1,-4(0,%0)"; |
| return "stws,ma %1,-8(0,%0)\n\tstw %R1,12(0,%0)"; |
| } |
| else |
| abort(); |
| } |
| else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) |
| { |
| rtx high_reg = gen_rtx_SUBREG (SImode, operands[1], 0); |
| |
| operands[0] = XEXP (addr, 0); |
| if (GET_CODE (operands[1]) != REG || GET_CODE (operands[0]) != REG) |
| abort (); |
| |
| if (!reg_overlap_mentioned_p (high_reg, addr)) |
| { |
| /* No overlap between high target register and address |
| register. (We do this in a non-obvious way to |
| save a register file writeback) */ |
| if (GET_CODE (addr) == PRE_INC) |
| return "stws,mb %1,8(0,%0)\n\tstw %R1,4(0,%0)"; |
| return "stws,mb %1,-8(0,%0)\n\tstw %R1,4(0,%0)"; |
| } |
| else |
| abort(); |
| } |
| } |
| if (optype1 == MEMOP) |
| { |
| /* We have to output the address syntax ourselves, since print_operand |
| doesn't deal with the addresses we want to use. Fix this later. */ |
| |
| rtx addr = XEXP (operands[1], 0); |
| if (GET_CODE (addr) == POST_INC || GET_CODE (addr) == POST_DEC) |
| { |
| rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); |
| |
| operands[1] = XEXP (addr, 0); |
| if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG) |
| abort (); |
| |
| if (!reg_overlap_mentioned_p (high_reg, addr)) |
| { |
| /* No overlap between high target register and address |
| register. (We do this in a non-obvious way to |
| save a register file writeback) */ |
| if (GET_CODE (addr) == POST_INC) |
| return "ldws,ma 8(0,%1),%0\n\tldw -4(0,%1),%R0"; |
| return "ldws,ma -8(0,%1),%0\n\tldw 12(0,%1),%R0"; |
| } |
| else |
| { |
| /* This is an undefined situation. We should load into the |
| address register *and* update that register. Probably |
| we don't need to handle this at all. */ |
| if (GET_CODE (addr) == POST_INC) |
| return "ldw 4(0,%1),%R0\n\tldws,ma 8(0,%1),%0"; |
| return "ldw 4(0,%1),%R0\n\tldws,ma -8(0,%1),%0"; |
| } |
| } |
| else if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC) |
| { |
| rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); |
| |
| operands[1] = XEXP (addr, 0); |
| if (GET_CODE (operands[0]) != REG || GET_CODE (operands[1]) != REG) |
| abort (); |
| |
| if (!reg_overlap_mentioned_p (high_reg, addr)) |
| { |
| /* No overlap between high target register and address |
| register. (We do this in a non-obvious way to |
| save a register file writeback) */ |
| if (GET_CODE (addr) == PRE_INC) |
| return "ldws,mb 8(0,%1),%0\n\tldw 4(0,%1),%R0"; |
| return "ldws,mb -8(0,%1),%0\n\tldw 4(0,%1),%R0"; |
| } |
| else |
| { |
| /* This is an undefined situation. We should load into the |
| address register *and* update that register. Probably |
| we don't need to handle this at all. */ |
| if (GET_CODE (addr) == PRE_INC) |
| return "ldw 12(0,%1),%R0\n\tldws,mb 8(0,%1),%0"; |
| return "ldw -4(0,%1),%R0\n\tldws,mb -8(0,%1),%0"; |
| } |
| } |
| else if (GET_CODE (addr) == PLUS |
| && GET_CODE (XEXP (addr, 0)) == MULT) |
| { |
| rtx high_reg = gen_rtx_SUBREG (SImode, operands[0], 0); |
| |
| if (!reg_overlap_mentioned_p (high_reg, addr)) |
| { |
| rtx xoperands[3]; |
| |
| xoperands[0] = high_reg; |
| xoperands[1] = XEXP (addr, 1); |
| xoperands[2] = XEXP (XEXP (addr, 0), 0); |
| xoperands[3] = XEXP (XEXP (addr, 0), 1); |
| output_asm_insn ("sh%O3addl %2,%1,%0", xoperands); |
| return "ldw 4(0,%0),%R0\n\tldw 0(0,%0),%0"; |
| } |
| else |
| { |
| rtx xoperands[3]; |
| |
| xoperands[0] = high_reg; |
| xoperands[1] = XEXP (addr, 1); |
| xoperands[2] = XEXP (XEXP (addr, 0), 0); |
| xoperands[3] = XEXP (XEXP (addr, 0), 1); |
| output_asm_insn ("sh%O3addl %2,%1,%R0", xoperands); |
| return "ldw 0(0,%R0),%0\n\tldw 4(0,%R0),%R0"; |
| } |
| |
| } |
| } |
| |
| /* If an operand is an unoffsettable memory ref, find a register |
| we can increment temporarily to make it refer to the second word. */ |
| |
| if (optype0 == MEMOP) |
| addreg0 = find_addr_reg (XEXP (operands[0], 0)); |
| |
| if (optype1 == MEMOP) |
| addreg1 = find_addr_reg (XEXP (operands[1], 0)); |
| |
| /* Ok, we can do one word at a time. |
| Normally we do the low-numbered word first. |
| |
| In either case, set up in LATEHALF the operands to use |
| for the high-numbered word and in some cases alter the |
| operands in OPERANDS to be suitable for the low-numbered word. */ |
| |
| if (optype0 == REGOP) |
| latehalf[0] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); |
| else if (optype0 == OFFSOP) |
| latehalf[0] = adj_offsettable_operand (operands[0], 4); |
| else |
| latehalf[0] = operands[0]; |
| |
| if (optype1 == REGOP) |
| latehalf[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1); |
| else if (optype1 == OFFSOP) |
| latehalf[1] = adj_offsettable_operand (operands[1], 4); |
| else if (optype1 == CNSTOP) |
| split_double (operands[1], &operands[1], &latehalf[1]); |
| else |
| latehalf[1] = operands[1]; |
| |
| /* If the first move would clobber the source of the second one, |
| do them in the other order. |
| |
| This can happen in two cases: |
| |
| mem -> register where the first half of the destination register |
| is the same register used in the memory's address. Reload |
| can create such insns. |
| |
| mem in this case will be either register indirect or register |
| indirect plus a valid offset. |
| |
| register -> register move where REGNO(dst) == REGNO(src + 1) |
| someone (Tim/Tege?) claimed this can happen for parameter loads. |
| |
| Handle mem -> register case first. */ |
| if (optype0 == REGOP |
| && (optype1 == MEMOP || optype1 == OFFSOP) |
| && refers_to_regno_p (REGNO (operands[0]), REGNO (operands[0]) + 1, |
| operands[1], 0)) |
| { |
| /* Do the late half first. */ |
| if (addreg1) |
| output_asm_insn ("ldo 4(%0),%0", &addreg1); |
| output_asm_insn (singlemove_string (latehalf), latehalf); |
| |
| /* Then clobber. */ |
| if (addreg1) |
| output_asm_insn ("ldo -4(%0),%0", &addreg1); |
| return singlemove_string (operands); |
| } |
| |
| /* Now handle register -> register case. */ |
| if (optype0 == REGOP && optype1 == REGOP |
| && REGNO (operands[0]) == REGNO (operands[1]) + 1) |
| { |
| output_asm_insn (singlemove_string (latehalf), latehalf); |
| return singlemove_string (operands); |
| } |
| |
| /* Normal case: do the two words, low-numbered first. */ |
| |
| output_asm_insn (singlemove_string (operands), operands); |
| |
| /* Make any unoffsettable addresses point at high-numbered word. */ |
| if (addreg0) |
| output_asm_insn ("ldo 4(%0),%0", &addreg0); |
| if (addreg1) |
| output_asm_insn ("ldo 4(%0),%0", &addreg1); |
| |
| /* Do that word. */ |
| output_asm_insn (singlemove_string (latehalf), latehalf); |
| |
| /* Undo the adds we just did. */ |
| if (addreg0) |
| output_asm_insn ("ldo -4(%0),%0", &addreg0); |
| if (addreg1) |
| output_asm_insn ("ldo -4(%0),%0", &addreg1); |
| |
| return ""; |
| } |
| |
| char * |
| output_fp_move_double (operands) |
| rtx *operands; |
| { |
| if (FP_REG_P (operands[0])) |
| { |
| if (FP_REG_P (operands[1]) |
| || operands[1] == CONST0_RTX (GET_MODE (operands[0]))) |
| output_asm_insn ("fcpy,dbl %r1,%0", operands); |
| else |
| output_asm_insn ("fldd%F1 %1,%0", operands); |
| } |
| else if (FP_REG_P (operands[1])) |
| { |
| output_asm_insn ("fstd%F0 %1,%0", operands); |
| } |
| else if (operands[1] == CONST0_RTX (GET_MODE (operands[0]))) |
| { |
| if (GET_CODE (operands[0]) == REG) |
| { |
| rtx xoperands[2]; |
| xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1); |
| xoperands[0] = operands[0]; |
| output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands); |
| } |
| /* This is a pain. You have to be prepared to deal with an |
| arbitrary address here including pre/post increment/decrement. |
| |
| so avoid this in the MD. */ |
| else |
| abort (); |
| } |
| else abort (); |
| return ""; |
| } |
| |
| /* Return a REG that occurs in ADDR with coefficient 1. |
| ADDR can be effectively incremented by incrementing REG. */ |
| |
| static rtx |
| find_addr_reg (addr) |
| rtx addr; |
| { |
| while (GET_CODE (addr) == PLUS) |
| { |
| if (GET_CODE (XEXP (addr, 0)) == REG) |
| addr = XEXP (addr, 0); |
| else if (GET_CODE (XEXP (addr, 1)) == REG) |
| addr = XEXP (addr, 1); |
| else if (CONSTANT_P (XEXP (addr, 0))) |
| addr = XEXP (addr, 1); |
| else if (CONSTANT_P (XEXP (addr, 1))) |
| addr = XEXP (addr, 0); |
| else |
| abort (); |
| } |
| if (GET_CODE (addr) == REG) |
| return addr; |
| abort (); |
| } |
| |
| /* Emit code to perform a block move. |
| |
| OPERANDS[0] is the destination pointer as a REG, clobbered. |
| OPERANDS[1] is the source pointer as a REG, clobbered. |
| OPERANDS[2] is a register for temporary storage. |
| OPERANDS[4] is the size as a CONST_INT |
| OPERANDS[3] is a register for temporary storage. |
| OPERANDS[5] is the alignment safe to use, as a CONST_INT. |
| OPERANDS[6] is another temporary register. */ |
| |
| char * |
| output_block_move (operands, size_is_constant) |
| rtx *operands; |
| int size_is_constant ATTRIBUTE_UNUSED; |
| { |
| int align = INTVAL (operands[5]); |
| unsigned long n_bytes = INTVAL (operands[4]); |
| |
| /* We can't move more than four bytes at a time because the PA |
| has no longer integer move insns. (Could use fp mem ops?) */ |
| if (align > 4) |
| align = 4; |
| |
| /* Note that we know each loop below will execute at least twice |
| (else we would have open-coded the copy). */ |
| switch (align) |
| { |
| case 4: |
| /* Pre-adjust the loop counter. */ |
| operands[4] = GEN_INT (n_bytes - 8); |
| output_asm_insn ("ldi %4,%2", operands); |
| |
| /* Copying loop. */ |
| output_asm_insn ("ldws,ma 4(0,%1),%3", operands); |
| output_asm_insn ("ldws,ma 4(0,%1),%6", operands); |
| output_asm_insn ("stws,ma %3,4(0,%0)", operands); |
| output_asm_insn ("addib,>= -8,%2,.-12", operands); |
| output_asm_insn ("stws,ma %6,4(0,%0)", operands); |
| |
| /* Handle the residual. There could be up to 7 bytes of |
| residual to copy! */ |
| if (n_bytes % 8 != 0) |
| { |
| operands[4] = GEN_INT (n_bytes % 4); |
| if (n_bytes % 8 >= 4) |
| output_asm_insn ("ldws,ma 4(0,%1),%3", operands); |
| if (n_bytes % 4 != 0) |
| output_asm_insn ("ldw 0(0,%1),%6", operands); |
| if (n_bytes % 8 >= 4) |
| output_asm_insn ("stws,ma %3,4(0,%0)", operands); |
| if (n_bytes % 4 != 0) |
| output_asm_insn ("stbys,e %6,%4(0,%0)", operands); |
| } |
| return ""; |
| |
| case 2: |
| /* Pre-adjust the loop counter. */ |
| operands[4] = GEN_INT (n_bytes - 4); |
| output_asm_insn ("ldi %4,%2", operands); |
| |
| /* Copying loop. */ |
| output_asm_insn ("ldhs,ma 2(0,%1),%3", operands); |
| output_asm_insn ("ldhs,ma 2(0,%1),%6", operands); |
| output_asm_insn ("sths,ma %3,2(0,%0)", operands); |
| output_asm_insn ("addib,>= -4,%2,.-12", operands); |
| output_asm_insn ("sths,ma %6,2(0,%0)", operands); |
| |
| /* Handle the residual. */ |
| if (n_bytes % 4 != 0) |
| { |
| if (n_bytes % 4 >= 2) |
| output_asm_insn ("ldhs,ma 2(0,%1),%3", operands); |
| if (n_bytes % 2 != 0) |
| output_asm_insn ("ldb 0(0,%1),%6", operands); |
| if (n_bytes % 4 >= 2) |
| output_asm_insn ("sths,ma %3,2(0,%0)", operands); |
| if (n_bytes % 2 != 0) |
| output_asm_insn ("stb %6,0(0,%0)", operands); |
| } |
| return ""; |
| |
| case 1: |
| /* Pre-adjust the loop counter. */ |
| operands[4] = GEN_INT (n_bytes - 2); |
| output_asm_insn ("ldi %4,%2", operands); |
| |
| /* Copying loop. */ |
| output_asm_insn ("ldbs,ma 1(0,%1),%3", operands); |
| output_asm_insn ("ldbs,ma 1(0,%1),%6", operands); |
| output_asm_insn ("stbs,ma %3,1(0,%0)", operands); |
| output_asm_insn ("addib,>= -2,%2,.-12", operands); |
| output_asm_insn ("stbs,ma %6,1(0,%0)", operands); |
| |
| /* Handle the residual. */ |
| if (n_bytes % 2 != 0) |
| { |
| output_asm_insn ("ldb 0(0,%1),%3", operands); |
| output_asm_insn ("stb %3,0(0,%0)", operands); |
| } |
| return ""; |
| |
| default: |
| abort (); |
| } |
| } |
| |
| /* Count the number of insns necessary to handle this block move. |
| |
| Basic structure is the same as emit_block_move, except that we |
| count insns rather than emit them. */ |
| |
| int |
| compute_movstrsi_length (insn) |
| rtx insn; |
| { |
| rtx pat = PATTERN (insn); |
| int align = INTVAL (XEXP (XVECEXP (pat, 0, 6), 0)); |
| unsigned long n_bytes = INTVAL (XEXP (XVECEXP (pat, 0, 5), 0)); |
| unsigned int n_insns = 0; |
| |
| /* We can't move more than four bytes at a time because the PA |
| has no longer integer move insns. (Could use fp mem ops?) */ |
| if (align > 4) |
| align = 4; |
| |
| /* The basic copying loop. */ |
| n_insns = 6; |
| |
| /* Residuals. */ |
| if (n_bytes % (2 * align) != 0) |
| { |
| if ((n_bytes % (2 * align)) >= align) |
| n_insns += 2; |
| |
| if ((n_bytes % align) != 0) |
| n_insns += 2; |
| } |
| |
| /* Lengths are expressed in bytes now; each insn is 4 bytes. */ |
| return n_insns * 4; |
| } |
| |
| |
| char * |
| output_and (operands) |
| rtx *operands; |
| { |
| if (GET_CODE (operands[2]) == CONST_INT && INTVAL (operands[2]) != 0) |
| { |
| unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); |
| int ls0, ls1, ms0, p, len; |
| |
| for (ls0 = 0; ls0 < 32; ls0++) |
| if ((mask & (1 << ls0)) == 0) |
| break; |
| |
| for (ls1 = ls0; ls1 < 32; ls1++) |
| if ((mask & (1 << ls1)) != 0) |
| break; |
| |
| for (ms0 = ls1; ms0 < 32; ms0++) |
| if ((mask & (1 << ms0)) == 0) |
| break; |
| |
| if (ms0 != 32) |
| abort(); |
| |
| if (ls1 == 32) |
| { |
| len = ls0; |
| |
| if (len == 0) |
| abort (); |
| |
| operands[2] = GEN_INT (len); |
| return "extru %1,31,%2,%0"; |
| } |
| else |
| { |
| /* We could use this `depi' for the case above as well, but `depi' |
| requires one more register file access than an `extru'. */ |
| |
| p = 31 - ls0; |
| len = ls1 - ls0; |
| |
| operands[2] = GEN_INT (p); |
| operands[3] = GEN_INT (len); |
| return "depi 0,%2,%3,%0"; |
| } |
| } |
| else |
| return "and %1,%2,%0"; |
| } |
| |
| char * |
| output_ior (operands) |
| rtx *operands; |
| { |
| unsigned HOST_WIDE_INT mask = INTVAL (operands[2]); |
| int bs0, bs1, p, len; |
| |
| if (INTVAL (operands[2]) == 0) |
| return "copy %1,%0"; |
| |
| for (bs0 = 0; bs0 < 32; bs0++) |
| if ((mask & (1 << bs0)) != 0) |
| break; |
| |
| for (bs1 = bs0; bs1 < 32; bs1++) |
| if ((mask & (1 << bs1)) == 0) |
| break; |
| |
| if (bs1 != 32 && ((unsigned HOST_WIDE_INT) 1 << bs1) <= mask) |
| abort(); |
| |
| p = 31 - bs0; |
| len = bs1 - bs0; |
| |
| operands[2] = GEN_INT (p); |
| operands[3] = GEN_INT (len); |
| return "depi -1,%2,%3,%0"; |
| } |
| |
| /* Output an ascii string. */ |
| void |
| output_ascii (file, p, size) |
| FILE *file; |
| unsigned char *p; |
| int size; |
| { |
| int i; |
| int chars_output; |
| unsigned char partial_output[16]; /* Max space 4 chars can occupy. */ |
| |
| /* The HP assembler can only take strings of 256 characters at one |
| time. This is a limitation on input line length, *not* the |
| length of the string. Sigh. Even worse, it seems that the |
| restriction is in number of input characters (see \xnn & |
| \whatever). So we have to do this very carefully. */ |
| |
| fputs ("\t.STRING \"", file); |
| |
| chars_output = 0; |
| for (i = 0; i < size; i += 4) |
| { |
| int co = 0; |
| int io = 0; |
| for (io = 0, co = 0; io < MIN (4, size - i); io++) |
| { |
| register unsigned int c = p[i + io]; |
| |
| if (c == '\"' || c == '\\') |
| partial_output[co++] = '\\'; |
| if (c >= ' ' && c < 0177) |
| partial_output[co++] = c; |
| else |
| { |
| unsigned int hexd; |
| partial_output[co++] = '\\'; |
| partial_output[co++] = 'x'; |
| hexd = c / 16 - 0 + '0'; |
| if (hexd > '9') |
| hexd -= '9' - 'a' + 1; |
| partial_output[co++] = hexd; |
| hexd = c % 16 - 0 + '0'; |
| if (hexd > '9') |
| hexd -= '9' - 'a' + 1; |
| partial_output[co++] = hexd; |
| } |
| } |
| if (chars_output + co > 243) |
| { |
| fputs ("\"\n\t.STRING \"", file); |
| chars_output = 0; |
| } |
| fwrite (partial_output, 1, co, file); |
| chars_output += co; |
| co = 0; |
| } |
| fputs ("\"\n", file); |
| } |
| |
| /* Try to rewrite floating point comparisons & branches to avoid |
| useless add,tr insns. |
| |
| CHECK_NOTES is nonzero if we should examine REG_DEAD notes |
| to see if FPCC is dead. CHECK_NOTES is nonzero for the |
| first attempt to remove useless add,tr insns. It is zero |
| for the second pass as reorg sometimes leaves bogus REG_DEAD |
| notes lying around. |
| |
| When CHECK_NOTES is zero we can only eliminate add,tr insns |
| when there's a 1:1 correspondence between fcmp and ftest/fbranch |
| instructions. */ |
| void |
| remove_useless_addtr_insns (insns, check_notes) |
| rtx insns; |
| int check_notes; |
| { |
| rtx insn; |
| static int pass = 0; |
| |
| /* This is fairly cheap, so always run it when optimizing. */ |
| if (optimize > 0) |
| { |
| int fcmp_count = 0; |
| int fbranch_count = 0; |
| |
| /* Walk all the insns in this function looking for fcmp & fbranch |
| instructions. Keep track of how many of each we find. */ |
| insns = get_insns (); |
| for (insn = insns; insn; insn = next_insn (insn)) |
| { |
| rtx tmp; |
| |
| /* Ignore anything that isn't an INSN or a JUMP_INSN. */ |
| if (GET_CODE (insn) != INSN && GET_CODE (insn) != JUMP_INSN) |
| continue; |
| |
| tmp = PATTERN (insn); |
| |
| /* It must be a set. */ |
| if (GET_CODE (tmp) != SET) |
| continue; |
| |
| /* If the destination is CCFP, then we've found an fcmp insn. */ |
| tmp = SET_DEST (tmp); |
| if (GET_CODE (tmp) == REG && REGNO (tmp) == 0) |
| { |
| fcmp_count++; |
| continue; |
| } |
| |
| tmp = PATTERN (insn); |
| /* If this is an fbranch instruction, bump the fbranch counter. */ |
| if (GET_CODE (tmp) == SET |
| && SET_DEST (tmp) == pc_rtx |
| && GET_CODE (SET_SRC (tmp)) == IF_THEN_ELSE |
| && GET_CODE (XEXP (SET_SRC (tmp), 0)) == NE |
| && GET_CODE (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == REG |
| && REGNO (XEXP (XEXP (SET_SRC (tmp), 0), 0)) == 0) |
| { |
| fbranch_count++; |
| continue; |
| } |
| } |
| |
| |
| /* Find all floating point compare + branch insns. If possible, |
| reverse the comparison & the branch to avoid add,tr insns. */ |
| for (insn = insns; insn; insn = next_insn (insn)) |
| { |
| rtx tmp, next; |
| |
| /* Ignore anything that isn't an INSN. */ |
| if (GET_CODE (insn) != INSN) |
| continue; |
| |
| tmp = PATTERN (insn); |
| |
| /* It must be a set. */ |
| if (GET_CODE (tmp) != SET) |
| continue; |
| |
| /* The destination must be CCFP, which is register zero. */ |
| tmp = SET_DEST (tmp); |
| if (GET_CODE (tmp) != REG || REGNO (tmp) != 0) |
| continue; |
| |
| /* INSN should be a set of CCFP. |
| |
| See if the result of this insn is used in a reversed FP |
| conditional branch. If so, reverse our condition and |
| the branch. Doing so avoids useless add,tr insns. */ |
| next = next_insn (insn); |
| while (next) |
| { |
| /* Jumps, calls and labels stop our search. */ |
| if (GET_CODE (next) == JUMP_INSN |
| || GET_CODE (next) == CALL_INSN |
| || GET_CODE (next) == CODE_LABEL) |
| break; |
| |
| /* As does another fcmp insn. */ |
| if (GET_CODE (next) == INSN |
| && GET_CODE (PATTERN (next)) == SET |
| && GET_CODE (SET_DEST (PATTERN (next))) == REG |
| && REGNO (SET_DEST (PATTERN (next))) == 0) |
| break; |
| |
| next = next_insn (next); |
| } |
| |
| /* Is NEXT_INSN a branch? */ |
| if (next |
| && GET_CODE (next) == JUMP_INSN) |
| { |
| rtx pattern = PATTERN (next); |
| |
| /* If it a reversed fp conditional branch (eg uses add,tr) |
| and CCFP dies, then reverse our conditional and the branch |
| to avoid the add,tr. */ |
| if (GET_CODE (pattern) == SET |
| && SET_DEST (pattern) == pc_rtx |
| && GET_CODE (SET_SRC (pattern)) == IF_THEN_ELSE |
| && GET_CODE (XEXP (SET_SRC (pattern), 0)) == NE |
| && GET_CODE (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == REG |
| && REGNO (XEXP (XEXP (SET_SRC (pattern), 0), 0)) == 0 |
| && GET_CODE (XEXP (SET_SRC (pattern), 1)) == PC |
| && (fcmp_count == fbranch_count |
| || (check_notes |
| && find_regno_note (next, REG_DEAD, 0)))) |
| { |
| /* Reverse the branch. */ |
| tmp = XEXP (SET_SRC (pattern), 1); |
| XEXP (SET_SRC (pattern), 1) = XEXP (SET_SRC (pattern), 2); |
| XEXP (SET_SRC (pattern), 2) = tmp; |
| INSN_CODE (next) = -1; |
| |
| /* Reverse our condition. */ |
| tmp = PATTERN (insn); |
| PUT_CODE (XEXP (tmp, 1), |
| reverse_condition (GET_CODE (XEXP (tmp, 1)))); |
| } |
| } |
| } |
| } |
| |
| pass = !pass; |
| |
| } |
| |
| /* You may have trouble believing this, but this is the HP-PA stack |
| layout. Wow. |
| |
| Offset Contents |
| |
| Variable arguments (optional; any number may be allocated) |
| |
| SP-(4*(N+9)) arg word N |
| : : |
| SP-56 arg word 5 |
| SP-52 arg word 4 |
| |
| Fixed arguments (must be allocated; may remain unused) |
| |
| SP-48 arg word 3 |
| SP-44 arg word 2 |
| SP-40 arg word 1 |
| SP-36 arg word 0 |
| |
| Frame Marker |
| |
| SP-32 External Data Pointer (DP) |
| SP-28 External sr4 |
| SP-24 External/stub RP (RP') |
| SP-20 Current RP |
| SP-16 Static Link |
| SP-12 Clean up |
| SP-8 Calling Stub RP (RP'') |
| SP-4 Previous SP |
| |
| Top of Frame |
| |
| SP-0 Stack Pointer (points to next available address) |
| |
| */ |
| |
| /* This function saves registers as follows. Registers marked with ' are |
| this function's registers (as opposed to the previous function's). |
| If a frame_pointer isn't needed, r4 is saved as a general register; |
| the space for the frame pointer is still allocated, though, to keep |
| things simple. |
| |
| |
| Top of Frame |
| |
| SP (FP') Previous FP |
| SP + 4 Alignment filler (sigh) |
| SP + 8 Space for locals reserved here. |
| . |
| . |
| . |
| SP + n All call saved register used. |
| . |
| . |
| . |
| SP + o All call saved fp registers used. |
| . |
| . |
| . |
| SP + p (SP') points to next available address. |
| |
| */ |
| |
| /* Emit RTL to store REG at the memory location specified by BASE+DISP. |
| Handle case where DISP > 8k by using the add_high_const pattern. |
| |
| Note in DISP > 8k case, we will leave the high part of the address |
| in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/ |
| static void |
| store_reg (reg, disp, base) |
| int reg, disp, base; |
| { |
| if (VAL_14_BITS_P (disp)) |
| { |
| emit_move_insn (gen_rtx_MEM (SImode, |
| gen_rtx_PLUS (SImode, |
| gen_rtx_REG (SImode, base), |
| GEN_INT (disp))), |
| gen_rtx_REG (SImode, reg)); |
| } |
| else |
| { |
| emit_insn (gen_add_high_const (gen_rtx_REG (SImode, 1), |
| gen_rtx_REG (SImode, base), |
| GEN_INT (disp))); |
| emit_move_insn (gen_rtx_MEM (SImode, |
| gen_rtx_LO_SUM (SImode, |
| gen_rtx_REG (SImode, 1), |
| GEN_INT (disp))), |
| gen_rtx_REG (SImode, reg)); |
| } |
| } |
| |
| /* Emit RTL to load REG from the memory location specified by BASE+DISP. |
| Handle case where DISP > 8k by using the add_high_const pattern. |
| |
| Note in DISP > 8k case, we will leave the high part of the address |
| in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/ |
| static void |
| load_reg (reg, disp, base) |
| int reg, disp, base; |
| { |
| if (VAL_14_BITS_P (disp)) |
| { |
| emit_move_insn (gen_rtx_REG (SImode, reg), |
| gen_rtx_MEM (SImode, |
| gen_rtx_PLUS (SImode, |
| gen_rtx_REG (SImode, base), |
| GEN_INT (disp)))); |
| } |
| else |
| { |
| emit_insn (gen_add_high_const (gen_rtx_REG (SImode, 1), |
| gen_rtx_REG (SImode, base), |
| GEN_INT (disp))); |
| emit_move_insn (gen_rtx_REG (SImode, reg), |
| gen_rtx_MEM (SImode, |
| gen_rtx_LO_SUM (SImode, |
| gen_rtx_REG (SImode, 1), |
| GEN_INT (disp)))); |
| } |
| } |
| |
| /* Emit RTL to set REG to the value specified by BASE+DISP. |
| Handle case where DISP > 8k by using the add_high_const pattern. |
| |
| Note in DISP > 8k case, we will leave the high part of the address |
| in %r1. There is code in expand_hppa_{prologue,epilogue} that knows this.*/ |
| static void |
| set_reg_plus_d(reg, base, disp) |
| int reg, base, disp; |
| { |
| if (VAL_14_BITS_P (disp)) |
| { |
| emit_move_insn (gen_rtx_REG (SImode, reg), |
| gen_rtx_PLUS (SImode, |
| gen_rtx_REG (SImode, base), |
| GEN_INT (disp))); |
| } |
| else |
| { |
| emit_insn (gen_add_high_const (gen_rtx_REG (SImode, 1), |
| gen_rtx_REG (SImode, base), |
| GEN_INT (disp))); |
| emit_move_insn (gen_rtx_REG (SImode, reg), |
| gen_rtx_LO_SUM (SImode, |
| gen_rtx_REG (SImode, 1), |
| GEN_INT (disp))); |
| } |
| } |
| |
| /* Global variables set by FUNCTION_PROLOGUE. */ |
| /* Size of frame. Need to know this to emit return insns from |
| leaf procedures. */ |
| static int actual_fsize; |
| static int local_fsize, save_fregs; |
| |
| int |
| compute_frame_size (size, fregs_live) |
| int size; |
| int *fregs_live; |
| { |
| extern int current_function_outgoing_args_size; |
| int i, fsize; |
| |
| /* 8 is space for frame pointer + filler. If any frame is allocated |
| we need to add this in because of STARTING_FRAME_OFFSET. */ |
| fsize = size + (size || frame_pointer_needed ? 8 : 0); |
| |
| /* We must leave enough space for all the callee saved registers |
| from 3 .. highest used callee save register since we don't |
| know if we're going to have an inline or out of line prologue |
| and epilogue. */ |
| for (i = 18; i >= 3; i--) |
| if (regs_ever_live[i]) |
| { |
| fsize += 4 * (i - 2); |
| break; |
| } |
| |
| /* Round the stack. */ |
| fsize = (fsize + 7) & ~7; |
| |
| /* We must leave enough space for all the callee saved registers |
| from 3 .. highest used callee save register since we don't |
| know if we're going to have an inline or out of line prologue |
| and epilogue. */ |
| for (i = 66; i >= 48; i -= 2) |
| if (regs_ever_live[i] || regs_ever_live[i + 1]) |
| { |
| if (fregs_live) |
| *fregs_live = 1; |
| |
| fsize += 4 * (i - 46); |
| break; |
| } |
| |
| fsize += current_function_outgoing_args_size; |
| if (! leaf_function_p () || fsize) |
| fsize += 32; |
| return (fsize + 63) & ~63; |
| } |
| |
| rtx hp_profile_label_rtx; |
| static char hp_profile_label_name[8]; |
| void |
| output_function_prologue (file, size) |
| FILE *file; |
| int size ATTRIBUTE_UNUSED; |
| { |
| /* The function's label and associated .PROC must never be |
| separated and must be output *after* any profiling declarations |
| to avoid changing spaces/subspaces within a procedure. */ |
| ASM_OUTPUT_LABEL (file, XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0)); |
| fputs ("\t.PROC\n", file); |
| |
| /* hppa_expand_prologue does the dirty work now. We just need |
| to output the assembler directives which denote the start |
| of a function. */ |
| fprintf (file, "\t.CALLINFO FRAME=%d", actual_fsize); |
| if (regs_ever_live[2] || profile_flag) |
| fputs (",CALLS,SAVE_RP", file); |
| else |
| fputs (",NO_CALLS", file); |
| |
| if (frame_pointer_needed) |
| fputs (",SAVE_SP", file); |
| |
| /* Pass on information about the number of callee register saves |
| performed in the prologue. |
| |
| The compiler is supposed to pass the highest register number |
| saved, the assembler then has to adjust that number before |
| entering it into the unwind descriptor (to account for any |
| caller saved registers with lower register numbers than the |
| first callee saved register). */ |
| if (gr_saved) |
| fprintf (file, ",ENTRY_GR=%d", gr_saved + 2); |
| |
| if (fr_saved) |
| fprintf (file, ",ENTRY_FR=%d", fr_saved + 11); |
| |
| fputs ("\n\t.ENTRY\n", file); |
| |
| /* Horrid hack. emit_function_prologue will modify this RTL in |
| place to get the expected results. */ |
| if (profile_flag) |
| ASM_GENERATE_INTERNAL_LABEL (hp_profile_label_name, "LP", |
| hp_profile_labelno); |
| |
| /* If we're using GAS and not using the portable runtime model, then |
| we don't need to accumulate the total number of code bytes. */ |
| if (TARGET_GAS && ! TARGET_PORTABLE_RUNTIME) |
| total_code_bytes = 0; |
| else if (insn_addresses) |
| { |
| unsigned int old_total = total_code_bytes; |
| |
| total_code_bytes += insn_addresses[INSN_UID (get_last_insn())]; |
| total_code_bytes += FUNCTION_BOUNDARY / BITS_PER_UNIT; |
| |
| /* Be prepared to handle overflows. */ |
| total_code_bytes = old_total > total_code_bytes ? -1 : total_code_bytes; |
| } |
| else |
| total_code_bytes = -1; |
| |
| remove_useless_addtr_insns (get_insns (), 0); |
| |
| /* Restore INSN_CODEs for insn which use unscaled indexed addresses. */ |
| restore_unscaled_index_insn_codes (get_insns ()); |
| } |
| |
| void |
| hppa_expand_prologue() |
| { |
| extern char call_used_regs[]; |
| int size = get_frame_size (); |
| int merge_sp_adjust_with_store = 0; |
| int i, offset; |
| rtx tmpreg, size_rtx; |
| |
| gr_saved = 0; |
| fr_saved = 0; |
| save_fregs = 0; |
| local_fsize = size + (size || frame_pointer_needed ? 8 : 0); |
| actual_fsize = compute_frame_size (size, &save_fregs); |
| |
| /* Compute a few things we will use often. */ |
| tmpreg = gen_rtx_REG (SImode, 1); |
| size_rtx = GEN_INT (actual_fsize); |
| |
| /* Handle out of line prologues and epilogues. */ |
| if (TARGET_SPACE) |
| { |
| rtx operands[2]; |
| int saves = 0; |
| int outline_insn_count = 0; |
| int inline_insn_count = 0; |
| |
| /* Count the number of insns for the inline and out of line |
| variants so we can choose one appropriately. |
| |
| No need to screw with counting actual_fsize operations -- they're |
| done for both inline and out of line prologues. */ |
| if (regs_ever_live[2]) |
| inline_insn_count += 1; |
| |
| if (! cint_ok_for_move (local_fsize)) |
| outline_insn_count += 2; |
| else |
| outline_insn_count += 1; |
| |
| /* Put the register save info into %r22. */ |
| for (i = 18; i >= 3; i--) |
| if (regs_ever_live[i] && ! call_used_regs[i]) |
| { |
| /* -1 because the stack adjustment is normally done in |
| the same insn as a register save. */ |
| inline_insn_count += (i - 2) - 1; |
| saves = i; |
| break; |
| } |
| |
| for (i = 66; i >= 48; i -= 2) |
| if (regs_ever_live[i] || regs_ever_live[i + 1]) |
| { |
| /* +1 needed as we load %r1 with the start of the freg |
| save area. */ |
| inline_insn_count += (i/2 - 23) + 1; |
| saves |= ((i/2 - 12 ) << 16); |
| break; |
| } |
| |
| if (frame_pointer_needed) |
| inline_insn_count += 3; |
| |
| if (! cint_ok_for_move (saves)) |
| outline_insn_count += 2; |
| else |
| outline_insn_count += 1; |
| |
| if (TARGET_PORTABLE_RUNTIME) |
| outline_insn_count += 2; |
| else |
| outline_insn_count += 1; |
| |
| /* If there's a lot of insns in the prologue, then do it as |
| an out-of-line sequence. */ |
| if (inline_insn_count > outline_insn_count) |
| { |
| /* Put the local_fisze into %r19. */ |
| operands[0] = gen_rtx_REG (SImode, 19); |
| operands[1] = GEN_INT (local_fsize); |
| emit_move_insn (operands[0], operands[1]); |
| |
| /* Put the stack size into %r21. */ |
| operands[0] = gen_rtx_REG (SImode, 21); |
| operands[1] = size_rtx; |
| emit_move_insn (operands[0], operands[1]); |
| |
| operands[0] = gen_rtx_REG (SImode, 22); |
| operands[1] = GEN_INT (saves); |
| emit_move_insn (operands[0], operands[1]); |
| |
| /* Now call the out-of-line prologue. */ |
| emit_insn (gen_outline_prologue_call ()); |
| emit_insn (gen_blockage ()); |
| |
| /* Note that we're using an out-of-line prologue. */ |
| out_of_line_prologue_epilogue = 1; |
| return; |
| } |
| } |
| |
| out_of_line_prologue_epilogue = 0; |
| |
| /* Save RP first. The calling conventions manual states RP will |
| always be stored into the caller's frame at sp-20. */ |
| if (regs_ever_live[2] || profile_flag) |
| store_reg (2, -20, STACK_POINTER_REGNUM); |
| |
| /* Allocate the local frame and set up the frame pointer if needed. */ |
| if (actual_fsize) |
| { |
| if (frame_pointer_needed) |
| { |
| /* Copy the old frame pointer temporarily into %r1. Set up the |
| new stack pointer, then store away the saved old frame pointer |
| into the stack at sp+actual_fsize and at the same time update |
| the stack pointer by actual_fsize bytes. Two versions, first |
| handles small (<8k) frames. The second handles large (>8k) |
| frames. */ |
| emit_move_insn (tmpreg, frame_pointer_rtx); |
| emit_move_insn (frame_pointer_rtx, stack_pointer_rtx); |
| if (VAL_14_BITS_P (actual_fsize)) |
| emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, size_rtx)); |
| else |
| { |
| /* It is incorrect to store the saved frame pointer at *sp, |
| then increment sp (writes beyond the current stack boundary). |
| |
| So instead use stwm to store at *sp and post-increment the |
| stack pointer as an atomic operation. Then increment sp to |
| finish allocating the new frame. */ |
| emit_insn (gen_post_stwm (stack_pointer_rtx, tmpreg, GEN_INT (64))); |
| set_reg_plus_d (STACK_POINTER_REGNUM, |
| STACK_POINTER_REGNUM, |
| actual_fsize - 64); |
| } |
| } |
| /* no frame pointer needed. */ |
| else |
| { |
| /* In some cases we can perform the first callee register save |
| and allocating the stack frame at the same time. If so, just |
| make a note of it and defer allocating the frame until saving |
| the callee registers. */ |
| if (VAL_14_BITS_P (-actual_fsize) |
| && local_fsize == 0 |
| && ! profile_flag |
| && ! flag_pic) |
| merge_sp_adjust_with_store = 1; |
| /* Can not optimize. Adjust the stack frame by actual_fsize bytes. */ |
| else if (actual_fsize != 0) |
| set_reg_plus_d (STACK_POINTER_REGNUM, |
| STACK_POINTER_REGNUM, |
| actual_fsize); |
| } |
| } |
| |
| /* The hppa calling conventions say that %r19, the pic offset |
| register, is saved at sp - 32 (in this function's frame) when |
| generating PIC code. FIXME: What is the correct thing to do |
| for functions which make no calls and allocate no frame? Do |
| we need to allocate a frame, or can we just omit the save? For |
| now we'll just omit the save. */ |
| if (actual_fsize != 0 && flag_pic) |
| store_reg (PIC_OFFSET_TABLE_REGNUM, -32, STACK_POINTER_REGNUM); |
| |
| /* Profiling code. |
| |
| Instead of taking one argument, the counter label, as most normal |
| mcounts do, _mcount appears to behave differently on the HPPA. It |
| takes the return address of the caller, the address of this routine, |
| and the address of the label. Also, it isn't magic, so |
| argument registers have to be preserved. */ |
| if (profile_flag) |
| { |
| int pc_offset, i, arg_offset, basereg, offsetadj; |
| |
| pc_offset = 4 + (frame_pointer_needed |
| ? (VAL_14_BITS_P (actual_fsize) ? 12 : 20) |
| : (VAL_14_BITS_P (actual_fsize) ? 4 : 8)); |
| |
| /* When the function has a frame pointer, use it as the base |
| register for saving/restore registers. Else use the stack |
| pointer. Adjust the offset according to the frame size if |
| this function does not have a frame pointer. */ |
| |
| basereg = frame_pointer_needed ? FRAME_POINTER_REGNUM |
| : STACK_POINTER_REGNUM; |
| offsetadj = frame_pointer_needed ? 0 : actual_fsize; |
| |
| /* Horrid hack. emit_function_prologue will modify this RTL in |
| place to get the expected results. sprintf here is just to |
| put something in the name. */ |
| sprintf(hp_profile_label_name, "LP$%04d", -1); |
| hp_profile_label_rtx = gen_rtx_SYMBOL_REF (SImode, hp_profile_label_name); |
| if (current_function_returns_struct) |
| store_reg (STRUCT_VALUE_REGNUM, - 12 - offsetadj, basereg); |
| if (current_function_needs_context) |
| store_reg (STATIC_CHAIN_REGNUM, - 16 - offsetadj, basereg); |
| |
| for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4) |
| if (regs_ever_live [i]) |
| { |
| store_reg (i, arg_offset, basereg); |
| /* Deal with arg_offset not fitting in 14 bits. */ |
| pc_offset += VAL_14_BITS_P (arg_offset) ? 4 : 8; |
| } |
| |
| emit_move_insn (gen_rtx_REG (SImode, 26), gen_rtx_REG (SImode, 2)); |
| emit_move_insn (tmpreg, gen_rtx_HIGH (SImode, hp_profile_label_rtx)); |
| emit_move_insn (gen_rtx_REG (SImode, 24), |
| gen_rtx_LO_SUM (SImode, tmpreg, hp_profile_label_rtx)); |
| /* %r25 is set from within the output pattern. */ |
| emit_insn (gen_call_profiler (GEN_INT (- pc_offset - 20))); |
| |
| /* Restore argument registers. */ |
| for (i = 26, arg_offset = -36 - offsetadj; i >= 23; i--, arg_offset -= 4) |
| if (regs_ever_live [i]) |
| load_reg (i, arg_offset, basereg); |
| |
| if (current_function_returns_struct) |
| load_reg (STRUCT_VALUE_REGNUM, -12 - offsetadj, basereg); |
| |
| if (current_function_needs_context) |
| load_reg (STATIC_CHAIN_REGNUM, -16 - offsetadj, basereg); |
| } |
| |
| /* Normal register save. |
| |
| Do not save the frame pointer in the frame_pointer_needed case. It |
| was done earlier. */ |
| if (frame_pointer_needed) |
| { |
| for (i = 18, offset = local_fsize; i >= 4; i--) |
| if (regs_ever_live[i] && ! call_used_regs[i]) |
| { |
| store_reg (i, offset, FRAME_POINTER_REGNUM); |
| offset += 4; |
| gr_saved++; |
| } |
| /* Account for %r3 which is saved in a special place. */ |
| gr_saved++; |
| } |
| /* No frame pointer needed. */ |
| else |
| { |
| for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--) |
| if (regs_ever_live[i] && ! call_used_regs[i]) |
| { |
| /* If merge_sp_adjust_with_store is nonzero, then we can |
| optimize the first GR save. */ |
| if (merge_sp_adjust_with_store) |
| { |
| merge_sp_adjust_with_store = 0; |
| emit_insn (gen_post_stwm (stack_pointer_rtx, |
| gen_rtx_REG (SImode, i), |
| GEN_INT (-offset))); |
| } |
| else |
| store_reg (i, offset, STACK_POINTER_REGNUM); |
| offset += 4; |
| gr_saved++; |
| } |
| |
| /* If we wanted to merge the SP adjustment with a GR save, but we never |
| did any GR saves, then just emit the adjustment here. */ |
| if (merge_sp_adjust_with_store) |
| set_reg_plus_d (STACK_POINTER_REGNUM, |
| STACK_POINTER_REGNUM, |
| actual_fsize); |
| } |
| |
| /* Align pointer properly (doubleword boundary). */ |
| offset = (offset + 7) & ~7; |
| |
| /* Floating point register store. */ |
| if (save_fregs) |
| { |
| /* First get the frame or stack pointer to the start of the FP register |
| save area. */ |
| if (frame_pointer_needed) |
| set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset); |
| else |
| set_reg_plus_d (1, STACK_POINTER_REGNUM, offset); |
| |
| /* Now actually save the FP registers. */ |
| for (i = 66; i >= 48; i -= 2) |
| { |
| if (regs_ever_live[i] || regs_ever_live[i + 1]) |
| { |
| emit_move_insn (gen_rtx_MEM (DFmode, |
| gen_rtx_POST_INC (DFmode, tmpreg)), |
| gen_rtx_REG (DFmode, i)); |
| fr_saved++; |
| } |
| } |
| } |
| |
| /* When generating PIC code it is necessary to save/restore the |
| PIC register around each function call. We used to do this |
| in the call patterns themselves, but that implementation |
| made incorrect assumptions about using global variables to hold |
| per-function rtl code generated in the backend. |
| |
| So instead, we copy the PIC register into a reserved callee saved |
| register in the prologue. Then after each call we reload the PIC |
| register from the callee saved register. We also reload the PIC |
| register from the callee saved register in the epilogue ensure the |
| PIC register is valid at function exit. |
| |
| This may (depending on the exact characteristics of the function) |
| even be more efficient. |
| |
| Avoid this if the callee saved register wasn't used (these are |
| leaf functions). */ |
| if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM_SAVED]) |
| emit_move_insn (gen_rtx_REG (SImode, PIC_OFFSET_TABLE_REGNUM_SAVED), |
| gen_rtx_REG (SImode, PIC_OFFSET_TABLE_REGNUM)); |
| } |
| |
| |
| void |
| output_function_epilogue (file, size) |
| FILE *file; |
| int size ATTRIBUTE_UNUSED; |
| { |
| rtx insn = get_last_insn (); |
| |
| /* hppa_expand_epilogue does the dirty work now. We just need |
| to output the assembler directives which denote the end |
| of a function. |
| |
| To make debuggers happy, emit a nop if the epilogue was completely |
| eliminated due to a volatile call as the last insn in the |
| current function. That way the return address (in %r2) will |
| always point to a valid instruction in the current function. */ |
| |
| /* Get the last real insn. */ |
| if (GET_CODE (insn) == NOTE) |
| insn = prev_real_insn (insn); |
| |
| /* If it is a sequence, then look inside. */ |
| if (insn && GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE) |
| insn = XVECEXP (PATTERN (insn), 0, 0); |
| |
| /* If insn is a CALL_INSN, then it must be a call to a volatile |
| function (otherwise there would be epilogue insns). */ |
| if (insn && GET_CODE (insn) == CALL_INSN) |
| fputs ("\tnop\n", file); |
| |
| fputs ("\t.EXIT\n\t.PROCEND\n", file); |
| |
| /* Free up stuff we don't need anymore. */ |
| if (unscaled_index_insn_codes) |
| free (unscaled_index_insn_codes); |
| max_unscaled_index_insn_codes_uid = 0; |
| } |
| |
| void |
| hppa_expand_epilogue () |
| { |
| rtx tmpreg; |
| int offset,i; |
| int merge_sp_adjust_with_load = 0; |
| |
| /* Handle out of line prologues and epilogues. */ |
| if (TARGET_SPACE && out_of_line_prologue_epilogue) |
| { |
| int saves = 0; |
| rtx operands[2]; |
| |
| /* Put the register save info into %r22. */ |
| for (i = 18; i >= 3; i--) |
| if (regs_ever_live[i] && ! call_used_regs[i]) |
| { |
| saves = i; |
| break; |
| } |
| |
| for (i = 66; i >= 48; i -= 2) |
| if (regs_ever_live[i] || regs_ever_live[i + 1]) |
| { |
| saves |= ((i/2 - 12 ) << 16); |
| break; |
| } |
| |
| emit_insn (gen_blockage ()); |
| |
| /* Put the local_fisze into %r19. */ |
| operands[0] = gen_rtx_REG (SImode, 19); |
| operands[1] = GEN_INT (local_fsize); |
| emit_move_insn (operands[0], operands[1]); |
| |
| /* Put the stack size into %r21. */ |
| operands[0] = gen_rtx_REG (SImode, 21); |
| operands[1] = GEN_INT (actual_fsize); |
| emit_move_insn (operands[0], operands[1]); |
| |
| operands[0] = gen_rtx_REG (SImode, 22); |
| operands[1] = GEN_INT (saves); |
| emit_move_insn (operands[0], operands[1]); |
| |
| /* Now call the out-of-line epilogue. */ |
| emit_insn (gen_outline_epilogue_call ()); |
| return; |
| } |
| |
| /* We will use this often. */ |
| tmpreg = gen_rtx_REG (SImode, 1); |
| |
| /* Try to restore RP early to avoid load/use interlocks when |
| RP gets used in the return (bv) instruction. This appears to still |
| be necessary even when we schedule the prologue and epilogue. */ |
| if (frame_pointer_needed |
| && (regs_ever_live [2] || profile_flag)) |
| load_reg (2, -20, FRAME_POINTER_REGNUM); |
| |
| /* No frame pointer, and stack is smaller than 8k. */ |
| else if (! frame_pointer_needed |
| && VAL_14_BITS_P (actual_fsize + 20) |
| && (regs_ever_live[2] || profile_flag)) |
| load_reg (2, - (actual_fsize + 20), STACK_POINTER_REGNUM); |
| |
| /* General register restores. */ |
| if (frame_pointer_needed) |
| { |
| for (i = 18, offset = local_fsize; i >= 4; i--) |
| if (regs_ever_live[i] && ! call_used_regs[i]) |
| { |
| load_reg (i, offset, FRAME_POINTER_REGNUM); |
| offset += 4; |
| } |
| } |
| else |
| { |
| for (i = 18, offset = local_fsize - actual_fsize; i >= 3; i--) |
| { |
| if (regs_ever_live[i] && ! call_used_regs[i]) |
| { |
| /* Only for the first load. |
| merge_sp_adjust_with_load holds the register load |
| with which we will merge the sp adjustment. */ |
| if (VAL_14_BITS_P (actual_fsize + 20) |
| && local_fsize == 0 |
| && ! merge_sp_adjust_with_load) |
| merge_sp_adjust_with_load = i; |
| else |
| load_reg (i, offset, STACK_POINTER_REGNUM); |
| offset += 4; |
| } |
| } |
| } |
| |
| /* Align pointer properly (doubleword boundary). */ |
| offset = (offset + 7) & ~7; |
| |
| /* FP register restores. */ |
| if (save_fregs) |
| { |
| /* Adjust the register to index off of. */ |
| if (frame_pointer_needed) |
| set_reg_plus_d (1, FRAME_POINTER_REGNUM, offset); |
| else |
| set_reg_plus_d (1, STACK_POINTER_REGNUM, offset); |
| |
| /* Actually do the restores now. */ |
| for (i = 66; i >= 48; i -= 2) |
| { |
| if (regs_ever_live[i] || regs_ever_live[i + 1]) |
| { |
| emit_move_insn (gen_rtx_REG (DFmode, i), |
| gen_rtx_MEM (DFmode, |
| gen_rtx_POST_INC (DFmode, tmpreg))); |
| } |
| } |
| } |
| |
| /* Emit a blockage insn here to keep these insns from being moved to |
| an earlier spot in the epilogue, or into the main instruction stream. |
| |
| This is necessary as we must not cut the stack back before all the |
| restores are finished. */ |
| emit_insn (gen_blockage ()); |
| /* No frame pointer, but we have a stack greater than 8k. We restore |
| %r2 very late in this case. (All other cases are restored as early |
| as possible.) */ |
| if (! frame_pointer_needed |
| && ! VAL_14_BITS_P (actual_fsize + 20) |
| && (regs_ever_live[2] || profile_flag)) |
| { |
| set_reg_plus_d (STACK_POINTER_REGNUM, |
| STACK_POINTER_REGNUM, |
| - actual_fsize); |
| |
| /* This used to try and be clever by not depending on the value in |
| %r30 and instead use the value held in %r1 (so that the 2nd insn |
| which sets %r30 could be put in the delay slot of the return insn). |
| |
| That won't work since if the stack is exactly 8k set_reg_plus_d |
| doesn't set %r1, just %r30. */ |
| load_reg (2, - 20, STACK_POINTER_REGNUM); |
| } |
| |
| /* Reset stack pointer (and possibly frame pointer). The stack |
| pointer is initially set to fp + 64 to avoid a race condition. */ |
| else if (frame_pointer_needed) |
| { |
| set_reg_plus_d (STACK_POINTER_REGNUM, FRAME_POINTER_REGNUM, 64); |
| emit_insn (gen_pre_ldwm (frame_pointer_rtx, |
| stack_pointer_rtx, |
| GEN_INT (-64))); |
| } |
| /* If we were deferring a callee register restore, do it now. */ |
| else if (! frame_pointer_needed && merge_sp_adjust_with_load) |
| emit_insn (gen_pre_ldwm (gen_rtx_REG (SImode, merge_sp_adjust_with_load), |
| stack_pointer_rtx, |
| GEN_INT (- actual_fsize))); |
| else if (actual_fsize != 0) |
| set_reg_plus_d (STACK_POINTER_REGNUM, |
| STACK_POINTER_REGNUM, |
| - actual_fsize); |
| } |
| |
| /* Fetch the return address for the frame COUNT steps up from |
| the current frame, after the prologue. FRAMEADDR is the |
| frame pointer of the COUNT frame. |
| |
| We want to ignore any export stub remnants here. |
| |
| The value returned is used in two different ways: |
| |
| 1. To find a function's caller. |
| |
| 2. To change the return address for a function. |
| |
| This function handles most instances of case 1; however, it will |
| fail if there are two levels of stubs to execute on the return |
| path. The only way I believe that can happen is if the return value |
| needs a parameter relocation, which never happens for C code. |
| |
| This function handles most instances of case 2; however, it will |
| fail if we did not originally have stub code on the return path |
| but will need code on the new return path. This can happen if |
| the caller & callee are both in the main program, but the new |
| return location is in a shared library. |
| |
| To handle this correctly we need to set the return pointer at |
| frame-20 to point to a return stub frame-24 to point to the |
| location we wish to return to. */ |
| |
| rtx |
| return_addr_rtx (count, frameaddr) |
| int count ATTRIBUTE_UNUSED; |
| rtx frameaddr; |
| { |
| rtx label; |
| rtx saved_rp; |
| rtx ins; |
| |
| saved_rp = gen_reg_rtx (Pmode); |
| |
| /* First, we start off with the normal return address pointer from |
| -20[frameaddr]. */ |
| |
| emit_move_insn (saved_rp, plus_constant (frameaddr, -5 * UNITS_PER_WORD)); |
| |
| /* Get pointer to the instruction stream. We have to mask out the |
| privilege level from the two low order bits of the return address |
| pointer here so that ins will point to the start of the first |
| instruction that would have been executed if we returned. */ |
| ins = copy_to_reg (gen_rtx_AND (Pmode, |
| copy_to_reg (gen_rtx_MEM (Pmode, saved_rp)), |
| MASK_RETURN_ADDR)); |
| label = gen_label_rtx (); |
| |
| /* Check the instruction stream at the normal return address for the |
| export stub: |
| |
| 0x4bc23fd1 | stub+8: ldw -18(sr0,sp),rp |
| 0x004010a1 | stub+12: ldsid (sr0,rp),r1 |
| 0x00011820 | stub+16: mtsp r1,sr0 |
| 0xe0400002 | stub+20: be,n 0(sr0,rp) |
| |
| If it is an export stub, than our return address is really in |
| -24[frameaddr]. */ |
| |
| emit_cmp_insn (gen_rtx_MEM (SImode, ins), |
| GEN_INT (0x4bc23fd1), |
| NE, NULL_RTX, SImode, 1, 0); |
| emit_jump_insn (gen_bne (label)); |
| |
| emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 4)), |
| GEN_INT (0x004010a1), |
| NE, NULL_RTX, SImode, 1, 0); |
| emit_jump_insn (gen_bne (label)); |
| |
| emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 8)), |
| GEN_INT (0x00011820), |
| NE, NULL_RTX, SImode, 1, 0); |
| emit_jump_insn (gen_bne (label)); |
| |
| emit_cmp_insn (gen_rtx_MEM (SImode, plus_constant (ins, 12)), |
| GEN_INT (0xe0400002), |
| NE, NULL_RTX, SImode, 1, 0); |
| |
| /* If there is no export stub then just use our initial guess of |
| -20[frameaddr]. */ |
| |
| emit_jump_insn (gen_bne (label)); |
| |
| /* Here we know that our return address pointer points to an export |
| stub. We don't want to return the address of the export stub, |
| but rather the return address that leads back into user code. |
| That return address is stored at -24[frameaddr]. */ |
| |
| emit_move_insn (saved_rp, plus_constant (frameaddr, -6 * UNITS_PER_WORD)); |
| |
| emit_label (label); |
| return gen_rtx_MEM (Pmode, memory_address (Pmode, saved_rp)); |
| } |
| |
| /* This is only valid once reload has completed because it depends on |
| knowing exactly how much (if any) frame there is and... |
| |
| It's only valid if there is no frame marker to de-allocate and... |
| |
| It's only valid if %r2 hasn't been saved into the caller's frame |
| (we're not profiling and %r2 isn't live anywhere). */ |
| int |
| hppa_can_use_return_insn_p () |
| { |
| return (reload_completed |
| && (compute_frame_size (get_frame_size (), 0) ? 0 : 1) |
| && ! profile_flag |
| && ! regs_ever_live[2] |
| && ! frame_pointer_needed); |
| } |
| |
| void |
| emit_bcond_fp (code, operand0) |
| enum rtx_code code; |
| rtx operand0; |
| { |
| emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, |
| gen_rtx_IF_THEN_ELSE (VOIDmode, |
| gen_rtx_fmt_ee (code, |
| VOIDmode, |
| gen_rtx_REG (CCFPmode, 0), |
| const0_rtx), |
| gen_rtx_LABEL_REF (VOIDmode, operand0), |
| pc_rtx))); |
| |
| } |
| |
| rtx |
| gen_cmp_fp (code, operand0, operand1) |
| enum rtx_code code; |
| rtx operand0, operand1; |
| { |
| return gen_rtx_SET (VOIDmode, gen_rtx_REG (CCFPmode, 0), |
| gen_rtx_fmt_ee (code, CCFPmode, operand0, operand1)); |
| } |
| |
| /* Adjust the cost of a scheduling dependency. Return the new cost of |
| a dependency LINK or INSN on DEP_INSN. COST is the current cost. */ |
| |
| int |
| pa_adjust_cost (insn, link, dep_insn, cost) |
| rtx insn; |
| rtx link; |
| rtx dep_insn; |
| int cost; |
| { |
| enum attr_type attr_type; |
| |
| /* Don't adjust costs for a pa8000 chip. */ |
| if (pa_cpu >= PROCESSOR_8000) |
| return cost; |
| |
| if (! recog_memoized (insn)) |
| return 0; |
| |
| attr_type = get_attr_type (insn); |
| |
| if (REG_NOTE_KIND (link) == 0) |
| { |
| /* Data dependency; DEP_INSN writes a register that INSN reads some |
| cycles later. */ |
| |
| if (attr_type == TYPE_FPSTORE) |
| { |
| rtx pat = PATTERN (insn); |
| rtx dep_pat = PATTERN (dep_insn); |
| if (GET_CODE (pat) == PARALLEL) |
| { |
| /* This happens for the fstXs,mb patterns. */ |
| pat = XVECEXP (pat, 0, 0); |
| } |
| if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) |
| /* If this happens, we have to extend this to schedule |
| optimally. Return 0 for now. */ |
| return 0; |
| |
| if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat))) |
| { |
| if (! recog_memoized (dep_insn)) |
| return 0; |
| /* DEP_INSN is writing its result to the register |
| being stored in the fpstore INSN. */ |
| switch (get_attr_type (dep_insn)) |
| { |
| case TYPE_FPLOAD: |
| /* This cost 3 cycles, not 2 as the md says for the |
| 700 and 7100. */ |
| return cost + 1; |
| |
| case TYPE_FPALU: |
| case TYPE_FPMULSGL: |
| case TYPE_FPMULDBL: |
| case TYPE_FPDIVSGL: |
| case TYPE_FPDIVDBL: |
| case TYPE_FPSQRTSGL: |
| case TYPE_FPSQRTDBL: |
| /* In these important cases, we save one cycle compared to |
| when flop instruction feed each other. */ |
| return cost - 1; |
| |
| default: |
| return cost; |
| } |
| } |
| } |
| |
| /* For other data dependencies, the default cost specified in the |
| md is correct. */ |
| return cost; |
| } |
| else if (REG_NOTE_KIND (link) == REG_DEP_ANTI) |
| { |
| /* Anti dependency; DEP_INSN reads a register that INSN writes some |
| cycles later. */ |
| |
| if (attr_type == TYPE_FPLOAD) |
| { |
| rtx pat = PATTERN (insn); |
| rtx dep_pat = PATTERN (dep_insn); |
| if (GET_CODE (pat) == PARALLEL) |
| { |
| /* This happens for the fldXs,mb patterns. */ |
| pat = XVECEXP (pat, 0, 0); |
| } |
| if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) |
| /* If this happens, we have to extend this to schedule |
| optimally. Return 0 for now. */ |
| return 0; |
| |
| if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat))) |
| { |
| if (! recog_memoized (dep_insn)) |
| return 0; |
| switch (get_attr_type (dep_insn)) |
| { |
| case TYPE_FPALU: |
| case TYPE_FPMULSGL: |
| case TYPE_FPMULDBL: |
| case TYPE_FPDIVSGL: |
| case TYPE_FPDIVDBL: |
| case TYPE_FPSQRTSGL: |
| case TYPE_FPSQRTDBL: |
| /* A fpload can't be issued until one cycle before a |
| preceding arithmetic operation has finished if |
| the target of the fpload is any of the sources |
| (or destination) of the arithmetic operation. */ |
| return cost - 1; |
| |
| default: |
| return 0; |
| } |
| } |
| } |
| else if (attr_type == TYPE_FPALU) |
| { |
| rtx pat = PATTERN (insn); |
| rtx dep_pat = PATTERN (dep_insn); |
| if (GET_CODE (pat) == PARALLEL) |
| { |
| /* This happens for the fldXs,mb patterns. */ |
| pat = XVECEXP (pat, 0, 0); |
| } |
| if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) |
| /* If this happens, we have to extend this to schedule |
| optimally. Return 0 for now. */ |
| return 0; |
| |
| if (reg_mentioned_p (SET_DEST (pat), SET_SRC (dep_pat))) |
| { |
| if (! recog_memoized (dep_insn)) |
| return 0; |
| switch (get_attr_type (dep_insn)) |
| { |
| case TYPE_FPDIVSGL: |
| case TYPE_FPDIVDBL: |
| case TYPE_FPSQRTSGL: |
| case TYPE_FPSQRTDBL: |
| /* An ALU flop can't be issued until two cycles before a |
| preceding divide or sqrt operation has finished if |
| the target of the ALU flop is any of the sources |
| (or destination) of the divide or sqrt operation. */ |
| return cost - 2; |
| |
| default: |
| return 0; |
| } |
| } |
| } |
| |
| /* For other anti dependencies, the cost is 0. */ |
| return 0; |
| } |
| else if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT) |
| { |
| /* Output dependency; DEP_INSN writes a register that INSN writes some |
| cycles later. */ |
| if (attr_type == TYPE_FPLOAD) |
| { |
| rtx pat = PATTERN (insn); |
| rtx dep_pat = PATTERN (dep_insn); |
| if (GET_CODE (pat) == PARALLEL) |
| { |
| /* This happens for the fldXs,mb patterns. */ |
| pat = XVECEXP (pat, 0, 0); |
| } |
| if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) |
| /* If this happens, we have to extend this to schedule |
| optimally. Return 0 for now. */ |
| return 0; |
| |
| if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat))) |
| { |
| if (! recog_memoized (dep_insn)) |
| return 0; |
| switch (get_attr_type (dep_insn)) |
| { |
| case TYPE_FPALU: |
| case TYPE_FPMULSGL: |
| case TYPE_FPMULDBL: |
| case TYPE_FPDIVSGL: |
| case TYPE_FPDIVDBL: |
| case TYPE_FPSQRTSGL: |
| case TYPE_FPSQRTDBL: |
| /* A fpload can't be issued until one cycle before a |
| preceding arithmetic operation has finished if |
| the target of the fpload is the destination of the |
| arithmetic operation. */ |
| return cost - 1; |
| |
| default: |
| return 0; |
| } |
| } |
| } |
| else if (attr_type == TYPE_FPALU) |
| { |
| rtx pat = PATTERN (insn); |
| rtx dep_pat = PATTERN (dep_insn); |
| if (GET_CODE (pat) == PARALLEL) |
| { |
| /* This happens for the fldXs,mb patterns. */ |
| pat = XVECEXP (pat, 0, 0); |
| } |
| if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET) |
| /* If this happens, we have to extend this to schedule |
| optimally. Return 0 for now. */ |
| return 0; |
| |
| if (reg_mentioned_p (SET_DEST (pat), SET_DEST (dep_pat))) |
| { |
| if (! recog_memoized (dep_insn)) |
| return 0; |
| switch (get_attr_type (dep_insn)) |
| { |
| case TYPE_FPDIVSGL: |
| case TYPE_FPDIVDBL: |
| case TYPE_FPSQRTSGL: |
| case TYPE_FPSQRTDBL: |
| /* An ALU flop can't be issued until two cycles before a |
| preceding divide or sqrt operation has finished if |
| the target of the ALU flop is also the target of |
| the divide or sqrt operation. */ |
| return cost - 2; |
| |
| default: |
| return 0; |
| } |
| } |
| } |
| |
| /* For other output dependencies, the cost is 0. */ |
| return 0; |
| } |
| else |
| abort (); |
| } |
| |
| /* Return any length adjustment needed by INSN which already has its length |
| computed as LENGTH. Return zero if no adjustment is necessary. |
| |
| For the PA: function calls, millicode calls, and backwards short |
| conditional branches with unfilled delay slots need an adjustment by +1 |
| (to account for the NOP which will be inserted into the instruction stream). |
| |
| Also compute the length of an inline block move here as it is too |
| complicated to express as a length attribute in pa.md. */ |
| int |
| pa_adjust_insn_length (insn, length) |
| rtx insn; |
| int length; |
| { |
| rtx pat = PATTERN (insn); |
| |
| /* Call insns which are *not* indirect and have unfilled delay slots. */ |
| if (GET_CODE (insn) == CALL_INSN) |
| { |
| |
| if (GET_CODE (XVECEXP (pat, 0, 0)) == CALL |
| && GET_CODE (XEXP (XEXP (XVECEXP (pat, 0, 0), 0), 0)) == SYMBOL_REF) |
| return 4; |
| else if (GET_CODE (XVECEXP (pat, 0, 0)) == SET |
| && GET_CODE (XEXP (XEXP (XEXP (XVECEXP (pat, 0, 0), 1), 0), 0)) |
| == SYMBOL_REF) |
| return 4; |
| else |
| return 0; |
| } |
| /* Jumps inside switch tables which have unfilled delay slots |
| also need adjustment. */ |
| else if (GET_CODE (insn) == JUMP_INSN |
| && simplejump_p (insn) |
| && GET_MODE (insn) == SImode) |
| return 4; |
| /* Millicode insn with an unfilled delay slot. */ |
| else if (GET_CODE (insn) == INSN |
| && GET_CODE (pat) != SEQUENCE |
| && GET_CODE (pat) != USE |
| && GET_CODE (pat) != CLOBBER |
| && get_attr_type (insn) == TYPE_MILLI) |
| return 4; |
| /* Block move pattern. */ |
| else if (GET_CODE (insn) == INSN |
| && GET_CODE (pat) == PARALLEL |
| && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 0)) == MEM |
| && GET_CODE (XEXP (XVECEXP (pat, 0, 0), 1)) == MEM |
| && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 0)) == BLKmode |
| && GET_MODE (XEXP (XVECEXP (pat, 0, 0), 1)) == BLKmode) |
| return compute_movstrsi_length (insn) - 4; |
| /* Conditional branch with an unfilled delay slot. */ |
| else if (GET_CODE (insn) == JUMP_INSN && ! simplejump_p (insn)) |
| { |
| /* Adjust a short backwards conditional with an unfilled delay slot. */ |
| if (GET_CODE (pat) == SET |
| && length == 4 |
| && ! forward_branch_p (insn)) |
| return 4; |
| else if (GET_CODE (pat) == PARALLEL |
| && get_attr_type (insn) == TYPE_PARALLEL_BRANCH |
| && length == 4) |
| return 4; |
| /* Adjust dbra insn with short backwards conditional branch with |
| unfilled delay slot -- only for case where counter is in a |
| general register register. */ |
| else if (GET_CODE (pat) == PARALLEL |
| && GET_CODE (XVECEXP (pat, 0, 1)) == SET |
| && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == REG |
| && ! FP_REG_P (XEXP (XVECEXP (pat, 0, 1), 0)) |
| && length == 4 |
| && ! forward_branch_p (insn)) |
| return 4; |
| else |
| return 0; |
| } |
| return 0; |
| } |
| |
| /* Print operand X (an rtx) in assembler syntax to file FILE. |
| CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified. |
| For `%' followed by punctuation, CODE is the punctuation and X is null. */ |
| |
| void |
| print_operand (file, x, code) |
| FILE *file; |
| rtx x; |
| int code; |
| { |
| switch (code) |
| { |
| case '#': |
| /* Output a 'nop' if there's nothing for the delay slot. */ |
| if (dbr_sequence_length () == 0) |
| fputs ("\n\tnop", file); |
| return; |
| case '*': |
| /* Output an nullification completer if there's nothing for the */ |
| /* delay slot or nullification is requested. */ |
| if (dbr_sequence_length () == 0 || |
| (final_sequence && |
| INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))) |
| fputs (",n", file); |
| return; |
| case 'R': |
| /* Print out the second register name of a register pair. |
| I.e., R (6) => 7. */ |
| fputs (reg_names[REGNO (x)+1], file); |
| return; |
| case 'r': |
| /* A register or zero. */ |
| if (x == const0_rtx |
| || (x == CONST0_RTX (DFmode)) |
| || (x == CONST0_RTX (SFmode))) |
| { |
| fputs ("0", file); |
| return; |
| } |
| else |
| break; |
| case 'C': /* Plain (C)ondition */ |
| case 'X': |
| switch (GET_CODE (x)) |
| { |
| case EQ: |
| fputs ("=", file); break; |
| case NE: |
| fputs ("<>", file); break; |
| case GT: |
| fputs (">", file); break; |
| case GE: |
| fputs (">=", file); break; |
| case GEU: |
| fputs (">>=", file); break; |
| case GTU: |
| fputs (">>", file); break; |
| case LT: |
| fputs ("<", file); break; |
| case LE: |
| fputs ("<=", file); break; |
| case LEU: |
| fputs ("<<=", file); break; |
| case LTU: |
| fputs ("<<", file); break; |
| default: |
| abort (); |
| } |
| return; |
| case 'N': /* Condition, (N)egated */ |
| switch (GET_CODE (x)) |
| { |
| case EQ: |
| fputs ("<>", file); break; |
| case NE: |
| fputs ("=", file); break; |
| case GT: |
| fputs ("<=", file); break; |
| case GE: |
| fputs ("<", file); break; |
| case GEU: |
| fputs ("<<", file); break; |
| case GTU: |
| fputs ("<<=", file); break; |
| case LT: |
| fputs (">=", file); break; |
| case LE: |
| fputs (">", file); break; |
| case LEU: |
| fputs (">>", file); break; |
| case LTU: |
| fputs (">>=", file); break; |
| default: |
| abort (); |
| } |
| return; |
| /* For floating point comparisons. Need special conditions to deal |
| with NaNs properly. */ |
| case 'Y': |
| switch (GET_CODE (x)) |
| { |
| case EQ: |
| fputs ("!=", file); break; |
| case NE: |
| fputs ("=", file); break; |
| case GT: |
| fputs ("<=", file); break; |
| case GE: |
| fputs ("<", file); break; |
| case LT: |
| fputs (">=", file); break; |
| case LE: |
| fputs (">", file); break; |
| default: |
| abort (); |
| } |
| return; |
| case 'S': /* Condition, operands are (S)wapped. */ |
| switch (GET_CODE (x)) |
| { |
| case EQ: |
| fputs ("=", file); break; |
| case NE: |
| fputs ("<>", file); break; |
| case GT: |
| fputs ("<", file); break; |
| case GE: |
| fputs ("<=", file); break; |
| case GEU: |
| fputs ("<<=", file); break; |
| case GTU: |
| fputs ("<<", file); break; |
| case LT: |
| fputs (">", file); break; |
| case LE: |
| fputs (">=", file); break; |
| case LEU: |
| fputs (">>=", file); break; |
| case LTU: |
| fputs (">>", file); break; |
| default: |
| abort (); |
| } |
| return; |
| case 'B': /* Condition, (B)oth swapped and negate. */ |
| switch (GET_CODE (x)) |
| { |
| case EQ: |
| fputs ("<>", file); break; |
| case NE: |
| fputs ("=", file); break; |
| case GT: |
| fputs (">=", file); break; |
| case GE: |
| fputs (">", file); break; |
| case GEU: |
| fputs (">>", file); break; |
| case GTU: |
| fputs (">>=", file); break; |
| case LT: |
| fputs ("<=", file); break; |
| case LE: |
| fputs ("<", file); break; |
| case LEU: |
| fputs ("<<", file); break; |
| case LTU: |
| fputs ("<<=", file); break; |
| default: |
| abort (); |
| } |
| return; |
| case 'k': |
| if (GET_CODE (x) == CONST_INT) |
| { |
| fprintf (file, "%d", ~INTVAL (x)); |
| return; |
| } |
| abort(); |
| case 'L': |
| if (GET_CODE (x) == CONST_INT) |
| { |
| fprintf (file, "%d", 32 - (INTVAL (x) & 31)); |
| return; |
| } |
| abort(); |
| case 'O': |
| if (GET_CODE (x) == CONST_INT && exact_log2 (INTVAL (x)) >= 0) |
| { |
| fprintf (file, "%d", exact_log2 (INTVAL (x))); |
| return; |
| } |
| abort(); |
| case 'P': |
| if (GET_CODE (x) == CONST_INT) |
| { |
| fprintf (file, "%d", 31 - (INTVAL (x) & 31)); |
| return; |
| } |
| abort(); |
| case 'I': |
| if (GET_CODE (x) == CONST_INT) |
| fputs ("i", file); |
| return; |
| case 'M': |
| case 'F': |
| switch (GET_CODE (XEXP (x, 0))) |
| { |
| case PRE_DEC: |
| case PRE_INC: |
| fputs ("s,mb", file); |
| break; |
| case POST_DEC: |
| case POST_INC: |
| fputs ("s,ma", file); |
| break; |
| case PLUS: |
| if (GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT |
| || GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT) |
| fputs ("x,s", file); |
| else if (code == 'F') |
| fputs ("s", file); |
| break; |
| default: |
| if (code == 'F') |
| fputs ("s", file); |
| break; |
| } |
| return; |
| case 'G': |
| output_global_address (file, x, 0); |
| return; |
| case 'H': |
| output_global_address (file, x, 1); |
| return; |
| case 0: /* Don't do anything special */ |
| break; |
| case 'Z': |
| { |
| unsigned op[3]; |
| compute_zdepi_operands (INTVAL (x), op); |
| fprintf (file, "%d,%d,%d", op[0], op[1], op[2]); |
| return; |
| } |
| default: |
| abort (); |
| } |
| if (GET_CODE (x) == REG) |
| { |
| fputs (reg_names [REGNO (x)], file); |
| if (FP_REG_P (x) && GET_MODE_SIZE (GET_MODE (x)) <= 4 && (REGNO (x) & 1) == 0) |
| fputs ("L", file); |
| } |
| else if (GET_CODE (x) == MEM) |
| { |
| int size = GET_MODE_SIZE (GET_MODE (x)); |
| rtx base = XEXP (XEXP (x, 0), 0); |
| switch (GET_CODE (XEXP (x, 0))) |
| { |
| case PRE_DEC: |
| case POST_DEC: |
| fprintf (file, "-%d(0,%s)", size, reg_names [REGNO (base)]); |
| break; |
| case PRE_INC: |
| case POST_INC: |
| fprintf (file, "%d(0,%s)", size, reg_names [REGNO (base)]); |
| break; |
| default: |
| if (GET_CODE (XEXP (x, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT) |
| fprintf (file, "%s(0,%s)", |
| reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 0), 0))], |
| reg_names [REGNO (XEXP (XEXP (x, 0), 1))]); |
| else if (GET_CODE (XEXP (x, 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (x, 0), 1)) == MULT) |
| fprintf (file, "%s(0,%s)", |
| reg_names [REGNO (XEXP (XEXP (XEXP (x, 0), 1), 0))], |
| reg_names [REGNO (XEXP (XEXP (x, 0), 0))]); |
| else |
| output_address (XEXP (x, 0)); |
| break; |
| } |
| } |
| else |
| output_addr_const (file, x); |
| } |
| |
| /* output a SYMBOL_REF or a CONST expression involving a SYMBOL_REF. */ |
| |
| void |
| output_global_address (file, x, round_constant) |
| FILE *file; |
| rtx x; |
| int round_constant; |
| { |
| |
| /* Imagine (high (const (plus ...))). */ |
| if (GET_CODE (x) == HIGH) |
| x = XEXP (x, 0); |
| |
| if (GET_CODE (x) == SYMBOL_REF && read_only_operand (x)) |
| assemble_name (file, XSTR (x, 0)); |
| else if (GET_CODE (x) == SYMBOL_REF && !flag_pic) |
| { |
| assemble_name (file, XSTR (x, 0)); |
| fputs ("-$global$", file); |
| } |
| else if (GET_CODE (x) == CONST) |
| { |
| char *sep = ""; |
| int offset = 0; /* assembler wants -$global$ at end */ |
| rtx base = NULL_RTX; |
| |
| if (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF) |
| { |
| base = XEXP (XEXP (x, 0), 0); |
| output_addr_const (file, base); |
| } |
| else if (GET_CODE (XEXP (XEXP (x, 0), 0)) == CONST_INT) |
| offset = INTVAL (XEXP (XEXP (x, 0), 0)); |
| else abort (); |
| |
| if (GET_CODE (XEXP (XEXP (x, 0), 1)) == SYMBOL_REF) |
| { |
| base = XEXP (XEXP (x, 0), 1); |
| output_addr_const (file, base); |
| } |
| else if (GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT) |
| offset = INTVAL (XEXP (XEXP (x, 0),1)); |
| else abort (); |
| |
| /* How bogus. The compiler is apparently responsible for |
| rounding the constant if it uses an LR field selector. |
| |
| The linker and/or assembler seem a better place since |
| they have to do this kind of thing already. |
| |
| If we fail to do this, HP's optimizing linker may eliminate |
| an addil, but not update the ldw/stw/ldo instruction that |
| uses the result of the addil. */ |
| if (round_constant) |
| offset = ((offset + 0x1000) & ~0x1fff); |
| |
| if (GET_CODE (XEXP (x, 0)) == PLUS) |
| { |
| if (offset < 0) |
| { |
| offset = -offset; |
| sep = "-"; |
| } |
| else |
| sep = "+"; |
| } |
| else if (GET_CODE (XEXP (x, 0)) == MINUS |
| && (GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)) |
| sep = "-"; |
| else abort (); |
| |
| if (!read_only_operand (base) && !flag_pic) |
| fputs ("-$global$", file); |
| if (offset) |
| fprintf (file,"%s%d", sep, offset); |
| } |
| else |
| output_addr_const (file, x); |
| } |
| |
| void |
| output_deferred_plabels (file) |
| FILE *file; |
| { |
| int i; |
| /* If we have deferred plabels, then we need to switch into the data |
| section and align it to a 4 byte boundary before we output the |
| deferred plabels. */ |
| if (n_deferred_plabels) |
| { |
| data_section (); |
| ASM_OUTPUT_ALIGN (file, 2); |
| } |
| |
| /* Now output the deferred plabels. */ |
| for (i = 0; i < n_deferred_plabels; i++) |
| { |
| ASM_OUTPUT_INTERNAL_LABEL (file, "L", CODE_LABEL_NUMBER (deferred_plabels[i].internal_label)); |
| assemble_integer (gen_rtx_SYMBOL_REF (VOIDmode, |
| deferred_plabels[i].name), 4, 1); |
| } |
| } |
| |
| /* HP's millicode routines mean something special to the assembler. |
| Keep track of which ones we have used. */ |
| |
| enum millicodes { remI, remU, divI, divU, mulI, mulU, end1000 }; |
| static char imported[(int)end1000]; |
| static char *milli_names[] = {"remI", "remU", "divI", "divU", "mulI", "mulU"}; |
| static char import_string[] = ".IMPORT $$....,MILLICODE"; |
| #define MILLI_START 10 |
| |
| static void |
| import_milli (code) |
| enum millicodes code; |
| { |
| char str[sizeof (import_string)]; |
| |
| if (!imported[(int)code]) |
| { |
| imported[(int)code] = 1; |
| strcpy (str, import_string); |
| strncpy (str + MILLI_START, milli_names[(int)code], 4); |
| output_asm_insn (str, 0); |
| } |
| } |
| |
| /* The register constraints have put the operands and return value in |
| the proper registers. */ |
| |
| char * |
| output_mul_insn (unsignedp, insn) |
| int unsignedp ATTRIBUTE_UNUSED; |
| rtx insn; |
| { |
| import_milli (mulI); |
| return output_millicode_call (insn, gen_rtx_SYMBOL_REF (SImode, "$$mulI")); |
| } |
| |
| /* Emit the rtl for doing a division by a constant. */ |
| |
| /* Do magic division millicodes exist for this value? */ |
| static int magic_milli[]= {0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, |
| 1, 1}; |
| |
| /* We'll use an array to keep track of the magic millicodes and |
| whether or not we've used them already. [n][0] is signed, [n][1] is |
| unsigned. */ |
| |
| static int div_milli[16][2]; |
| |
| int |
| div_operand (op, mode) |
| rtx op; |
| enum machine_mode mode; |
| { |
| return (mode == SImode |
| && ((GET_CODE (op) == REG && REGNO (op) == 25) |
| || (GET_CODE (op) == CONST_INT && INTVAL (op) > 0 |
| && INTVAL (op) < 16 && magic_milli[INTVAL (op)]))); |
| } |
| |
| int |
| emit_hpdiv_const (operands, unsignedp) |
| rtx *operands; |
| int unsignedp; |
| { |
| if (GET_CODE (operands[2]) == CONST_INT |
| && INTVAL (operands[2]) > 0 |
| && INTVAL (operands[2]) < 16 |
| && magic_milli[INTVAL (operands[2])]) |
| { |
| emit_move_insn (gen_rtx_REG (SImode, 26), operands[1]); |
| emit |
| (gen_rtx |
| (PARALLEL, VOIDmode, |
| gen_rtvec (5, gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, 29), |
| gen_rtx_fmt_ee (unsignedp ? UDIV : DIV, |
| SImode, |
| gen_rtx_REG (SImode, 26), |
| operands[2])), |
| gen_rtx_CLOBBER (VOIDmode, operands[3]), |
| gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)), |
| gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)), |
| gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 31))))); |
| emit_move_insn (operands[0], gen_rtx_REG (SImode, 29)); |
| return 1; |
| } |
| return 0; |
| } |
| |
| char * |
| output_div_insn (operands, unsignedp, insn) |
| rtx *operands; |
| int unsignedp; |
| rtx insn; |
| { |
| int divisor; |
| |
| /* If the divisor is a constant, try to use one of the special |
| opcodes .*/ |
| if (GET_CODE (operands[0]) == CONST_INT) |
| { |
| static char buf[100]; |
| divisor = INTVAL (operands[0]); |
| if (!div_milli[divisor][unsignedp]) |
| { |
| div_milli[divisor][unsignedp] = 1; |
| if (unsignedp) |
| output_asm_insn (".IMPORT $$divU_%0,MILLICODE", operands); |
| else |
| output_asm_insn (".IMPORT $$divI_%0,MILLICODE", operands); |
| } |
| if (unsignedp) |
| { |
| sprintf (buf, "$$divU_%d", INTVAL (operands[0])); |
| return output_millicode_call (insn, |
| gen_rtx_SYMBOL_REF (SImode, buf)); |
| } |
| else |
| { |
| sprintf (buf, "$$divI_%d", INTVAL (operands[0])); |
| return output_millicode_call (insn, |
| gen_rtx_SYMBOL_REF (SImode, buf)); |
| } |
| } |
| /* Divisor isn't a special constant. */ |
| else |
| { |
| if (unsignedp) |
| { |
| import_milli (divU); |
| return output_millicode_call (insn, |
| gen_rtx_SYMBOL_REF (SImode, "$$divU")); |
| } |
| else |
| { |
| import_milli (divI); |
| return output_millicode_call (insn, |
| gen_rtx_SYMBOL_REF (SImode, "$$divI")); |
| } |
| } |
| } |
| |
| /* Output a $$rem millicode to do mod. */ |
| |
| char * |
| output_mod_insn (unsignedp, insn) |
| int unsignedp; |
| rtx insn; |
| { |
| if (unsignedp) |
| { |
| import_milli (remU); |
| return output_millicode_call (insn, |
| gen_rtx_SYMBOL_REF (SImode, "$$remU")); |
| } |
| else |
| { |
| import_milli (remI); |
| return output_millicode_call (insn, |
| gen_rtx_SYMBOL_REF (SImode, "$$remI")); |
| } |
| } |
| |
| void |
| output_arg_descriptor (call_insn) |
| rtx call_insn; |
| { |
| char *arg_regs[4]; |
| enum machine_mode arg_mode; |
| rtx link; |
| int i, output_flag = 0; |
| int regno; |
| |
| for (i = 0; i < 4; i++) |
| arg_regs[i] = 0; |
| |
| /* Specify explicitly that no argument relocations should take place |
| if using the portable runtime calling conventions. */ |
| if (TARGET_PORTABLE_RUNTIME) |
| { |
| fputs ("\t.CALL ARGW0=NO,ARGW1=NO,ARGW2=NO,ARGW3=NO,RETVAL=NO\n", |
| asm_out_file); |
| return; |
| } |
| |
| if (GET_CODE (call_insn) != CALL_INSN) |
| abort (); |
| for (link = CALL_INSN_FUNCTION_USAGE (call_insn); link; link = XEXP (link, 1)) |
| { |
| rtx use = XEXP (link, 0); |
| |
| if (! (GET_CODE (use) == USE |
| && GET_CODE (XEXP (use, 0)) == REG |
| && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) |
| continue; |
| |
| arg_mode = GET_MODE (XEXP (use, 0)); |
| regno = REGNO (XEXP (use, 0)); |
| if (regno >= 23 && regno <= 26) |
| { |
| arg_regs[26 - regno] = "GR"; |
| if (arg_mode == DImode) |
| arg_regs[25 - regno] = "GR"; |
| } |
| else if (regno >= 32 && regno <= 39) |
| { |
| if (arg_mode == SFmode) |
| arg_regs[(regno - 32) / 2] = "FR"; |
| else |
| { |
| #ifndef HP_FP_ARG_DESCRIPTOR_REVERSED |
| arg_regs[(regno - 34) / 2] = "FR"; |
| arg_regs[(regno - 34) / 2 + 1] = "FU"; |
| #else |
| arg_regs[(regno - 34) / 2] = "FU"; |
| arg_regs[(regno - 34) / 2 + 1] = "FR"; |
| #endif |
| } |
| } |
| } |
| fputs ("\t.CALL ", asm_out_file); |
| for (i = 0; i < 4; i++) |
| { |
| if (arg_regs[i]) |
| { |
| if (output_flag++) |
| fputc (',', asm_out_file); |
| fprintf (asm_out_file, "ARGW%d=%s", i, arg_regs[i]); |
| } |
| } |
| fputc ('\n', asm_out_file); |
| } |
| |
| /* Return the class of any secondary reload register that is needed to |
| move IN into a register in class CLASS using mode MODE. |
| |
| Profiling has showed this routine and its descendants account for |
| a significant amount of compile time (~7%). So it has been |
| optimized to reduce redundant computations and eliminate useless |
| function calls. |
| |
| It might be worthwhile to try and make this a leaf function too. */ |
| |
| enum reg_class |
| secondary_reload_class (class, mode, in) |
| enum reg_class class; |
| enum machine_mode mode; |
| rtx in; |
| { |
| int regno, is_symbolic; |
| |
| /* Trying to load a constant into a FP register during PIC code |
| generation will require %r1 as a scratch register. */ |
| if (flag_pic == 2 |
| && GET_MODE_CLASS (mode) == MODE_INT |
| && FP_REG_CLASS_P (class) |
| && (GET_CODE (in) == CONST_INT || GET_CODE (in) == CONST_DOUBLE)) |
| return R1_REGS; |
| |
| /* Profiling showed the PA port spends about 1.3% of its compilation |
| time in true_regnum from calls inside secondary_reload_class. */ |
| |
| if (GET_CODE (in) == REG) |
| { |
| regno = REGNO (in); |
| if (regno >= FIRST_PSEUDO_REGISTER) |
| regno = true_regnum (in); |
| } |
| else if (GET_CODE (in) == SUBREG) |
| regno = true_regnum (in); |
| else |
| regno = -1; |
| |
| /* If we have something like (mem (mem (...)), we can safely assume the |
| inner MEM will end up in a general register after reloading, so there's |
| no need for a secondary reload. */ |
| if (GET_CODE (in) == MEM |
| && GET_CODE (XEXP (in, 0)) == MEM) |
| return NO_REGS; |
| |
| /* Handle out of range displacement for integer mode loads/stores of |
| FP registers. */ |
| if (((regno >= FIRST_PSEUDO_REGISTER || regno == -1) |
| && GET_MODE_CLASS (mode) == MODE_INT |
| && FP_REG_CLASS_P (class)) |
| || (class == SHIFT_REGS && (regno <= 0 || regno >= 32))) |
| return GENERAL_REGS; |
| |
| if (GET_CODE (in) == HIGH) |
| in = XEXP (in, 0); |
| |
| /* Profiling has showed GCC spends about 2.6% of its compilation |
| time in symbolic_operand from calls inside secondary_reload_class. |
| |
| We use an inline copy and only compute its return value once to avoid |
| useless work. */ |
| switch (GET_CODE (in)) |
| { |
| rtx tmp; |
| |
| case SYMBOL_REF: |
| case LABEL_REF: |
| is_symbolic = 1; |
| break; |
| case CONST: |
| tmp = XEXP (in, 0); |
| is_symbolic = ((GET_CODE (XEXP (tmp, 0)) == SYMBOL_REF |
| || GET_CODE (XEXP (tmp, 0)) == LABEL_REF) |
| && GET_CODE (XEXP (tmp, 1)) == CONST_INT); |
| break; |
| |
| default: |
| is_symbolic = 0; |
| break; |
| } |
| |
| if (!flag_pic |
| && is_symbolic |
| && read_only_operand (in)) |
| return NO_REGS; |
| |
| if (class != R1_REGS && is_symbolic) |
| return R1_REGS; |
| |
| return NO_REGS; |
| } |
| |
| enum direction |
| function_arg_padding (mode, type) |
| enum machine_mode mode; |
| tree type; |
| { |
| int size; |
| |
| if (mode == BLKmode) |
| { |
| if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST) |
| size = int_size_in_bytes (type) * BITS_PER_UNIT; |
| else |
| return upward; /* Don't know if this is right, but */ |
| /* same as old definition. */ |
| } |
| else |
| size = GET_MODE_BITSIZE (mode); |
| if (size < PARM_BOUNDARY) |
| return downward; |
| else if (size % PARM_BOUNDARY) |
| return upward; |
| else |
| return none; |
| } |
| |
| |
| /* Do what is necessary for `va_start'. The argument is ignored; |
| We look at the current function to determine if stdargs or varargs |
| is used and fill in an initial va_list. A pointer to this constructor |
| is returned. */ |
| |
| struct rtx_def * |
| hppa_builtin_saveregs (arglist) |
| tree arglist ATTRIBUTE_UNUSED; |
| { |
| rtx offset, dest; |
| tree fntype = TREE_TYPE (current_function_decl); |
| int argadj = ((!(TYPE_ARG_TYPES (fntype) != 0 |
| && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype))) |
| != void_type_node))) |
| ? UNITS_PER_WORD : 0); |
| |
| if (argadj) |
| offset = plus_constant (current_function_arg_offset_rtx, argadj); |
| else |
| offset = current_function_arg_offset_rtx; |
| |
| /* Store general registers on the stack. */ |
| dest = gen_rtx_MEM (BLKmode, |
| plus_constant (current_function_internal_arg_pointer, |
| -16)); |
| move_block_from_reg (23, dest, 4, 4 * UNITS_PER_WORD); |
| |
| /* move_block_from_reg will emit code to store the argument registers |
| individually as scalar stores. |
| |
| However, other insns may later load from the same addresses for |
| a structure load (passing a struct to a varargs routine). |
| |
| The alias code assumes that such aliasing can never happen, so we |
| have to keep memory referencing insns from moving up beyond the |
| last argument register store. So we emit a blockage insn here. */ |
| emit_insn (gen_blockage ()); |
| |
| if (current_function_check_memory_usage) |
| emit_library_call (chkr_set_right_libfunc, 1, VOIDmode, 3, |
| dest, ptr_mode, |
| GEN_INT (4 * UNITS_PER_WORD), TYPE_MODE (sizetype), |
| GEN_INT (MEMORY_USE_RW), |
| TYPE_MODE (integer_type_node)); |
| |
| return copy_to_reg (expand_binop (Pmode, add_optab, |
| current_function_internal_arg_pointer, |
| offset, 0, 0, OPTAB_LIB_WIDEN)); |
| } |
| |
| /* This routine handles all the normal conditional branch sequences we |
| might need to generate. It handles compare immediate vs compare |
| register, nullification of delay slots, varying length branches, |
| negated branches, and all combinations of the above. It returns the |
| output appropriate to emit the branch corresponding to all given |
| parameters. */ |
| |
| char * |
| output_cbranch (operands, nullify, length, negated, insn) |
| rtx *operands; |
| int nullify, length, negated; |
| rtx insn; |
| { |
| static char buf[100]; |
| int useskip = 0; |
| |
| /* A conditional branch to the following instruction (eg the delay slot) is |
| asking for a disaster. This can happen when not optimizing. |
| |
| In such cases it is safe to emit nothing. */ |
| |
| if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn)) |
| return ""; |
| |
| /* If this is a long branch with its delay slot unfilled, set `nullify' |
| as it can nullify the delay slot and save a nop. */ |
| if (length == 8 && dbr_sequence_length () == 0) |
| nullify = 1; |
| |
| /* If this is a short forward conditional branch which did not get |
| its delay slot filled, the delay slot can still be nullified. */ |
| if (! nullify && length == 4 && dbr_sequence_length () == 0) |
| nullify = forward_branch_p (insn); |
| |
| /* A forward branch over a single nullified insn can be done with a |
| comclr instruction. This avoids a single cycle penalty due to |
| mis-predicted branch if we fall through (branch not taken). */ |
| if (length == 4 |
| && next_real_insn (insn) != 0 |
| && get_attr_length (next_real_insn (insn)) == 4 |
| && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn)) |
| && nullify) |
| useskip = 1; |
| |
| switch (length) |
| { |
| /* All short conditional branches except backwards with an unfilled |
| delay slot. */ |
| case 4: |
| if (useskip) |
| strcpy (buf, "com%I2clr,"); |
| else |
| strcpy (buf, "com%I2b,"); |
| if (negated) |
| strcat (buf, "%B3"); |
| else |
| strcat (buf, "%S3"); |
| if (useskip) |
| strcat (buf, " %2,%r1,0"); |
| else if (nullify) |
| strcat (buf, ",n %2,%r1,%0"); |
| else |
| strcat (buf, " %2,%r1,%0"); |
| break; |
| |
| /* All long conditionals. Note an short backward branch with an |
| unfilled delay slot is treated just like a long backward branch |
| with an unfilled delay slot. */ |
| case 8: |
| /* Handle weird backwards branch with a filled delay slot |
| with is nullified. */ |
| if (dbr_sequence_length () != 0 |
| && ! forward_branch_p (insn) |
| && nullify) |
| { |
| strcpy (buf, "com%I2b,"); |
| if (negated) |
| strcat (buf, "%S3"); |
| else |
| strcat (buf, "%B3"); |
| strcat (buf, ",n %2,%r1,.+12\n\tbl %0,0"); |
| } |
| /* Handle short backwards branch with an unfilled delay slot. |
| Using a comb;nop rather than comiclr;bl saves 1 cycle for both |
| taken and untaken branches. */ |
| else if (dbr_sequence_length () == 0 |
| && ! forward_branch_p (insn) |
| && insn_addresses |
| && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))] |
| - insn_addresses[INSN_UID (insn)] - 8)) |
| { |
| strcpy (buf, "com%I2b,"); |
| if (negated) |
| strcat (buf, "%B3 %2,%r1,%0%#"); |
| else |
| strcat (buf, "%S3 %2,%r1,%0%#"); |
| } |
| else |
| { |
| strcpy (buf, "com%I2clr,"); |
| if (negated) |
| strcat (buf, "%S3"); |
| else |
| strcat (buf, "%B3"); |
| if (nullify) |
| strcat (buf, " %2,%r1,0\n\tbl,n %0,0"); |
| else |
| strcat (buf, " %2,%r1,0\n\tbl %0,0"); |
| } |
| break; |
| |
| case 20: |
| /* Very long branch. Right now we only handle these when not |
| optimizing. See "jump" pattern in pa.md for details. */ |
| if (optimize) |
| abort (); |
| |
| /* Create a reversed conditional branch which branches around |
| the following insns. */ |
| if (negated) |
| strcpy (buf, "com%I2b,%S3,n %2,%r1,.+20"); |
| else |
| strcpy (buf, "com%I2b,%B3,n %2,%r1,.+20"); |
| output_asm_insn (buf, operands); |
| |
| /* Output an insn to save %r1. */ |
| output_asm_insn ("stw %%r1,-16(%%r30)", operands); |
| |
| /* Now output a very long branch to the original target. */ |
| output_asm_insn ("ldil L'%l0,%%r1\n\tbe R'%l0(%%sr4,%%r1)", operands); |
| |
| /* Now restore the value of %r1 in the delay slot. We're not |
| optimizing so we know nothing else can be in the delay slot. */ |
| return "ldw -16(%%r30),%%r1"; |
| |
| case 28: |
| /* Very long branch when generating PIC code. Right now we only |
| handle these when not optimizing. See "jump" pattern in pa.md |
| for details. */ |
| if (optimize) |
| abort (); |
| |
| /* Create a reversed conditional branch which branches around |
| the following insns. */ |
| if (negated) |
| strcpy (buf, "com%I2b,%S3,n %2,%r1,.+28"); |
| else |
| strcpy (buf, "com%I2b,%B3,n %2,%r1,.+28"); |
| output_asm_insn (buf, operands); |
| |
| /* Output an insn to save %r1. */ |
| output_asm_insn ("stw %%r1,-16(%%r30)", operands); |
| |
| /* Now output a very long PIC branch to the original target. */ |
| { |
| rtx xoperands[5]; |
| |
| xoperands[0] = operands[0]; |
| xoperands[1] = operands[1]; |
| xoperands[2] = operands[2]; |
| xoperands[3] = operands[3]; |
| xoperands[4] = gen_label_rtx (); |
| |
| output_asm_insn ("bl .+8,%%r1\n\taddil L'%l0-%l4,%%r1", xoperands); |
| ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", |
| CODE_LABEL_NUMBER (xoperands[4])); |
| output_asm_insn ("ldo R'%l0-%l4(%%r1),%%r1\n\tbv 0(%%r1)", xoperands); |
| } |
| |
| /* Now restore the value of %r1 in the delay slot. We're not |
| optimizing so we know nothing else can be in the delay slot. */ |
| return "ldw -16(%%r30),%%r1"; |
| |
| default: |
| abort(); |
| } |
| return buf; |
| } |
| |
| /* This routine handles all the branch-on-bit conditional branch sequences we |
| might need to generate. It handles nullification of delay slots, |
| varying length branches, negated branches and all combinations of the |
| above. it returns the appropriate output template to emit the branch. */ |
| |
| char * |
| output_bb (operands, nullify, length, negated, insn, which) |
| rtx *operands ATTRIBUTE_UNUSED; |
| int nullify, length, negated; |
| rtx insn; |
| int which; |
| { |
| static char buf[100]; |
| int useskip = 0; |
| |
| /* A conditional branch to the following instruction (eg the delay slot) is |
| asking for a disaster. I do not think this can happen as this pattern |
| is only used when optimizing; jump optimization should eliminate the |
| jump. But be prepared just in case. */ |
| |
| if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn)) |
| return ""; |
| |
| /* If this is a long branch with its delay slot unfilled, set `nullify' |
| as it can nullify the delay slot and save a nop. */ |
| if (length == 8 && dbr_sequence_length () == 0) |
| nullify = 1; |
| |
| /* If this is a short forward conditional branch which did not get |
| its delay slot filled, the delay slot can still be nullified. */ |
| if (! nullify && length == 4 && dbr_sequence_length () == 0) |
| nullify = forward_branch_p (insn); |
| |
| /* A forward branch over a single nullified insn can be done with a |
| extrs instruction. This avoids a single cycle penalty due to |
| mis-predicted branch if we fall through (branch not taken). */ |
| |
| if (length == 4 |
| && next_real_insn (insn) != 0 |
| && get_attr_length (next_real_insn (insn)) == 4 |
| && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn)) |
| && nullify) |
| useskip = 1; |
| |
| switch (length) |
| { |
| |
| /* All short conditional branches except backwards with an unfilled |
| delay slot. */ |
| case 4: |
| if (useskip) |
| strcpy (buf, "extrs,"); |
| else |
| strcpy (buf, "bb,"); |
| if ((which == 0 && negated) |
| || (which == 1 && ! negated)) |
| strcat (buf, ">="); |
| else |
| strcat (buf, "<"); |
| if (useskip) |
| strcat (buf, " %0,%1,1,0"); |
| else if (nullify && negated) |
| strcat (buf, ",n %0,%1,%3"); |
| else if (nullify && ! negated) |
| strcat (buf, ",n %0,%1,%2"); |
| else if (! nullify && negated) |
| strcat (buf, "%0,%1,%3"); |
| else if (! nullify && ! negated) |
| strcat (buf, " %0,%1,%2"); |
| break; |
| |
| /* All long conditionals. Note an short backward branch with an |
| unfilled delay slot is treated just like a long backward branch |
| with an unfilled delay slot. */ |
| case 8: |
| /* Handle weird backwards branch with a filled delay slot |
| with is nullified. */ |
| if (dbr_sequence_length () != 0 |
| && ! forward_branch_p (insn) |
| && nullify) |
| { |
| strcpy (buf, "bb,"); |
| if ((which == 0 && negated) |
| || (which == 1 && ! negated)) |
| strcat (buf, "<"); |
| else |
| strcat (buf, ">="); |
| if (negated) |
| strcat (buf, ",n %0,%1,.+12\n\tbl %3,0"); |
| else |
| strcat (buf, ",n %0,%1,.+12\n\tbl %2,0"); |
| } |
| /* Handle short backwards branch with an unfilled delay slot. |
| Using a bb;nop rather than extrs;bl saves 1 cycle for both |
| taken and untaken branches. */ |
| else if (dbr_sequence_length () == 0 |
| && ! forward_branch_p (insn) |
| && insn_addresses |
| && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))] |
| - insn_addresses[INSN_UID (insn)] - 8)) |
| { |
| strcpy (buf, "bb,"); |
| if ((which == 0 && negated) |
| || (which == 1 && ! negated)) |
| strcat (buf, ">="); |
| else |
| strcat (buf, "<"); |
| if (negated) |
| strcat (buf, " %0,%1,%3%#"); |
| else |
| strcat (buf, " %0,%1,%2%#"); |
| } |
| else |
| { |
| strcpy (buf, "extrs,"); |
| if ((which == 0 && negated) |
| || (which == 1 && ! negated)) |
| strcat (buf, "<"); |
| else |
| strcat (buf, ">="); |
| if (nullify && negated) |
| strcat (buf, " %0,%1,1,0\n\tbl,n %3,0"); |
| else if (nullify && ! negated) |
| strcat (buf, " %0,%1,1,0\n\tbl,n %2,0"); |
| else if (negated) |
| strcat (buf, " %0,%1,1,0\n\tbl %3,0"); |
| else |
| strcat (buf, " %0,%1,1,0\n\tbl %2,0"); |
| } |
| break; |
| |
| default: |
| abort(); |
| } |
| return buf; |
| } |
| |
| /* This routine handles all the branch-on-variable-bit conditional branch |
| sequences we might need to generate. It handles nullification of delay |
| slots, varying length branches, negated branches and all combinations |
| of the above. it returns the appropriate output template to emit the |
| branch. */ |
| |
| char * |
| output_bvb (operands, nullify, length, negated, insn, which) |
| rtx *operands ATTRIBUTE_UNUSED; |
| int nullify, length, negated; |
| rtx insn; |
| int which; |
| { |
| static char buf[100]; |
| int useskip = 0; |
| |
| /* A conditional branch to the following instruction (eg the delay slot) is |
| asking for a disaster. I do not think this can happen as this pattern |
| is only used when optimizing; jump optimization should eliminate the |
| jump. But be prepared just in case. */ |
| |
| if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn)) |
| return ""; |
| |
| /* If this is a long branch with its delay slot unfilled, set `nullify' |
| as it can nullify the delay slot and save a nop. */ |
| if (length == 8 && dbr_sequence_length () == 0) |
| nullify = 1; |
| |
| /* If this is a short forward conditional branch which did not get |
| its delay slot filled, the delay slot can still be nullified. */ |
| if (! nullify && length == 4 && dbr_sequence_length () == 0) |
| nullify = forward_branch_p (insn); |
| |
| /* A forward branch over a single nullified insn can be done with a |
| extrs instruction. This avoids a single cycle penalty due to |
| mis-predicted branch if we fall through (branch not taken). */ |
| |
| if (length == 4 |
| && next_real_insn (insn) != 0 |
| && get_attr_length (next_real_insn (insn)) == 4 |
| && JUMP_LABEL (insn) == next_nonnote_insn (next_real_insn (insn)) |
| && nullify) |
| useskip = 1; |
| |
| switch (length) |
| { |
| |
| /* All short conditional branches except backwards with an unfilled |
| delay slot. */ |
| case 4: |
| if (useskip) |
| strcpy (buf, "vextrs,"); |
| else |
| strcpy (buf, "bvb,"); |
| if ((which == 0 && negated) |
| || (which == 1 && ! negated)) |
| strcat (buf, ">="); |
| else |
| strcat (buf, "<"); |
| if (useskip) |
| strcat (buf, " %0,1,0"); |
| else if (nullify && negated) |
| strcat (buf, ",n %0,%3"); |
| else if (nullify && ! negated) |
| strcat (buf, ",n %0,%2"); |
| else if (! nullify && negated) |
| strcat (buf, "%0,%3"); |
| else if (! nullify && ! negated) |
| strcat (buf, " %0,%2"); |
| break; |
| |
| /* All long conditionals. Note an short backward branch with an |
| unfilled delay slot is treated just like a long backward branch |
| with an unfilled delay slot. */ |
| case 8: |
| /* Handle weird backwards branch with a filled delay slot |
| with is nullified. */ |
| if (dbr_sequence_length () != 0 |
| && ! forward_branch_p (insn) |
| && nullify) |
| { |
| strcpy (buf, "bvb,"); |
| if ((which == 0 && negated) |
| || (which == 1 && ! negated)) |
| strcat (buf, "<"); |
| else |
| strcat (buf, ">="); |
| if (negated) |
| strcat (buf, ",n %0,.+12\n\tbl %3,0"); |
| else |
| strcat (buf, ",n %0,.+12\n\tbl %2,0"); |
| } |
| /* Handle short backwards branch with an unfilled delay slot. |
| Using a bb;nop rather than extrs;bl saves 1 cycle for both |
| taken and untaken branches. */ |
| else if (dbr_sequence_length () == 0 |
| && ! forward_branch_p (insn) |
| && insn_addresses |
| && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))] |
| - insn_addresses[INSN_UID (insn)] - 8)) |
| { |
| strcpy (buf, "bvb,"); |
| if ((which == 0 && negated) |
| || (which == 1 && ! negated)) |
| strcat (buf, ">="); |
| else |
| strcat (buf, "<"); |
| if (negated) |
| strcat (buf, " %0,%3%#"); |
| else |
| strcat (buf, " %0,%2%#"); |
| } |
| else |
| { |
| strcpy (buf, "vextrs,"); |
| if ((which == 0 && negated) |
| || (which == 1 && ! negated)) |
| strcat (buf, "<"); |
| else |
| strcat (buf, ">="); |
| if (nullify && negated) |
| strcat (buf, " %0,1,0\n\tbl,n %3,0"); |
| else if (nullify && ! negated) |
| strcat (buf, " %0,1,0\n\tbl,n %2,0"); |
| else if (negated) |
| strcat (buf, " %0,1,0\n\tbl %3,0"); |
| else |
| strcat (buf, " %0,1,0\n\tbl %2,0"); |
| } |
| break; |
| |
| default: |
| abort(); |
| } |
| return buf; |
| } |
| |
| /* Return the output template for emitting a dbra type insn. |
| |
| Note it may perform some output operations on its own before |
| returning the final output string. */ |
| char * |
| output_dbra (operands, insn, which_alternative) |
| rtx *operands; |
| rtx insn; |
| int which_alternative; |
| { |
| |
| /* A conditional branch to the following instruction (eg the delay slot) is |
| asking for a disaster. Be prepared! */ |
| |
| if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn)) |
| { |
| if (which_alternative == 0) |
| return "ldo %1(%0),%0"; |
| else if (which_alternative == 1) |
| { |
| output_asm_insn ("fstws %0,-16(0,%%r30)",operands); |
| output_asm_insn ("ldw -16(0,%%r30),%4",operands); |
| output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands); |
| return "fldws -16(0,%%r30),%0"; |
| } |
| else |
| { |
| output_asm_insn ("ldw %0,%4", operands); |
| return "ldo %1(%4),%4\n\tstw %4,%0"; |
| } |
| } |
| |
| if (which_alternative == 0) |
| { |
| int nullify = INSN_ANNULLED_BRANCH_P (insn); |
| int length = get_attr_length (insn); |
| |
| /* If this is a long branch with its delay slot unfilled, set `nullify' |
| as it can nullify the delay slot and save a nop. */ |
| if (length == 8 && dbr_sequence_length () == 0) |
| nullify = 1; |
| |
| /* If this is a short forward conditional branch which did not get |
| its delay slot filled, the delay slot can still be nullified. */ |
| if (! nullify && length == 4 && dbr_sequence_length () == 0) |
| nullify = forward_branch_p (insn); |
| |
| /* Handle short versions first. */ |
| if (length == 4 && nullify) |
| return "addib,%C2,n %1,%0,%3"; |
| else if (length == 4 && ! nullify) |
| return "addib,%C2 %1,%0,%3"; |
| else if (length == 8) |
| { |
| /* Handle weird backwards branch with a fulled delay slot |
| which is nullified. */ |
| if (dbr_sequence_length () != 0 |
| && ! forward_branch_p (insn) |
| && nullify) |
| return "addib,%N2,n %1,%0,.+12\n\tbl %3,0"; |
| /* Handle short backwards branch with an unfilled delay slot. |
| Using a addb;nop rather than addi;bl saves 1 cycle for both |
| taken and untaken branches. */ |
| else if (dbr_sequence_length () == 0 |
| && ! forward_branch_p (insn) |
| && insn_addresses |
| && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))] |
| - insn_addresses[INSN_UID (insn)] - 8)) |
| return "addib,%C2 %1,%0,%3%#"; |
| |
| /* Handle normal cases. */ |
| if (nullify) |
| return "addi,%N2 %1,%0,%0\n\tbl,n %3,0"; |
| else |
| return "addi,%N2 %1,%0,%0\n\tbl %3,0"; |
| } |
| else |
| abort(); |
| } |
| /* Deal with gross reload from FP register case. */ |
| else if (which_alternative == 1) |
| { |
| /* Move loop counter from FP register to MEM then into a GR, |
| increment the GR, store the GR into MEM, and finally reload |
| the FP register from MEM from within the branch's delay slot. */ |
| output_asm_insn ("fstws %0,-16(0,%%r30)\n\tldw -16(0,%%r30),%4",operands); |
| output_asm_insn ("ldo %1(%4),%4\n\tstw %4,-16(0,%%r30)", operands); |
| if (get_attr_length (insn) == 24) |
| return "comb,%S2 0,%4,%3\n\tfldws -16(0,%%r30),%0"; |
| else |
| return "comclr,%B2 0,%4,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0"; |
| } |
| /* Deal with gross reload from memory case. */ |
| else |
| { |
| /* Reload loop counter from memory, the store back to memory |
| happens in the branch's delay slot. */ |
| output_asm_insn ("ldw %0,%4", operands); |
| if (get_attr_length (insn) == 12) |
| return "addib,%C2 %1,%4,%3\n\tstw %4,%0"; |
| else |
| return "addi,%N2 %1,%4,%4\n\tbl %3,0\n\tstw %4,%0"; |
| } |
| } |
| |
| /* Return the output template for emitting a dbra type insn. |
| |
| Note it may perform some output operations on its own before |
| returning the final output string. */ |
| char * |
| output_movb (operands, insn, which_alternative, reverse_comparison) |
| rtx *operands; |
| rtx insn; |
| int which_alternative; |
| int reverse_comparison; |
| { |
| |
| /* A conditional branch to the following instruction (eg the delay slot) is |
| asking for a disaster. Be prepared! */ |
| |
| if (next_active_insn (JUMP_LABEL (insn)) == next_active_insn (insn)) |
| { |
| if (which_alternative == 0) |
| return "copy %1,%0"; |
| else if (which_alternative == 1) |
| { |
| output_asm_insn ("stw %1,-16(0,%%r30)",operands); |
| return "fldws -16(0,%%r30),%0"; |
| } |
| else if (which_alternative == 2) |
| return "stw %1,%0"; |
| else |
| return "mtsar %r1"; |
| } |
| |
| /* Support the second variant. */ |
| if (reverse_comparison) |
| PUT_CODE (operands[2], reverse_condition (GET_CODE (operands[2]))); |
| |
| if (which_alternative == 0) |
| { |
| int nullify = INSN_ANNULLED_BRANCH_P (insn); |
| int length = get_attr_length (insn); |
| |
| /* If this is a long branch with its delay slot unfilled, set `nullify' |
| as it can nullify the delay slot and save a nop. */ |
| if (length == 8 && dbr_sequence_length () == 0) |
| nullify = 1; |
| |
| /* If this is a short forward conditional branch which did not get |
| its delay slot filled, the delay slot can still be nullified. */ |
| if (! nullify && length == 4 && dbr_sequence_length () == 0) |
| nullify = forward_branch_p (insn); |
| |
| /* Handle short versions first. */ |
| if (length == 4 && nullify) |
| return "movb,%C2,n %1,%0,%3"; |
| else if (length == 4 && ! nullify) |
| return "movb,%C2 %1,%0,%3"; |
| else if (length == 8) |
| { |
| /* Handle weird backwards branch with a filled delay slot |
| which is nullified. */ |
| if (dbr_sequence_length () != 0 |
| && ! forward_branch_p (insn) |
| && nullify) |
| return "movb,%N2,n %1,%0,.+12\n\tbl %3,0"; |
| |
| /* Handle short backwards branch with an unfilled delay slot. |
| Using a movb;nop rather than or;bl saves 1 cycle for both |
| taken and untaken branches. */ |
| else if (dbr_sequence_length () == 0 |
| && ! forward_branch_p (insn) |
| && insn_addresses |
| && VAL_14_BITS_P (insn_addresses[INSN_UID (JUMP_LABEL (insn))] |
| - insn_addresses[INSN_UID (insn)] - 8)) |
| return "movb,%C2 %1,%0,%3%#"; |
| /* Handle normal cases. */ |
| if (nullify) |
| return "or,%N2 %1,%%r0,%0\n\tbl,n %3,0"; |
| else |
| return "or,%N2 %1,%%r0,%0\n\tbl %3,0"; |
| } |
| else |
| abort(); |
| } |
| /* Deal with gross reload from FP register case. */ |
| else if (which_alternative == 1) |
| { |
| /* Move loop counter from FP register to MEM then into a GR, |
| increment the GR, store the GR into MEM, and finally reload |
| the FP register from MEM from within the branch's delay slot. */ |
| output_asm_insn ("stw %1,-16(0,%%r30)",operands); |
| if (get_attr_length (insn) == 12) |
| return "comb,%S2 0,%1,%3\n\tfldws -16(0,%%r30),%0"; |
| else |
| return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tfldws -16(0,%%r30),%0"; |
| } |
| /* Deal with gross reload from memory case. */ |
| else if (which_alternative == 2) |
| { |
| /* Reload loop counter from memory, the store back to memory |
| happens in the branch's delay slot. */ |
| if (get_attr_length (insn) == 8) |
| return "comb,%S2 0,%1,%3\n\tstw %1,%0"; |
| else |
| return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tstw %1,%0"; |
| } |
| /* Handle SAR as a destination. */ |
| else |
| { |
| if (get_attr_length (insn) == 8) |
| return "comb,%S2 0,%1,%3\n\tmtsar %r1"; |
| else |
| return "comclr,%B2 0,%1,0\n\tbl %3,0\n\tmtsar %r1"; |
| } |
| } |
| |
| |
| /* INSN is a millicode call. It may have an unconditional jump in its delay |
| slot. |
| |
| CALL_DEST is the routine we are calling. */ |
| |
| char * |
| output_millicode_call (insn, call_dest) |
| rtx insn; |
| rtx call_dest; |
| { |
| int distance; |
| rtx xoperands[4]; |
| rtx seq_insn; |
| |
| /* Handle common case -- empty delay slot or no jump in the delay slot, |
| and we're sure that the branch will reach the beginning of the $CODE$ |
| subspace. */ |
| if ((dbr_sequence_length () == 0 |
| && (get_attr_length (insn) == 8 || get_attr_length (insn) == 28)) |
| || (dbr_sequence_length () != 0 |
| && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN |
| && get_attr_length (insn) == 4)) |
| { |
| xoperands[0] = call_dest; |
| output_asm_insn ("bl %0,%%r31%#", xoperands); |
| return ""; |
| } |
| |
| /* This call may not reach the beginning of the $CODE$ subspace. */ |
| if (get_attr_length (insn) > 4) |
| { |
| int delay_insn_deleted = 0; |
| rtx xoperands[2]; |
| |
| /* We need to emit an inline long-call branch. */ |
| if (dbr_sequence_length () != 0 |
| && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN) |
| { |
| /* A non-jump insn in the delay slot. By definition we can |
| emit this insn before the call. */ |
| final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0); |
| |
| /* Now delete the delay insn. */ |
| PUT_CODE (NEXT_INSN (insn), NOTE); |
| NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED; |
| NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0; |
| delay_insn_deleted = 1; |
| } |
| |
| /* If we're allowed to use be/ble instructions, then this is the |
| best sequence to use for a long millicode call. */ |
| if (TARGET_NO_SPACE_REGS || TARGET_FAST_INDIRECT_CALLS |
| || ! (flag_pic || TARGET_PORTABLE_RUNTIME)) |
| { |
| xoperands[0] = call_dest; |
| output_asm_insn ("ldil L%%%0,%%r31", xoperands); |
| output_asm_insn ("ble R%%%0(%%sr4,%%r31)", xoperands); |
| output_asm_insn ("nop", xoperands); |
| } |
| /* Pure portable runtime doesn't allow be/ble; we also don't have |
| PIC support int he assembler/linker, so this sequence is needed. */ |
| else if (TARGET_PORTABLE_RUNTIME) |
| { |
| xoperands[0] = call_dest; |
| /* Get the address of our target into %r29. */ |
| output_asm_insn ("ldil L%%%0,%%r29", xoperands); |
| output_asm_insn ("ldo R%%%0(%%r29),%%r29", xoperands); |
| |
| /* Get our return address into %r31. */ |
| output_asm_insn ("blr 0,%%r31", xoperands); |
| |
| /* Jump to our target address in %r29. */ |
| output_asm_insn ("bv,n 0(%%r29)", xoperands); |
| |
| /* Empty delay slot. Note this insn gets fetched twice and |
| executed once. To be safe we use a nop. */ |
| output_asm_insn ("nop", xoperands); |
| return ""; |
| } |
| /* PIC long millicode call sequence. */ |
| else |
| { |
| xoperands[0] = call_dest; |
| xoperands[1] = gen_label_rtx (); |
| /* Get our address + 8 into %r1. */ |
| output_asm_insn ("bl .+8,%%r1", xoperands); |
| |
| /* Add %r1 to the offset of our target from the next insn. */ |
| output_asm_insn ("addil L%%%0-%1,%%r1", xoperands); |
| ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", |
| CODE_LABEL_NUMBER (xoperands[1])); |
| output_asm_insn ("ldo R%%%0-%1(%%r1),%%r1", xoperands); |
| |
| /* Get the return address into %r31. */ |
| output_asm_insn ("blr 0,%%r31", xoperands); |
| |
| /* Branch to our target which is in %r1. */ |
| output_asm_insn ("bv,n 0(%%r1)", xoperands); |
| |
| /* Empty delay slot. Note this insn gets fetched twice and |
| executed once. To be safe we use a nop. */ |
| output_asm_insn ("nop", xoperands); |
| } |
| |
| /* If we had a jump in the call's delay slot, output it now. */ |
| if (dbr_sequence_length () != 0 |
| && !delay_insn_deleted) |
| { |
| xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1); |
| output_asm_insn ("b,n %0", xoperands); |
| |
| /* Now delete the delay insn. */ |
| PUT_CODE (NEXT_INSN (insn), NOTE); |
| NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED; |
| NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0; |
| } |
| return ""; |
| } |
| |
| /* This call has an unconditional jump in its delay slot and the |
| call is known to reach its target or the beginning of the current |
| subspace. */ |
| |
| /* Use the containing sequence insn's address. */ |
| seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0))); |
| |
| distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))] |
| - insn_addresses[INSN_UID (seq_insn)] - 8; |
| |
| /* If the branch was too far away, emit a normal call followed |
| by a nop, followed by the unconditional branch. |
| |
| If the branch is close, then adjust %r2 from within the |
| call's delay slot. */ |
| |
| xoperands[0] = call_dest; |
| xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1); |
| if (! VAL_14_BITS_P (distance)) |
| output_asm_insn ("bl %0,%%r31\n\tnop\n\tbl,n %1,%%r0", xoperands); |
| else |
| { |
| xoperands[3] = gen_label_rtx (); |
| output_asm_insn ("\n\tbl %0,%%r31\n\tldo %1-%3(%%r31),%%r31", xoperands); |
| ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", |
| CODE_LABEL_NUMBER (xoperands[3])); |
| } |
| |
| /* Delete the jump. */ |
| PUT_CODE (NEXT_INSN (insn), NOTE); |
| NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED; |
| NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0; |
| return ""; |
| } |
| |
| extern struct obstack permanent_obstack; |
| extern struct obstack *saveable_obstack; |
| extern struct obstack *rtl_obstack; |
| extern struct obstack *current_obstack; |
| |
| /* INSN is either a function call. It may have an unconditional jump |
| in its delay slot. |
| |
| CALL_DEST is the routine we are calling. */ |
| |
| char * |
| output_call (insn, call_dest) |
| rtx insn; |
| rtx call_dest; |
| { |
| int distance; |
| rtx xoperands[4]; |
| rtx seq_insn; |
| |
| /* Handle common case -- empty delay slot or no jump in the delay slot, |
| and we're sure that the branch will reach the beginning of the $CODE$ |
| subspace. */ |
| if ((dbr_sequence_length () == 0 |
| && get_attr_length (insn) == 8) |
| || (dbr_sequence_length () != 0 |
| && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN |
| && get_attr_length (insn) == 4)) |
| { |
| xoperands[0] = call_dest; |
| output_asm_insn ("bl %0,%%r2%#", xoperands); |
| return ""; |
| } |
| |
| /* This call may not reach the beginning of the $CODE$ subspace. */ |
| if (get_attr_length (insn) > 8) |
| { |
| int delay_insn_deleted = 0; |
| rtx xoperands[2]; |
| rtx link; |
| |
| /* We need to emit an inline long-call branch. Furthermore, |
| because we're changing a named function call into an indirect |
| function call well after the parameters have been set up, we |
| need to make sure any FP args appear in both the integer |
| and FP registers. Also, we need move any delay slot insn |
| out of the delay slot. And finally, we can't rely on the linker |
| being able to fix the call to $$dyncall! -- Yuk!. */ |
| if (dbr_sequence_length () != 0 |
| && GET_CODE (NEXT_INSN (insn)) != JUMP_INSN) |
| { |
| /* A non-jump insn in the delay slot. By definition we can |
| emit this insn before the call (and in fact before argument |
| relocating. */ |
| final_scan_insn (NEXT_INSN (insn), asm_out_file, optimize, 0, 0); |
| |
| /* Now delete the delay insn. */ |
| PUT_CODE (NEXT_INSN (insn), NOTE); |
| NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED; |
| NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0; |
| delay_insn_deleted = 1; |
| } |
| |
| /* Now copy any FP arguments into integer registers. */ |
| for (link = CALL_INSN_FUNCTION_USAGE (insn); link; link = XEXP (link, 1)) |
| { |
| int arg_mode, regno; |
| rtx use = XEXP (link, 0); |
| if (! (GET_CODE (use) == USE |
| && GET_CODE (XEXP (use, 0)) == REG |
| && FUNCTION_ARG_REGNO_P (REGNO (XEXP (use, 0))))) |
| continue; |
| |
| arg_mode = GET_MODE (XEXP (use, 0)); |
| regno = REGNO (XEXP (use, 0)); |
| /* Is it a floating point register? */ |
| if (regno >= 32 && regno <= 39) |
| { |
| /* Copy from the FP register into an integer register |
| (via memory). */ |
| if (arg_mode == SFmode) |
| { |
| xoperands[0] = XEXP (use, 0); |
| xoperands[1] = gen_rtx_REG (SImode, 26 - (regno - 32) / 2); |
| output_asm_insn ("fstws %0,-16(%%sr0,%%r30)", xoperands); |
| output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); |
| } |
| else |
| { |
| xoperands[0] = XEXP (use, 0); |
| xoperands[1] = gen_rtx_REG (DImode, 25 - (regno - 34) / 2); |
| output_asm_insn ("fstds %0,-16(%%sr0,%%r30)", xoperands); |
| output_asm_insn ("ldw -12(%%sr0,%%r30),%R1", xoperands); |
| output_asm_insn ("ldw -16(%%sr0,%%r30),%1", xoperands); |
| } |
| } |
| } |
| |
| /* Don't have to worry about TARGET_PORTABLE_RUNTIME here since |
| we don't have any direct calls in that case. */ |
| { |
| int i; |
| char *name = XSTR (call_dest, 0); |
| |
| /* See if we have already put this function on the list |
| of deferred plabels. This list is generally small, |
| so a liner search is not too ugly. If it proves too |
| slow replace it with something faster. */ |
| for (i = 0; i < n_deferred_plabels; i++) |
| if (strcmp (name, deferred_plabels[i].name) == 0) |
| break; |
| |
| /* If the deferred plabel list is empty, or this entry was |
| not found on the list, create a new entry on the list. */ |
| if (deferred_plabels == NULL || i == n_deferred_plabels) |
| { |
| struct obstack *ambient_obstack = current_obstack; |
| struct obstack *ambient_rtl_obstack = rtl_obstack; |
| char *real_name; |
| |
| /* Any RTL we create here needs to live until the end of |
| the compilation unit and therefore must live on the |
| permanent obstack. */ |
| current_obstack = &permanent_obstack; |
| rtl_obstack = &permanent_obstack; |
| |
| if (deferred_plabels == 0) |
| deferred_plabels = (struct deferred_plabel *) |
| xmalloc (1 * sizeof (struct deferred_plabel)); |
| else |
| deferred_plabels = (struct deferred_plabel *) |
| xrealloc (deferred_plabels, |
| ((n_deferred_plabels + 1) |
| * sizeof (struct deferred_plabel))); |
| |
| i = n_deferred_plabels++; |
| deferred_plabels[i].internal_label = gen_label_rtx (); |
| deferred_plabels[i].name = obstack_alloc (&permanent_obstack, |
| strlen (name) + 1); |
| strcpy (deferred_plabels[i].name, name); |
| |
| /* Switch back to normal obstack allocation. */ |
| current_obstack = ambient_obstack; |
| rtl_obstack = ambient_rtl_obstack; |
| |
| /* Gross. We have just implicitly taken the address of this |
| function, mark it as such. */ |
| STRIP_NAME_ENCODING (real_name, name); |
| TREE_SYMBOL_REFERENCED (get_identifier (real_name)) = 1; |
| } |
| |
| /* We have to load the address of the function using a procedure |
| label (plabel). Inline plabels can lose for PIC and other |
| cases, so avoid them by creating a 32bit plabel in the data |
| segment. */ |
| if (flag_pic) |
| { |
| xoperands[0] = deferred_plabels[i].internal_label; |
| xoperands[1] = gen_label_rtx (); |
| |
| output_asm_insn ("addil LT%%%0,%%r19", xoperands); |
| output_asm_insn ("ldw RT%%%0(%%r1),%%r22", xoperands); |
| output_asm_insn ("ldw 0(0,%%r22),%%r22", xoperands); |
| |
| /* Get our address + 8 into %r1. */ |
| output_asm_insn ("bl .+8,%%r1", xoperands); |
| |
| /* Add %r1 to the offset of dyncall from the next insn. */ |
| output_asm_insn ("addil L%%$$dyncall-%1,%%r1", xoperands); |
| ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", |
| CODE_LABEL_NUMBER (xoperands[1])); |
| output_asm_insn ("ldo R%%$$dyncall-%1(%%r1),%%r1", xoperands); |
| |
| /* Get the return address into %r31. */ |
| output_asm_insn ("blr 0,%%r31", xoperands); |
| |
| /* Branch to our target which is in %r1. */ |
| output_asm_insn ("bv 0(%%r1)", xoperands); |
| |
| /* Copy the return address into %r2 also. */ |
| output_asm_insn ("copy %%r31,%%r2", xoperands); |
| } |
| else |
| { |
| xoperands[0] = deferred_plabels[i].internal_label; |
| |
| /* Get the address of our target into %r22. */ |
| output_asm_insn ("addil LR%%%0-$global$,%%r27", xoperands); |
| output_asm_insn ("ldw RR%%%0-$global$(%%r1),%%r22", xoperands); |
| |
| /* Get the high part of the address of $dyncall into %r2, then |
| add in the low part in the branch instruction. */ |
| output_asm_insn ("ldil L%%$$dyncall,%%r2", xoperands); |
| output_asm_insn ("ble R%%$$dyncall(%%sr4,%%r2)", xoperands); |
| |
| /* Copy the return pointer into both %r31 and %r2. */ |
| output_asm_insn ("copy %%r31,%%r2", xoperands); |
| } |
| } |
| |
| /* If we had a jump in the call's delay slot, output it now. */ |
| if (dbr_sequence_length () != 0 |
| && !delay_insn_deleted) |
| { |
| xoperands[0] = XEXP (PATTERN (NEXT_INSN (insn)), 1); |
| output_asm_insn ("b,n %0", xoperands); |
| |
| /* Now delete the delay insn. */ |
| PUT_CODE (NEXT_INSN (insn), NOTE); |
| NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED; |
| NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0; |
| } |
| return ""; |
| } |
| |
| /* This call has an unconditional jump in its delay slot and the |
| call is known to reach its target or the beginning of the current |
| subspace. */ |
| |
| /* Use the containing sequence insn's address. */ |
| seq_insn = NEXT_INSN (PREV_INSN (XVECEXP (final_sequence, 0, 0))); |
| |
| distance = insn_addresses[INSN_UID (JUMP_LABEL (NEXT_INSN (insn)))] |
| - insn_addresses[INSN_UID (seq_insn)] - 8; |
| |
| /* If the branch was too far away, emit a normal call followed |
| by a nop, followed by the unconditional branch. |
| |
| If the branch is close, then adjust %r2 from within the |
| call's delay slot. */ |
| |
| xoperands[0] = call_dest; |
| xoperands[1] = XEXP (PATTERN (NEXT_INSN (insn)), 1); |
| if (! VAL_14_BITS_P (distance)) |
| output_asm_insn ("bl %0,%%r2\n\tnop\n\tbl,n %1,%%r0", xoperands); |
| else |
| { |
| xoperands[3] = gen_label_rtx (); |
| output_asm_insn ("\n\tbl %0,%%r2\n\tldo %1-%3(%%r2),%%r2", xoperands); |
| ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", |
| CODE_LABEL_NUMBER (xoperands[3])); |
| } |
| |
| /* Delete the jump. */ |
| PUT_CODE (NEXT_INSN (insn), NOTE); |
| NOTE_LINE_NUMBER (NEXT_INSN (insn)) = NOTE_INSN_DELETED; |
| NOTE_SOURCE_FILE (NEXT_INSN (insn)) = 0; |
| return ""; |
| } |
| |
| /* In HPUX 8.0's shared library scheme, special relocations are needed |
| for function labels if they might be passed to a function |
| in a shared library (because shared libraries don't live in code |
| space), and special magic is needed to construct their address. |
| |
| For reasons too disgusting to describe storage for the new name |
| is allocated either on the saveable_obstack (released at function |
| exit) or on the permanent_obstack for things that can never change |
| (libcall names for example). */ |
| |
| void |
| hppa_encode_label (sym, permanent) |
| rtx sym; |
| int permanent; |
| { |
| char *str = XSTR (sym, 0); |
| int len = strlen (str); |
| char *newstr; |
| |
| newstr = obstack_alloc ((permanent ? &permanent_obstack : saveable_obstack), |
| len + 2); |
| |
| if (str[0] == '*') |
| *newstr++ = *str++; |
| strcpy (newstr + 1, str); |
| *newstr = '@'; |
| XSTR (sym,0) = newstr; |
| } |
| |
| int |
| function_label_operand (op, mode) |
| rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| return GET_CODE (op) == SYMBOL_REF && FUNCTION_NAME_P (XSTR (op, 0)); |
| } |
| |
| /* Returns 1 if OP is a function label involved in a simple addition |
| with a constant. Used to keep certain patterns from matching |
| during instruction combination. */ |
| int |
| is_function_label_plus_const (op) |
| rtx op; |
| { |
| /* Strip off any CONST. */ |
| if (GET_CODE (op) == CONST) |
| op = XEXP (op, 0); |
| |
| return (GET_CODE (op) == PLUS |
| && function_label_operand (XEXP (op, 0), Pmode) |
| && GET_CODE (XEXP (op, 1)) == CONST_INT); |
| } |
| |
| /* Returns 1 if the 6 operands specified in OPERANDS are suitable for |
| use in fmpyadd instructions. */ |
| int |
| fmpyaddoperands (operands) |
| rtx *operands; |
| { |
| enum machine_mode mode = GET_MODE (operands[0]); |
| |
| /* Must be a floating point mode. */ |
| if (mode != SFmode && mode != DFmode) |
| return 0; |
| |
| /* All modes must be the same. */ |
| if (! (mode == GET_MODE (operands[1]) |
| && mode == GET_MODE (operands[2]) |
| && mode == GET_MODE (operands[3]) |
| && mode == GET_MODE (operands[4]) |
| && mode == GET_MODE (operands[5]))) |
| return 0; |
| |
| /* All operands must be registers. */ |
| if (! (GET_CODE (operands[1]) == REG |
| && GET_CODE (operands[2]) == REG |
| && GET_CODE (operands[3]) == REG |
| && GET_CODE (operands[4]) == REG |
| && GET_CODE (operands[5]) == REG)) |
| return 0; |
| |
| /* Only 2 real operands to the addition. One of the input operands must |
| be the same as the output operand. */ |
| if (! rtx_equal_p (operands[3], operands[4]) |
| && ! rtx_equal_p (operands[3], operands[5])) |
| return 0; |
| |
| /* Inout operand of add can not conflict with any operands from multiply. */ |
| if (rtx_equal_p (operands[3], operands[0]) |
| || rtx_equal_p (operands[3], operands[1]) |
| || rtx_equal_p (operands[3], operands[2])) |
| return 0; |
| |
| /* multiply can not feed into addition operands. */ |
| if (rtx_equal_p (operands[4], operands[0]) |
| || rtx_equal_p (operands[5], operands[0])) |
| return 0; |
| |
| /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */ |
| if (mode == SFmode |
| && (REGNO (operands[0]) < 57 |
| || REGNO (operands[1]) < 57 |
| || REGNO (operands[2]) < 57 |
| || REGNO (operands[3]) < 57 |
| || REGNO (operands[4]) < 57 |
| || REGNO (operands[5]) < 57)) |
| return 0; |
| |
| /* Passed. Operands are suitable for fmpyadd. */ |
| return 1; |
| } |
| |
| /* Returns 1 if the 6 operands specified in OPERANDS are suitable for |
| use in fmpysub instructions. */ |
| int |
| fmpysuboperands (operands) |
| rtx *operands; |
| { |
| enum machine_mode mode = GET_MODE (operands[0]); |
| |
| /* Must be a floating point mode. */ |
| if (mode != SFmode && mode != DFmode) |
| return 0; |
| |
| /* All modes must be the same. */ |
| if (! (mode == GET_MODE (operands[1]) |
| && mode == GET_MODE (operands[2]) |
| && mode == GET_MODE (operands[3]) |
| && mode == GET_MODE (operands[4]) |
| && mode == GET_MODE (operands[5]))) |
| return 0; |
| |
| /* All operands must be registers. */ |
| if (! (GET_CODE (operands[1]) == REG |
| && GET_CODE (operands[2]) == REG |
| && GET_CODE (operands[3]) == REG |
| && GET_CODE (operands[4]) == REG |
| && GET_CODE (operands[5]) == REG)) |
| return 0; |
| |
| /* Only 2 real operands to the subtraction. Subtraction is not a commutative |
| operation, so operands[4] must be the same as operand[3]. */ |
| if (! rtx_equal_p (operands[3], operands[4])) |
| return 0; |
| |
| /* multiply can not feed into subtraction. */ |
| if (rtx_equal_p (operands[5], operands[0])) |
| return 0; |
| |
| /* Inout operand of sub can not conflict with any operands from multiply. */ |
| if (rtx_equal_p (operands[3], operands[0]) |
| || rtx_equal_p (operands[3], operands[1]) |
| || rtx_equal_p (operands[3], operands[2])) |
| return 0; |
| |
| /* SFmode limits the registers to the upper 32 of the 32bit FP regs. */ |
| if (mode == SFmode |
| && (REGNO (operands[0]) < 57 |
| || REGNO (operands[1]) < 57 |
| || REGNO (operands[2]) < 57 |
| || REGNO (operands[3]) < 57 |
| || REGNO (operands[4]) < 57 |
| || REGNO (operands[5]) < 57)) |
| return 0; |
| |
| /* Passed. Operands are suitable for fmpysub. */ |
| return 1; |
| } |
| |
| int |
| plus_xor_ior_operator (op, mode) |
| rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| return (GET_CODE (op) == PLUS || GET_CODE (op) == XOR |
| || GET_CODE (op) == IOR); |
| } |
| |
| /* Return 1 if the given constant is 2, 4, or 8. These are the valid |
| constants for shadd instructions. */ |
| static int |
| shadd_constant_p (val) |
| int val; |
| { |
| if (val == 2 || val == 4 || val == 8) |
| return 1; |
| else |
| return 0; |
| } |
| |
| /* Return 1 if OP is a CONST_INT with the value 2, 4, or 8. These are |
| the valid constant for shadd instructions. */ |
| int |
| shadd_operand (op, mode) |
| rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| return (GET_CODE (op) == CONST_INT && shadd_constant_p (INTVAL (op))); |
| } |
| |
| /* Return 1 if OP is valid as a base register in a reg + reg address. */ |
| |
| int |
| basereg_operand (op, mode) |
| rtx op; |
| enum machine_mode mode; |
| { |
| /* cse will create some unscaled indexed addresses, however; it |
| generally isn't a win on the PA, so avoid creating unscaled |
| indexed addresses until after cse is finished. */ |
| if (!cse_not_expected) |
| return 0; |
| |
| /* Once reload has started everything is considered valid. Reload should |
| only create indexed addresses using the stack/frame pointer, and any |
| others were checked for validity when created by the combine pass. |
| |
| Also allow any register when TARGET_NO_SPACE_REGS is in effect since |
| we don't have to worry about the braindamaged implicit space register |
| selection using the basereg only (rather than effective address) |
| screwing us over. */ |
| if (TARGET_NO_SPACE_REGS || reload_in_progress || reload_completed) |
| return (GET_CODE (op) == REG); |
| |
| /* Stack is always OK for indexing. */ |
| if (op == stack_pointer_rtx) |
| return 1; |
| |
| /* While it's always safe to index off the frame pointer, it's not |
| always profitable, particularly when the frame pointer is being |
| eliminated. */ |
| if (! flag_omit_frame_pointer && op == frame_pointer_rtx) |
| return 1; |
| |
| /* The only other valid OPs are pseudo registers with |
| REGNO_POINTER_FLAG set. */ |
| if (GET_CODE (op) != REG |
| || REGNO (op) < FIRST_PSEUDO_REGISTER |
| || ! register_operand (op, mode)) |
| return 0; |
| |
| return REGNO_POINTER_FLAG (REGNO (op)); |
| } |
| |
| /* Return 1 if this operand is anything other than a hard register. */ |
| |
| int |
| non_hard_reg_operand (op, mode) |
| rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| return ! (GET_CODE (op) == REG && REGNO (op) < FIRST_PSEUDO_REGISTER); |
| } |
| |
| /* Return 1 if INSN branches forward. Should be using insn_addresses |
| to avoid walking through all the insns... */ |
| static int |
| forward_branch_p (insn) |
| rtx insn; |
| { |
| rtx label = JUMP_LABEL (insn); |
| |
| while (insn) |
| { |
| if (insn == label) |
| break; |
| else |
| insn = NEXT_INSN (insn); |
| } |
| |
| return (insn == label); |
| } |
| |
| /* Return 1 if OP is an equality comparison, else return 0. */ |
| int |
| eq_neq_comparison_operator (op, mode) |
| rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| return (GET_CODE (op) == EQ || GET_CODE (op) == NE); |
| } |
| |
| /* Return 1 if OP is an operator suitable for use in a movb instruction. */ |
| int |
| movb_comparison_operator (op, mode) |
| rtx op; |
| enum machine_mode mode ATTRIBUTE_UNUSED; |
| { |
| return (GET_CODE (op) == EQ || GET_CODE (op) == NE |
| || GET_CODE (op) == LT || GET_CODE (op) == GE); |
| } |
| |
| /* Return 1 if INSN is in the delay slot of a call instruction. */ |
| int |
| jump_in_call_delay (insn) |
| rtx insn; |
| { |
| |
| if (GET_CODE (insn) != JUMP_INSN) |
| return 0; |
| |
| if (PREV_INSN (insn) |
| && PREV_INSN (PREV_INSN (insn)) |
| && GET_CODE (next_active_insn (PREV_INSN (PREV_INSN (insn)))) == INSN) |
| { |
| rtx test_insn = next_active_insn (PREV_INSN (PREV_INSN (insn))); |
| |
| return (GET_CODE (PATTERN (test_insn)) == SEQUENCE |
| && XVECEXP (PATTERN (test_insn), 0, 1) == insn); |
| |
| } |
| else |
| return 0; |
| } |
| |
| /* Output an unconditional move and branch insn. */ |
| |
| char * |
| output_parallel_movb (operands, length) |
| rtx *operands; |
| int length; |
| { |
| /* These are the cases in which we win. */ |
| if (length == 4) |
| return "mov%I1b,tr %1,%0,%2"; |
| |
| /* None of these cases wins, but they don't lose either. */ |
| if (dbr_sequence_length () == 0) |
| { |
| /* Nothing in the delay slot, fake it by putting the combined |
| insn (the copy or add) in the delay slot of a bl. */ |
| if (GET_CODE (operands[1]) == CONST_INT) |
| return "bl %2,0\n\tldi %1,%0"; |
| else |
| return "bl %2,0\n\tcopy %1,%0"; |
| } |
| else |
| { |
| /* Something in the delay slot, but we've got a long branch. */ |
| if (GET_CODE (operands[1]) == CONST_INT) |
| return "ldi %1,%0\n\tbl %2,0"; |
| else |
| return "copy %1,%0\n\tbl %2,0"; |
| } |
| } |
| |
| /* Output an unconditional add and branch insn. */ |
| |
| char * |
| output_parallel_addb (operands, length) |
| rtx *operands; |
| int length; |
| { |
| /* To make life easy we want operand0 to be the shared input/output |
| operand and operand1 to be the readonly operand. */ |
| if (operands[0] == operands[1]) |
| operands[1] = operands[2]; |
| |
| /* These are the cases in which we win. */ |
| if (length == 4) |
| return "add%I1b,tr %1,%0,%3"; |
| |
| /* None of these cases win, but they don't lose either. */ |
| if (dbr_sequence_length () == 0) |
| { |
| /* Nothing in the delay slot, fake it by putting the combined |
| insn (the copy or add) in the delay slot of a bl. */ |
| return "bl %3,0\n\tadd%I1 %1,%0,%0"; |
| } |
| else |
| { |
| /* Something in the delay slot, but we've got a long branch. */ |
| return "add%I1 %1,%0,%0\n\tbl %3,0"; |
| } |
| } |
| |
| /* Return nonzero if INSN (a jump insn) immediately follows a call to |
| a named function. This is used to discourage creating parallel movb/addb |
| insns since a jump which immediately follows a call can execute in the |
| delay slot of the call. |
| |
| It is also used to avoid filling the delay slot of a jump which |
| immediately follows a call since the jump can usually be eliminated |
| completely by modifying RP in the delay slot of the call. */ |
| |
| int |
| following_call (insn) |
| rtx insn; |
| { |
| /* Find the previous real insn, skipping NOTEs. */ |
| insn = PREV_INSN (insn); |
| while (insn && GET_CODE (insn) == NOTE) |
| insn = PREV_INSN (insn); |
| |
| /* Check for CALL_INSNs and millicode calls. */ |
| if (insn |
| && ((GET_CODE (insn) == CALL_INSN |
| && get_attr_type (insn) != TYPE_DYNCALL) |
| || (GET_CODE (insn) == INSN |
| && GET_CODE (PATTERN (insn)) != SEQUENCE |
| && GET_CODE (PATTERN (insn)) != USE |
| && GET_CODE (PATTERN (insn)) != CLOBBER |
| && get_attr_type (insn) == TYPE_MILLI))) |
| return 1; |
| |
| return 0; |
| } |
| |
| /* Restore any INSN_CODEs for insns with unscaled indexed addresses since |
| the INSN_CODE might be clobberd by rerecognition triggered by reorg. */ |
| |
| static void |
| restore_unscaled_index_insn_codes (insns) |
| rtx insns; |
| { |
| rtx insn; |
| |
| for (insn = insns; insn; insn = NEXT_INSN (insn)) |
| { |
| if (INSN_UID (insn) < max_unscaled_index_insn_codes_uid |
| && unscaled_index_insn_codes[INSN_UID (insn)] != -1) |
| INSN_CODE (insn) = unscaled_index_insn_codes[INSN_UID (insn)]; |
| } |
| } |
| |
| /* Severe braindamage: |
| |
| On the PA, address computations within MEM expressions are not |
| commutative because of the implicit space register selection |
| from the base register (instead of the entire effective address). |
| |
| Because of this mis-feature we have to know which register in a reg+reg |
| address is the base and which is the index. |
| |
| Before reload, the base can be identified by REGNO_POINTER_FLAG. We use |
| this to force base + index addresses to match a different insn than |
| index + base addresses. |
| |
| We assume that no pass during or after reload creates new unscaled indexed |
| addresses, so any unscaled indexed address we find after reload must have |
| at one time been recognized a base + index or index + base and we accept |
| any register as a base register. |
| |
| This scheme assumes that no pass during/after reload will rerecognize an |
| insn with an unscaled indexed address. This failed due to a reorg call |
| to rerecognize certain insns. |
| |
| So, we record if an insn uses an unscaled indexed address and which |
| register is the base (via recording of the INSN_CODE for such insns). |
| |
| Just before we output code for the function, we make sure all the insns |
| using unscaled indexed addresses have the same INSN_CODE as they did |
| immediately before delay slot scheduling. |
| |
| This is extremely gross. Long term, I'd like to be able to look at |
| REG_POINTER_FLAG to handle these kinds of problems. */ |
| |
| static void |
| record_unscaled_index_insn_codes (insns) |
| rtx insns; |
| { |
| rtx insn; |
| |
| max_unscaled_index_insn_codes_uid = get_max_uid (); |
| unscaled_index_insn_codes |
| = (int *)xmalloc (max_unscaled_index_insn_codes_uid * sizeof (int)); |
| memset (unscaled_index_insn_codes, -1, |
| max_unscaled_index_insn_codes_uid * sizeof (int)); |
| |
| for (insn = insns; insn; insn = NEXT_INSN (insn)) |
| { |
| rtx set = single_set (insn); |
| rtx mem = NULL_RTX; |
| |
| /* Ignore anything that isn't a normal SET. */ |
| if (set == NULL_RTX) |
| continue; |
| |
| /* No insns can have more than one MEM. */ |
| if (GET_CODE (SET_SRC (set)) == MEM) |
| mem = SET_SRC (set); |
| |
| if (GET_CODE (SET_DEST (set)) == MEM) |
| mem = SET_DEST (set); |
| |
| /* If neither operand is a mem, then there's nothing to do. */ |
| if (mem == NULL_RTX) |
| continue; |
| |
| if (GET_CODE (XEXP (mem, 0)) != PLUS) |
| continue; |
| |
| /* If both are REGs (or SUBREGs), then record the insn code for |
| this insn. */ |
| if (REG_P (XEXP (XEXP (mem, 0), 0)) && REG_P (XEXP (XEXP (mem, 0), 1))) |
| unscaled_index_insn_codes[INSN_UID (insn)] = INSN_CODE (insn); |
| } |
| } |
| |
| /* We use this hook to perform a PA specific optimization which is difficult |
| to do in earlier passes. |
| |
| We want the delay slots of branches within jump tables to be filled. |
| None of the compiler passes at the moment even has the notion that a |
| PA jump table doesn't contain addresses, but instead contains actual |
| instructions! |
| |
| Because we actually jump into the table, the addresses of each entry |
| must stay constant in relation to the beginning of the table (which |
| itself must stay constant relative to the instruction to jump into |
| it). I don't believe we can guarantee earlier passes of the compiler |
| will adhere to those rules. |
| |
| So, late in the compilation process we find all the jump tables, and |
| expand them into real code -- eg each entry in the jump table vector |
| will get an appropriate label followed by a jump to the final target. |
| |
| Reorg and the final jump pass can then optimize these branches and |
| fill their delay slots. We end up with smaller, more efficient code. |
| |
| The jump instructions within the table are special; we must be able |
| to identify them during assembly output (if the jumps don't get filled |
| we need to emit a nop rather than nullifying the delay slot)). We |
| identify jumps in switch tables by marking the SET with DImode. |
| |
| We also surround the jump table itself with BEGIN_BRTAB and END_BRTAB |
| insns. This serves two purposes, first it prevents jump.c from |
| noticing that the last N entries in the table jump to the instruction |
| immediately after the table and deleting the jumps. Second, those |
| insns mark where we should emit .begin_brtab and .end_brtab directives |
| when using GAS (allows for better link time optimizations). */ |
| |
| void |
| pa_reorg (insns) |
| rtx insns; |
| { |
| rtx insn; |
| |
| /* Keep track of which insns have unscaled indexed addresses, and which |
| register is the base address in such insns. */ |
| record_unscaled_index_insn_codes (insns); |
| |
| remove_useless_addtr_insns (insns, 1); |
| |
| if (pa_cpu < PROCESSOR_8000) |
| pa_combine_instructions (get_insns ()); |
| |
| |
| /* This is fairly cheap, so always run it if optimizing. */ |
| if (optimize > 0 && !TARGET_BIG_SWITCH) |
| { |
| /* Find and explode all ADDR_VEC or ADDR_DIFF_VEC insns. */ |
| insns = get_insns (); |
| for (insn = insns; insn; insn = NEXT_INSN (insn)) |
| { |
| rtx pattern, tmp, location; |
| unsigned int length, i; |
| |
| /* Find an ADDR_VEC or ADDR_DIFF_VEC insn to explode. */ |
| if (GET_CODE (insn) != JUMP_INSN |
| || (GET_CODE (PATTERN (insn)) != ADDR_VEC |
| && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)) |
| continue; |
| |
| /* Emit marker for the beginning of the branch table. */ |
| emit_insn_before (gen_begin_brtab (), insn); |
| |
| pattern = PATTERN (insn); |
| location = PREV_INSN (insn); |
| length = XVECLEN (pattern, GET_CODE (pattern) == ADDR_DIFF_VEC); |
| |
| for (i = 0; i < length; i++) |
| { |
| /* Emit a label before each jump to keep jump.c from |
| removing this code. */ |
| tmp = gen_label_rtx (); |
| LABEL_NUSES (tmp) = 1; |
| emit_label_after (tmp, location); |
| location = NEXT_INSN (location); |
| |
| if (GET_CODE (pattern) == ADDR_VEC) |
| { |
| /* Emit the jump itself. */ |
| tmp = gen_jump (XEXP (XVECEXP (pattern, 0, i), 0)); |
| tmp = emit_jump_insn_after (tmp, location); |
| JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 0, i), 0); |
| /* It is easy to rely on the branch table markers |
| during assembly output to trigger the correct code |
| for a switch table jump with an unfilled delay slot, |
| |
| However, that requires state and assumes that we look |
| at insns in order. |
| |
| We can't make such assumptions when computing the length |
| of instructions. Ugh. We could walk the insn chain to |
| determine if this instruction is in a branch table, but |
| that can get rather expensive, particularly during the |
| branch shortening phase of the compiler. |
| |
| So instead we mark this jump as being special. This is |
| far from ideal and knows that no code after this will |
| muck around with the mode of the JUMP_INSN itself. */ |
| PUT_MODE (tmp, SImode); |
| LABEL_NUSES (JUMP_LABEL (tmp))++; |
| location = NEXT_INSN (location); |
| } |
| else |
| { |
| /* Emit the jump itself. */ |
| tmp = gen_jump (XEXP (XVECEXP (pattern, 1, i), 0)); |
| tmp = emit_jump_insn_after (tmp, location); |
| JUMP_LABEL (tmp) = XEXP (XVECEXP (pattern, 1, i), 0); |
| /* It is easy to rely on the branch table markers |
| during assembly output to trigger the correct code |
| for a switch table jump with an unfilled delay slot, |
| |
| However, that requires state and assumes that we look |
| at insns in order. |
| |
| We can't make such assumptions when computing the length |
| of instructions. Ugh. We could walk the insn chain to |
| determine if this instruction is in a branch table, but |
| that can get rather expensive, particularly during the |
| branch shortening phase of the compiler. |
| |
| So instead we mark this jump as being special. This is |
| far from ideal and knows that no code after this will |
| muck around with the mode of the JUMP_INSN itself. */ |
| PUT_MODE (tmp, SImode); |
| LABEL_NUSES (JUMP_LABEL (tmp))++; |
| location = NEXT_INSN (location); |
| } |
| |
| /* Emit a BARRIER after the jump. */ |
| emit_barrier_after (location); |
| location = NEXT_INSN (location); |
| } |
| |
| /* Emit marker for the end of the branch table. */ |
| emit_insn_before (gen_end_brtab (), location); |
| location = NEXT_INSN (location); |
| emit_barrier_after (location); |
| |
| /* Delete the ADDR_VEC or ADDR_DIFF_VEC. */ |
| delete_insn (insn); |
| } |
| } |
| else |
| { |
| /* Sill need an end_brtab insn. */ |
| insns = get_insns (); |
| for (insn = insns; insn; insn = NEXT_INSN (insn)) |
| { |
| /* Find an ADDR_VEC insn. */ |
| if (GET_CODE (insn) != JUMP_INSN |
| || (GET_CODE (PATTERN (insn)) != ADDR_VEC |
| && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)) |
| continue; |
| |
| /* Now generate markers for the beginning and end of the |
| branch table. */ |
| emit_insn_before (gen_begin_brtab (), insn); |
| emit_insn_after (gen_end_brtab (), insn); |
| } |
| } |
| } |
| |
| /* The PA has a number of odd instructions which can perform multiple |
| tasks at once. On first generation PA machines (PA1.0 and PA1.1) |
| it may be profitable to combine two instructions into one instruction |
| with two outputs. It's not profitable PA2.0 machines because the |
| two outputs would take two slots in the reorder buffers. |
| |
| This routine finds instructions which can be combined and combines |
| them. We only support some of the potential combinations, and we |
| only try common ways to find suitable instructions. |
| |
| * addb can add two registers or a register and a small integer |
| and jump to a nearby (+-8k) location. Normally the jump to the |
| nearby location is conditional on the result of the add, but by |
| using the "true" condition we can make the jump unconditional. |
| Thus addb can perform two independent operations in one insn. |
| |
| * movb is similar to addb in that it can perform a reg->reg |
| or small immediate->reg copy and jump to a nearby (+-8k location). |
| |
| * fmpyadd and fmpysub can perform a FP multiply and either an |
| FP add or FP sub if the operands of the multiply and add/sub are |
| independent (there are other minor restrictions). Note both |
| the fmpy and fadd/fsub can in theory move to better spots according |
| to data dependencies, but for now we require the fmpy stay at a |
| fixed location. |
| |
| * Many of the memory operations can perform pre & post updates |
| of index registers. GCC's pre/post increment/decrement addressing |
| is far too simple to take advantage of all the possibilities. This |
| pass may not be suitable since those insns may not be independent. |
| |
| * comclr can compare two ints or an int and a register, nullify |
| the following instruction and zero some other register. This |
| is more difficult to use as it's harder to find an insn which |
| will generate a comclr than finding something like an unconditional |
| branch. (conditional moves & long branches create comclr insns). |
| |
| * Most arithmetic operations can conditionally skip the next |
| instruction. They can be viewed as "perform this operation |
| and conditionally jump to this nearby location" (where nearby |
| is an insns away). These are difficult to use due to the |
| branch length restrictions. */ |
| |
| static void |
| pa_combine_instructions (insns) |
| rtx insns ATTRIBUTE_UNUSED; |
| { |
| rtx anchor, new; |
| |
| /* This can get expensive since the basic algorithm is on the |
| order of O(n^2) (or worse). Only do it for -O2 or higher |
| levels of optimization. */ |
| if (optimize < 2) |
| return; |
| |
| /* Walk down the list of insns looking for "anchor" insns which |
| may be combined with "floating" insns. As the name implies, |
| "anchor" instructions don't move, while "floating" insns may |
| move around. */ |
| new = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, NULL_RTX, NULL_RTX)); |
| new = make_insn_raw (new); |
| |
| for (anchor = get_insns (); anchor; anchor = NEXT_INSN (anchor)) |
| { |
| enum attr_pa_combine_type anchor_attr; |
| enum attr_pa_combine_type floater_attr; |
| |
| /* We only care about INSNs, JUMP_INSNs, and CALL_INSNs. |
| Also ignore any special USE insns. */ |
| if ((GET_CODE (anchor) != INSN |
| && GET_CODE (anchor) != JUMP_INSN |
| && GET_CODE (anchor) != CALL_INSN) |
| || GET_CODE (PATTERN (anchor)) == USE |
| || GET_CODE (PATTERN (anchor)) == CLOBBER |
| || GET_CODE (PATTERN (anchor)) == ADDR_VEC |
| || GET_CODE (PATTERN (anchor)) == ADDR_DIFF_VEC) |
| continue; |
| |
| anchor_attr = get_attr_pa_combine_type (anchor); |
| /* See if anchor is an insn suitable for combination. */ |
| if (anchor_attr == PA_COMBINE_TYPE_FMPY |
| || anchor_attr == PA_COMBINE_TYPE_FADDSUB |
| || (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH |
| && ! forward_branch_p (anchor))) |
| { |
| rtx floater; |
| |
| for (floater = PREV_INSN (anchor); |
| floater; |
| floater = PREV_INSN (floater)) |
| { |
| if (GET_CODE (floater) == NOTE |
| || (GET_CODE (floater) == INSN |
| && (GET_CODE (PATTERN (floater)) == USE |
| || GET_CODE (PATTERN (floater)) == CLOBBER))) |
| continue; |
| |
| /* Anything except a regular INSN will stop our search. */ |
| if (GET_CODE (floater) != INSN |
| || GET_CODE (PATTERN (floater)) == ADDR_VEC |
| || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC) |
| { |
| floater = NULL_RTX; |
| break; |
| } |
| |
| /* See if FLOATER is suitable for combination with the |
| anchor. */ |
| floater_attr = get_attr_pa_combine_type (floater); |
| if ((anchor_attr == PA_COMBINE_TYPE_FMPY |
| && floater_attr == PA_COMBINE_TYPE_FADDSUB) |
| || (anchor_attr == PA_COMBINE_TYPE_FADDSUB |
| && floater_attr == PA_COMBINE_TYPE_FMPY)) |
| { |
| /* If ANCHOR and FLOATER can be combined, then we're |
| done with this pass. */ |
| if (pa_can_combine_p (new, anchor, floater, 0, |
| SET_DEST (PATTERN (floater)), |
| XEXP (SET_SRC (PATTERN (floater)), 0), |
| XEXP (SET_SRC (PATTERN (floater)), 1))) |
| break; |
| } |
| |
| else if (anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH |
| && floater_attr == PA_COMBINE_TYPE_ADDMOVE) |
| { |
| if (GET_CODE (SET_SRC (PATTERN (floater))) == PLUS) |
| { |
| if (pa_can_combine_p (new, anchor, floater, 0, |
| SET_DEST (PATTERN (floater)), |
| XEXP (SET_SRC (PATTERN (floater)), 0), |
| XEXP (SET_SRC (PATTERN (floater)), 1))) |
| break; |
| } |
| else |
| { |
| if (pa_can_combine_p (new, anchor, floater, 0, |
| SET_DEST (PATTERN (floater)), |
| SET_SRC (PATTERN (floater)), |
| SET_SRC (PATTERN (floater)))) |
| break; |
| } |
| } |
| } |
| |
| /* If we didn't find anything on the backwards scan try forwards. */ |
| if (!floater |
| && (anchor_attr == PA_COMBINE_TYPE_FMPY |
| || anchor_attr == PA_COMBINE_TYPE_FADDSUB)) |
| { |
| for (floater = anchor; floater; floater = NEXT_INSN (floater)) |
| { |
| if (GET_CODE (floater) == NOTE |
| || (GET_CODE (floater) == INSN |
| && (GET_CODE (PATTERN (floater)) == USE |
| || GET_CODE (PATTERN (floater)) == CLOBBER))) |
| |
| continue; |
| |
| /* Anything except a regular INSN will stop our search. */ |
| if (GET_CODE (floater) != INSN |
| || GET_CODE (PATTERN (floater)) == ADDR_VEC |
| || GET_CODE (PATTERN (floater)) == ADDR_DIFF_VEC) |
| { |
| floater = NULL_RTX; |
| break; |
| } |
| |
| /* See if FLOATER is suitable for combination with the |
| anchor. */ |
| floater_attr = get_attr_pa_combine_type (floater); |
| if ((anchor_attr == PA_COMBINE_TYPE_FMPY |
| && floater_attr == PA_COMBINE_TYPE_FADDSUB) |
| || (anchor_attr == PA_COMBINE_TYPE_FADDSUB |
| && floater_attr == PA_COMBINE_TYPE_FMPY)) |
| { |
| /* If ANCHOR and FLOATER can be combined, then we're |
| done with this pass. */ |
| if (pa_can_combine_p (new, anchor, floater, 1, |
| SET_DEST (PATTERN (floater)), |
| XEXP (SET_SRC (PATTERN(floater)),0), |
| XEXP(SET_SRC(PATTERN(floater)),1))) |
| break; |
| } |
| } |
| } |
| |
| /* FLOATER will be nonzero if we found a suitable floating |
| insn for combination with ANCHOR. */ |
| if (floater |
| && (anchor_attr == PA_COMBINE_TYPE_FADDSUB |
| || anchor_attr == PA_COMBINE_TYPE_FMPY)) |
| { |
| /* Emit the new instruction and delete the old anchor. */ |
| emit_insn_before (gen_rtx_PARALLEL (VOIDmode, |
| gen_rtvec (2, |
| PATTERN (anchor), |
| PATTERN (floater))), |
| anchor); |
| PUT_CODE (anchor, NOTE); |
| NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED; |
| NOTE_SOURCE_FILE (anchor) = 0; |
| |
| /* Emit a special USE insn for FLOATER, then delete |
| the floating insn. */ |
| emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater); |
| delete_insn (floater); |
| |
| continue; |
| } |
| else if (floater |
| && anchor_attr == PA_COMBINE_TYPE_UNCOND_BRANCH) |
| { |
| rtx temp; |
| /* Emit the new_jump instruction and delete the old anchor. */ |
| temp = emit_jump_insn_before (gen_rtx_PARALLEL (VOIDmode, |
| gen_rtvec (2, PATTERN (anchor), |
| PATTERN (floater))), |
| anchor); |
| JUMP_LABEL (temp) = JUMP_LABEL (anchor); |
| PUT_CODE (anchor, NOTE); |
| NOTE_LINE_NUMBER (anchor) = NOTE_INSN_DELETED; |
| NOTE_SOURCE_FILE (anchor) = 0; |
| |
| /* Emit a special USE insn for FLOATER, then delete |
| the floating insn. */ |
| emit_insn_before (gen_rtx_USE (VOIDmode, floater), floater); |
| delete_insn (floater); |
| continue; |
| } |
| } |
| } |
| } |
| |
| int |
| pa_can_combine_p (new, anchor, floater, reversed, dest, src1, src2) |
| rtx new, anchor, floater; |
| int reversed; |
| rtx dest, src1, src2; |
| { |
| int insn_code_number; |
| rtx start, end; |
| |
| /* Create a PARALLEL with the patterns of ANCHOR and |
| FLOATER, try to recognize it, then test constraints |
| for the resulting pattern. |
| |
| If the pattern doesn't match or the constraints |
| aren't met keep searching for a suitable floater |
| insn. */ |
| XVECEXP (PATTERN (new), 0, 0) = PATTERN (anchor); |
| XVECEXP (PATTERN (new), 0, 1) = PATTERN (floater); |
| INSN_CODE (new) = -1; |
| insn_code_number = recog_memoized (new); |
| if (insn_code_number < 0 |
| || !constrain_operands (insn_code_number, 1)) |
| return 0; |
| |
| if (reversed) |
| { |
| start = anchor; |
| end = floater; |
| } |
| else |
| { |
| start = floater; |
| end = anchor; |
| } |
| |
| /* There's up to three operands to consider. One |
| output and two inputs. |
| |
| The output must not be used between FLOATER & ANCHOR |
| exclusive. The inputs must not be set between |
| FLOATER and ANCHOR exclusive. */ |
| |
| if (reg_used_between_p (dest, start, end)) |
| return 0; |
| |
| if (reg_set_between_p (src1, start, end)) |
| return 0; |
| |
| if (reg_set_between_p (src2, start, end)) |
| return 0; |
| |
| /* If we get here, then everything is good. */ |
| return 1; |
| } |
| |
| /* Return nonzero if sets and references for INSN are delayed. |
| |
| Millicode insns are actually function calls with some special |
| constraints on arguments and register usage. |
| |
| Millicode calls always expect their arguments in the integer argument |
| registers, and always return their result in %r29 (ret1). They |
| are expected to clobber their arguments, %r1, %r29, and %r31 and |
| nothing else. |
| |
| By considering this effects delayed reorg reorg can put insns |
| which set the argument registers into the delay slot of the millicode |
| call -- thus they act more like traditional CALL_INSNs. |
| |
| get_attr_type will try to recognize the given insn, so make sure to |
| filter out things it will not accept -- SEQUENCE, USE and CLOBBER insns |
| in particular. */ |
| int |
| insn_sets_and_refs_are_delayed (insn) |
| rtx insn; |
| { |
| return ((GET_CODE (insn) == INSN |
| && GET_CODE (PATTERN (insn)) != SEQUENCE |
| && GET_CODE (PATTERN (insn)) != USE |
| && GET_CODE (PATTERN (insn)) != CLOBBER |
| && get_attr_type (insn) == TYPE_MILLI)); |
| } |