| /* Subroutines for insn-output.c for SPARC. |
| Copyright (C) 1987-2021 Free Software Foundation, Inc. |
| Contributed by Michael Tiemann (tiemann@cygnus.com) |
| 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans, |
| at Cygnus Support. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| GCC is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #define IN_TARGET_CODE 1 |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "backend.h" |
| #include "target.h" |
| #include "rtl.h" |
| #include "tree.h" |
| #include "memmodel.h" |
| #include "gimple.h" |
| #include "df.h" |
| #include "tm_p.h" |
| #include "stringpool.h" |
| #include "attribs.h" |
| #include "expmed.h" |
| #include "optabs.h" |
| #include "regs.h" |
| #include "emit-rtl.h" |
| #include "recog.h" |
| #include "diagnostic-core.h" |
| #include "alias.h" |
| #include "fold-const.h" |
| #include "stor-layout.h" |
| #include "calls.h" |
| #include "varasm.h" |
| #include "output.h" |
| #include "insn-attr.h" |
| #include "explow.h" |
| #include "expr.h" |
| #include "debug.h" |
| #include "cfgrtl.h" |
| #include "common/common-target.h" |
| #include "gimplify.h" |
| #include "langhooks.h" |
| #include "reload.h" |
| #include "tree-pass.h" |
| #include "context.h" |
| #include "builtins.h" |
| #include "tree-vector-builder.h" |
| #include "opts.h" |
| |
| /* This file should be included last. */ |
| #include "target-def.h" |
| |
| /* Processor costs */ |
| |
| struct processor_costs { |
| /* Integer load */ |
| const int int_load; |
| |
| /* Integer signed load */ |
| const int int_sload; |
| |
| /* Integer zeroed load */ |
| const int int_zload; |
| |
| /* Float load */ |
| const int float_load; |
| |
| /* fmov, fneg, fabs */ |
| const int float_move; |
| |
| /* fadd, fsub */ |
| const int float_plusminus; |
| |
| /* fcmp */ |
| const int float_cmp; |
| |
| /* fmov, fmovr */ |
| const int float_cmove; |
| |
| /* fmul */ |
| const int float_mul; |
| |
| /* fdivs */ |
| const int float_div_sf; |
| |
| /* fdivd */ |
| const int float_div_df; |
| |
| /* fsqrts */ |
| const int float_sqrt_sf; |
| |
| /* fsqrtd */ |
| const int float_sqrt_df; |
| |
| /* umul/smul */ |
| const int int_mul; |
| |
| /* mulX */ |
| const int int_mulX; |
| |
| /* integer multiply cost for each bit set past the most |
| significant 3, so the formula for multiply cost becomes: |
| |
| if (rs1 < 0) |
| highest_bit = highest_clear_bit(rs1); |
| else |
| highest_bit = highest_set_bit(rs1); |
| if (highest_bit < 3) |
| highest_bit = 3; |
| cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor); |
| |
| A value of zero indicates that the multiply costs is fixed, |
| and not variable. */ |
| const int int_mul_bit_factor; |
| |
| /* udiv/sdiv */ |
| const int int_div; |
| |
| /* divX */ |
| const int int_divX; |
| |
| /* movcc, movr */ |
| const int int_cmove; |
| |
| /* penalty for shifts, due to scheduling rules etc. */ |
| const int shift_penalty; |
| |
| /* cost of a (predictable) branch. */ |
| const int branch_cost; |
| }; |
| |
| static const |
| struct processor_costs cypress_costs = { |
| COSTS_N_INSNS (2), /* int load */ |
| COSTS_N_INSNS (2), /* int signed load */ |
| COSTS_N_INSNS (2), /* int zeroed load */ |
| COSTS_N_INSNS (2), /* float load */ |
| COSTS_N_INSNS (5), /* fmov, fneg, fabs */ |
| COSTS_N_INSNS (5), /* fadd, fsub */ |
| COSTS_N_INSNS (1), /* fcmp */ |
| COSTS_N_INSNS (1), /* fmov, fmovr */ |
| COSTS_N_INSNS (7), /* fmul */ |
| COSTS_N_INSNS (37), /* fdivs */ |
| COSTS_N_INSNS (37), /* fdivd */ |
| COSTS_N_INSNS (63), /* fsqrts */ |
| COSTS_N_INSNS (63), /* fsqrtd */ |
| COSTS_N_INSNS (1), /* imul */ |
| COSTS_N_INSNS (1), /* imulX */ |
| 0, /* imul bit factor */ |
| COSTS_N_INSNS (1), /* idiv */ |
| COSTS_N_INSNS (1), /* idivX */ |
| COSTS_N_INSNS (1), /* movcc/movr */ |
| 0, /* shift penalty */ |
| 3 /* branch cost */ |
| }; |
| |
| static const |
| struct processor_costs supersparc_costs = { |
| COSTS_N_INSNS (1), /* int load */ |
| COSTS_N_INSNS (1), /* int signed load */ |
| COSTS_N_INSNS (1), /* int zeroed load */ |
| COSTS_N_INSNS (0), /* float load */ |
| COSTS_N_INSNS (3), /* fmov, fneg, fabs */ |
| COSTS_N_INSNS (3), /* fadd, fsub */ |
| COSTS_N_INSNS (3), /* fcmp */ |
| COSTS_N_INSNS (1), /* fmov, fmovr */ |
| COSTS_N_INSNS (3), /* fmul */ |
| COSTS_N_INSNS (6), /* fdivs */ |
| COSTS_N_INSNS (9), /* fdivd */ |
| COSTS_N_INSNS (12), /* fsqrts */ |
| COSTS_N_INSNS (12), /* fsqrtd */ |
| COSTS_N_INSNS (4), /* imul */ |
| COSTS_N_INSNS (4), /* imulX */ |
| 0, /* imul bit factor */ |
| COSTS_N_INSNS (4), /* idiv */ |
| COSTS_N_INSNS (4), /* idivX */ |
| COSTS_N_INSNS (1), /* movcc/movr */ |
| 1, /* shift penalty */ |
| 3 /* branch cost */ |
| }; |
| |
| static const |
| struct processor_costs hypersparc_costs = { |
| COSTS_N_INSNS (1), /* int load */ |
| COSTS_N_INSNS (1), /* int signed load */ |
| COSTS_N_INSNS (1), /* int zeroed load */ |
| COSTS_N_INSNS (1), /* float load */ |
| COSTS_N_INSNS (1), /* fmov, fneg, fabs */ |
| COSTS_N_INSNS (1), /* fadd, fsub */ |
| COSTS_N_INSNS (1), /* fcmp */ |
| COSTS_N_INSNS (1), /* fmov, fmovr */ |
| COSTS_N_INSNS (1), /* fmul */ |
| COSTS_N_INSNS (8), /* fdivs */ |
| COSTS_N_INSNS (12), /* fdivd */ |
| COSTS_N_INSNS (17), /* fsqrts */ |
| COSTS_N_INSNS (17), /* fsqrtd */ |
| COSTS_N_INSNS (17), /* imul */ |
| COSTS_N_INSNS (17), /* imulX */ |
| 0, /* imul bit factor */ |
| COSTS_N_INSNS (17), /* idiv */ |
| COSTS_N_INSNS (17), /* idivX */ |
| COSTS_N_INSNS (1), /* movcc/movr */ |
| 0, /* shift penalty */ |
| 3 /* branch cost */ |
| }; |
| |
| static const |
| struct processor_costs leon_costs = { |
| COSTS_N_INSNS (1), /* int load */ |
| COSTS_N_INSNS (1), /* int signed load */ |
| COSTS_N_INSNS (1), /* int zeroed load */ |
| COSTS_N_INSNS (1), /* float load */ |
| COSTS_N_INSNS (1), /* fmov, fneg, fabs */ |
| COSTS_N_INSNS (1), /* fadd, fsub */ |
| COSTS_N_INSNS (1), /* fcmp */ |
| COSTS_N_INSNS (1), /* fmov, fmovr */ |
| COSTS_N_INSNS (1), /* fmul */ |
| COSTS_N_INSNS (15), /* fdivs */ |
| COSTS_N_INSNS (15), /* fdivd */ |
| COSTS_N_INSNS (23), /* fsqrts */ |
| COSTS_N_INSNS (23), /* fsqrtd */ |
| COSTS_N_INSNS (5), /* imul */ |
| COSTS_N_INSNS (5), /* imulX */ |
| 0, /* imul bit factor */ |
| COSTS_N_INSNS (5), /* idiv */ |
| COSTS_N_INSNS (5), /* idivX */ |
| COSTS_N_INSNS (1), /* movcc/movr */ |
| 0, /* shift penalty */ |
| 3 /* branch cost */ |
| }; |
| |
| static const |
| struct processor_costs leon3_costs = { |
| COSTS_N_INSNS (1), /* int load */ |
| COSTS_N_INSNS (1), /* int signed load */ |
| COSTS_N_INSNS (1), /* int zeroed load */ |
| COSTS_N_INSNS (1), /* float load */ |
| COSTS_N_INSNS (1), /* fmov, fneg, fabs */ |
| COSTS_N_INSNS (1), /* fadd, fsub */ |
| COSTS_N_INSNS (1), /* fcmp */ |
| COSTS_N_INSNS (1), /* fmov, fmovr */ |
| COSTS_N_INSNS (1), /* fmul */ |
| COSTS_N_INSNS (14), /* fdivs */ |
| COSTS_N_INSNS (15), /* fdivd */ |
| COSTS_N_INSNS (22), /* fsqrts */ |
| COSTS_N_INSNS (23), /* fsqrtd */ |
| COSTS_N_INSNS (5), /* imul */ |
| COSTS_N_INSNS (5), /* imulX */ |
| 0, /* imul bit factor */ |
| COSTS_N_INSNS (35), /* idiv */ |
| COSTS_N_INSNS (35), /* idivX */ |
| COSTS_N_INSNS (1), /* movcc/movr */ |
| 0, /* shift penalty */ |
| 3 /* branch cost */ |
| }; |
| |
| static const |
| struct processor_costs leon5_costs = { |
| COSTS_N_INSNS (1), /* int load */ |
| COSTS_N_INSNS (1), /* int signed load */ |
| COSTS_N_INSNS (1), /* int zeroed load */ |
| COSTS_N_INSNS (1), /* float load */ |
| COSTS_N_INSNS (1), /* fmov, fneg, fabs */ |
| COSTS_N_INSNS (1), /* fadd, fsub */ |
| COSTS_N_INSNS (1), /* fcmp */ |
| COSTS_N_INSNS (1), /* fmov, fmovr */ |
| COSTS_N_INSNS (1), /* fmul */ |
| COSTS_N_INSNS (17), /* fdivs */ |
| COSTS_N_INSNS (18), /* fdivd */ |
| COSTS_N_INSNS (25), /* fsqrts */ |
| COSTS_N_INSNS (26), /* fsqrtd */ |
| COSTS_N_INSNS (4), /* imul */ |
| COSTS_N_INSNS (4), /* imulX */ |
| 0, /* imul bit factor */ |
| COSTS_N_INSNS (35), /* idiv */ |
| COSTS_N_INSNS (35), /* idivX */ |
| COSTS_N_INSNS (1), /* movcc/movr */ |
| 0, /* shift penalty */ |
| 3 /* branch cost */ |
| }; |
| |
| static const |
| struct processor_costs sparclet_costs = { |
| COSTS_N_INSNS (3), /* int load */ |
| COSTS_N_INSNS (3), /* int signed load */ |
| COSTS_N_INSNS (1), /* int zeroed load */ |
| COSTS_N_INSNS (1), /* float load */ |
| COSTS_N_INSNS (1), /* fmov, fneg, fabs */ |
| COSTS_N_INSNS (1), /* fadd, fsub */ |
| COSTS_N_INSNS (1), /* fcmp */ |
| COSTS_N_INSNS (1), /* fmov, fmovr */ |
| COSTS_N_INSNS (1), /* fmul */ |
| COSTS_N_INSNS (1), /* fdivs */ |
| COSTS_N_INSNS (1), /* fdivd */ |
| COSTS_N_INSNS (1), /* fsqrts */ |
| COSTS_N_INSNS (1), /* fsqrtd */ |
| COSTS_N_INSNS (5), /* imul */ |
| COSTS_N_INSNS (5), /* imulX */ |
| 0, /* imul bit factor */ |
| COSTS_N_INSNS (5), /* idiv */ |
| COSTS_N_INSNS (5), /* idivX */ |
| COSTS_N_INSNS (1), /* movcc/movr */ |
| 0, /* shift penalty */ |
| 3 /* branch cost */ |
| }; |
| |
| static const |
| struct processor_costs ultrasparc_costs = { |
| COSTS_N_INSNS (2), /* int load */ |
| COSTS_N_INSNS (3), /* int signed load */ |
| COSTS_N_INSNS (2), /* int zeroed load */ |
| COSTS_N_INSNS (2), /* float load */ |
| COSTS_N_INSNS (1), /* fmov, fneg, fabs */ |
| COSTS_N_INSNS (4), /* fadd, fsub */ |
| COSTS_N_INSNS (1), /* fcmp */ |
| COSTS_N_INSNS (2), /* fmov, fmovr */ |
| COSTS_N_INSNS (4), /* fmul */ |
| COSTS_N_INSNS (13), /* fdivs */ |
| COSTS_N_INSNS (23), /* fdivd */ |
| COSTS_N_INSNS (13), /* fsqrts */ |
| COSTS_N_INSNS (23), /* fsqrtd */ |
| COSTS_N_INSNS (4), /* imul */ |
| COSTS_N_INSNS (4), /* imulX */ |
| 2, /* imul bit factor */ |
| COSTS_N_INSNS (37), /* idiv */ |
| COSTS_N_INSNS (68), /* idivX */ |
| COSTS_N_INSNS (2), /* movcc/movr */ |
| 2, /* shift penalty */ |
| 2 /* branch cost */ |
| }; |
| |
| static const |
| struct processor_costs ultrasparc3_costs = { |
| COSTS_N_INSNS (2), /* int load */ |
| COSTS_N_INSNS (3), /* int signed load */ |
| COSTS_N_INSNS (3), /* int zeroed load */ |
| COSTS_N_INSNS (2), /* float load */ |
| COSTS_N_INSNS (3), /* fmov, fneg, fabs */ |
| COSTS_N_INSNS (4), /* fadd, fsub */ |
| COSTS_N_INSNS (5), /* fcmp */ |
| COSTS_N_INSNS (3), /* fmov, fmovr */ |
| COSTS_N_INSNS (4), /* fmul */ |
| COSTS_N_INSNS (17), /* fdivs */ |
| COSTS_N_INSNS (20), /* fdivd */ |
| COSTS_N_INSNS (20), /* fsqrts */ |
| COSTS_N_INSNS (29), /* fsqrtd */ |
| COSTS_N_INSNS (6), /* imul */ |
| COSTS_N_INSNS (6), /* imulX */ |
| 0, /* imul bit factor */ |
| COSTS_N_INSNS (40), /* idiv */ |
| COSTS_N_INSNS (71), /* idivX */ |
| COSTS_N_INSNS (2), /* movcc/movr */ |
| 0, /* shift penalty */ |
| 2 /* branch cost */ |
| }; |
| |
| static const |
| struct processor_costs niagara_costs = { |
| COSTS_N_INSNS (3), /* int load */ |
| COSTS_N_INSNS (3), /* int signed load */ |
| COSTS_N_INSNS (3), /* int zeroed load */ |
| COSTS_N_INSNS (9), /* float load */ |
| COSTS_N_INSNS (8), /* fmov, fneg, fabs */ |
| COSTS_N_INSNS (8), /* fadd, fsub */ |
| COSTS_N_INSNS (26), /* fcmp */ |
| COSTS_N_INSNS (8), /* fmov, fmovr */ |
| COSTS_N_INSNS (29), /* fmul */ |
| COSTS_N_INSNS (54), /* fdivs */ |
| COSTS_N_INSNS (83), /* fdivd */ |
| COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */ |
| COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */ |
| COSTS_N_INSNS (11), /* imul */ |
| COSTS_N_INSNS (11), /* imulX */ |
| 0, /* imul bit factor */ |
| COSTS_N_INSNS (72), /* idiv */ |
| COSTS_N_INSNS (72), /* idivX */ |
| COSTS_N_INSNS (1), /* movcc/movr */ |
| 0, /* shift penalty */ |
| 4 /* branch cost */ |
| }; |
| |
| static const |
| struct processor_costs niagara2_costs = { |
| COSTS_N_INSNS (3), /* int load */ |
| COSTS_N_INSNS (3), /* int signed load */ |
| COSTS_N_INSNS (3), /* int zeroed load */ |
| COSTS_N_INSNS (3), /* float load */ |
| COSTS_N_INSNS (6), /* fmov, fneg, fabs */ |
| COSTS_N_INSNS (6), /* fadd, fsub */ |
| COSTS_N_INSNS (6), /* fcmp */ |
| COSTS_N_INSNS (6), /* fmov, fmovr */ |
| COSTS_N_INSNS (6), /* fmul */ |
| COSTS_N_INSNS (19), /* fdivs */ |
| COSTS_N_INSNS (33), /* fdivd */ |
| COSTS_N_INSNS (19), /* fsqrts */ |
| COSTS_N_INSNS (33), /* fsqrtd */ |
| COSTS_N_INSNS (5), /* imul */ |
| COSTS_N_INSNS (5), /* imulX */ |
| 0, /* imul bit factor */ |
| COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */ |
| COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */ |
| COSTS_N_INSNS (1), /* movcc/movr */ |
| 0, /* shift penalty */ |
| 5 /* branch cost */ |
| }; |
| |
| static const |
| struct processor_costs niagara3_costs = { |
| COSTS_N_INSNS (3), /* int load */ |
| COSTS_N_INSNS (3), /* int signed load */ |
| COSTS_N_INSNS (3), /* int zeroed load */ |
| COSTS_N_INSNS (3), /* float load */ |
| COSTS_N_INSNS (9), /* fmov, fneg, fabs */ |
| COSTS_N_INSNS (9), /* fadd, fsub */ |
| COSTS_N_INSNS (9), /* fcmp */ |
| COSTS_N_INSNS (9), /* fmov, fmovr */ |
| COSTS_N_INSNS (9), /* fmul */ |
| COSTS_N_INSNS (23), /* fdivs */ |
| COSTS_N_INSNS (37), /* fdivd */ |
| COSTS_N_INSNS (23), /* fsqrts */ |
| COSTS_N_INSNS (37), /* fsqrtd */ |
| COSTS_N_INSNS (9), /* imul */ |
| COSTS_N_INSNS (9), /* imulX */ |
| 0, /* imul bit factor */ |
| COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */ |
| COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */ |
| COSTS_N_INSNS (1), /* movcc/movr */ |
| 0, /* shift penalty */ |
| 5 /* branch cost */ |
| }; |
| |
| static const |
| struct processor_costs niagara4_costs = { |
| COSTS_N_INSNS (5), /* int load */ |
| COSTS_N_INSNS (5), /* int signed load */ |
| COSTS_N_INSNS (5), /* int zeroed load */ |
| COSTS_N_INSNS (5), /* float load */ |
| COSTS_N_INSNS (11), /* fmov, fneg, fabs */ |
| COSTS_N_INSNS (11), /* fadd, fsub */ |
| COSTS_N_INSNS (11), /* fcmp */ |
| COSTS_N_INSNS (11), /* fmov, fmovr */ |
| COSTS_N_INSNS (11), /* fmul */ |
| COSTS_N_INSNS (24), /* fdivs */ |
| COSTS_N_INSNS (37), /* fdivd */ |
| COSTS_N_INSNS (24), /* fsqrts */ |
| COSTS_N_INSNS (37), /* fsqrtd */ |
| COSTS_N_INSNS (12), /* imul */ |
| COSTS_N_INSNS (12), /* imulX */ |
| 0, /* imul bit factor */ |
| COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */ |
| COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */ |
| COSTS_N_INSNS (1), /* movcc/movr */ |
| 0, /* shift penalty */ |
| 2 /* branch cost */ |
| }; |
| |
| static const |
| struct processor_costs niagara7_costs = { |
| COSTS_N_INSNS (5), /* int load */ |
| COSTS_N_INSNS (5), /* int signed load */ |
| COSTS_N_INSNS (5), /* int zeroed load */ |
| COSTS_N_INSNS (5), /* float load */ |
| COSTS_N_INSNS (11), /* fmov, fneg, fabs */ |
| COSTS_N_INSNS (11), /* fadd, fsub */ |
| COSTS_N_INSNS (11), /* fcmp */ |
| COSTS_N_INSNS (11), /* fmov, fmovr */ |
| COSTS_N_INSNS (11), /* fmul */ |
| COSTS_N_INSNS (24), /* fdivs */ |
| COSTS_N_INSNS (37), /* fdivd */ |
| COSTS_N_INSNS (24), /* fsqrts */ |
| COSTS_N_INSNS (37), /* fsqrtd */ |
| COSTS_N_INSNS (12), /* imul */ |
| COSTS_N_INSNS (12), /* imulX */ |
| 0, /* imul bit factor */ |
| COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */ |
| COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */ |
| COSTS_N_INSNS (1), /* movcc/movr */ |
| 0, /* shift penalty */ |
| 1 /* branch cost */ |
| }; |
| |
| static const |
| struct processor_costs m8_costs = { |
| COSTS_N_INSNS (3), /* int load */ |
| COSTS_N_INSNS (3), /* int signed load */ |
| COSTS_N_INSNS (3), /* int zeroed load */ |
| COSTS_N_INSNS (3), /* float load */ |
| COSTS_N_INSNS (9), /* fmov, fneg, fabs */ |
| COSTS_N_INSNS (9), /* fadd, fsub */ |
| COSTS_N_INSNS (9), /* fcmp */ |
| COSTS_N_INSNS (9), /* fmov, fmovr */ |
| COSTS_N_INSNS (9), /* fmul */ |
| COSTS_N_INSNS (26), /* fdivs */ |
| COSTS_N_INSNS (30), /* fdivd */ |
| COSTS_N_INSNS (33), /* fsqrts */ |
| COSTS_N_INSNS (41), /* fsqrtd */ |
| COSTS_N_INSNS (12), /* imul */ |
| COSTS_N_INSNS (10), /* imulX */ |
| 0, /* imul bit factor */ |
| COSTS_N_INSNS (57), /* udiv/sdiv */ |
| COSTS_N_INSNS (30), /* udivx/sdivx */ |
| COSTS_N_INSNS (1), /* movcc/movr */ |
| 0, /* shift penalty */ |
| 1 /* branch cost */ |
| }; |
| |
| static const struct processor_costs *sparc_costs = &cypress_costs; |
| |
| #ifdef HAVE_AS_RELAX_OPTION |
| /* If 'as' and 'ld' are relaxing tail call insns into branch always, use |
| "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized. |
| With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if |
| somebody does not branch between the sethi and jmp. */ |
| #define LEAF_SIBCALL_SLOT_RESERVED_P 1 |
| #else |
| #define LEAF_SIBCALL_SLOT_RESERVED_P \ |
| ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic) |
| #endif |
| |
| /* Vector, indexed by hard register number, which contains 1 |
| for a register that is allowable in a candidate for leaf |
| function treatment. */ |
| char sparc_leaf_regs[] = |
| { 1, 1, 1, 1, 1, 1, 1, 1, |
| 0, 0, 0, 0, 0, 0, 1, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, |
| 1, 1, 1, 1, 1, 1, 0, 1, |
| 1, 1, 1, 1, 1, 1, 1, 1, |
| 1, 1, 1, 1, 1, 1, 1, 1, |
| 1, 1, 1, 1, 1, 1, 1, 1, |
| 1, 1, 1, 1, 1, 1, 1, 1, |
| 1, 1, 1, 1, 1, 1, 1, 1, |
| 1, 1, 1, 1, 1, 1, 1, 1, |
| 1, 1, 1, 1, 1, 1, 1, 1, |
| 1, 1, 1, 1, 1, 1, 1, 1, |
| 1, 1, 1, 1, 1, 1, 1}; |
| |
| struct GTY(()) machine_function |
| { |
| /* Size of the frame of the function. */ |
| HOST_WIDE_INT frame_size; |
| |
| /* Size of the frame of the function minus the register window save area |
| and the outgoing argument area. */ |
| HOST_WIDE_INT apparent_frame_size; |
| |
| /* Register we pretend the frame pointer is allocated to. Normally, this |
| is %fp, but if we are in a leaf procedure, this is (%sp + offset). We |
| record "offset" separately as it may be too big for (reg + disp). */ |
| rtx frame_base_reg; |
| HOST_WIDE_INT frame_base_offset; |
| |
| /* Number of global or FP registers to be saved (as 4-byte quantities). */ |
| int n_global_fp_regs; |
| |
| /* True if the current function is leaf and uses only leaf regs, |
| so that the SPARC leaf function optimization can be applied. |
| Private version of crtl->uses_only_leaf_regs, see |
| sparc_expand_prologue for the rationale. */ |
| int leaf_function_p; |
| |
| /* True if the prologue saves local or in registers. */ |
| bool save_local_in_regs_p; |
| |
| /* True if the data calculated by sparc_expand_prologue are valid. */ |
| bool prologue_data_valid_p; |
| }; |
| |
| #define sparc_frame_size cfun->machine->frame_size |
| #define sparc_apparent_frame_size cfun->machine->apparent_frame_size |
| #define sparc_frame_base_reg cfun->machine->frame_base_reg |
| #define sparc_frame_base_offset cfun->machine->frame_base_offset |
| #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs |
| #define sparc_leaf_function_p cfun->machine->leaf_function_p |
| #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p |
| #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p |
| |
| /* 1 if the next opcode is to be specially indented. */ |
| int sparc_indent_opcode = 0; |
| |
| static void sparc_option_override (void); |
| static void sparc_init_modes (void); |
| static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode, |
| const_tree, bool, bool, int *, int *); |
| |
| static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int); |
| static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int); |
| static int leon5_adjust_cost (rtx_insn *, int, rtx_insn *, int); |
| |
| static void sparc_emit_set_const32 (rtx, rtx); |
| static void sparc_emit_set_const64 (rtx, rtx); |
| static void sparc_output_addr_vec (rtx); |
| static void sparc_output_addr_diff_vec (rtx); |
| static void sparc_output_deferred_case_vectors (void); |
| static bool sparc_legitimate_address_p (machine_mode, rtx, bool); |
| static bool sparc_legitimate_constant_p (machine_mode, rtx); |
| static rtx sparc_builtin_saveregs (void); |
| static int epilogue_renumber (rtx *, int); |
| static bool sparc_assemble_integer (rtx, unsigned int, int); |
| static int set_extends (rtx_insn *); |
| static void sparc_asm_function_prologue (FILE *); |
| static void sparc_asm_function_epilogue (FILE *); |
| #ifdef TARGET_SOLARIS |
| static void sparc_solaris_elf_asm_named_section (const char *, unsigned int, |
| tree) ATTRIBUTE_UNUSED; |
| #endif |
| static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int); |
| static int sparc_issue_rate (void); |
| static void sparc_sched_init (FILE *, int, int); |
| static int sparc_use_sched_lookahead (void); |
| |
| static void emit_soft_tfmode_libcall (const char *, int, rtx *); |
| static void emit_soft_tfmode_binop (enum rtx_code, rtx *); |
| static void emit_soft_tfmode_unop (enum rtx_code, rtx *); |
| static void emit_soft_tfmode_cvt (enum rtx_code, rtx *); |
| static void emit_hard_tfmode_operation (enum rtx_code, rtx *); |
| |
| static bool sparc_function_ok_for_sibcall (tree, tree); |
| static void sparc_init_libfuncs (void); |
| static void sparc_init_builtins (void); |
| static void sparc_fpu_init_builtins (void); |
| static void sparc_vis_init_builtins (void); |
| static tree sparc_builtin_decl (unsigned, bool); |
| static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int); |
| static tree sparc_fold_builtin (tree, int, tree *, bool); |
| static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, |
| HOST_WIDE_INT, tree); |
| static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT, |
| HOST_WIDE_INT, const_tree); |
| static struct machine_function * sparc_init_machine_status (void); |
| static bool sparc_cannot_force_const_mem (machine_mode, rtx); |
| static rtx sparc_tls_get_addr (void); |
| static rtx sparc_tls_got (void); |
| static int sparc_register_move_cost (machine_mode, |
| reg_class_t, reg_class_t); |
| static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool); |
| static machine_mode sparc_promote_function_mode (const_tree, machine_mode, |
| int *, const_tree, int); |
| static bool sparc_strict_argument_naming (cumulative_args_t); |
| static void sparc_va_start (tree, rtx); |
| static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *); |
| static bool sparc_vector_mode_supported_p (machine_mode); |
| static bool sparc_tls_referenced_p (rtx); |
| static rtx sparc_legitimize_tls_address (rtx); |
| static rtx sparc_legitimize_pic_address (rtx, rtx); |
| static rtx sparc_legitimize_address (rtx, rtx, machine_mode); |
| static rtx sparc_delegitimize_address (rtx); |
| static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t); |
| static bool sparc_pass_by_reference (cumulative_args_t, |
| const function_arg_info &); |
| static void sparc_function_arg_advance (cumulative_args_t, |
| const function_arg_info &); |
| static rtx sparc_function_arg (cumulative_args_t, const function_arg_info &); |
| static rtx sparc_function_incoming_arg (cumulative_args_t, |
| const function_arg_info &); |
| static pad_direction sparc_function_arg_padding (machine_mode, const_tree); |
| static unsigned int sparc_function_arg_boundary (machine_mode, |
| const_tree); |
| static int sparc_arg_partial_bytes (cumulative_args_t, |
| const function_arg_info &); |
| static bool sparc_return_in_memory (const_tree, const_tree); |
| static rtx sparc_struct_value_rtx (tree, int); |
| static rtx sparc_function_value (const_tree, const_tree, bool); |
| static rtx sparc_libcall_value (machine_mode, const_rtx); |
| static bool sparc_function_value_regno_p (const unsigned int); |
| static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void); |
| static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; |
| static void sparc_file_end (void); |
| static bool sparc_frame_pointer_required (void); |
| static bool sparc_can_eliminate (const int, const int); |
| static void sparc_conditional_register_usage (void); |
| static bool sparc_use_pseudo_pic_reg (void); |
| static void sparc_init_pic_reg (void); |
| #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING |
| static const char *sparc_mangle_type (const_tree); |
| #endif |
| static void sparc_trampoline_init (rtx, tree, rtx); |
| static machine_mode sparc_preferred_simd_mode (scalar_mode); |
| static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass); |
| static bool sparc_lra_p (void); |
| static bool sparc_print_operand_punct_valid_p (unsigned char); |
| static void sparc_print_operand (FILE *, rtx, int); |
| static void sparc_print_operand_address (FILE *, machine_mode, rtx); |
| static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t, |
| machine_mode, |
| secondary_reload_info *); |
| static bool sparc_secondary_memory_needed (machine_mode, reg_class_t, |
| reg_class_t); |
| static machine_mode sparc_secondary_memory_needed_mode (machine_mode); |
| static scalar_int_mode sparc_cstore_mode (enum insn_code icode); |
| static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *); |
| static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *); |
| static unsigned int sparc_min_arithmetic_precision (void); |
| static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode); |
| static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode); |
| static bool sparc_modes_tieable_p (machine_mode, machine_mode); |
| static bool sparc_can_change_mode_class (machine_mode, machine_mode, |
| reg_class_t); |
| static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT); |
| static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx, |
| const vec_perm_indices &); |
| static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *); |
| static HARD_REG_SET sparc_zero_call_used_regs (HARD_REG_SET); |
| |
| #ifdef SUBTARGET_ATTRIBUTE_TABLE |
| /* Table of valid machine attributes. */ |
| static const struct attribute_spec sparc_attribute_table[] = |
| { |
| /* { name, min_len, max_len, decl_req, type_req, fn_type_req, |
| do_diagnostic, handler, exclude } */ |
| SUBTARGET_ATTRIBUTE_TABLE, |
| { NULL, 0, 0, false, false, false, false, NULL, NULL } |
| }; |
| #endif |
| |
| char sparc_hard_reg_printed[8]; |
| |
| /* Initialize the GCC target structure. */ |
| |
| /* The default is to use .half rather than .short for aligned HI objects. */ |
| #undef TARGET_ASM_ALIGNED_HI_OP |
| #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t" |
| |
| #undef TARGET_ASM_UNALIGNED_HI_OP |
| #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t" |
| #undef TARGET_ASM_UNALIGNED_SI_OP |
| #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t" |
| #undef TARGET_ASM_UNALIGNED_DI_OP |
| #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t" |
| |
| /* The target hook has to handle DI-mode values. */ |
| #undef TARGET_ASM_INTEGER |
| #define TARGET_ASM_INTEGER sparc_assemble_integer |
| |
| #undef TARGET_ASM_FUNCTION_PROLOGUE |
| #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue |
| #undef TARGET_ASM_FUNCTION_EPILOGUE |
| #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue |
| |
| #undef TARGET_SCHED_ADJUST_COST |
| #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost |
| #undef TARGET_SCHED_ISSUE_RATE |
| #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate |
| #undef TARGET_SCHED_INIT |
| #define TARGET_SCHED_INIT sparc_sched_init |
| #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD |
| #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead |
| |
| #undef TARGET_FUNCTION_OK_FOR_SIBCALL |
| #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall |
| |
| #undef TARGET_INIT_LIBFUNCS |
| #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs |
| |
| #undef TARGET_LEGITIMIZE_ADDRESS |
| #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address |
| #undef TARGET_DELEGITIMIZE_ADDRESS |
| #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address |
| #undef TARGET_MODE_DEPENDENT_ADDRESS_P |
| #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p |
| |
| #undef TARGET_INIT_BUILTINS |
| #define TARGET_INIT_BUILTINS sparc_init_builtins |
| #undef TARGET_BUILTIN_DECL |
| #define TARGET_BUILTIN_DECL sparc_builtin_decl |
| #undef TARGET_EXPAND_BUILTIN |
| #define TARGET_EXPAND_BUILTIN sparc_expand_builtin |
| #undef TARGET_FOLD_BUILTIN |
| #define TARGET_FOLD_BUILTIN sparc_fold_builtin |
| |
| #if TARGET_TLS |
| #undef TARGET_HAVE_TLS |
| #define TARGET_HAVE_TLS true |
| #endif |
| |
| #undef TARGET_CANNOT_FORCE_CONST_MEM |
| #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem |
| |
| #undef TARGET_ASM_OUTPUT_MI_THUNK |
| #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk |
| #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK |
| #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk |
| |
| #undef TARGET_RTX_COSTS |
| #define TARGET_RTX_COSTS sparc_rtx_costs |
| #undef TARGET_ADDRESS_COST |
| #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0 |
| #undef TARGET_REGISTER_MOVE_COST |
| #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost |
| |
| #undef TARGET_PROMOTE_FUNCTION_MODE |
| #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode |
| #undef TARGET_STRICT_ARGUMENT_NAMING |
| #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming |
| |
| #undef TARGET_MUST_PASS_IN_STACK |
| #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size |
| #undef TARGET_PASS_BY_REFERENCE |
| #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference |
| #undef TARGET_ARG_PARTIAL_BYTES |
| #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes |
| #undef TARGET_FUNCTION_ARG_ADVANCE |
| #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance |
| #undef TARGET_FUNCTION_ARG |
| #define TARGET_FUNCTION_ARG sparc_function_arg |
| #undef TARGET_FUNCTION_INCOMING_ARG |
| #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg |
| #undef TARGET_FUNCTION_ARG_PADDING |
| #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding |
| #undef TARGET_FUNCTION_ARG_BOUNDARY |
| #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary |
| |
| #undef TARGET_RETURN_IN_MEMORY |
| #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory |
| #undef TARGET_STRUCT_VALUE_RTX |
| #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx |
| #undef TARGET_FUNCTION_VALUE |
| #define TARGET_FUNCTION_VALUE sparc_function_value |
| #undef TARGET_LIBCALL_VALUE |
| #define TARGET_LIBCALL_VALUE sparc_libcall_value |
| #undef TARGET_FUNCTION_VALUE_REGNO_P |
| #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p |
| |
| #undef TARGET_EXPAND_BUILTIN_SAVEREGS |
| #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs |
| |
| #undef TARGET_ASAN_SHADOW_OFFSET |
| #define TARGET_ASAN_SHADOW_OFFSET sparc_asan_shadow_offset |
| |
| #undef TARGET_EXPAND_BUILTIN_VA_START |
| #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start |
| #undef TARGET_GIMPLIFY_VA_ARG_EXPR |
| #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg |
| |
| #undef TARGET_VECTOR_MODE_SUPPORTED_P |
| #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p |
| |
| #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE |
| #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode |
| |
| #ifdef SUBTARGET_INSERT_ATTRIBUTES |
| #undef TARGET_INSERT_ATTRIBUTES |
| #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES |
| #endif |
| |
| #ifdef SUBTARGET_ATTRIBUTE_TABLE |
| #undef TARGET_ATTRIBUTE_TABLE |
| #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table |
| #endif |
| |
| #undef TARGET_OPTION_OVERRIDE |
| #define TARGET_OPTION_OVERRIDE sparc_option_override |
| |
| #ifdef TARGET_THREAD_SSP_OFFSET |
| #undef TARGET_STACK_PROTECT_GUARD |
| #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null |
| #endif |
| |
| #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL) |
| #undef TARGET_ASM_OUTPUT_DWARF_DTPREL |
| #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel |
| #endif |
| |
| #undef TARGET_ASM_FILE_END |
| #define TARGET_ASM_FILE_END sparc_file_end |
| |
| #undef TARGET_FRAME_POINTER_REQUIRED |
| #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required |
| |
| #undef TARGET_CAN_ELIMINATE |
| #define TARGET_CAN_ELIMINATE sparc_can_eliminate |
| |
| #undef TARGET_PREFERRED_RELOAD_CLASS |
| #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class |
| |
| #undef TARGET_SECONDARY_RELOAD |
| #define TARGET_SECONDARY_RELOAD sparc_secondary_reload |
| #undef TARGET_SECONDARY_MEMORY_NEEDED |
| #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed |
| #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE |
| #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode |
| |
| #undef TARGET_CONDITIONAL_REGISTER_USAGE |
| #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage |
| |
| #undef TARGET_INIT_PIC_REG |
| #define TARGET_INIT_PIC_REG sparc_init_pic_reg |
| |
| #undef TARGET_USE_PSEUDO_PIC_REG |
| #define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg |
| |
| #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING |
| #undef TARGET_MANGLE_TYPE |
| #define TARGET_MANGLE_TYPE sparc_mangle_type |
| #endif |
| |
| #undef TARGET_LRA_P |
| #define TARGET_LRA_P sparc_lra_p |
| |
| #undef TARGET_LEGITIMATE_ADDRESS_P |
| #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p |
| |
| #undef TARGET_LEGITIMATE_CONSTANT_P |
| #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p |
| |
| #undef TARGET_TRAMPOLINE_INIT |
| #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init |
| |
| #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P |
| #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p |
| #undef TARGET_PRINT_OPERAND |
| #define TARGET_PRINT_OPERAND sparc_print_operand |
| #undef TARGET_PRINT_OPERAND_ADDRESS |
| #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address |
| |
| /* The value stored by LDSTUB. */ |
| #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL |
| #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff |
| |
| #undef TARGET_CSTORE_MODE |
| #define TARGET_CSTORE_MODE sparc_cstore_mode |
| |
| #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV |
| #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv |
| |
| #undef TARGET_FIXED_CONDITION_CODE_REGS |
| #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs |
| |
| #undef TARGET_MIN_ARITHMETIC_PRECISION |
| #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision |
| |
| #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS |
| #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1 |
| |
| #undef TARGET_HARD_REGNO_NREGS |
| #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs |
| #undef TARGET_HARD_REGNO_MODE_OK |
| #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok |
| |
| #undef TARGET_MODES_TIEABLE_P |
| #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p |
| |
| #undef TARGET_CAN_CHANGE_MODE_CLASS |
| #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class |
| |
| #undef TARGET_CONSTANT_ALIGNMENT |
| #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment |
| |
| #undef TARGET_VECTORIZE_VEC_PERM_CONST |
| #define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const |
| |
| #undef TARGET_CAN_FOLLOW_JUMP |
| #define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump |
| |
| #undef TARGET_ZERO_CALL_USED_REGS |
| #define TARGET_ZERO_CALL_USED_REGS sparc_zero_call_used_regs |
| |
| #ifdef SPARC_GCOV_TYPE_SIZE |
| static HOST_WIDE_INT |
| sparc_gcov_type_size (void) |
| { |
| return SPARC_GCOV_TYPE_SIZE; |
| } |
| |
| #undef TARGET_GCOV_TYPE_SIZE |
| #define TARGET_GCOV_TYPE_SIZE sparc_gcov_type_size |
| #endif |
| |
| struct gcc_target targetm = TARGET_INITIALIZER; |
| |
| /* Return the memory reference contained in X if any, zero otherwise. */ |
| |
| static rtx |
| mem_ref (rtx x) |
| { |
| if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND) |
| x = XEXP (x, 0); |
| |
| if (MEM_P (x)) |
| return x; |
| |
| return NULL_RTX; |
| } |
| |
| /* True if any of INSN's source register(s) is REG. */ |
| |
| static bool |
| insn_uses_reg_p (rtx_insn *insn, unsigned int reg) |
| { |
| extract_insn (insn); |
| return ((REG_P (recog_data.operand[1]) |
| && REGNO (recog_data.operand[1]) == reg) |
| || (recog_data.n_operands == 3 |
| && REG_P (recog_data.operand[2]) |
| && REGNO (recog_data.operand[2]) == reg)); |
| } |
| |
| /* True if INSN is a floating-point division or square-root. */ |
| |
| static bool |
| div_sqrt_insn_p (rtx_insn *insn) |
| { |
| if (GET_CODE (PATTERN (insn)) != SET) |
| return false; |
| |
| switch (get_attr_type (insn)) |
| { |
| case TYPE_FPDIVS: |
| case TYPE_FPSQRTS: |
| case TYPE_FPDIVD: |
| case TYPE_FPSQRTD: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| /* True if INSN is a floating-point instruction. */ |
| |
| static bool |
| fpop_insn_p (rtx_insn *insn) |
| { |
| if (GET_CODE (PATTERN (insn)) != SET) |
| return false; |
| |
| switch (get_attr_type (insn)) |
| { |
| case TYPE_FPMOVE: |
| case TYPE_FPCMOVE: |
| case TYPE_FP: |
| case TYPE_FPCMP: |
| case TYPE_FPMUL: |
| case TYPE_FPDIVS: |
| case TYPE_FPSQRTS: |
| case TYPE_FPDIVD: |
| case TYPE_FPSQRTD: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| /* True if INSN is an atomic instruction. */ |
| |
| static bool |
| atomic_insn_for_leon3_p (rtx_insn *insn) |
| { |
| switch (INSN_CODE (insn)) |
| { |
| case CODE_FOR_swapsi: |
| case CODE_FOR_ldstub: |
| case CODE_FOR_atomic_compare_and_swap_leon3_1: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| /* True if INSN is a store instruction. */ |
| |
| static bool |
| store_insn_p (rtx_insn *insn) |
| { |
| if (GET_CODE (PATTERN (insn)) != SET) |
| return false; |
| |
| switch (get_attr_type (insn)) |
| { |
| case TYPE_STORE: |
| case TYPE_FPSTORE: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| /* True if INSN is a load instruction. */ |
| |
| static bool |
| load_insn_p (rtx_insn *insn) |
| { |
| if (GET_CODE (PATTERN (insn)) != SET) |
| return false; |
| |
| switch (get_attr_type (insn)) |
| { |
| case TYPE_LOAD: |
| case TYPE_SLOAD: |
| case TYPE_FPLOAD: |
| return true; |
| default: |
| return false; |
| } |
| } |
| |
| /* We use a machine specific pass to enable workarounds for errata. |
| |
| We need to have the (essentially) final form of the insn stream in order |
| to properly detect the various hazards. Therefore, this machine specific |
| pass runs as late as possible. */ |
| |
| /* True if INSN is a md pattern or asm statement. */ |
| #define USEFUL_INSN_P(INSN) \ |
| (NONDEBUG_INSN_P (INSN) \ |
| && GET_CODE (PATTERN (INSN)) != USE \ |
| && GET_CODE (PATTERN (INSN)) != CLOBBER) |
| |
| rtx_insn * |
| next_active_non_empty_insn (rtx_insn *insn) |
| { |
| insn = next_active_insn (insn); |
| |
| while (insn |
| && (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE |
| || GET_CODE (PATTERN (insn)) == ASM_INPUT |
| || (USEFUL_INSN_P (insn) |
| && (asm_noperands (PATTERN (insn)) >= 0) |
| && !strcmp (decode_asm_operands (PATTERN (insn), |
| NULL, NULL, NULL, |
| NULL, NULL), "")))) |
| insn = next_active_insn (insn); |
| |
| return insn; |
| } |
| |
| static unsigned int |
| sparc_do_work_around_errata (void) |
| { |
| rtx_insn *insn, *next; |
| bool find_first_useful = true; |
| |
| /* Force all instructions to be split into their final form. */ |
| split_all_insns_noflow (); |
| |
| /* Now look for specific patterns in the insn stream. */ |
| for (insn = get_insns (); insn; insn = next) |
| { |
| bool insert_nop = false; |
| rtx set; |
| rtx_insn *jump; |
| rtx_sequence *seq; |
| |
| /* Look into the instruction in a delay slot. */ |
| if (NONJUMP_INSN_P (insn) |
| && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))) |
| { |
| jump = seq->insn (0); |
| insn = seq->insn (1); |
| } |
| else if (JUMP_P (insn)) |
| jump = insn; |
| else |
| jump = NULL; |
| |
| /* Do not begin function with atomic instruction. */ |
| if (sparc_fix_ut700 |
| && find_first_useful |
| && USEFUL_INSN_P (insn)) |
| { |
| find_first_useful = false; |
| if (atomic_insn_for_leon3_p (insn)) |
| emit_insn_before (gen_nop (), insn); |
| } |
| |
| /* Place a NOP at the branch target of an integer branch if it is a |
| floating-point operation or a floating-point branch. */ |
| if (sparc_fix_gr712rc |
| && jump |
| && jump_to_label_p (jump) |
| && get_attr_branch_type (jump) == BRANCH_TYPE_ICC) |
| { |
| rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump)); |
| if (target |
| && (fpop_insn_p (target) |
| || (JUMP_P (target) |
| && get_attr_branch_type (target) == BRANCH_TYPE_FCC))) |
| emit_insn_before (gen_nop (), target); |
| } |
| |
| /* Insert a NOP between load instruction and atomic instruction. Insert |
| a NOP at branch target if there is a load in delay slot and an atomic |
| instruction at branch target. */ |
| if (sparc_fix_ut700 |
| && NONJUMP_INSN_P (insn) |
| && load_insn_p (insn)) |
| { |
| if (jump && jump_to_label_p (jump)) |
| { |
| rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump)); |
| if (target && atomic_insn_for_leon3_p (target)) |
| emit_insn_before (gen_nop (), target); |
| } |
| |
| next = next_active_non_empty_insn (insn); |
| if (!next) |
| break; |
| |
| if (atomic_insn_for_leon3_p (next)) |
| insert_nop = true; |
| } |
| |
| /* Look for a sequence that starts with a fdiv or fsqrt instruction and |
| ends with another fdiv or fsqrt instruction with no dependencies on |
| the former, along with an appropriate pattern in between. */ |
| if (sparc_fix_lost_divsqrt |
| && NONJUMP_INSN_P (insn) |
| && div_sqrt_insn_p (insn)) |
| { |
| int i; |
| int fp_found = 0; |
| rtx_insn *after; |
| |
| const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn))); |
| |
| next = next_active_insn (insn); |
| if (!next) |
| break; |
| |
| for (after = next, i = 0; i < 4; i++) |
| { |
| /* Count floating-point operations. */ |
| if (i != 3 && fpop_insn_p (after)) |
| { |
| /* If the insn uses the destination register of |
| the div/sqrt, then it cannot be problematic. */ |
| if (insn_uses_reg_p (after, dest_reg)) |
| break; |
| fp_found++; |
| } |
| |
| /* Count floating-point loads. */ |
| if (i != 3 |
| && (set = single_set (after)) != NULL_RTX |
| && REG_P (SET_DEST (set)) |
| && REGNO (SET_DEST (set)) > 31) |
| { |
| /* If the insn uses the destination register of |
| the div/sqrt, then it cannot be problematic. */ |
| if (REGNO (SET_DEST (set)) == dest_reg) |
| break; |
| fp_found++; |
| } |
| |
| /* Check if this is a problematic sequence. */ |
| if (i > 1 |
| && fp_found >= 2 |
| && div_sqrt_insn_p (after)) |
| { |
| /* If this is the short version of the problematic |
| sequence we add two NOPs in a row to also prevent |
| the long version. */ |
| if (i == 2) |
| emit_insn_before (gen_nop (), next); |
| insert_nop = true; |
| break; |
| } |
| |
| /* No need to scan past a second div/sqrt. */ |
| if (div_sqrt_insn_p (after)) |
| break; |
| |
| /* Insert NOP before branch. */ |
| if (i < 3 |
| && (!NONJUMP_INSN_P (after) |
| || GET_CODE (PATTERN (after)) == SEQUENCE)) |
| { |
| insert_nop = true; |
| break; |
| } |
| |
| after = next_active_insn (after); |
| if (!after) |
| break; |
| } |
| } |
| |
| /* Look for either of these two sequences: |
| |
| Sequence A: |
| 1. store of word size or less (e.g. st / stb / sth / stf) |
| 2. any single instruction that is not a load or store |
| 3. any store instruction (e.g. st / stb / sth / stf / std / stdf) |
| |
| Sequence B: |
| 1. store of double word size (e.g. std / stdf) |
| 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */ |
| if (sparc_fix_b2bst |
| && NONJUMP_INSN_P (insn) |
| && (set = single_set (insn)) != NULL_RTX |
| && store_insn_p (insn)) |
| { |
| /* Sequence B begins with a double-word store. */ |
| bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8; |
| rtx_insn *after; |
| int i; |
| |
| next = next_active_non_empty_insn (insn); |
| if (!next) |
| break; |
| |
| for (after = next, i = 0; i < 2; i++) |
| { |
| /* If the insn is a branch, then it cannot be problematic. */ |
| if (!NONJUMP_INSN_P (after) |
| || GET_CODE (PATTERN (after)) == SEQUENCE) |
| break; |
| |
| /* Sequence B is only two instructions long. */ |
| if (seq_b) |
| { |
| /* Add NOP if followed by a store. */ |
| if (store_insn_p (after)) |
| insert_nop = true; |
| |
| /* Otherwise it is ok. */ |
| break; |
| } |
| |
| /* If the second instruction is a load or a store, |
| then the sequence cannot be problematic. */ |
| if (i == 0) |
| { |
| if ((set = single_set (after)) != NULL_RTX |
| && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set)))) |
| break; |
| |
| after = next_active_non_empty_insn (after); |
| if (!after) |
| break; |
| } |
| |
| /* Add NOP if third instruction is a store. */ |
| if (i == 1 |
| && store_insn_p (after)) |
| insert_nop = true; |
| } |
| } |
| |
| /* Look for a single-word load into an odd-numbered FP register. */ |
| else if (sparc_fix_at697f |
| && NONJUMP_INSN_P (insn) |
| && (set = single_set (insn)) != NULL_RTX |
| && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 |
| && mem_ref (SET_SRC (set)) |
| && REG_P (SET_DEST (set)) |
| && REGNO (SET_DEST (set)) > 31 |
| && REGNO (SET_DEST (set)) % 2 != 0) |
| { |
| /* The wrong dependency is on the enclosing double register. */ |
| const unsigned int x = REGNO (SET_DEST (set)) - 1; |
| unsigned int src1, src2, dest; |
| int code; |
| |
| next = next_active_insn (insn); |
| if (!next) |
| break; |
| /* If the insn is a branch, then it cannot be problematic. */ |
| if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE) |
| continue; |
| |
| extract_insn (next); |
| code = INSN_CODE (next); |
| |
| switch (code) |
| { |
| case CODE_FOR_adddf3: |
| case CODE_FOR_subdf3: |
| case CODE_FOR_muldf3: |
| case CODE_FOR_divdf3: |
| dest = REGNO (recog_data.operand[0]); |
| src1 = REGNO (recog_data.operand[1]); |
| src2 = REGNO (recog_data.operand[2]); |
| if (src1 != src2) |
| { |
| /* Case [1-4]: |
| ld [address], %fx+1 |
| FPOPd %f{x,y}, %f{y,x}, %f{x,y} */ |
| if ((src1 == x || src2 == x) |
| && (dest == src1 || dest == src2)) |
| insert_nop = true; |
| } |
| else |
| { |
| /* Case 5: |
| ld [address], %fx+1 |
| FPOPd %fx, %fx, %fx */ |
| if (src1 == x |
| && dest == src1 |
| && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3)) |
| insert_nop = true; |
| } |
| break; |
| |
| case CODE_FOR_sqrtdf2: |
| dest = REGNO (recog_data.operand[0]); |
| src1 = REGNO (recog_data.operand[1]); |
| /* Case 6: |
| ld [address], %fx+1 |
| fsqrtd %fx, %fx */ |
| if (src1 == x && dest == src1) |
| insert_nop = true; |
| break; |
| |
| default: |
| break; |
| } |
| } |
| |
| /* Look for a single-word load into an integer register. */ |
| else if (sparc_fix_ut699 |
| && NONJUMP_INSN_P (insn) |
| && (set = single_set (insn)) != NULL_RTX |
| && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4 |
| && (mem_ref (SET_SRC (set)) != NULL_RTX |
| || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op) |
| && REG_P (SET_DEST (set)) |
| && REGNO (SET_DEST (set)) < 32) |
| { |
| /* There is no problem if the second memory access has a data |
| dependency on the first single-cycle load. */ |
| rtx x = SET_DEST (set); |
| |
| next = next_active_insn (insn); |
| if (!next) |
| break; |
| /* If the insn is a branch, then it cannot be problematic. */ |
| if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE) |
| continue; |
| |
| /* Look for a second memory access to/from an integer register. */ |
| if ((set = single_set (next)) != NULL_RTX) |
| { |
| rtx src = SET_SRC (set); |
| rtx dest = SET_DEST (set); |
| rtx mem; |
| |
| /* LDD is affected. */ |
| if ((mem = mem_ref (src)) != NULL_RTX |
| && REG_P (dest) |
| && REGNO (dest) < 32 |
| && !reg_mentioned_p (x, XEXP (mem, 0))) |
| insert_nop = true; |
| |
| /* STD is *not* affected. */ |
| else if (MEM_P (dest) |
| && GET_MODE_SIZE (GET_MODE (dest)) <= 4 |
| && (src == CONST0_RTX (GET_MODE (dest)) |
| || (REG_P (src) |
| && REGNO (src) < 32 |
| && REGNO (src) != REGNO (x))) |
| && !reg_mentioned_p (x, XEXP (dest, 0))) |
| insert_nop = true; |
| |
| /* GOT accesses uses LD. */ |
| else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op |
| && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1))) |
| insert_nop = true; |
| } |
| } |
| |
| /* Look for a single-word load/operation into an FP register. */ |
| else if (sparc_fix_ut699 |
| && NONJUMP_INSN_P (insn) |
| && (set = single_set (insn)) != NULL_RTX |
| && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 |
| && REG_P (SET_DEST (set)) |
| && REGNO (SET_DEST (set)) > 31) |
| { |
| /* Number of instructions in the problematic window. */ |
| const int n_insns = 4; |
| /* The problematic combination is with the sibling FP register. */ |
| const unsigned int x = REGNO (SET_DEST (set)); |
| const unsigned int y = x ^ 1; |
| rtx_insn *after; |
| int i; |
| |
| next = next_active_insn (insn); |
| if (!next) |
| break; |
| /* If the insn is a branch, then it cannot be problematic. */ |
| if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE) |
| continue; |
| |
| /* Look for a second load/operation into the sibling FP register. */ |
| if (!((set = single_set (next)) != NULL_RTX |
| && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4 |
| && REG_P (SET_DEST (set)) |
| && REGNO (SET_DEST (set)) == y)) |
| continue; |
| |
| /* Look for a (possible) store from the FP register in the next N |
| instructions, but bail out if it is again modified or if there |
| is a store from the sibling FP register before this store. */ |
| for (after = next, i = 0; i < n_insns; i++) |
| { |
| bool branch_p; |
| |
| after = next_active_insn (after); |
| if (!after) |
| break; |
| |
| /* This is a branch with an empty delay slot. */ |
| if (!NONJUMP_INSN_P (after)) |
| { |
| if (++i == n_insns) |
| break; |
| branch_p = true; |
| after = NULL; |
| } |
| /* This is a branch with a filled delay slot. */ |
| else if (rtx_sequence *seq = |
| dyn_cast <rtx_sequence *> (PATTERN (after))) |
| { |
| if (++i == n_insns) |
| break; |
| branch_p = true; |
| after = seq->insn (1); |
| } |
| /* This is a regular instruction. */ |
| else |
| branch_p = false; |
| |
| if (after && (set = single_set (after)) != NULL_RTX) |
| { |
| const rtx src = SET_SRC (set); |
| const rtx dest = SET_DEST (set); |
| const unsigned int size = GET_MODE_SIZE (GET_MODE (dest)); |
| |
| /* If the FP register is again modified before the store, |
| then the store isn't affected. */ |
| if (REG_P (dest) |
| && (REGNO (dest) == x |
| || (REGNO (dest) == y && size == 8))) |
| break; |
| |
| if (MEM_P (dest) && REG_P (src)) |
| { |
| /* If there is a store from the sibling FP register |
| before the store, then the store is not affected. */ |
| if (REGNO (src) == y || (REGNO (src) == x && size == 8)) |
| break; |
| |
| /* Otherwise, the store is affected. */ |
| if (REGNO (src) == x && size == 4) |
| { |
| insert_nop = true; |
| break; |
| } |
| } |
| } |
| |
| /* If we have a branch in the first M instructions, then we |
| cannot see the (M+2)th instruction so we play safe. */ |
| if (branch_p && i <= (n_insns - 2)) |
| { |
| insert_nop = true; |
| break; |
| } |
| } |
| } |
| |
| else |
| next = NEXT_INSN (insn); |
| |
| if (insert_nop) |
| emit_insn_before (gen_nop (), next); |
| } |
| |
| return 0; |
| } |
| |
| namespace { |
| |
| const pass_data pass_data_work_around_errata = |
| { |
| RTL_PASS, /* type */ |
| "errata", /* name */ |
| OPTGROUP_NONE, /* optinfo_flags */ |
| TV_MACH_DEP, /* tv_id */ |
| 0, /* properties_required */ |
| 0, /* properties_provided */ |
| 0, /* properties_destroyed */ |
| 0, /* todo_flags_start */ |
| 0, /* todo_flags_finish */ |
| }; |
| |
| class pass_work_around_errata : public rtl_opt_pass |
| { |
| public: |
| pass_work_around_errata(gcc::context *ctxt) |
| : rtl_opt_pass(pass_data_work_around_errata, ctxt) |
| {} |
| |
| /* opt_pass methods: */ |
| virtual bool gate (function *) |
| { |
| return sparc_fix_at697f |
| || sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc |
| || sparc_fix_b2bst || sparc_fix_lost_divsqrt; |
| } |
| |
| virtual unsigned int execute (function *) |
| { |
| return sparc_do_work_around_errata (); |
| } |
| |
| }; // class pass_work_around_errata |
| |
| } // anon namespace |
| |
| rtl_opt_pass * |
| make_pass_work_around_errata (gcc::context *ctxt) |
| { |
| return new pass_work_around_errata (ctxt); |
| } |
| |
| /* Helpers for TARGET_DEBUG_OPTIONS. */ |
| static void |
| dump_target_flag_bits (const int flags) |
| { |
| if (flags & MASK_64BIT) |
| fprintf (stderr, "64BIT "); |
| if (flags & MASK_APP_REGS) |
| fprintf (stderr, "APP_REGS "); |
| if (flags & MASK_FASTER_STRUCTS) |
| fprintf (stderr, "FASTER_STRUCTS "); |
| if (flags & MASK_FLAT) |
| fprintf (stderr, "FLAT "); |
| if (flags & MASK_FMAF) |
| fprintf (stderr, "FMAF "); |
| if (flags & MASK_FSMULD) |
| fprintf (stderr, "FSMULD "); |
| if (flags & MASK_FPU) |
| fprintf (stderr, "FPU "); |
| if (flags & MASK_HARD_QUAD) |
| fprintf (stderr, "HARD_QUAD "); |
| if (flags & MASK_POPC) |
| fprintf (stderr, "POPC "); |
| if (flags & MASK_PTR64) |
| fprintf (stderr, "PTR64 "); |
| if (flags & MASK_STACK_BIAS) |
| fprintf (stderr, "STACK_BIAS "); |
| if (flags & MASK_UNALIGNED_DOUBLES) |
| fprintf (stderr, "UNALIGNED_DOUBLES "); |
| if (flags & MASK_V8PLUS) |
| fprintf (stderr, "V8PLUS "); |
| if (flags & MASK_VIS) |
| fprintf (stderr, "VIS "); |
| if (flags & MASK_VIS2) |
| fprintf (stderr, "VIS2 "); |
| if (flags & MASK_VIS3) |
| fprintf (stderr, "VIS3 "); |
| if (flags & MASK_VIS4) |
| fprintf (stderr, "VIS4 "); |
| if (flags & MASK_VIS4B) |
| fprintf (stderr, "VIS4B "); |
| if (flags & MASK_CBCOND) |
| fprintf (stderr, "CBCOND "); |
| if (flags & MASK_DEPRECATED_V8_INSNS) |
| fprintf (stderr, "DEPRECATED_V8_INSNS "); |
| if (flags & MASK_LEON) |
| fprintf (stderr, "LEON "); |
| if (flags & MASK_LEON3) |
| fprintf (stderr, "LEON3 "); |
| if (flags & MASK_SPARCLET) |
| fprintf (stderr, "SPARCLET "); |
| if (flags & MASK_SPARCLITE) |
| fprintf (stderr, "SPARCLITE "); |
| if (flags & MASK_V8) |
| fprintf (stderr, "V8 "); |
| if (flags & MASK_V9) |
| fprintf (stderr, "V9 "); |
| } |
| |
| static void |
| dump_target_flags (const char *prefix, const int flags) |
| { |
| fprintf (stderr, "%s: (%08x) [ ", prefix, flags); |
| dump_target_flag_bits (flags); |
| fprintf(stderr, "]\n"); |
| } |
| |
| /* Validate and override various options, and do some machine dependent |
| initialization. */ |
| |
| static void |
| sparc_option_override (void) |
| { |
| /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */ |
| static struct cpu_default { |
| const int cpu; |
| const enum sparc_processor_type processor; |
| } const cpu_default[] = { |
| /* There must be one entry here for each TARGET_CPU value. */ |
| { TARGET_CPU_sparc, PROCESSOR_CYPRESS }, |
| { TARGET_CPU_v8, PROCESSOR_V8 }, |
| { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC }, |
| { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC }, |
| { TARGET_CPU_leon, PROCESSOR_LEON }, |
| { TARGET_CPU_leon3, PROCESSOR_LEON3 }, |
| { TARGET_CPU_leon5, PROCESSOR_LEON5 }, |
| { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 }, |
| { TARGET_CPU_sparclite, PROCESSOR_F930 }, |
| { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X }, |
| { TARGET_CPU_sparclet, PROCESSOR_TSC701 }, |
| { TARGET_CPU_v9, PROCESSOR_V9 }, |
| { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC }, |
| { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 }, |
| { TARGET_CPU_niagara, PROCESSOR_NIAGARA }, |
| { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 }, |
| { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 }, |
| { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 }, |
| { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 }, |
| { TARGET_CPU_m8, PROCESSOR_M8 }, |
| { -1, PROCESSOR_V7 } |
| }; |
| const struct cpu_default *def; |
| /* Table of values for -m{cpu,tune}=. This must match the order of |
| the enum processor_type in sparc-opts.h. */ |
| static struct cpu_table { |
| const char *const name; |
| const int disable; |
| const int enable; |
| } const cpu_table[] = { |
| { "v7", MASK_ISA, 0 }, |
| { "cypress", MASK_ISA, 0 }, |
| { "v8", MASK_ISA, MASK_V8 }, |
| /* TI TMS390Z55 supersparc */ |
| { "supersparc", MASK_ISA, MASK_V8 }, |
| { "hypersparc", MASK_ISA, MASK_V8 }, |
| { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON }, |
| { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 }, |
| { "leon5", MASK_ISA, MASK_V8|MASK_LEON3 }, |
| { "leon3v7", MASK_ISA, MASK_LEON3 }, |
| { "sparclite", MASK_ISA, MASK_SPARCLITE }, |
| /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */ |
| { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE }, |
| /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */ |
| { "f934", MASK_ISA, MASK_SPARCLITE }, |
| { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE }, |
| { "sparclet", MASK_ISA, MASK_SPARCLET }, |
| /* TEMIC sparclet */ |
| { "tsc701", MASK_ISA, MASK_SPARCLET }, |
| { "v9", MASK_ISA, MASK_V9 }, |
| /* UltraSPARC I, II, IIi */ |
| { "ultrasparc", MASK_ISA, |
| /* Although insns using %y are deprecated, it is a clear win. */ |
| MASK_V9|MASK_DEPRECATED_V8_INSNS }, |
| /* UltraSPARC III */ |
| /* ??? Check if %y issue still holds true. */ |
| { "ultrasparc3", MASK_ISA, |
| MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 }, |
| /* UltraSPARC T1 */ |
| { "niagara", MASK_ISA, |
| MASK_V9|MASK_DEPRECATED_V8_INSNS }, |
| /* UltraSPARC T2 */ |
| { "niagara2", MASK_ISA, |
| MASK_V9|MASK_POPC|MASK_VIS2 }, |
| /* UltraSPARC T3 */ |
| { "niagara3", MASK_ISA, |
| MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF }, |
| /* UltraSPARC T4 */ |
| { "niagara4", MASK_ISA, |
| MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND }, |
| /* UltraSPARC M7 */ |
| { "niagara7", MASK_ISA, |
| MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }, |
| /* UltraSPARC M8 */ |
| { "m8", MASK_ISA, |
| MASK_V9|MASK_POPC|MASK_VIS4B|MASK_FMAF|MASK_CBCOND|MASK_SUBXC } |
| }; |
| const struct cpu_table *cpu; |
| unsigned int i; |
| |
| if (sparc_debug_string != NULL) |
| { |
| const char *q; |
| char *p; |
| |
| p = ASTRDUP (sparc_debug_string); |
| while ((q = strtok (p, ",")) != NULL) |
| { |
| bool invert; |
| int mask; |
| |
| p = NULL; |
| if (*q == '!') |
| { |
| invert = true; |
| q++; |
| } |
| else |
| invert = false; |
| |
| if (! strcmp (q, "all")) |
| mask = MASK_DEBUG_ALL; |
| else if (! strcmp (q, "options")) |
| mask = MASK_DEBUG_OPTIONS; |
| else |
| error ("unknown %<-mdebug-%s%> switch", q); |
| |
| if (invert) |
| sparc_debug &= ~mask; |
| else |
| sparc_debug |= mask; |
| } |
| } |
| |
| /* Enable the FsMULd instruction by default if not explicitly specified by |
| the user. It may be later disabled by the CPU (explicitly or not). */ |
| if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD)) |
| target_flags |= MASK_FSMULD; |
| |
| if (TARGET_DEBUG_OPTIONS) |
| { |
| dump_target_flags("Initial target_flags", target_flags); |
| dump_target_flags("target_flags_explicit", target_flags_explicit); |
| } |
| |
| #ifdef SUBTARGET_OVERRIDE_OPTIONS |
| SUBTARGET_OVERRIDE_OPTIONS; |
| #endif |
| |
| #ifndef SPARC_BI_ARCH |
| /* Check for unsupported architecture size. */ |
| if (!TARGET_64BIT != DEFAULT_ARCH32_P) |
| error ("%s is not supported by this configuration", |
| DEFAULT_ARCH32_P ? "-m64" : "-m32"); |
| #endif |
| |
| /* We force all 64bit archs to use 128 bit long double */ |
| if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128) |
| { |
| error ("%<-mlong-double-64%> not allowed with %<-m64%>"); |
| target_flags |= MASK_LONG_DOUBLE_128; |
| } |
| |
| /* Check that -fcall-saved-REG wasn't specified for out registers. */ |
| for (i = 8; i < 16; i++) |
| if (!call_used_regs [i]) |
| { |
| error ("%<-fcall-saved-REG%> is not supported for out registers"); |
| call_used_regs [i] = 1; |
| } |
| |
| /* Set the default CPU if no -mcpu option was specified. */ |
| if (!global_options_set.x_sparc_cpu_and_features) |
| { |
| for (def = &cpu_default[0]; def->cpu != -1; ++def) |
| if (def->cpu == TARGET_CPU_DEFAULT) |
| break; |
| gcc_assert (def->cpu != -1); |
| sparc_cpu_and_features = def->processor; |
| } |
| |
| /* Set the default CPU if no -mtune option was specified. */ |
| if (!global_options_set.x_sparc_cpu) |
| sparc_cpu = sparc_cpu_and_features; |
| |
| cpu = &cpu_table[(int) sparc_cpu_and_features]; |
| |
| if (TARGET_DEBUG_OPTIONS) |
| { |
| fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name); |
| dump_target_flags ("cpu->disable", cpu->disable); |
| dump_target_flags ("cpu->enable", cpu->enable); |
| } |
| |
| target_flags &= ~cpu->disable; |
| target_flags |= (cpu->enable |
| #ifndef HAVE_AS_FMAF_HPC_VIS3 |
| & ~(MASK_FMAF | MASK_VIS3) |
| #endif |
| #ifndef HAVE_AS_SPARC4 |
| & ~MASK_CBCOND |
| #endif |
| #ifndef HAVE_AS_SPARC5_VIS4 |
| & ~(MASK_VIS4 | MASK_SUBXC) |
| #endif |
| #ifndef HAVE_AS_SPARC6 |
| & ~(MASK_VIS4B) |
| #endif |
| #ifndef HAVE_AS_LEON |
| & ~(MASK_LEON | MASK_LEON3) |
| #endif |
| & ~(target_flags_explicit & MASK_FEATURES) |
| ); |
| |
| /* FsMULd is a V8 instruction. */ |
| if (!TARGET_V8 && !TARGET_V9) |
| target_flags &= ~MASK_FSMULD; |
| |
| /* -mvis2 implies -mvis. */ |
| if (TARGET_VIS2) |
| target_flags |= MASK_VIS; |
| |
| /* -mvis3 implies -mvis2 and -mvis. */ |
| if (TARGET_VIS3) |
| target_flags |= MASK_VIS2 | MASK_VIS; |
| |
| /* -mvis4 implies -mvis3, -mvis2 and -mvis. */ |
| if (TARGET_VIS4) |
| target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS; |
| |
| /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */ |
| if (TARGET_VIS4B) |
| target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS; |
| |
| /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if |
| FPU is disabled. */ |
| if (!TARGET_FPU) |
| target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4 |
| | MASK_VIS4B | MASK_FMAF | MASK_FSMULD); |
| |
| /* -mvis assumes UltraSPARC+, so we are sure v9 instructions |
| are available; -m64 also implies v9. */ |
| if (TARGET_VIS || TARGET_ARCH64) |
| { |
| target_flags |= MASK_V9; |
| target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE); |
| } |
| |
| /* -mvis also implies -mv8plus on 32-bit. */ |
| if (TARGET_VIS && !TARGET_ARCH64) |
| target_flags |= MASK_V8PLUS; |
| |
| /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */ |
| if (TARGET_V9 && TARGET_ARCH32) |
| target_flags |= MASK_DEPRECATED_V8_INSNS; |
| |
| /* V8PLUS requires V9 and makes no sense in 64-bit mode. */ |
| if (!TARGET_V9 || TARGET_ARCH64) |
| target_flags &= ~MASK_V8PLUS; |
| |
| /* Don't use stack biasing in 32-bit mode. */ |
| if (TARGET_ARCH32) |
| target_flags &= ~MASK_STACK_BIAS; |
| |
| /* Use LRA instead of reload, unless otherwise instructed. */ |
| if (!(target_flags_explicit & MASK_LRA)) |
| target_flags |= MASK_LRA; |
| |
| /* Enable applicable errata workarounds for LEON3FT. */ |
| if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc) |
| { |
| sparc_fix_b2bst = 1; |
| sparc_fix_lost_divsqrt = 1; |
| } |
| |
| /* Disable FsMULd for the UT699 since it doesn't work correctly. */ |
| if (sparc_fix_ut699) |
| target_flags &= ~MASK_FSMULD; |
| |
| #ifdef TARGET_DEFAULT_LONG_DOUBLE_128 |
| if (!(target_flags_explicit & MASK_LONG_DOUBLE_128)) |
| target_flags |= MASK_LONG_DOUBLE_128; |
| #endif |
| |
| if (TARGET_DEBUG_OPTIONS) |
| dump_target_flags ("Final target_flags", target_flags); |
| |
| /* Set the code model if no -mcmodel option was specified. */ |
| if (global_options_set.x_sparc_code_model) |
| { |
| if (TARGET_ARCH32) |
| error ("%<-mcmodel=%> is not supported in 32-bit mode"); |
| } |
| else |
| { |
| if (TARGET_ARCH32) |
| sparc_code_model = CM_32; |
| else |
| sparc_code_model = SPARC_DEFAULT_CMODEL; |
| } |
| |
| /* Set the memory model if no -mmemory-model option was specified. */ |
| if (!global_options_set.x_sparc_memory_model) |
| { |
| /* Choose the memory model for the operating system. */ |
| enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL; |
| if (os_default != SMM_DEFAULT) |
| sparc_memory_model = os_default; |
| /* Choose the most relaxed model for the processor. */ |
| else if (TARGET_V9) |
| sparc_memory_model = SMM_RMO; |
| else if (TARGET_LEON3) |
| sparc_memory_model = SMM_TSO; |
| else if (TARGET_LEON) |
| sparc_memory_model = SMM_SC; |
| else if (TARGET_V8) |
| sparc_memory_model = SMM_PSO; |
| else |
| sparc_memory_model = SMM_SC; |
| } |
| |
| /* Supply a default value for align_functions. */ |
| if (flag_align_functions && !str_align_functions) |
| { |
| if (sparc_cpu == PROCESSOR_ULTRASPARC |
| || sparc_cpu == PROCESSOR_ULTRASPARC3 |
| || sparc_cpu == PROCESSOR_NIAGARA |
| || sparc_cpu == PROCESSOR_NIAGARA2 |
| || sparc_cpu == PROCESSOR_NIAGARA3 |
| || sparc_cpu == PROCESSOR_NIAGARA4) |
| str_align_functions = "32"; |
| else if (sparc_cpu == PROCESSOR_NIAGARA7 |
| || sparc_cpu == PROCESSOR_M8) |
| str_align_functions = "64"; |
| } |
| |
| /* Validate PCC_STRUCT_RETURN. */ |
| if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN) |
| flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1); |
| |
| /* Only use .uaxword when compiling for a 64-bit target. */ |
| if (!TARGET_ARCH64) |
| targetm.asm_out.unaligned_op.di = NULL; |
| |
| /* Set the processor costs. */ |
| switch (sparc_cpu) |
| { |
| case PROCESSOR_V7: |
| case PROCESSOR_CYPRESS: |
| sparc_costs = &cypress_costs; |
| break; |
| case PROCESSOR_V8: |
| case PROCESSOR_SPARCLITE: |
| case PROCESSOR_SUPERSPARC: |
| sparc_costs = &supersparc_costs; |
| break; |
| case PROCESSOR_F930: |
| case PROCESSOR_F934: |
| case PROCESSOR_HYPERSPARC: |
| case PROCESSOR_SPARCLITE86X: |
| sparc_costs = &hypersparc_costs; |
| break; |
| case PROCESSOR_LEON: |
| sparc_costs = &leon_costs; |
| break; |
| case PROCESSOR_LEON3: |
| case PROCESSOR_LEON3V7: |
| sparc_costs = &leon3_costs; |
| break; |
| case PROCESSOR_LEON5: |
| sparc_costs = &leon5_costs; |
| break; |
| case PROCESSOR_SPARCLET: |
| case PROCESSOR_TSC701: |
| sparc_costs = &sparclet_costs; |
| break; |
| case PROCESSOR_V9: |
| case PROCESSOR_ULTRASPARC: |
| sparc_costs = &ultrasparc_costs; |
| break; |
| case PROCESSOR_ULTRASPARC3: |
| sparc_costs = &ultrasparc3_costs; |
| break; |
| case PROCESSOR_NIAGARA: |
| sparc_costs = &niagara_costs; |
| break; |
| case PROCESSOR_NIAGARA2: |
| sparc_costs = &niagara2_costs; |
| break; |
| case PROCESSOR_NIAGARA3: |
| sparc_costs = &niagara3_costs; |
| break; |
| case PROCESSOR_NIAGARA4: |
| sparc_costs = &niagara4_costs; |
| break; |
| case PROCESSOR_NIAGARA7: |
| sparc_costs = &niagara7_costs; |
| break; |
| case PROCESSOR_M8: |
| sparc_costs = &m8_costs; |
| break; |
| case PROCESSOR_NATIVE: |
| gcc_unreachable (); |
| }; |
| |
| /* param_simultaneous_prefetches is the number of prefetches that |
| can run at the same time. More important, it is the threshold |
| defining when additional prefetches will be dropped by the |
| hardware. |
| |
| The UltraSPARC-III features a documented prefetch queue with a |
| size of 8. Additional prefetches issued in the cpu are |
| dropped. |
| |
| Niagara processors are different. In these processors prefetches |
| are handled much like regular loads. The L1 miss buffer is 32 |
| entries, but prefetches start getting affected when 30 entries |
| become occupied. That occupation could be a mix of regular loads |
| and prefetches though. And that buffer is shared by all threads. |
| Once the threshold is reached, if the core is running a single |
| thread the prefetch will retry. If more than one thread is |
| running, the prefetch will be dropped. |
| |
| All this makes it very difficult to determine how many |
| simultaneous prefetches can be issued simultaneously, even in a |
| single-threaded program. Experimental results show that setting |
| this parameter to 32 works well when the number of threads is not |
| high. */ |
| SET_OPTION_IF_UNSET (&global_options, &global_options_set, |
| param_simultaneous_prefetches, |
| ((sparc_cpu == PROCESSOR_ULTRASPARC |
| || sparc_cpu == PROCESSOR_NIAGARA |
| || sparc_cpu == PROCESSOR_NIAGARA2 |
| || sparc_cpu == PROCESSOR_NIAGARA3 |
| || sparc_cpu == PROCESSOR_NIAGARA4) |
| ? 2 |
| : (sparc_cpu == PROCESSOR_ULTRASPARC3 |
| ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7 |
| || sparc_cpu == PROCESSOR_M8) |
| ? 32 : 3)))); |
| |
| /* param_l1_cache_line_size is the size of the L1 cache line, in |
| bytes. |
| |
| The Oracle SPARC Architecture (previously the UltraSPARC |
| Architecture) specification states that when a PREFETCH[A] |
| instruction is executed an implementation-specific amount of data |
| is prefetched, and that it is at least 64 bytes long (aligned to |
| at least 64 bytes). |
| |
| However, this is not correct. The M7 (and implementations prior |
| to that) does not guarantee a 64B prefetch into a cache if the |
| line size is smaller. A single cache line is all that is ever |
| prefetched. So for the M7, where the L1D$ has 32B lines and the |
| L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the |
| L2 and L3, but only 32B are brought into the L1D$. (Assuming it |
| is a read_n prefetch, which is the only type which allocates to |
| the L1.) */ |
| SET_OPTION_IF_UNSET (&global_options, &global_options_set, |
| param_l1_cache_line_size, |
| (sparc_cpu == PROCESSOR_M8 ? 64 : 32)); |
| |
| /* param_l1_cache_size is the size of the L1D$ (most SPARC chips use |
| Hardvard level-1 caches) in kilobytes. Both UltraSPARC and |
| Niagara processors feature a L1D$ of 16KB. */ |
| SET_OPTION_IF_UNSET (&global_options, &global_options_set, |
| param_l1_cache_size, |
| ((sparc_cpu == PROCESSOR_ULTRASPARC |
| || sparc_cpu == PROCESSOR_ULTRASPARC3 |
| || sparc_cpu == PROCESSOR_NIAGARA |
| || sparc_cpu == PROCESSOR_NIAGARA2 |
| || sparc_cpu == PROCESSOR_NIAGARA3 |
| || sparc_cpu == PROCESSOR_NIAGARA4 |
| || sparc_cpu == PROCESSOR_NIAGARA7 |
| || sparc_cpu == PROCESSOR_M8) |
| ? 16 : 64)); |
| |
| /* param_l2_cache_size is the size fo the L2 in kilobytes. Note |
| that 512 is the default in params.def. */ |
| SET_OPTION_IF_UNSET (&global_options, &global_options_set, |
| param_l2_cache_size, |
| ((sparc_cpu == PROCESSOR_NIAGARA4 |
| || sparc_cpu == PROCESSOR_M8) |
| ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7 |
| ? 256 : 512))); |
| |
| |
| /* Disable save slot sharing for call-clobbered registers by default. |
| The IRA sharing algorithm works on single registers only and this |
| pessimizes for double floating-point registers. */ |
| if (!global_options_set.x_flag_ira_share_save_slots) |
| flag_ira_share_save_slots = 0; |
| |
| /* Only enable REE by default in 64-bit mode where it helps to eliminate |
| redundant 32-to-64-bit extensions. */ |
| if (!global_options_set.x_flag_ree && TARGET_ARCH32) |
| flag_ree = 0; |
| |
| /* Do various machine dependent initializations. */ |
| sparc_init_modes (); |
| |
| /* Set up function hooks. */ |
| init_machine_status = sparc_init_machine_status; |
| } |
| |
| /* Miscellaneous utilities. */ |
| |
| /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move |
| or branch on register contents instructions. */ |
| |
| int |
| v9_regcmp_p (enum rtx_code code) |
| { |
| return (code == EQ || code == NE || code == GE || code == LT |
| || code == LE || code == GT); |
| } |
| |
| /* Nonzero if OP is a floating point constant which can |
| be loaded into an integer register using a single |
| sethi instruction. */ |
| |
| int |
| fp_sethi_p (rtx op) |
| { |
| if (GET_CODE (op) == CONST_DOUBLE) |
| { |
| long i; |
| |
| REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i); |
| return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i); |
| } |
| |
| return 0; |
| } |
| |
| /* Nonzero if OP is a floating point constant which can |
| be loaded into an integer register using a single |
| mov instruction. */ |
| |
| int |
| fp_mov_p (rtx op) |
| { |
| if (GET_CODE (op) == CONST_DOUBLE) |
| { |
| long i; |
| |
| REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i); |
| return SPARC_SIMM13_P (i); |
| } |
| |
| return 0; |
| } |
| |
| /* Nonzero if OP is a floating point constant which can |
| be loaded into an integer register using a high/losum |
| instruction sequence. */ |
| |
| int |
| fp_high_losum_p (rtx op) |
| { |
| /* The constraints calling this should only be in |
| SFmode move insns, so any constant which cannot |
| be moved using a single insn will do. */ |
| if (GET_CODE (op) == CONST_DOUBLE) |
| { |
| long i; |
| |
| REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i); |
| return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i); |
| } |
| |
| return 0; |
| } |
| |
| /* Return true if the address of LABEL can be loaded by means of the |
| mov{si,di}_pic_label_ref patterns in PIC mode. */ |
| |
| static bool |
| can_use_mov_pic_label_ref (rtx label) |
| { |
| /* VxWorks does not impose a fixed gap between segments; the run-time |
| gap can be different from the object-file gap. We therefore can't |
| assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we |
| are absolutely sure that X is in the same segment as the GOT. |
| Unfortunately, the flexibility of linker scripts means that we |
| can't be sure of that in general, so assume that GOT-relative |
| accesses are never valid on VxWorks. */ |
| if (TARGET_VXWORKS_RTP) |
| return false; |
| |
| /* Similarly, if the label is non-local, it might end up being placed |
| in a different section than the current one; now mov_pic_label_ref |
| requires the label and the code to be in the same section. */ |
| if (LABEL_REF_NONLOCAL_P (label)) |
| return false; |
| |
| /* Finally, if we are reordering basic blocks and partition into hot |
| and cold sections, this might happen for any label. */ |
| if (flag_reorder_blocks_and_partition) |
| return false; |
| |
| return true; |
| } |
| |
| /* Expand a move instruction. Return true if all work is done. */ |
| |
| bool |
| sparc_expand_move (machine_mode mode, rtx *operands) |
| { |
| /* Handle sets of MEM first. */ |
| if (GET_CODE (operands[0]) == MEM) |
| { |
| /* 0 is a register (or a pair of registers) on SPARC. */ |
| if (register_or_zero_operand (operands[1], mode)) |
| return false; |
| |
| if (!reload_in_progress) |
| { |
| operands[0] = validize_mem (operands[0]); |
| operands[1] = force_reg (mode, operands[1]); |
| } |
| } |
| |
| /* Fix up TLS cases. */ |
| if (TARGET_HAVE_TLS |
| && CONSTANT_P (operands[1]) |
| && sparc_tls_referenced_p (operands [1])) |
| { |
| operands[1] = sparc_legitimize_tls_address (operands[1]); |
| return false; |
| } |
| |
| /* Fix up PIC cases. */ |
| if (flag_pic && CONSTANT_P (operands[1])) |
| { |
| if (pic_address_needs_scratch (operands[1])) |
| operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX); |
| |
| /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */ |
| if ((GET_CODE (operands[1]) == LABEL_REF |
| && can_use_mov_pic_label_ref (operands[1])) |
| || (GET_CODE (operands[1]) == CONST |
| && GET_CODE (XEXP (operands[1], 0)) == PLUS |
| && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF |
| && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT |
| && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0)))) |
| { |
| if (mode == SImode) |
| { |
| emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1])); |
| return true; |
| } |
| |
| if (mode == DImode) |
| { |
| emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1])); |
| return true; |
| } |
| } |
| |
| if (symbolic_operand (operands[1], mode)) |
| { |
| operands[1] |
| = sparc_legitimize_pic_address (operands[1], |
| reload_in_progress |
| ? operands[0] : NULL_RTX); |
| return false; |
| } |
| } |
| |
| /* If we are trying to toss an integer constant into FP registers, |
| or loading a FP or vector constant, force it into memory. */ |
| if (CONSTANT_P (operands[1]) |
| && REG_P (operands[0]) |
| && (SPARC_FP_REG_P (REGNO (operands[0])) |
| || SCALAR_FLOAT_MODE_P (mode) |
| || VECTOR_MODE_P (mode))) |
| { |
| /* emit_group_store will send such bogosity to us when it is |
| not storing directly into memory. So fix this up to avoid |
| crashes in output_constant_pool. */ |
| if (operands [1] == const0_rtx) |
| operands[1] = CONST0_RTX (mode); |
| |
| /* We can clear or set to all-ones FP registers if TARGET_VIS, and |
| always other regs. */ |
| if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG) |
| && (const_zero_operand (operands[1], mode) |
| || const_all_ones_operand (operands[1], mode))) |
| return false; |
| |
| if (REGNO (operands[0]) < SPARC_FIRST_FP_REG |
| /* We are able to build any SF constant in integer registers |
| with at most 2 instructions. */ |
| && (mode == SFmode |
| /* And any DF constant in integer registers if needed. */ |
| || (mode == DFmode && !can_create_pseudo_p ()))) |
| return false; |
| |
| operands[1] = force_const_mem (mode, operands[1]); |
| if (!reload_in_progress) |
| operands[1] = validize_mem (operands[1]); |
| return false; |
| } |
| |
| /* Accept non-constants and valid constants unmodified. */ |
| if (!CONSTANT_P (operands[1]) |
| || GET_CODE (operands[1]) == HIGH |
| || input_operand (operands[1], mode)) |
| return false; |
| |
| switch (mode) |
| { |
| case E_QImode: |
| /* All QImode constants require only one insn, so proceed. */ |
| break; |
| |
| case E_HImode: |
| case E_SImode: |
| sparc_emit_set_const32 (operands[0], operands[1]); |
| return true; |
| |
| case E_DImode: |
| /* input_operand should have filtered out 32-bit mode. */ |
| sparc_emit_set_const64 (operands[0], operands[1]); |
| return true; |
| |
| case E_TImode: |
| { |
| rtx high, low; |
| /* TImode isn't available in 32-bit mode. */ |
| split_double (operands[1], &high, &low); |
| emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode), |
| high)); |
| emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode), |
| low)); |
| } |
| return true; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| return false; |
| } |
| |
| /* Load OP1, a 32-bit constant, into OP0, a register. |
| We know it can't be done in one insn when we get |
| here, the move expander guarantees this. */ |
| |
| static void |
| sparc_emit_set_const32 (rtx op0, rtx op1) |
| { |
| machine_mode mode = GET_MODE (op0); |
| rtx temp = op0; |
| |
| if (can_create_pseudo_p ()) |
| temp = gen_reg_rtx (mode); |
| |
| if (GET_CODE (op1) == CONST_INT) |
| { |
| gcc_assert (!small_int_operand (op1, mode) |
| && !const_high_operand (op1, mode)); |
| |
| /* Emit them as real moves instead of a HIGH/LO_SUM, |
| this way CSE can see everything and reuse intermediate |
| values if it wants. */ |
| emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1) |
| & ~(HOST_WIDE_INT) 0x3ff))); |
| |
| emit_insn (gen_rtx_SET (op0, |
| gen_rtx_IOR (mode, temp, |
| GEN_INT (INTVAL (op1) & 0x3ff)))); |
| } |
| else |
| { |
| /* A symbol, emit in the traditional way. */ |
| emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1))); |
| emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1))); |
| } |
| } |
| |
| /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register. |
| If TEMP is nonzero, we are forbidden to use any other scratch |
| registers. Otherwise, we are allowed to generate them as needed. |
| |
| Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY |
| or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */ |
| |
| void |
| sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp) |
| { |
| rtx cst, temp1, temp2, temp3, temp4, temp5; |
| rtx ti_temp = 0; |
| |
| /* Deal with too large offsets. */ |
| if (GET_CODE (op1) == CONST |
| && GET_CODE (XEXP (op1, 0)) == PLUS |
| && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1)) |
| && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst)) |
| { |
| gcc_assert (!temp); |
| temp1 = gen_reg_rtx (DImode); |
| temp2 = gen_reg_rtx (DImode); |
| sparc_emit_set_const64 (temp2, cst); |
| sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0), |
| NULL_RTX); |
| emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2))); |
| return; |
| } |
| |
| if (temp && GET_MODE (temp) == TImode) |
| { |
| ti_temp = temp; |
| temp = gen_rtx_REG (DImode, REGNO (temp)); |
| } |
| |
| /* SPARC-V9 code model support. */ |
| switch (sparc_code_model) |
| { |
| case CM_MEDLOW: |
| /* The range spanned by all instructions in the object is less |
| than 2^31 bytes (2GB) and the distance from any instruction |
| to the location of the label _GLOBAL_OFFSET_TABLE_ is less |
| than 2^31 bytes (2GB). |
| |
| The executable must be in the low 4TB of the virtual address |
| space. |
| |
| sethi %hi(symbol), %temp1 |
| or %temp1, %lo(symbol), %reg */ |
| if (temp) |
| temp1 = temp; /* op0 is allowed. */ |
| else |
| temp1 = gen_reg_rtx (DImode); |
| |
| emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1))); |
| emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1))); |
| break; |
| |
| case CM_MEDMID: |
| /* The range spanned by all instructions in the object is less |
| than 2^31 bytes (2GB) and the distance from any instruction |
| to the location of the label _GLOBAL_OFFSET_TABLE_ is less |
| than 2^31 bytes (2GB). |
| |
| The executable must be in the low 16TB of the virtual address |
| space. |
| |
| sethi %h44(symbol), %temp1 |
| or %temp1, %m44(symbol), %temp2 |
| sllx %temp2, 12, %temp3 |
| or %temp3, %l44(symbol), %reg */ |
| if (temp) |
| { |
| temp1 = op0; |
| temp2 = op0; |
| temp3 = temp; /* op0 is allowed. */ |
| } |
| else |
| { |
| temp1 = gen_reg_rtx (DImode); |
| temp2 = gen_reg_rtx (DImode); |
| temp3 = gen_reg_rtx (DImode); |
| } |
| |
| emit_insn (gen_seth44 (temp1, op1)); |
| emit_insn (gen_setm44 (temp2, temp1, op1)); |
| emit_insn (gen_rtx_SET (temp3, |
| gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12)))); |
| emit_insn (gen_setl44 (op0, temp3, op1)); |
| break; |
| |
| case CM_MEDANY: |
| /* The range spanned by all instructions in the object is less |
| than 2^31 bytes (2GB) and the distance from any instruction |
| to the location of the label _GLOBAL_OFFSET_TABLE_ is less |
| than 2^31 bytes (2GB). |
| |
| The executable can be placed anywhere in the virtual address |
| space. |
| |
| sethi %hh(symbol), %temp1 |
| sethi %lm(symbol), %temp2 |
| or %temp1, %hm(symbol), %temp3 |
| sllx %temp3, 32, %temp4 |
| or %temp4, %temp2, %temp5 |
| or %temp5, %lo(symbol), %reg */ |
| if (temp) |
| { |
| /* It is possible that one of the registers we got for operands[2] |
| might coincide with that of operands[0] (which is why we made |
| it TImode). Pick the other one to use as our scratch. */ |
| if (rtx_equal_p (temp, op0)) |
| { |
| gcc_assert (ti_temp); |
| temp = gen_rtx_REG (DImode, REGNO (temp) + 1); |
| } |
| temp1 = op0; |
| temp2 = temp; /* op0 is _not_ allowed, see above. */ |
| temp3 = op0; |
| temp4 = op0; |
| temp5 = op0; |
| } |
| else |
| { |
| temp1 = gen_reg_rtx (DImode); |
| temp2 = gen_reg_rtx (DImode); |
| temp3 = gen_reg_rtx (DImode); |
| temp4 = gen_reg_rtx (DImode); |
| temp5 = gen_reg_rtx (DImode); |
| } |
| |
| emit_insn (gen_sethh (temp1, op1)); |
| emit_insn (gen_setlm (temp2, op1)); |
| emit_insn (gen_sethm (temp3, temp1, op1)); |
| emit_insn (gen_rtx_SET (temp4, |
| gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32)))); |
| emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2))); |
| emit_insn (gen_setlo (op0, temp5, op1)); |
| break; |
| |
| case CM_EMBMEDANY: |
| /* Old old old backwards compatibility kruft here. |
| Essentially it is MEDLOW with a fixed 64-bit |
| virtual base added to all data segment addresses. |
| Text-segment stuff is computed like MEDANY, we can't |
| reuse the code above because the relocation knobs |
| look different. |
| |
| Data segment: sethi %hi(symbol), %temp1 |
| add %temp1, EMBMEDANY_BASE_REG, %temp2 |
| or %temp2, %lo(symbol), %reg */ |
| if (data_segment_operand (op1, GET_MODE (op1))) |
| { |
| if (temp) |
| { |
| temp1 = temp; /* op0 is allowed. */ |
| temp2 = op0; |
| } |
| else |
| { |
| temp1 = gen_reg_rtx (DImode); |
| temp2 = gen_reg_rtx (DImode); |
| } |
| |
| emit_insn (gen_embmedany_sethi (temp1, op1)); |
| emit_insn (gen_embmedany_brsum (temp2, temp1)); |
| emit_insn (gen_embmedany_losum (op0, temp2, op1)); |
| } |
| |
| /* Text segment: sethi %uhi(symbol), %temp1 |
| sethi %hi(symbol), %temp2 |
| or %temp1, %ulo(symbol), %temp3 |
| sllx %temp3, 32, %temp4 |
| or %temp4, %temp2, %temp5 |
| or %temp5, %lo(symbol), %reg */ |
| else |
| { |
| if (temp) |
| { |
| /* It is possible that one of the registers we got for operands[2] |
| might coincide with that of operands[0] (which is why we made |
| it TImode). Pick the other one to use as our scratch. */ |
| if (rtx_equal_p (temp, op0)) |
| { |
| gcc_assert (ti_temp); |
| temp = gen_rtx_REG (DImode, REGNO (temp) + 1); |
| } |
| temp1 = op0; |
| temp2 = temp; /* op0 is _not_ allowed, see above. */ |
| temp3 = op0; |
| temp4 = op0; |
| temp5 = op0; |
| } |
| else |
| { |
| temp1 = gen_reg_rtx (DImode); |
| temp2 = gen_reg_rtx (DImode); |
| temp3 = gen_reg_rtx (DImode); |
| temp4 = gen_reg_rtx (DImode); |
| temp5 = gen_reg_rtx (DImode); |
| } |
| |
| emit_insn (gen_embmedany_textuhi (temp1, op1)); |
| emit_insn (gen_embmedany_texthi (temp2, op1)); |
| emit_insn (gen_embmedany_textulo (temp3, temp1, op1)); |
| emit_insn (gen_rtx_SET (temp4, |
| gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32)))); |
| emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2))); |
| emit_insn (gen_embmedany_textlo (op0, temp5, op1)); |
| } |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| /* These avoid problems when cross compiling. If we do not |
| go through all this hair then the optimizer will see |
| invalid REG_EQUAL notes or in some cases none at all. */ |
| static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT); |
| static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT); |
| static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT); |
| static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT); |
| |
| /* The optimizer is not to assume anything about exactly |
| which bits are set for a HIGH, they are unspecified. |
| Unfortunately this leads to many missed optimizations |
| during CSE. We mask out the non-HIGH bits, and matches |
| a plain movdi, to alleviate this problem. */ |
| static rtx |
| gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val) |
| { |
| return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff)); |
| } |
| |
| static rtx |
| gen_safe_SET64 (rtx dest, HOST_WIDE_INT val) |
| { |
| return gen_rtx_SET (dest, GEN_INT (val)); |
| } |
| |
| static rtx |
| gen_safe_OR64 (rtx src, HOST_WIDE_INT val) |
| { |
| return gen_rtx_IOR (DImode, src, GEN_INT (val)); |
| } |
| |
| static rtx |
| gen_safe_XOR64 (rtx src, HOST_WIDE_INT val) |
| { |
| return gen_rtx_XOR (DImode, src, GEN_INT (val)); |
| } |
| |
| /* Worker routines for 64-bit constant formation on arch64. |
| One of the key things to be doing in these emissions is |
| to create as many temp REGs as possible. This makes it |
| possible for half-built constants to be used later when |
| such values are similar to something required later on. |
| Without doing this, the optimizer cannot see such |
| opportunities. */ |
| |
| static void sparc_emit_set_const64_quick1 (rtx, rtx, |
| unsigned HOST_WIDE_INT, int); |
| |
| static void |
| sparc_emit_set_const64_quick1 (rtx op0, rtx temp, |
| unsigned HOST_WIDE_INT low_bits, int is_neg) |
| { |
| unsigned HOST_WIDE_INT high_bits; |
| |
| if (is_neg) |
| high_bits = (~low_bits) & 0xffffffff; |
| else |
| high_bits = low_bits; |
| |
| emit_insn (gen_safe_HIGH64 (temp, high_bits)); |
| if (!is_neg) |
| { |
| emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff)))); |
| } |
| else |
| { |
| /* If we are XOR'ing with -1, then we should emit a one's complement |
| instead. This way the combiner will notice logical operations |
| such as ANDN later on and substitute. */ |
| if ((low_bits & 0x3ff) == 0x3ff) |
| { |
| emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp))); |
| } |
| else |
| { |
| emit_insn (gen_rtx_SET (op0, |
| gen_safe_XOR64 (temp, |
| (-(HOST_WIDE_INT)0x400 |
| | (low_bits & 0x3ff))))); |
| } |
| } |
| } |
| |
| static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT, |
| unsigned HOST_WIDE_INT, int); |
| |
| static void |
| sparc_emit_set_const64_quick2 (rtx op0, rtx temp, |
| unsigned HOST_WIDE_INT high_bits, |
| unsigned HOST_WIDE_INT low_immediate, |
| int shift_count) |
| { |
| rtx temp2 = op0; |
| |
| if ((high_bits & 0xfffffc00) != 0) |
| { |
| emit_insn (gen_safe_HIGH64 (temp, high_bits)); |
| if ((high_bits & ~0xfffffc00) != 0) |
| emit_insn (gen_rtx_SET (op0, |
| gen_safe_OR64 (temp, (high_bits & 0x3ff)))); |
| else |
| temp2 = temp; |
| } |
| else |
| { |
| emit_insn (gen_safe_SET64 (temp, high_bits)); |
| temp2 = temp; |
| } |
| |
| /* Now shift it up into place. */ |
| emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2, |
| GEN_INT (shift_count)))); |
| |
| /* If there is a low immediate part piece, finish up by |
| putting that in as well. */ |
| if (low_immediate != 0) |
| emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate))); |
| } |
| |
| static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT, |
| unsigned HOST_WIDE_INT); |
| |
| /* Full 64-bit constant decomposition. Even though this is the |
| 'worst' case, we still optimize a few things away. */ |
| static void |
| sparc_emit_set_const64_longway (rtx op0, rtx temp, |
| unsigned HOST_WIDE_INT high_bits, |
| unsigned HOST_WIDE_INT low_bits) |
| { |
| rtx sub_temp = op0; |
| |
| if (can_create_pseudo_p ()) |
| sub_temp = gen_reg_rtx (DImode); |
| |
| if ((high_bits & 0xfffffc00) != 0) |
| { |
| emit_insn (gen_safe_HIGH64 (temp, high_bits)); |
| if ((high_bits & ~0xfffffc00) != 0) |
| emit_insn (gen_rtx_SET (sub_temp, |
| gen_safe_OR64 (temp, (high_bits & 0x3ff)))); |
| else |
| sub_temp = temp; |
| } |
| else |
| { |
| emit_insn (gen_safe_SET64 (temp, high_bits)); |
| sub_temp = temp; |
| } |
| |
| if (can_create_pseudo_p ()) |
| { |
| rtx temp2 = gen_reg_rtx (DImode); |
| rtx temp3 = gen_reg_rtx (DImode); |
| rtx temp4 = gen_reg_rtx (DImode); |
| |
| emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp, |
| GEN_INT (32)))); |
| |
| emit_insn (gen_safe_HIGH64 (temp2, low_bits)); |
| if ((low_bits & ~0xfffffc00) != 0) |
| { |
| emit_insn (gen_rtx_SET (temp3, |
| gen_safe_OR64 (temp2, (low_bits & 0x3ff)))); |
| emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3))); |
| } |
| else |
| { |
| emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2))); |
| } |
| } |
| else |
| { |
| rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff); |
| rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff); |
| rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff); |
| int to_shift = 12; |
| |
| /* We are in the middle of reload, so this is really |
| painful. However we do still make an attempt to |
| avoid emitting truly stupid code. */ |
| if (low1 != const0_rtx) |
| { |
| emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp, |
| GEN_INT (to_shift)))); |
| emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1))); |
| sub_temp = op0; |
| to_shift = 12; |
| } |
| else |
| { |
| to_shift += 12; |
| } |
| if (low2 != const0_rtx) |
| { |
| emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp, |
| GEN_INT (to_shift)))); |
| emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2))); |
| sub_temp = op0; |
| to_shift = 8; |
| } |
| else |
| { |
| to_shift += 8; |
| } |
| emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp, |
| GEN_INT (to_shift)))); |
| if (low3 != const0_rtx) |
| emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3))); |
| /* phew... */ |
| } |
| } |
| |
| /* Analyze a 64-bit constant for certain properties. */ |
| static void analyze_64bit_constant (unsigned HOST_WIDE_INT, |
| unsigned HOST_WIDE_INT, |
| int *, int *, int *); |
| |
| static void |
| analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits, |
| unsigned HOST_WIDE_INT low_bits, |
| int *hbsp, int *lbsp, int *abbasp) |
| { |
| int lowest_bit_set, highest_bit_set, all_bits_between_are_set; |
| int i; |
| |
| lowest_bit_set = highest_bit_set = -1; |
| i = 0; |
| do |
| { |
| if ((lowest_bit_set == -1) |
| && ((low_bits >> i) & 1)) |
| lowest_bit_set = i; |
| if ((highest_bit_set == -1) |
| && ((high_bits >> (32 - i - 1)) & 1)) |
| highest_bit_set = (64 - i - 1); |
| } |
| while (++i < 32 |
| && ((highest_bit_set == -1) |
| || (lowest_bit_set == -1))); |
| if (i == 32) |
| { |
| i = 0; |
| do |
| { |
| if ((lowest_bit_set == -1) |
| && ((high_bits >> i) & 1)) |
| lowest_bit_set = i + 32; |
| if ((highest_bit_set == -1) |
| && ((low_bits >> (32 - i - 1)) & 1)) |
| highest_bit_set = 32 - i - 1; |
| } |
| while (++i < 32 |
| && ((highest_bit_set == -1) |
| || (lowest_bit_set == -1))); |
| } |
| /* If there are no bits set this should have gone out |
| as one instruction! */ |
| gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1); |
| all_bits_between_are_set = 1; |
| for (i = lowest_bit_set; i <= highest_bit_set; i++) |
| { |
| if (i < 32) |
| { |
| if ((low_bits & (1 << i)) != 0) |
| continue; |
| } |
| else |
| { |
| if ((high_bits & (1 << (i - 32))) != 0) |
| continue; |
| } |
| all_bits_between_are_set = 0; |
| break; |
| } |
| *hbsp = highest_bit_set; |
| *lbsp = lowest_bit_set; |
| *abbasp = all_bits_between_are_set; |
| } |
| |
| static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT); |
| |
| static int |
| const64_is_2insns (unsigned HOST_WIDE_INT high_bits, |
| unsigned HOST_WIDE_INT low_bits) |
| { |
| int highest_bit_set, lowest_bit_set, all_bits_between_are_set; |
| |
| if (high_bits == 0 |
| || high_bits == 0xffffffff) |
| return 1; |
| |
| analyze_64bit_constant (high_bits, low_bits, |
| &highest_bit_set, &lowest_bit_set, |
| &all_bits_between_are_set); |
| |
| if ((highest_bit_set == 63 |
| || lowest_bit_set == 0) |
| && all_bits_between_are_set != 0) |
| return 1; |
| |
| if ((highest_bit_set - lowest_bit_set) < 21) |
| return 1; |
| |
| return 0; |
| } |
| |
| static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT, |
| unsigned HOST_WIDE_INT, |
| int, int); |
| |
| static unsigned HOST_WIDE_INT |
| create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits, |
| unsigned HOST_WIDE_INT low_bits, |
| int lowest_bit_set, int shift) |
| { |
| HOST_WIDE_INT hi, lo; |
| |
| if (lowest_bit_set < 32) |
| { |
| lo = (low_bits >> lowest_bit_set) << shift; |
| hi = ((high_bits << (32 - lowest_bit_set)) << shift); |
| } |
| else |
| { |
| lo = 0; |
| hi = ((high_bits >> (lowest_bit_set - 32)) << shift); |
| } |
| gcc_assert (! (hi & lo)); |
| return (hi | lo); |
| } |
| |
| /* Here we are sure to be arch64 and this is an integer constant |
| being loaded into a register. Emit the most efficient |
| insn sequence possible. Detection of all the 1-insn cases |
| has been done already. */ |
| static void |
| sparc_emit_set_const64 (rtx op0, rtx op1) |
| { |
| unsigned HOST_WIDE_INT high_bits, low_bits; |
| int lowest_bit_set, highest_bit_set; |
| int all_bits_between_are_set; |
| rtx temp = 0; |
| |
| /* Sanity check that we know what we are working with. */ |
| gcc_assert (TARGET_ARCH64 |
| && (GET_CODE (op0) == SUBREG |
| || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0))))); |
| |
| if (! can_create_pseudo_p ()) |
| temp = op0; |
| |
| if (GET_CODE (op1) != CONST_INT) |
| { |
| sparc_emit_set_symbolic_const64 (op0, op1, temp); |
| return; |
| } |
| |
| if (! temp) |
| temp = gen_reg_rtx (DImode); |
| |
| high_bits = ((INTVAL (op1) >> 32) & 0xffffffff); |
| low_bits = (INTVAL (op1) & 0xffffffff); |
| |
| /* low_bits bits 0 --> 31 |
| high_bits bits 32 --> 63 */ |
| |
| analyze_64bit_constant (high_bits, low_bits, |
| &highest_bit_set, &lowest_bit_set, |
| &all_bits_between_are_set); |
| |
| /* First try for a 2-insn sequence. */ |
| |
| /* These situations are preferred because the optimizer can |
| * do more things with them: |
| * 1) mov -1, %reg |
| * sllx %reg, shift, %reg |
| * 2) mov -1, %reg |
| * srlx %reg, shift, %reg |
| * 3) mov some_small_const, %reg |
| * sllx %reg, shift, %reg |
| */ |
| if (((highest_bit_set == 63 |
| || lowest_bit_set == 0) |
| && all_bits_between_are_set != 0) |
| || ((highest_bit_set - lowest_bit_set) < 12)) |
| { |
| HOST_WIDE_INT the_const = -1; |
| int shift = lowest_bit_set; |
| |
| if ((highest_bit_set != 63 |
| && lowest_bit_set != 0) |
| || all_bits_between_are_set == 0) |
| { |
| the_const = |
| create_simple_focus_bits (high_bits, low_bits, |
| lowest_bit_set, 0); |
| } |
| else if (lowest_bit_set == 0) |
| shift = -(63 - highest_bit_set); |
| |
| gcc_assert (SPARC_SIMM13_P (the_const)); |
| gcc_assert (shift != 0); |
| |
| emit_insn (gen_safe_SET64 (temp, the_const)); |
| if (shift > 0) |
| emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp, |
| GEN_INT (shift)))); |
| else if (shift < 0) |
| emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp, |
| GEN_INT (-shift)))); |
| return; |
| } |
| |
| /* Now a range of 22 or less bits set somewhere. |
| * 1) sethi %hi(focus_bits), %reg |
| * sllx %reg, shift, %reg |
| * 2) sethi %hi(focus_bits), %reg |
| * srlx %reg, shift, %reg |
| */ |
| if ((highest_bit_set - lowest_bit_set) < 21) |
| { |
| unsigned HOST_WIDE_INT focus_bits = |
| create_simple_focus_bits (high_bits, low_bits, |
| lowest_bit_set, 10); |
| |
| gcc_assert (SPARC_SETHI_P (focus_bits)); |
| gcc_assert (lowest_bit_set != 10); |
| |
| emit_insn (gen_safe_HIGH64 (temp, focus_bits)); |
| |
| /* If lowest_bit_set == 10 then a sethi alone could have done it. */ |
| if (lowest_bit_set < 10) |
| emit_insn (gen_rtx_SET (op0, |
| gen_rtx_LSHIFTRT (DImode, temp, |
| GEN_INT (10 - lowest_bit_set)))); |
| else if (lowest_bit_set > 10) |
| emit_insn (gen_rtx_SET (op0, |
| gen_rtx_ASHIFT (DImode, temp, |
| GEN_INT (lowest_bit_set - 10)))); |
| return; |
| } |
| |
| /* 1) sethi %hi(low_bits), %reg |
| * or %reg, %lo(low_bits), %reg |
| * 2) sethi %hi(~low_bits), %reg |
| * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg |
| */ |
| if (high_bits == 0 |
| || high_bits == 0xffffffff) |
| { |
| sparc_emit_set_const64_quick1 (op0, temp, low_bits, |
| (high_bits == 0xffffffff)); |
| return; |
| } |
| |
| /* Now, try 3-insn sequences. */ |
| |
| /* 1) sethi %hi(high_bits), %reg |
| * or %reg, %lo(high_bits), %reg |
| * sllx %reg, 32, %reg |
| */ |
| if (low_bits == 0) |
| { |
| sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32); |
| return; |
| } |
| |
| /* We may be able to do something quick |
| when the constant is negated, so try that. */ |
| if (const64_is_2insns ((~high_bits) & 0xffffffff, |
| (~low_bits) & 0xfffffc00)) |
| { |
| /* NOTE: The trailing bits get XOR'd so we need the |
| non-negated bits, not the negated ones. */ |
| unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff; |
| |
| if ((((~high_bits) & 0xffffffff) == 0 |
| && ((~low_bits) & 0x80000000) == 0) |
| || (((~high_bits) & 0xffffffff) == 0xffffffff |
| && ((~low_bits) & 0x80000000) != 0)) |
| { |
| |