| /* Support for avr-passes.def for AVR 8-bit microcontrollers. |
| Copyright (C) 2024-2025 Free Software Foundation, Inc. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify |
| it under the terms of the GNU General Public License as published by |
| the Free Software Foundation; either version 3, or (at your option) |
| any later version. |
| |
| GCC is distributed in the hope that it will be useful, |
| but WITHOUT ANY WARRANTY; without even the implied warranty of |
| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| GNU General Public License for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #define IN_TARGET_CODE 1 |
| |
| #define INCLUDE_ARRAY |
| #define INCLUDE_VECTOR |
| #include "config.h" |
| #include "system.h" |
| #include "intl.h" |
| #include "coretypes.h" |
| #include "backend.h" |
| #include "target.h" |
| #include "rtl.h" |
| #include "tree.h" |
| #include "diagnostic-core.h" |
| #include "cfghooks.h" |
| #include "cfganal.h" |
| #include "df.h" |
| #include "memmodel.h" |
| #include "tm_p.h" |
| #include "optabs.h" |
| #include "regs.h" |
| #include "emit-rtl.h" |
| #include "recog.h" |
| #include "explow.h" |
| #include "cfgrtl.h" |
| #include "context.h" |
| #include "tree-pass.h" |
| #include "insn-attr.h" |
| #include "tm-constrs.h" |
| |
| |
| #define CONST_INT_OR_FIXED_P(X) (CONST_INT_P (X) || CONST_FIXED_P (X)) |
| |
| #define FIRST_GPR (AVR_TINY ? REG_18 : REG_2) |
| |
| |
| // Emit pattern PAT, and ICE when the insn is not valid / not recognized. |
| |
| static rtx_insn * |
| emit_valid_insn (rtx pat) |
| { |
| rtx_insn *insn = emit_insn (pat); |
| |
| if (! valid_insn_p (insn)) // Also runs recog(). |
| fatal_insn ("emit unrecognizable insn", insn); |
| |
| return insn; |
| } |
| |
| // Emit a single_set with an optional scratch operand. This function |
| // asserts that the new insn is valid and recognized. |
| |
| static rtx_insn * |
| emit_valid_move_clobbercc (rtx dest, rtx src, rtx scratch = NULL_RTX) |
| { |
| rtx pat = scratch |
| ? gen_gen_move_clobbercc_scratch (dest, src, scratch) |
| : gen_gen_move_clobbercc (dest, src); |
| |
| return emit_valid_insn (pat); |
| } |
| |
| |
| namespace |
| { |
| |
| ///////////////////////////////////////////////////////////////////////////// |
| // Before we start with the very code, introduce some helpers that are |
| // quite generic, though up to now only avr-fuse-add makes use of them. |
| |
| /* Get the next / previous NONDEBUG_INSN_P after INSN in basic block BB. |
| This assumes we are in CFG layout mode so that BLOCK_FOR_INSN() |
| can be used. */ |
| |
| static rtx_insn * |
| next_nondebug_insn_bb (basic_block bb, rtx_insn *insn, bool forward = true) |
| { |
| while (insn) |
| { |
| insn = forward ? NEXT_INSN (insn) : PREV_INSN (insn); |
| |
| if (insn && NONDEBUG_INSN_P (insn)) |
| return BLOCK_FOR_INSN (insn) == bb ? insn : nullptr; |
| } |
| |
| return insn; |
| } |
| |
| static rtx_insn * |
| prev_nondebug_insn_bb (basic_block bb, rtx_insn *insn) |
| { |
| return next_nondebug_insn_bb (bb, insn, false); |
| } |
| |
| |
| /* Like `single_set' with the addition that it sets REGNO_SCRATCH when the |
| insn is a single_set with a QImode scratch register. When the insn has |
| no QImode scratch or just a scratch:QI, then set REGNO_SCRATCH = 0. |
| The assumption is that the function is only used after the splits for |
| REG_CC so that the pattern is a parallel with 2 elements (INSN has no |
| scratch operand), or 3 elements (INSN does have a scratch operand). */ |
| |
| static rtx |
| single_set_with_scratch (rtx_insn *insn, int ®no_scratch) |
| { |
| regno_scratch = 0; |
| |
| if (! INSN_P (insn)) |
| return NULL_RTX; |
| |
| rtx set, clo, reg, pat = PATTERN (insn); |
| |
| // Search for SET + CLOBBER(QI) + CLOBBER(CC). |
| if (GET_CODE (pat) == PARALLEL |
| && XVECLEN (pat, 0) == 3 |
| && GET_CODE (set = XVECEXP (pat, 0, 0)) == SET |
| // At this pass, all insn are endowed with clobber(CC). |
| && GET_CODE (clo = XVECEXP (pat, 0, 2)) == CLOBBER |
| && GET_MODE (XEXP (clo, 0)) == CCmode |
| && GET_CODE (clo = XVECEXP (pat, 0, 1)) == CLOBBER |
| && REG_P (reg = XEXP (clo, 0)) |
| && GET_MODE (reg) == QImode) |
| { |
| regno_scratch = REGNO (reg); |
| return set; |
| } |
| |
| return single_set (insn); |
| } |
| |
| |
| // One bit for each GRP in REG_0 ... REG_31. |
| using gprmask_t = uint32_t; |
| |
| // True when this is a valid GPR number for ordinary code, e.g. |
| // registers wider than 2 bytes have to start at an exven regno. |
| // TMP_REG and ZERO_REG are not considered valid, even though |
| // the C source can use register vars with them. |
| static inline bool |
| gpr_regno_p (int regno, int n_bytes = 1) |
| { |
| return (IN_RANGE (regno, FIRST_GPR, REG_32 - n_bytes) |
| // Size in { 1, 2, 3, 4, 8 } bytes. |
| && ((1u << n_bytes) & 0x11e) |
| // Registers >= 2 bytes start at an even regno. |
| && (n_bytes == 1 || regno % 2 == 0)); |
| } |
| |
| // There are cases where the C source defines local reg vars |
| // for R1 etc. The assumption is that this is handled before |
| // calling this function, e.g. by skipping code when a register |
| // overlaps with a fixed register. |
| static inline gprmask_t |
| regmask (int regno, int size) |
| { |
| gcc_checking_assert (gpr_regno_p (regno, size)); |
| gprmask_t bits = (1u << size) - 1; |
| |
| return bits << regno; |
| } |
| |
| // Mask for hard register X that's some GPR, including fixed regs like R0. |
| static gprmask_t |
| regmask (rtx x) |
| { |
| gcc_assert (REG_P (x)); |
| gprmask_t bits = (1u << GET_MODE_SIZE (GET_MODE (x))) - 1; |
| |
| return bits << REGNO (x); |
| } |
| |
| |
| // Whether X has bits in the range [B0 ... B1] |
| static inline bool |
| has_bits_in (gprmask_t x, int b0, int b1) |
| { |
| if (b0 > b1 || b0 > 31 || b1 < 0) |
| return false; |
| |
| const gprmask_t m = (2u << (b1 - b0)) - 1; |
| return x & (m << b0); |
| } |
| |
| |
| template<typename T> |
| T bad_case () |
| { |
| gcc_unreachable (); |
| } |
| |
| #define select false ? bad_case |
| |
| |
| namespace AVRasm |
| { |
| // Returns true when we a scratch reg is needed in order to get |
| // (siged or unsigned) 8-bit value VAL in some GPR. |
| // When it's about costs rather than the sheer requirement for a |
| // scratch, see also AVRasm::constant_cost. |
| static inline bool ldi_needs_scratch (int regno, int val) |
| { |
| return regno < REG_16 && IN_RANGE (val & 0xff, 2, 254); |
| } |
| |
| // Return a byte value x >= 0 such that x <code> y == x for all y, or -1. |
| static inline int neutral_val (rtx_code code) |
| { |
| return select<int>() |
| : code == AND ? 0xff |
| : code == IOR ? 0x00 |
| : code == XOR ? 0x00 |
| : code == PLUS ? 0 |
| : -1; |
| } |
| |
| // When there exists a value x such that the image of the function |
| // y -> y <code> x has order 1, then return that x. Else return -1. |
| static inline int image1_val (rtx_code code) |
| { |
| return select<int>() |
| : code == AND ? 0x00 |
| : code == IOR ? 0xff |
| : -1; |
| } |
| |
| // Cost of 8-bit binary operation x o= VAL provided a scratch is |
| // available as needed. |
| static int constant_cost (rtx_code code, int regno, uint8_t val) |
| { |
| bool needs_scratch_p = select<bool>() |
| : code == PLUS ? regno < REG_16 && val != 1 && val != 0xff |
| : code == XOR ? val != 0xff && (regno < REG_16 || val != 0x80) |
| : code == IOR ? regno < REG_16 |
| : code == AND ? regno < REG_16 && val != 0 |
| : code == SET ? regno < REG_16 && val != 0 |
| : bad_case<bool> (); |
| |
| return val == AVRasm::neutral_val (code) |
| ? 0 |
| : 1 + needs_scratch_p; |
| } |
| }; // AVRasm |
| |
| |
| // Returns the mode mask for a mode size of SIZE bytes. |
| static uint64_t size_to_mask (int size) |
| { |
| return ((uint64_t) 2 << (8 * size - 1)) - 1; |
| } |
| |
| // Return the scalar int mode for a modesize of 1, 2, 3, 4 or 8 bytes. |
| static machine_mode size_to_mode (int size) |
| { |
| return select<machine_mode>() |
| : size == 1 ? QImode |
| : size == 2 ? HImode |
| : size == 3 ? PSImode |
| : size == 4 ? SImode |
| : size == 8 ? DImode |
| : bad_case<machine_mode> (); |
| } |
| |
| |
| ////////////////////////////////////////////////////////////////////////////// |
| // Optimize moves after reload: -mfuse-move=<0,23> |
| |
| /* The purpose of this pass is to perform optimizations after reload |
| like the following ones: |
| |
| Without optimization | With optimization |
| ==================== | ================= |
| |
| long long fn_zero (void) (1) |
| { |
| return 0; |
| } |
| |
| ldi r18, 0 ; movqi_insn | ldi r18, 0 ; movqi_insn |
| ldi r19, 0 ; movqi_insn | ldi r19, 0 ; movqi_insn |
| ldi r20, 0 ; movqi_insn | movw r20, r18 ; *movhi |
| ldi r21, 0 ; movqi_insn | |
| ldi r22, 0 ; movqi_insn | movw r22, r18 ; *movhi |
| ldi r23, 0 ; movqi_insn | |
| ldi r24, 0 ; movqi_insn | movw r24, r18 ; *movhi |
| ldi r25, 0 ; movqi_insn | |
| ret | ret |
| |
| int fn_eq0 (char c) (2) |
| { |
| return c == 0; |
| } |
| |
| mov r18, r24 ; movqi_insn | mov r18, r24 ; movqi_insn |
| ldi r24, 1 ; *movhi | ldi r24, 1 ; *movhi |
| ldi r25, 0 | ldi r25, 0 |
| cp r18, ZERO ; cmpqi3 | cpse r18, ZERO ; peephole |
| breq .+4 ; branch | |
| ldi r24, 0 ; *movhi | ldi r24, 0 ; movqi_insn |
| ldi r25, 0 | |
| ret | ret |
| |
| int a, b; (3) |
| |
| void fn_store_ab (void) |
| { |
| a = 1; |
| b = -1; |
| } |
| |
| ldi r24, 1 ; *movhi | ldi r24, 1 ; *movhi |
| ldi r25, 0 | ldi r25, 0 |
| sts a+1, r25 ; *movhi | sts a+1, r25 ; *movhi |
| sts a, r24 | sts a, r24 |
| ldi r24, -1 ; *movhi | sbiw r24, 2 ; *addhi3 |
| ldi r25, -1 | |
| sts b+1, r25 ; *movhi | sts b+1, r25 ; *movhi |
| sts b, r24 | sts b, r24 |
| ret | ret |
| |
| unsigned fn_crc (unsigned x, unsigned y) (4) |
| { |
| for (char i = 8; i--; x <<= 1) |
| y ^= (x ^ y) & 0x80 ? 79U : 0U; |
| return y; |
| } |
| |
| movw r18, r24 ; *movhi | movw r18, r24 ; *movhi |
| movw r24, r22 ; *movhi | movw r24, r22 ; *movhi |
| ldi r22, 8 ; movqi_insn | ldi r22, 8 ; movqi_insn |
| .L13: | .L13: |
| movw r30, r18 ; *movhi | movw r30, r18 ; *movhi |
| eor r30, r24 ; *xorqi3 | eor r30, r24 ; *xorqi3 |
| eor r31, r25 ; *xorqi3 | eor r31, r25 ; *xorqi3 |
| mov r20, r30 ; *andhi3 | mov r20, r30 ; *andqi3 |
| andi r20, 1<<7 | andi r20, 1<<7 |
| clr r21 | |
| sbrs r30, 7 ; *sbrx_branchhi | sbrc r30, 7 ; *sbrx_branchhi |
| rjmp .+4 | |
| ldi r20, 79 ; movqi_insn | ldi r20, 79 ; movqi_insn |
| ldi r21, 0 ; movqi_insn | |
| eor r24, r20 ; *xorqi3 | eor r24, r20 ; *xorqi3 |
| eor r25, r21 ; *xorqi3 | |
| lsl r18 ; *ashlhi3_const | lsl r18 ; *ashlhi3_const |
| rol r19 | rol r19 |
| subi r22, 1 ; *op8.for.cczn.p| subi r22, 1 ; *op8.for.cczn.plus |
| brne .L13 ; branch_ZN | brne .L13 ; branch_ZN |
| ret | ret |
| |
| #define SPDR (*(uint8_t volatile*) 0x2c) (5) |
| |
| void fn_PR49807 (long big) |
| { |
| SPDR = big >> 24; |
| SPDR = big >> 16; |
| SPDR = big >> 8; |
| SPDR = big; |
| } |
| |
| movw r20, r22 ; *movhi | movw r20, r22 ; *movhi |
| movw r22, r24 ; *movhi | movw r22, r24 ; *movhi |
| mov r24, r23 ; *ashrsi3_const | |
| clr r27 | |
| sbrc r24,7 | |
| com r27 | |
| mov r25, r27 | |
| mov r26, r27 | |
| out 0xc, r24 ; movqi_insn | out 0xc, r23 ; movqi_insn |
| movw r24, r22 ; *ashrsi3_const | |
| clr r27 | |
| sbrc r25, 7 | |
| com r27 | |
| mov r26, r27 | |
| out 0xc, r24 ; movqi_insn | out 0xc, r24 ; movqi_insn |
| clr r27 ; *ashrsi3_const | |
| sbrc r23, 7 | |
| dec r27 | |
| mov r26, r23 | |
| mov r25, r22 | |
| mov r24, r21 | |
| out 0xc, r24 ; movqi_insn | out 0xc, r21 ; movqi_insn |
| out 0xc, r20 ; movqi_insn | out 0xc, r20 ; movqi_insn |
| ret | ret |
| |
| The insns of each basic block are traversed from first to last. |
| Each insn is optimized on its own, or may be fused with the |
| previous insn like in example (1). |
| As the insns are traversed, memento_t keeps track of known values |
| held in the GPRs (general purpse registers) R2 ... R31 by simulating |
| the effect of the current insn in memento_t.apply_insn(). |
| The basic blocks are traversed in reverse post order so as to |
| maximize the chance that GPRs from all preceding blocks are known, |
| which is the case in example (2). The traversal of the basic block |
| is performed by bbinfo_t.optimize_one_function(). |
| bbinfo_t.optimize_one_block() traverses the insns of a BB and tries |
| the following optimizations: |
| |
| bbinfo_t::try_fuse_p |
| Try to fuse two 8-bit insns to one MOVW like in (1). |
| |
| bbinfo_t::try_simplify_p |
| Only perform the simplest optimizations that don't impede the |
| traceability of the generated code, which are: |
| - Transform operations like Rn = Rn=0 ^ Rm to Rn = Rm. |
| - Remove insns that are no-ops like Rn = Rn ^ Rm=0. |
| |
| bbinfo_t::try_bin_arg1_p |
| In insns like EOR Rn, arg1 where arg1 is known or is a reg that |
| dies in the insn, *and* there is a different register Rm that's |
| known to contain the same value, then arg1 is replaced with Rm. |
| |
| bbinfo_t::try_split_ldi_p |
| Tries to simplify loads of constants like in examples (1), (2) and (3). |
| It may use arithmetic instructions like AND with registers that |
| are holding known values when this is profitable. |
| |
| bbinfo_t::try_split_any_p |
| Split all insns where the operation can be performed on individual |
| bytes, like andsi3. In example (4) the andhi3 can be optimized |
| to an andqi3. |
| |
| bbinfo_t::try_mem0_p |
| Try to fuse a mem = reg insn to mem = __zero_reg__. |
| This should only occur when -msplit-ldst is on, but may |
| also occur with pushes since push<mode>1 splits them. |
| */ |
| |
| |
| // A basic block with additional information like the GPR state. |
| // The main entry point for the pass. Runs various strategies |
| // like try_fuse, try_simplify, try_bin_arg1, try_split_ldi, try_split_any |
| // depending on -mfuse-add=<0,11>. |
| struct bbinfo_t; |
| |
| // Additional insn information on a REG = non-memory single_set insn |
| // for quick access. Only valid when the m_size member is non-zero. |
| struct insninfo_t; |
| |
| // Helper classes with data needed by the try_xxx optimizers. |
| struct optimize_data_t; |
| struct insn_optimize_data_t; |
| |
| // Records which GPRs R0 ... R31 are holding a known value, |
| // and which values these are. |
| struct memento_t; |
| |
| // Abstract Interpretation of expressions. |
| // absint_val_t represents an 8-bit value that equals the content of |
| // some GPR, or equals some known value (or both, or none of them). |
| // absint_byte_t represents an 8-bit entity that is equivalent to |
| // an absint_val_t, or is equivalent to some (unary or binary) operation |
| // on absint_val_t's like NOT, AND, IOR, XOR that operate bit-wise (and |
| // hence also byte-wise). |
| // absint_t represents an array of absint_byte_t's. When some insn is applied |
| // to a GPR state, then memento_t.apply_insn() represents the RHS of |
| // a single_set as an absint_t, and then applies that result to the GPRs. |
| // For example, in int y = x << 8 the representation is x = [r25; r24] |
| // and RHS = [r24; 00]. |
| struct absint_val_t; |
| class absint_byte_t; |
| struct absint_t; |
| |
| // A ply_t is a potential step towards an optimal sequence to load a constant |
| // value into a multi-byte register. A ply_t loosely relates to one AVR |
| // instruction, but it may also represent a sequence of instructions. |
| // For example, loading a constant into a lower register when no sratch reg |
| // is available may take up to 4 instructions. There is no 1:1 correspondence |
| // to insns, either. |
| // try_split_ldi determines the best sequence of ply_t's by means of a |
| // brute-force search with tree pruning: It's much too complicated to |
| // construct a good sequence directly, but there are many conditions that |
| // good sequence will satisfy, implemented in bbinfo_t::find_plies. |
| struct ply_t; |
| struct plies_t; |
| |
| // The maximal number of ply_t's in any conceivable optimal solution |
| // that is better than what a vanilla mov<mode> generates. |
| // This is 6 for modes <= 4 and 8 for modes == 8. |
| static constexpr int N_BEST_PLYS = 8; |
| |
| #define FUSE_MOVE_MAX_MODESIZE 8 |
| |
| #include "avr-passes-fuse-move.h" |
| |
| // Static members. |
| |
| gprmask_t memento_t::fixed_regs_mask; |
| |
| // Statistics. |
| int ply_t::n_ply_ts; |
| int ply_t::max_n_ply_ts; |
| int plies_t::max_n_plies; |
| |
| bbinfo_t *bbinfo_t::bb_info; |
| int bbinfo_t::tick; |
| bbinfo_t::find_plies_data_t *bbinfo_t::fpd; |
| |
| // Which optimizations should be performed. |
| bool bbinfo_t::try_fuse_p; |
| bool bbinfo_t::try_bin_arg1_p; |
| bool bbinfo_t::try_split_ldi_p; |
| bool bbinfo_t::try_split_any_p; |
| bool bbinfo_t::try_simplify_p; |
| bool bbinfo_t::use_arith_p; |
| bool bbinfo_t::use_set_some_p; |
| bool bbinfo_t::try_mem0_p; |
| |
| |
| // Abstract Interpretation of expressions. |
| // A bunch of absint_byte_t's. |
| |
| struct absint_t |
| { |
| static constexpr int eq_size = FUSE_MOVE_MAX_MODESIZE; |
| std::array<absint_byte_t, eq_size> eq; |
| |
| rtx xexp = NULL_RTX; |
| rtx xexp_new = NULL_RTX; |
| |
| absint_byte_t &operator[] (int i) |
| { |
| gcc_assert (IN_RANGE (i, 0, absint_t::eq_size - 1)); |
| return eq[i]; |
| } |
| |
| const absint_byte_t &operator[] (int i) const |
| { |
| gcc_assert (IN_RANGE (i, 0, absint_t::eq_size - 1)); |
| return eq[i]; |
| } |
| |
| absint_t () {} |
| |
| absint_t (rtx xold) |
| : xexp(xold) |
| {} |
| |
| absint_t (rtx xold, rtx xnew, int n_bytes) |
| : xexp(xold), xexp_new(xnew) |
| { |
| gcc_assert (n_bytes <= eq_size); |
| if (xnew) |
| for (int i = 0; i < n_bytes; ++i) |
| eq[i].learn_val8 (avr_uint8 (xnew, i)); |
| } |
| |
| // CODE != UNKNOWN: Maximal index of a byte with code CODE, or -1. |
| // CODE == UNKNOWN: Maximal index of a byte with known CODE, or -1. |
| int max_knows (rtx_code code = UNKNOWN) const |
| { |
| for (int i = eq_size - 1; i >= 0; --i) |
| if ((code == UNKNOWN && ! eq[i].can (UNKNOWN)) |
| || (code != UNKNOWN && eq[i].can (code))) |
| return i; |
| return -1; |
| } |
| |
| // CODE != UNKNOWN: Maximal i such that all bytes < i have code CODE. |
| // CODE == UNKNOWN: Maximal i such that all bytes < i have code != UNKNOWN. |
| int end_knows (rtx_code code = UNKNOWN) const |
| { |
| for (int i = 0; i < eq_size; ++i) |
| if ((code == UNKNOWN && eq[i].can (UNKNOWN)) |
| || (code != UNKNOWN && ! eq[i].can (code))) |
| return i; |
| return eq_size; |
| } |
| |
| // Number of bytes for which there is usable information. |
| int popcount () const |
| { |
| int pop = 0; |
| for (int i = 0; i < eq_size; ++i) |
| pop += ! eq[i].can (UNKNOWN); |
| return pop; |
| } |
| |
| // Get the value under the assumption that all eq[].val8 are known. |
| uint64_t get_value (int n_bytes, bool strict = true) const |
| { |
| gcc_assert (IN_RANGE (n_bytes, 1, eq_size)); |
| gcc_assert (! strict || end_knows (CONST_INT) >= n_bytes); |
| |
| uint64_t val = 0; |
| for (int i = n_bytes - 1; i >= 0; --i) |
| val = 256 * val + eq[i].val8 (strict); |
| return val; |
| } |
| |
| // Get n-byte value as a const_int, or NULL_RTX when (partially) unknown. |
| rtx get_value_as_const_int (int n_bytes) const |
| { |
| gcc_checking_assert (gpr_regno_p (REG_24, n_bytes)); |
| |
| if (end_knows (CONST_INT) < n_bytes) |
| return NULL_RTX; |
| |
| const uint64_t val = get_value (n_bytes); |
| const machine_mode mode = size_to_mode (n_bytes); |
| |
| return gen_int_mode (val, mode); |
| } |
| |
| // Find a 16-bit register that contains the same value like held |
| // in positions I1 and I2 (if any). Return 0 when nothing appropriate |
| // for a MOVW is found. |
| int reg16_with_value (int i1, int i2, const memento_t &memo) const |
| { |
| if (i1 == (i2 ^ 1)) |
| { |
| const int lo8 = eq[i1 & ~1].val8 (false); |
| const int hi8 = eq[i1 | 1].val8 (false); |
| if (lo8 >= 0 && hi8 >= 0) |
| return memo.reg16_with_value (lo8, hi8, 0); |
| } |
| return 0; |
| } |
| |
| // When X is a REG rtx with a known content as of MEMO, then return |
| // the respective value as a constant for mode MODE. |
| // If X is NULL_RTX, or not a REG, or not known, then return NULL_RTX. |
| static rtx maybe_fold (rtx x, const memento_t &memo) |
| { |
| int n_bytes; |
| |
| if (x != NULL_RTX |
| && REG_P (x) |
| && (n_bytes = GET_MODE_SIZE (GET_MODE (x))) <= FUSE_MOVE_MAX_MODESIZE |
| && gpr_regno_p (REGNO (x), n_bytes)) |
| { |
| rtx xval = memo.get_value_as_const_int (REGNO (x), n_bytes); |
| if (xval) |
| return avr_chunk (GET_MODE (x), xval, 0); |
| } |
| |
| return NULL_RTX; |
| } |
| |
| // Try to conclude about the bytes that comprise X. DEST_MODE is the |
| // context mode that is used when X is CONST_INT and has VOIDmode. |
| static absint_t explore (rtx x, const memento_t &memo, |
| machine_mode dest_mode = VOIDmode) |
| { |
| const rtx_code code = GET_CODE (x); |
| bool worth_dumping = dump_file && (dump_flags & TDF_FOLDING); |
| |
| const machine_mode mode = GET_MODE (x) == VOIDmode |
| ? dest_mode |
| : GET_MODE (x); |
| |
| const int n_bytes = mode == VOIDmode && CONST_INT_P (x) |
| ? absint_t::eq_size |
| : GET_MODE_SIZE (mode); |
| |
| if (! IN_RANGE (n_bytes, 1, absint_t::eq_size)) |
| return absint_t (x); |
| |
| // Eat our own dog food as produced by try_plit_ldi. |
| |
| rtx xop0 = BINARY_P (x) || UNARY_P (x) ? XEXP (x, 0) : NULL_RTX; |
| rtx xval0 = xop0 && CONST_INT_OR_FIXED_P (xop0) |
| ? xop0 |
| : absint_t::maybe_fold (xop0, memo); |
| |
| if (UNARY_P (x) |
| && REG_P (xop0) |
| && GET_MODE (xop0) == mode |
| && xval0) |
| { |
| rtx y = simplify_unary_operation (code, mode, xval0, mode); |
| if (y && CONST_INT_OR_FIXED_P (y)) |
| return absint_t (x, y, n_bytes); |
| } |
| |
| rtx xop1 = BINARY_P (x) ? XEXP (x, 1) : NULL_RTX; |
| rtx xval1 = xop1 && CONST_INT_OR_FIXED_P (xop1) |
| ? xop1 |
| : absint_t::maybe_fold (xop1, memo); |
| |
| if (BINARY_P (x) |
| && xval0 && xval1) |
| { |
| rtx y = simplify_binary_operation (code, mode, xval0, xval1); |
| if (y && CONST_INT_OR_FIXED_P (y)) |
| return absint_t (x, y, n_bytes); |
| } |
| |
| // No fold to a constant value was found: |
| // Look at the individual bytes more closely. |
| |
| absint_t ai (x); |
| |
| switch (code) |
| { |
| default: |
| worth_dumping = false; |
| break; |
| |
| case REG: |
| if (END_REGNO (x) <= REG_32 |
| && ! (regmask (x) & memento_t::fixed_regs_mask)) |
| for (unsigned r = REGNO (x); r < END_REGNO (x); ++r) |
| { |
| ai[r - REGNO (x)].learn_regno (r); |
| if (memo.knows (r)) |
| ai[r - REGNO (x)].learn_val8 (memo.value (r)); |
| } |
| break; |
| |
| CASE_CONST_UNIQUE: |
| ai = absint_t (x, x, n_bytes); |
| break; |
| |
| case ASHIFT: |
| case ASHIFTRT: |
| case LSHIFTRT: |
| case ROTATE: |
| case ROTATERT: |
| if ((CONST_INT_P (xop1) && INTVAL (xop1) >= 8) |
| // DImode shift offsets for transparent calls are shipped in R16. |
| || n_bytes == 8) |
| ai = explore_shift (x, memo); |
| break; |
| |
| case AND: |
| case IOR: |
| case XOR: |
| { |
| const absint_t ai0 = absint_t::explore (xop0, memo, mode); |
| const absint_t ai1 = absint_t::explore (xop1, memo, mode); |
| for (int i = 0; i < n_bytes; ++i) |
| ai[i] = absint_byte_t (code, ai0[i], ai1[i]); |
| } |
| break; |
| |
| case NOT: |
| { |
| const absint_t ai0 = absint_t::explore (xop0, memo); |
| for (int i = 0; i < n_bytes; ++i) |
| ai[i] = absint_byte_t (NOT, ai0[i]); |
| } |
| break; |
| |
| case ZERO_EXTEND: |
| case SIGN_EXTEND: |
| { |
| const absint_t ai0 = absint_t::explore (xop0, memo); |
| const int ai0_size = GET_MODE_SIZE (GET_MODE (xop0)); |
| const absint_byte_t b_signs = ai0[ai0_size - 1].get_signs (code); |
| for (int i = 0; i < n_bytes; ++i) |
| ai[i] = i < ai0_size ? ai0[i] : b_signs; |
| } |
| break; |
| |
| case PLUS: |
| case MINUS: |
| if (SCALAR_INT_MODE_P (mode) |
| || ALL_SCALAR_FIXED_POINT_MODE_P (mode)) |
| { |
| const absint_t ai0 = absint_t::explore (xop0, memo, mode); |
| const absint_t ai1 = absint_t::explore (xop1, memo, mode); |
| if (code == MINUS) |
| for (int i = 0; i < n_bytes && ai1[i].val8 (false) == 0; ++i) |
| ai[i] = ai0[i]; |
| |
| if (code == PLUS) |
| for (int i = 0; i < n_bytes; ++i) |
| { |
| if (ai0[i].val8 (false) == 0) |
| ai[i] = ai1[i]; |
| else if (ai1[i].val8 (false) == 0) |
| ai[i] = ai0[i]; |
| else |
| { |
| ai[i] = absint_byte_t (code, ai0[i], ai1[i]); |
| break; |
| } |
| } |
| |
| if (code == PLUS |
| && GET_CODE (xop0) == ZERO_EXTEND |
| && CONST_INT_P (xop1)) |
| { |
| rtx exop = XEXP (xop0, 0); |
| int exsize = GET_MODE_SIZE (GET_MODE (exop)); |
| rtx lo_xop1 = avr_chunk (GET_MODE (exop), xop1, 0); |
| if (lo_xop1 == const0_rtx) |
| for (int i = exsize; i < n_bytes; ++i) |
| ai[i] = ai1[i]; |
| } |
| } |
| break; // PLUS, MINUS |
| |
| case MULT: |
| if (GET_MODE (xop0) == mode |
| && SCALAR_INT_MODE_P (mode)) |
| { |
| // The constant may be located in xop0's zero_extend... |
| const absint_t ai0 = absint_t::explore (xop0, memo, mode); |
| const absint_t ai1 = absint_t::explore (xop1, memo, mode); |
| const int end0 = ai0.end_knows (CONST_INT); |
| const int end1 = ai1.end_knows (CONST_INT); |
| const uint64_t mul0 = end0 > 0 ? ai0.get_value (end0) : 1; |
| const uint64_t mul1 = end1 > 0 ? ai1.get_value (end1) : 1; |
| // Shifting in off/8 zero bytes from the right. |
| const int off = mul0 * mul1 != 0 ? ctz_hwi (mul0 * mul1) : 0; |
| for (int i = 0; i < off / 8; ++i) |
| ai[i].learn_val8 (0); |
| } |
| break; // MULT |
| |
| case BSWAP: |
| if (GET_MODE (xop0) == mode) |
| { |
| const absint_t ai0 = absint_t::explore (xop0, memo); |
| for (int i = 0; i < n_bytes; ++i) |
| ai[i] = ai0[n_bytes - 1 - i]; |
| } |
| break; |
| } // switch code |
| |
| if (worth_dumping) |
| { |
| avr_dump (";; AI.explore %C:%m ", code, mode); |
| ai.dump (); |
| } |
| |
| for (int i = 0; i < n_bytes; ++i) |
| gcc_assert (ai[i].check ()); |
| |
| return ai; |
| } |
| |
| // Helper for the method above. |
| static absint_t explore_shift (rtx x, const memento_t &memo) |
| { |
| absint_t ai (x); |
| |
| const rtx_code code = GET_CODE (x); |
| const machine_mode mode = GET_MODE (x); |
| const int n_bytes = GET_MODE_SIZE (mode); |
| |
| if (! BINARY_P (x)) |
| return ai; |
| |
| rtx xop0 = XEXP (x, 0); |
| rtx xop1 = XEXP (x, 1); |
| |
| // Look at shift offsets of DImode more closely; |
| // they are in R16 for __lshrdi3 etc. Patch xop1 on success. |
| if (n_bytes == 8 |
| && ! CONST_INT_P (xop1) |
| && GET_MODE (xop0) == mode) |
| { |
| const int n_off = GET_MODE_SIZE (GET_MODE (xop1)); |
| const absint_t aoff = absint_t::explore (xop1, memo); |
| xop1 = aoff.get_value_as_const_int (n_off); |
| } |
| |
| if (! xop1 |
| || GET_MODE (xop0) != mode |
| || ! IN_RANGE (n_bytes, 1, FUSE_MOVE_MAX_MODESIZE) |
| || ! CONST_INT_P (xop1) |
| || ! IN_RANGE (INTVAL (xop1), 8, 8 * n_bytes - 1)) |
| return ai; |
| |
| const int off = INTVAL (xop1); |
| const absint_t ai0 = absint_t::explore (xop0, memo); |
| |
| switch (GET_CODE (x)) |
| { |
| default: |
| break; |
| |
| case ASHIFT: |
| // Shifting in 0x00's from the right. |
| for (int i = 0; i < off / 8; ++i) |
| ai[i].learn_val8 (0); |
| break; |
| |
| case LSHIFTRT: |
| case ASHIFTRT: |
| { |
| // Shifting in 0x00's or signs from the left. |
| absint_byte_t b_signs = ai0[n_bytes - 1].get_signs (GET_CODE (x)); |
| for (int i = n_bytes - off / 8; i < n_bytes; ++i) |
| ai[i] = b_signs; |
| if (off == 8 * n_bytes - 1) |
| if (code == ASHIFTRT) |
| ai[0] = b_signs; |
| } |
| break; |
| } |
| |
| if (off % 8 != 0 |
| || ai0.popcount () == 0) |
| return ai; |
| |
| // For shift offsets that are a multiple of 8, record the |
| // action on the constituent bytes. |
| |
| // Bytes are moving left by this offset (or zero for "none"). |
| const int boffL = select<int>() |
| : code == ROTATE || code == ASHIFT ? off / 8 |
| : code == ROTATERT ? n_bytes - off / 8 |
| : 0; |
| |
| // Bytes are moving right by this offset (or zero for "none"). |
| const int boffR = select<int>() |
| : code == ROTATERT || code == ASHIFTRT || code == LSHIFTRT ? off / 8 |
| : code == ROTATE ? n_bytes - off / 8 |
| : 0; |
| |
| if (dump_flags & TDF_FOLDING) |
| { |
| avr_dump (";; AI.explore_shift %C:%m ", code, mode); |
| if (boffL) |
| avr_dump ("<< %d%s", 8 * boffL, boffL && boffR ? ", " : ""); |
| if (boffR) |
| avr_dump (">> %d", 8 * boffR); |
| avr_dump ("\n"); |
| } |
| |
| if (boffL) |
| for (int i = 0; i < n_bytes - boffL; ++i) |
| ai[i + boffL] = ai0[i]; |
| |
| if (boffR) |
| for (int i = boffR; i < n_bytes; ++i) |
| ai[i - boffR] = ai0[i]; |
| |
| return ai; |
| } |
| |
| void dump (const char *msg = nullptr, FILE *f = dump_file) const |
| { |
| if (f) |
| dump (NULL_RTX, msg, f); |
| } |
| |
| void dump (rtx dest, const char *msg = nullptr, FILE *f = dump_file) const |
| { |
| if (f) |
| { |
| int regno = dest && REG_P (dest) ? REGNO (dest) : 0; |
| |
| msg = msg && msg[0] ? msg : "AI=[%s]\n"; |
| const char *const xs = strstr (msg, "%s"); |
| gcc_assert (xs); |
| |
| fprintf (f, "%.*s", (int) (xs - msg), msg); |
| for (int i = max_knows (); i >= 0; --i) |
| { |
| const int sub_regno = eq[i].regno (false /*nonstrict*/); |
| const bool nop = regno && sub_regno == regno + i; |
| eq[i].dump (nop ? "%s=nop" : "%s", f); |
| fprintf (f, "%s", i ? "; " : ""); |
| } |
| fprintf (f, "%s", xs + strlen ("%s")); |
| } |
| } |
| }; // absint_t |
| |
| |
| // Information for a REG = non-memory single_set. |
| |
| struct insninfo_t |
| { |
| // This is an insn that sets the m_size bytes of m_regno to either |
| // - A compile time constant m_isrc (m_code = CONST_INT), or |
| // - The contents of register number m_rsrc (m_code = REG). |
| int m_size = 0; |
| int m_regno; |
| int m_rsrc; |
| rtx_code m_code; |
| uint64_t m_isrc; |
| rtx_insn *m_insn = nullptr; |
| rtx m_set = NULL_RTX; |
| rtx m_src = NULL_RTX; |
| int m_scratch = 0; // 0 or the register number of a QImode scratch. |
| rtx_code m_old_code = UNKNOWN; |
| |
| // Knowledge about the bytes of the SET_SRC: A byte may have a known |
| // value, may be known to equal some register (e.g. with BSWAP), |
| // or both, or may be unknown. |
| absint_t m_ai; |
| |
| // May be set for binary operations. |
| absint_byte_t m_new_src; |
| |
| bool init1 (insn_optimize_data_t &, int max_size, const char *purpose); |
| |
| // Upper bound for the cost (in words) of a move<mode> insn that |
| // performs a REG = CONST_XXX = .m_isrc move of modesize .m_size. |
| int cost () const; |
| bool combine (const insninfo_t &prev, const insninfo_t &curr); |
| int emit_insn () const; |
| |
| bool needs_scratch () const |
| { |
| gcc_assert (m_code == CONST_INT); |
| |
| for (int i = 0; i < m_size; ++i) |
| if (AVRasm::ldi_needs_scratch (m_regno, m_isrc >> (8 * i))) |
| return true; |
| |
| return false; |
| } |
| |
| int hamming (const memento_t &memo) const |
| { |
| gcc_assert (m_code == CONST_INT); |
| |
| int h = 0; |
| for (int i = 0; i < m_size; ++i) |
| h += ! memo.have_value (m_regno + i, 1, 0xff & (m_isrc >> (8 * i))); |
| |
| return h; |
| } |
| |
| // Upper bound for the number of ply_t's of a solution, given Hamming |
| // distance of HAMM (-1 for unknown). |
| int n_best_plys (int hamm = -1) const |
| { |
| gcc_assert (m_code == CONST_INT); |
| |
| if (m_size == 8) |
| return (hamm >= 0 ? hamm : m_size); |
| else if (hamm <= 4) |
| return (hamm >= 0 ? hamm : m_size) |
| // The following terms is the max number of MOVWs with a |
| // Hamming difference of less than 2. |
| + (AVR_HAVE_MOVW && m_regno < REG_14) * m_size / 2 |
| + (AVR_HAVE_MOVW && m_regno == REG_14) * std::max (0, m_size - 2) |
| - (AVR_HAVE_MOVW && hamm == 4 && (uint32_t) m_isrc % 0x10001 == 0); |
| else |
| gcc_unreachable (); |
| } |
| }; // insninfo_t |
| |
| |
| struct insn_optimize_data_t |
| { |
| // Known values held in GPRs prior to the action of .insn / .ii, |
| memento_t ®s; |
| rtx_insn *insn; |
| insninfo_t ii; |
| bool unused; |
| |
| insn_optimize_data_t () = delete; |
| |
| insn_optimize_data_t (memento_t &memo) |
| : regs(memo) |
| {} |
| }; // insn_optimize_data_t |
| |
| struct optimize_data_t |
| { |
| insn_optimize_data_t prev; |
| insn_optimize_data_t curr; |
| |
| // Number >= 0 of new insns that replace the curr insn and maybe also the |
| // prev insn. -1 when no replacement has been found. |
| int n_new_insns = -1; |
| |
| // .prev will be removed provided we have (potentially zero) new insns. |
| bool delete_prev_p = false; |
| |
| // Ignore these GPRs when comparing the simulation results of |
| // old and new insn sequences. Usually some scratch reg(s). |
| gprmask_t ignore_mask = 0; |
| |
| optimize_data_t () = delete; |
| |
| optimize_data_t (memento_t &prev_regs, memento_t &curr_regs) |
| : prev(prev_regs), curr(curr_regs) |
| {} |
| |
| bool try_fuse (bbinfo_t *); |
| bool try_mem0 (bbinfo_t *); |
| bool try_bin_arg1 (bbinfo_t *); |
| bool try_simplify (bbinfo_t *); |
| bool try_split_ldi (bbinfo_t *); |
| bool try_split_any (bbinfo_t *); |
| bool fail (const char *reason); |
| bool emit_signs (int r_sign, gprmask_t); |
| void emit_move_mask (int dest, int src, int n_bytes, gprmask_t &); |
| rtx_insn *emit_sequence (basic_block, rtx_insn *); |
| bool get_2ary_operands (rtx_code &, const absint_byte_t &, |
| insn_optimize_data_t &, int r_dest, |
| absint_val_t &, absint_val_t &, int &ex_cost); |
| rtx_insn *emit_and_apply_move (memento_t &, rtx dest, rtx src); |
| |
| // M2 is the state of GPRs as the sequence starts; M1 is the state one before. |
| static void apply_sequence (const std::vector<rtx_insn *> &insns, |
| memento_t &m1, memento_t &m2) |
| { |
| gcc_assert (insns.size () >= 1); |
| |
| for (auto &i : insns) |
| { |
| m1 = m2; |
| m2.apply_insn (i, false); |
| } |
| } |
| }; // optimize_data_t |
| |
| |
| // Emit INSNS before .curr.insn, replacing .curr.insn and also .prev.insn when |
| // .delete_prev_p is on. Adjusts .curr.regs and .prev.regs accordingly. |
| rtx_insn * |
| optimize_data_t::emit_sequence (basic_block bb, rtx_insn *insns) |
| { |
| gcc_assert (n_new_insns >= 0); |
| |
| // The old insns will be replaced by and simulated... |
| const std::vector<rtx_insn *> old_insns = delete_prev_p |
| ? std::vector<rtx_insn *> { prev.insn, curr.insn } |
| : std::vector<rtx_insn *> { curr.insn }; |
| |
| // ...against the new insns. |
| std::vector<rtx_insn *> new_insns; |
| for (rtx_insn *i = insns; i; i = NEXT_INSN (i)) |
| new_insns.push_back (i); |
| |
| rtx_insn *new_curr_insn; |
| |
| memento_t &m1 = prev.regs; |
| memento_t &m2 = curr.regs; |
| |
| if (new_insns.empty ()) |
| { |
| if (delete_prev_p) |
| { |
| m2 = m1; |
| m1.known = 0; |
| new_curr_insn = prev_nondebug_insn_bb (bb, prev.insn); |
| } |
| else |
| new_curr_insn = prev.insn; |
| } |
| else |
| { |
| // We are going to emit at least one new insn. Simulate the effect of |
| // the new sequence and compare it against the effect of the old one. |
| // Both effects must be the same (modulo scratch regs). |
| |
| memento_t n1 = m1; |
| memento_t n2 = m2; |
| |
| if (delete_prev_p) |
| { |
| m2 = m1, m1.known = 0; |
| n2 = n1, n1.known = 0; |
| } |
| |
| avr_dump (";; Applying new route...\n"); |
| optimize_data_t::apply_sequence (new_insns, n1, n2); |
| |
| avr_dump (";; Applying old route...\n"); |
| optimize_data_t::apply_sequence (old_insns, m1, m2); |
| avr_dump ("\n"); |
| |
| if (! m2.equals (n2, ignore_mask)) |
| { |
| // When we come here, then |
| // - We have a genuine bug, and/or |
| // - We did produce insns that are opaque to absint_t's explore(). |
| avr_dump ("INCOMPLETE APPLICATION:\n"); |
| m2.dump ("regs old route=%s\n\n"); |
| n2.dump ("regs new route=%s\n\n"); |
| avr_dump ("The new insns are:\n%L", insns); |
| |
| fatal_insn ("incomplete application of insn", insns); |
| } |
| |
| // Use N1 and N2 as the new GPR states. Even though they are equal |
| // modulo ignore_mask, N2 may know more about GPRs when it doesn't |
| // clobber the scratch reg. |
| m1 = n1; |
| m2 = n2; |
| |
| emit_insn_before (insns, curr.insn); |
| |
| new_curr_insn = new_insns.back (); |
| } |
| |
| if (delete_prev_p) |
| SET_INSN_DELETED (prev.insn); |
| |
| SET_INSN_DELETED (curr.insn); |
| |
| return new_curr_insn; |
| } |
| |
| |
| const pass_data avr_pass_data_fuse_move = |
| { |
| RTL_PASS, // type |
| "", // name (will be patched) |
| OPTGROUP_NONE, // optinfo_flags |
| TV_MACH_DEP, // tv_id |
| 0, // properties_required |
| 0, // properties_provided |
| 0, // properties_destroyed |
| 0, // todo_flags_start |
| TODO_df_finish | TODO_df_verify // todo_flags_finish |
| }; |
| |
| |
| class avr_pass_fuse_move : public rtl_opt_pass |
| { |
| public: |
| avr_pass_fuse_move (gcc::context *ctxt, const char *name) |
| : rtl_opt_pass (avr_pass_data_fuse_move, ctxt) |
| { |
| this->name = name; |
| } |
| |
| unsigned int execute (function *func) final override |
| { |
| if (optimize > 0 && avropt_fuse_move > 0) |
| { |
| df_note_add_problem (); |
| df_analyze (); |
| |
| bbinfo_t::optimize_one_function (func); |
| } |
| |
| return 0; |
| } |
| }; // avr_pass_fuse_move |
| |
| |
| // Append PLY to .plies[]. A SET or BLD ply may start a new sequence of |
| // SETs or BLDs and gets assigned the overhead of the sequence like for an |
| // initial SET or CLT instruction. A SET ply my be added in two flavours: |
| // One that starts a sequence of single_sets, and one that represents the |
| // payload of a set_some insn. MEMO is the GPR state prior to PLY. |
| void |
| plies_t::add (ply_t ply, const ply_t *prev, const memento_t &memo, |
| bool maybe_set_some) |
| { |
| if (ply.code == SET) |
| { |
| if (prev && prev->code == SET) |
| { |
| // Proceed with the SET sequence flavour. |
| ply.in_set_some = prev->in_set_some; |
| |
| if (ply.in_set_some) |
| ply.scratch = 0; |
| else if (! ply.scratch && ply.needs_scratch ()) |
| ply.cost += 2; |
| } |
| else |
| { |
| // The 1st SET in a sequence. May use set_some to set |
| // all bytes in one insn, or a bunch of single_sets. |
| |
| // Route1: Bunch of single_sets. |
| const int ply_cost = ply.cost; |
| if (! ply.scratch && ply.needs_scratch ()) |
| ply.cost += 2; |
| ply.in_set_some = false; |
| |
| add (ply); |
| |
| if (maybe_set_some) |
| { |
| // Route 2: One set_some: The 1st SET gets all the overhead. |
| ply.scratch = 0; |
| ply.cost = ply_cost + 1 + ! memo.known_dregno (); |
| ply.in_set_some = true; |
| } |
| } |
| } // SET |
| else if (ply.is_bld ()) |
| { |
| // The first BLD in a series of BLDs gets the extra costs |
| // for the SET / CLT that precedes the BLDs. |
| ply.cost += ! ply.is_same_bld (prev); |
| } |
| |
| add (ply); |
| } |
| |
| |
| // Emit insns for .plies[] and return the number of emitted insns. |
| // The emitted insns represent the effect of II with MEMO, which |
| // is the GPR knowledge before II is executed. |
| int |
| plies_t::emit_insns (const insninfo_t &ii, const memento_t &memo) const |
| { |
| int n_insns = 0; |
| |
| for (int i = 0; i < n_plies; ++i) |
| { |
| const ply_t &p = plies[i]; |
| |
| // SETs and BLDs are dumped by their emit_xxxs(). |
| if (p.code != SET && ! p.is_bld ()) |
| p.dump (); |
| |
| rtx src1 = NULL_RTX; |
| rtx src2 = NULL_RTX; |
| rtx dest = NULL_RTX; |
| rtx xscratch = NULL_RTX; |
| rtx_code code = p.code; |
| |
| switch (p.code) |
| { |
| default: |
| avr_dump ("\n\n;; Bad ply_t:\n"); |
| p.dump (i + 1); |
| gcc_unreachable (); |
| break; |
| |
| case REG: // *movhi = MOVW; movqi_insn = MOV |
| dest = gen_rtx_REG (p.size == 1 ? QImode : HImode, p.regno); |
| src1 = gen_rtx_REG (p.size == 1 ? QImode : HImode, p.arg); |
| break; |
| |
| case SET: // movqi_insn = LDI, CLR; set_some = (LDI + MOV) ** size. |
| i += emit_sets (ii, n_insns, memo, i) - 1; |
| continue; |
| |
| case MOD: // *ior<mode>3, *and<mode>3 = SET + BLD... / CLT + BLD... |
| i += emit_blds (ii, n_insns, i) - 1; |
| continue; |
| |
| case MINUS: // *subqi3 = SUB |
| case PLUS: // *addqi3 = ADD |
| case AND: // *andqi3 = AND |
| case IOR: // *iorqi3 = OR |
| case XOR: // *xorqi3 = EOR |
| dest = gen_rtx_REG (QImode, p.regno); |
| src2 = gen_rtx_REG (QImode, p.arg); |
| break; |
| |
| case PRE_INC: // *addqi3 = INC |
| case PRE_DEC: // *addqi3 = DEC |
| code = PLUS; |
| dest = gen_rtx_REG (QImode, p.regno); |
| src2 = p.code == PRE_INC ? const1_rtx : constm1_rtx; |
| break; |
| |
| case NEG: // *negqi2 = NEG |
| case NOT: // *one_cmplqi2 = COM |
| dest = gen_rtx_REG (QImode, p.regno); |
| src1 = dest; |
| break; |
| |
| case ROTATE: // *rotlqi3 = SWAP |
| case ASHIFT: // *ashlqi3 = LSL |
| case ASHIFTRT: // *ashrqi3 = ASR |
| case LSHIFTRT: // *lshrqi3 = LSR |
| dest = gen_rtx_REG (QImode, p.regno); |
| src2 = GEN_INT (code == ROTATE ? 4 : 1); |
| break; |
| |
| case SS_PLUS: // *addhi3 = ADIW, SBIW |
| code = PLUS; |
| dest = gen_rtx_REG (HImode, p.regno); |
| src2 = gen_int_mode (p.arg, HImode); |
| break; |
| } // switch p.code |
| |
| gcc_assert (dest && (! src1) + (! src2) == 1); |
| |
| rtx src = code == REG || code == SET |
| ? src1 |
| : (src2 |
| ? gen_rtx_fmt_ee (code, GET_MODE (dest), dest, src2) |
| : gen_rtx_fmt_e (code, GET_MODE (dest), src1)); |
| |
| emit_valid_move_clobbercc (dest, src, xscratch); |
| n_insns += 1; |
| } |
| |
| return n_insns; |
| } |
| |
| |
| // Helper for .emit_insns(). Emit an ior<mode>3 or and<mode>3 insn |
| // that's equivalent to a sequence of contiguous BLDs starting at |
| // .plies[ISTART]. Updates N_INSNS according to the number of insns |
| // emitted and returns the number of consumed plys in .plies[]. |
| int |
| plies_t::emit_blds (const insninfo_t &ii, int &n_insns, int istart) const |
| { |
| const ply_t &first = plies[istart]; |
| |
| gcc_assert (ii.m_size <= 4); |
| gcc_assert (first.is_bld ()); |
| |
| const rtx_code code = first.is_setbld () ? IOR : AND; |
| const machine_mode mode = size_to_mode (ii.m_size); |
| |
| // Determine mask and number of BLDs. |
| |
| uint32_t mask = 0; |
| int n_blds = 0; |
| |
| for (int i = istart; i < n_plies; ++i, ++n_blds) |
| { |
| const ply_t &p = plies[i]; |
| if (! p.is_bld () || ! p.is_same_bld (& first)) |
| break; |
| |
| // For AND, work on the 1-complement of the mask, |
| // i.e. 1's specify which bits to clear. |
| uint8_t mask8 = code == IOR ? p.arg : ~p.arg; |
| mask |= mask8 << (8 * (p.regno - ii.m_regno)); |
| } |
| |
| mask = GET_MODE_MASK (mode) & (code == IOR ? mask : ~mask); |
| |
| if (dump_file) |
| { |
| fprintf (dump_file, ";; emit_blds[%d...%d] R%d[%d]%s=%0*x\n", |
| istart, istart + n_blds - 1, ii.m_regno, ii.m_size, |
| code == IOR ? "|" : "&", 2 * ii.m_size, (int) mask); |
| } |
| |
| for (int i = 0; i < n_blds; ++i) |
| plies[i + istart].dump (); |
| |
| rtx dest = gen_rtx_REG (mode, ii.m_regno); |
| rtx src = gen_rtx_fmt_ee (code, mode, dest, gen_int_mode (mask, mode)); |
| rtx xscratch = mode == QImode ? NULL_RTX : gen_rtx_SCRATCH (QImode); |
| |
| emit_valid_move_clobbercc (dest, src, xscratch); |
| n_insns += 1; |
| |
| return n_blds; |
| } |
| |
| |
| // Emit insns for a contiguous sequence of SET ply_t's starting at |
| // .plies[ISTART]. Advances N_INSNS by the number of emitted insns. |
| // MEMO ist the state of the GPRs before II is executed, where II |
| // represents the insn under optimization. |
| // The emitted insns are "movqi_insn" or "*reload_inqi" |
| // when .plies[ISTART].in_set_some is not set, and one "set_some" insn |
| // when .plies[ISTART].in_set_some is set. |
| int |
| plies_t::emit_sets (const insninfo_t &ii, int &n_insns, const memento_t &memo, |
| int istart) const |
| { |
| gcc_assert (plies[istart].code == SET); |
| |
| const bool in_set_some = plies[istart].in_set_some; |
| |
| // Some d-regno that holds a compile-time constant, or 0. |
| const int known_dregno = memo.known_dregno (); |
| |
| // Determine number of contiguous SETs, |
| // and sort them in ps[] such that smaller regnos come first. |
| |
| const ply_t *ps[FUSE_MOVE_MAX_MODESIZE]; |
| int n_sets = 0; |
| |
| for (int i = istart; i < n_plies && plies[i].code == SET; ++i) |
| ps[n_sets++] = & plies[i]; |
| |
| if (dump_file) |
| { |
| fprintf (dump_file, ";; emit_sets[%d...%d] R%d[%d]=%0*" PRIx64, |
| istart, istart + n_sets - 1, ii.m_regno, ii.m_size, |
| 2 * ii.m_size, ii.m_isrc); |
| fprintf (dump_file, ", scratch=%s%d", "R" + ! ii.m_scratch, ii.m_scratch); |
| fprintf (dump_file, ", known_dreg=%s%d, set_some=%d\n", |
| "R" + ! known_dregno, known_dregno, in_set_some); |
| } |
| |
| for (int i = 0; i < n_sets; ++i) |
| ps[i]->dump (); |
| |
| // Sort. This is most useful on regs like (reg:SI REG_14). |
| for (int i = 0; i < n_sets - 1; ++i) |
| for (int j = i + 1; j < n_sets; ++j) |
| if (ps[i]->regno > ps[j]->regno) |
| std::swap (ps[i], ps[j]); |
| |
| // Prepare operands. |
| rtx dst[FUSE_MOVE_MAX_MODESIZE]; |
| rtx src[FUSE_MOVE_MAX_MODESIZE]; |
| for (int i = 0; i < n_sets; ++i) |
| { |
| dst[i] = gen_rtx_REG (QImode, ps[i]->regno); |
| src[i] = gen_int_mode (ps[i]->arg, QImode); |
| } |
| |
| if (in_set_some) |
| { |
| // Emit a "set_some" insn that sets all of the collected 8-bit SETs. |
| // This is a parallel with n_sets QImode SETs as payload. |
| |
| gcc_assert (! known_dregno || memo.knows (known_dregno)); |
| |
| // A scratch reg... |
| rtx op1 = known_dregno |
| ? gen_rtx_REG (QImode, known_dregno) |
| : const0_rtx; |
| // ...with a known content, so it can be restored without saving. |
| rtx op2 = known_dregno |
| ? gen_int_mode (memo.values[known_dregno], QImode) |
| : const0_rtx; |
| // Target register envelope. |
| rtx op3 = GEN_INT (ii.m_regno); |
| rtx op4 = GEN_INT (ii.m_size); |
| |
| // Payload. |
| for (int i = 0; i < n_sets; ++i) |
| dst[i] = gen_rtx_SET (dst[i], src[i]); |
| |
| rtvec vec = gen_rtvec (5 + n_sets, |
| gen_rtx_USE (VOIDmode, op1), |
| gen_rtx_USE (VOIDmode, op2), |
| gen_rtx_USE (VOIDmode, op3), |
| gen_rtx_USE (VOIDmode, op4), |
| gen_rtx_CLOBBER (VOIDmode, cc_reg_rtx), |
| dst[0], dst[1], dst[2], dst[3]); |
| rtx pattern = gen_rtx_PARALLEL (VOIDmode, vec); |
| |
| emit_valid_insn (pattern); |
| n_insns += 1; |
| } |
| else |
| { |
| // Emit a bunch of movqi_insn / *reload_inqi insns. |
| |
| for (int i = 0; i < n_sets; ++i) |
| if (ii.m_scratch |
| && AVRasm::constant_cost (SET, ps[i]->regno, ps[i]->arg) > 1) |
| { |
| rtx scratch = gen_rtx_REG (QImode, ii.m_scratch); |
| bool use_reload_inqi = true; |
| if (use_reload_inqi) |
| { |
| emit_valid_move_clobbercc (dst[i], src[i], scratch); |
| n_insns += 1; |
| } |
| else |
| { |
| emit_valid_move_clobbercc (scratch, src[i]); |
| emit_valid_move_clobbercc (dst[i], scratch); |
| n_insns += 2; |
| } |
| } |
| else |
| { |
| emit_valid_move_clobbercc (dst[i], src[i]); |
| n_insns += 1; |
| } |
| } |
| |
| return n_sets; |
| } |
| |
| |
| // Try to find an operation such that Y = op (X). |
| // Shifts and rotates are regarded as unary operaions with |
| // an implied 2nd operand or 1 or 4, respectively. |
| static rtx_code |
| find_arith (uint8_t y, uint8_t x) |
| { |
| #define RETIF(ex, code) y == (0xff & (ex)) ? code |
| return select<rtx_code>() |
| : RETIF (x + 1, PRE_INC) |
| : RETIF (x - 1, PRE_DEC) |
| : RETIF ((x << 4) | (x >> 4), ROTATE) |
| : RETIF (-x, NEG) |
| : RETIF (~x, NOT) |
| : RETIF (x >> 1, LSHIFTRT) |
| : RETIF (x << 1, ASHIFT) |
| : RETIF ((x >> 1) | (x & 0x80), ASHIFTRT) |
| : UNKNOWN; |
| #undef RETIF |
| } |
| |
| |
| // Try to find an operation such that Z = X op X. |
| static rtx_code |
| find_arith2 (uint8_t z, uint8_t x, uint8_t y) |
| { |
| #define RETIF(ex, code) z == (0xff & (ex)) ? code |
| return select<rtx_code>() |
| : RETIF (x + y, PLUS) |
| : RETIF (x - y, MINUS) |
| : RETIF (x & y, AND) |
| : RETIF (x | y, IOR) |
| : RETIF (x ^ y, XOR) |
| : UNKNOWN; |
| #undef RETIF |
| } |
| |
| |
| // Add plies to .plies[] that represent a MOVW, but only ones that reduce |
| // the Hamming distance from REGNO[SIZE] to VAL by exactly DHAMM. |
| void |
| plies_t::add_plies_movw (int regno, int size, uint64_t val, |
| int dhamm, const memento_t &memo) |
| { |
| if (! AVR_HAVE_MOVW || size < 2) |
| return; |
| |
| for (int i = 0; i < size - 1; i += 2) |
| { |
| // MOVW that sets less than 2 regs to the target value is |
| // not needed for the upper regs. |
| if (dhamm != 2 && regno + i >= REG_16) |
| continue; |
| |
| const uint16_t val16 = val >> (8 * i); |
| const uint8_t lo8 = val16; |
| const uint8_t hi8 = val16 >> 8; |
| |
| // When one of the target bytes is already as expected, then |
| // no MOVW is needed for an optimal sequence. |
| if (memo.have_value (regno + i, 1, lo8) |
| || memo.have_value (regno + i + 1, 1, hi8)) |
| continue; |
| |
| const int h_old = memo.hamming (regno + i, 2, val16); |
| |
| // Record MOVWs that reduce the Hamming distance by DHAMM as requested. |
| for (int j = FIRST_GPR; j < REG_32; j += 2) |
| if (j != regno + i |
| && memo.knows (j, 2)) |
| { |
| const int h_new = memo.hamming (j, 2, val16); |
| if (h_new == h_old - dhamm) |
| add (ply_t { regno + i, 2, REG, j, 1, dhamm }); |
| } |
| } |
| } |
| |
| |
| // Set PS to plys that reduce the Hamming distance from II.m_regno to |
| // compile-time constant II.m_isrc by 2, 1 or 0. PREV is NULL or points |
| // to a previous ply_t. MEMO is the GPR state after PREV and prior to the |
| // added plys. |
| void |
| bbinfo_t::get_plies (plies_t &ps, const insninfo_t &ii, const memento_t &memo, |
| const ply_t *prev) |
| { |
| ps.reset (); |
| |
| fpd->n_get_plies += 1; |
| |
| const bool maybe_set_some = (bbinfo_t::use_set_some_p && ii.needs_scratch ()); |
| |
| // Start with cheap plies, then continue to more expensive ones. |
| const int regno = ii.m_regno; |
| const int size = ii.m_size; |
| const uint64_t val = ii.m_isrc; |
| |
| // Find MOVW with a Hamming delta of 2. |
| ps.add_plies_movw (regno, size, val, 2, memo); |
| |
| // Find ADIW / SBIW |
| if (AVR_HAVE_ADIW && size >= 2) |
| for (int i = 0; i < size - 1; i += 2) |
| if (regno + i >= REG_24 |
| && memo.knows (regno + i, 2)) |
| { |
| const int16_t value16 = memo[regno + i] + 256 * memo[regno + i + 1]; |
| const int16_t lo16 = val >> (8 * i); |
| const int16_t delta = lo16 - value16; |
| const uint8_t lo8 = val >> (8 * i); |
| const uint8_t hi8 = val >> (8 * i + 8); |
| if (IN_RANGE (delta, -63, 63) |
| && lo8 != memo[regno + i] |
| && hi8 != memo[regno + i + 1]) |
| { |
| ps.add (ply_t { regno + i, 2, SS_PLUS, delta, 1, 2 }); |
| } |
| } |
| |
| // Find 1-reg plies. In an optimal sequence, each 1-reg ply will decrease |
| // the Hamming distance. Thus we only have to consider plies that set |
| // one of the target bytes to the target value VAL. Start with the |
| // high registers since that is the canonical order when two plies commute. |
| |
| for (int i = size - 1; i >= 0; --i) |
| { |
| const uint8_t val8 = val >> (8 * i); |
| |
| // Nothing to do for this byte when its value is already as desired. |
| if (memo.have_value (regno + i, 1, val8)) |
| continue; |
| |
| // LDI or CLR. |
| if (regno + i >= REG_16 || val8 == 0) |
| ps.add (ply_t { regno + i, 1, SET, val8, 1 }, prev, memo, |
| maybe_set_some); |
| |
| // We only may need to MOV non-zero values since there is CLR, |
| // and only when there is no LDI. |
| if (val8 != 0 |
| && regno + i < REG_16) |
| { |
| // MOV where the source register is one of the target regs. |
| for (int j = 0; j < size; ++j) |
| if (j != i) |
| if (memo.have_value (regno + j, 1, val8)) |
| ps.add (ply_t { regno + i, 1, REG, regno + j, 1 }); |
| |
| // MOV where the source register is not a target reg. |
| // FIXME: ticks. |
| for (int j = FIRST_GPR; j < REG_32; ++j) |
| if (! IN_RANGE (j, regno, regno + size - 1)) |
| if (memo.have_value (j, 1, val8)) |
| ps.add (ply_t { regno + i, 1, REG, j, 1 }); |
| |
| // LDI + MOV. |
| if (regno + i < REG_16 && val8 != 0) |
| { |
| ply_t p { regno + i, 1, SET, val8, 2 }; |
| p.scratch = ii.m_scratch; |
| ps.add (p, prev, memo, maybe_set_some); |
| } |
| } |
| } |
| |
| // Arithmetic like INC, DEC or ASHIFT. |
| for (int i = size - 1; i >= 0; --i) |
| if (bbinfo_t::use_arith_p |
| && regno + i < REG_16 |
| && memo.knows (regno + i)) |
| { |
| const uint8_t y = val >> (8 * i); |
| const uint8_t x = memo[regno + i]; |
| rtx_code code; |
| |
| if (y == 0 || y == x) |
| continue; |
| |
| // INC, DEC, SWAP, LSL, NEG, ... |
| if (UNKNOWN != (code = find_arith (y, x))) |
| { |
| ps.add (ply_t { regno + i, 1, code, x /* dummy */, 1 }); |
| continue; |
| } |
| |
| // ADD, AND, ... |
| for (int r = FIRST_GPR; r < REG_32; ++r) |
| if (r != regno + i |
| && memo.knows (r) |
| && memo[r] != 0 |
| && UNKNOWN != (code = find_arith2 (y, x, memo[r]))) |
| { |
| ps.add (ply_t { regno + i, 1, code, r, 1 }); |
| } |
| |
| if (size < 2 || size > 4) |
| continue; |
| |
| // SET + BLD |
| if ((x & y) == x && popcount_hwi (x ^ y) == 1) |
| ps.add (ply_t { regno + i, 1, MOD, x ^ y, 1 }, |
| prev, memo, maybe_set_some); |
| |
| // CLT + BLD |
| if ((x & y) == y && popcount_hwi (x ^ y) == 1) |
| ps.add (ply_t { regno + i, 1, MOD, x ^ y ^ 0xff, 1 }, |
| prev, memo, maybe_set_some); |
| } |
| |
| if (bbinfo_t::use_arith_p |
| // For 8-byte values, don't use ply_t's with only a partial reduction |
| // of the hamming distance. |
| && size <= 4) |
| { |
| // Find MOVW with a Hamming delta of 1, then 0. |
| ps.add_plies_movw (regno, size, val, 1, memo); |
| ps.add_plies_movw (regno, size, val, 0, memo); |
| } |
| |
| plies_t::max_n_plies = std::max (plies_t::max_n_plies, ps.n_plies); |
| } |
| |
| |
| // Try to combine two 8-bit insns PREV and CURR that (effectively) |
| // are REG = CONST_INT to one 16-bit such insn. Returns true on success. |
| bool |
| insninfo_t::combine (const insninfo_t &prev, const insninfo_t &curr) |
| { |
| if (prev.m_size == 1 && curr.m_size == 1 |
| && prev.m_regno == (1 ^ curr.m_regno) |
| && curr.m_code == CONST_INT |
| && prev.m_code == CONST_INT) |
| { |
| m_regno = curr.m_regno & ~1; |
| m_code = CONST_INT; |
| m_size = 2; |
| m_scratch = std::max (curr.m_scratch, prev.m_scratch); |
| m_isrc = m_regno == prev.m_regno |
| ? (uint8_t) prev.m_isrc + 256 * (uint8_t) curr.m_isrc |
| : (uint8_t) curr.m_isrc + 256 * (uint8_t) prev.m_isrc; |
| |
| return true; |
| } |
| |
| return false; |
| } |
| |
| |
| // Return the cost (in terms of words) of the respective mov<mode> insn. |
| // This can be used as an upper bound for the ply_t's cost. |
| int |
| insninfo_t::cost () const |
| { |
| if (m_code != CONST_INT) |
| return m_size; |
| |
| if (m_regno >= REG_16 || m_isrc == 0) |
| return m_size |
| // MOVW can save one instruction. |
| - (AVR_HAVE_MOVW && m_size == 4 && (uint32_t) m_isrc % 0x10001 == 0); |
| |
| // LDI + MOV to a lower reg. |
| if (m_scratch && m_size == 1) |
| return 2; |
| |
| if (m_size == 8) |
| { |
| int len = m_size; |
| for (int i = 0; i < m_size; ++i) |
| len += m_regno + i < REG_16 && (0xff & (m_isrc >> (8 * i))) != 0; |
| return len; |
| } |
| |
| // All other cases are complicated. Ask the output oracle. |
| const machine_mode mode = size_to_mode (m_size); |
| rtx xscratch = m_scratch ? all_regs_rtx[m_scratch] : NULL_RTX; |
| rtx xop[] = { gen_rtx_REG (mode, m_regno), gen_int_mode (m_isrc, mode) }; |
| int len; |
| if (m_size == 4) |
| output_reload_insisf (xop, xscratch, &len); |
| else |
| output_reload_in_const (xop, xscratch, &len, false); |
| |
| return len; |
| } |
| |
| // Emit the according REG = REG-or-CONST_INT insn. Returns 1 or aborts |
| // when the insn is not of that form. |
| int |
| insninfo_t::emit_insn () const |
| { |
| int n_insns = 0; |
| |
| machine_mode mode = size_to_mode (m_size); |
| rtx xsrc = NULL_RTX; |
| rtx xscratch = NULL_RTX; |
| |
| gcc_assert (m_size > 0); |
| |
| switch (m_code) |
| { |
| default: |
| gcc_unreachable (); |
| |
| case CONST_INT: |
| xsrc = gen_int_mode (m_isrc, mode); |
| if (m_scratch && m_regno < REG_16) |
| xscratch = gen_rtx_REG (QImode, m_scratch); |
| break; |
| |
| case REG: |
| gcc_assert (gpr_regno_p (m_rsrc, m_size)); |
| if (m_regno != m_rsrc) |
| xsrc = gen_rtx_REG (mode, m_rsrc); |
| break; |
| } |
| |
| if (xsrc) |
| { |
| rtx dest = gen_rtx_REG (mode, m_regno); |
| emit_valid_move_clobbercc (dest, xsrc, xscratch); |
| n_insns += 1; |
| } |
| |
| return n_insns; |
| } |
| |
| |
| // Entering a basic block means combining known register values from |
| // all incoming BBs. |
| void |
| bbinfo_t::enter () |
| { |
| avr_dump ("\n;; Entering [bb %d]\n", bb->index); |
| |
| gcc_assert (! done); |
| |
| edge e; |
| edge_iterator ei; |
| gprmask_t pred_known_mask = ~0u; |
| bbinfo_t *bbi = nullptr; |
| |
| // A quick iteration over all predecessors / incoming edges to reveal |
| // whether this BB is worth a closer look. |
| FOR_EACH_EDGE (e, ei, bb->preds) |
| { |
| basic_block pred = e->src; |
| bbi = & bb_info[pred->index]; |
| |
| pred_known_mask &= bbi->regs.known; |
| |
| if (dump_file) |
| { |
| avr_dump (";; [bb %d] <- [bb %d] ", e->dest->index, e->src->index); |
| if (bbi->done) |
| bbi->regs.dump (); |
| else |
| avr_dump (" (unknown)\n"); |
| } |
| } |
| |
| // Only if all predecessors have already been handled, we can |
| // have known values as we are entering the current BB. |
| if (pred_known_mask != 0 |
| && bbi != nullptr) |
| { |
| // Initialize current BB info from BI, an arbitrary predecessor. |
| |
| regs = bbi->regs; |
| |
| // Coalesce the output values from all predecessing BBs. At the |
| // start of the current BB, a value is only known if it is known |
| // in *all* predecessors and *all* these values are the same. |
| FOR_EACH_EDGE (e, ei, bb->preds) |
| { |
| regs.coalesce (bb_info[e->src->index].regs); |
| } |
| } |
| |
| if (dump_file) |
| { |
| avr_dump (";; [bb %d] known at start: ", bb->index); |
| if (regs.known) |
| regs.dump (); |
| else |
| avr_dump (" (none)\n"); |
| avr_dump ("\n"); |
| } |
| } |
| |
| |
| void |
| bbinfo_t::leave () |
| { |
| done = true; |
| |
| if (dump_file) |
| fprintf (dump_file, ";; Leaving [bb %d]\n\n", bb->index); |
| } |
| |
| |
| /* Initialize according to INSN which is a 1-byte single_set that's |
| (effectively) a reg = reg or reg = const move. INSN may be the result |
| of the current pass's optimization, e.g. something like INC R2 where R2 |
| has a known content. MEMO is the state prior to INSN. Only CONST |
| cases are recorded; plus cases that are non-trivial for example when |
| an XOR decays to a move. */ |
| |
| bool |
| insninfo_t::init1 (insn_optimize_data_t &iod, int max_size, |
| const char *purpose = "") |
| { |
| m_size = 0; |
| m_insn = iod.insn; |
| m_old_code = UNKNOWN; |
| iod.unused = false; |
| |
| if (! iod.insn |
| || ! (m_set = single_set_with_scratch (iod.insn, m_scratch))) |
| return false; |
| |
| rtx dest = SET_DEST (m_set); |
| machine_mode mode = GET_MODE (dest); |
| const int n_bytes = GET_MODE_SIZE (mode); |
| max_size = std::min (max_size, FUSE_MOVE_MAX_MODESIZE); |
| |
| if (! REG_P (dest) |
| || END_REGNO (dest) > REG_32 |
| || n_bytes > max_size) |
| return false; |
| |
| // Omit insns that (explicitly) touch fixed GPRs in any way. |
| using elt0_getter_HRS = elt0_getter<HARD_REG_SET, HARD_REG_ELT_TYPE>; |
| HARD_REG_SET hregs; |
| CLEAR_HARD_REG_SET (hregs); |
| find_all_hard_regs (PATTERN (iod.insn), & hregs); |
| if (memento_t::fixed_regs_mask & (gprmask_t) elt0_getter_HRS::get (hregs)) |
| { |
| avr_dump (";; %sinit1 has fixed GPRs\n", purpose); |
| return false; |
| } |
| |
| if ((iod.unused = find_reg_note (iod.insn, REG_UNUSED, dest))) |
| return false; |
| |
| m_src = SET_SRC (m_set); |
| m_regno = REGNO (dest); |
| const rtx_code src_code = GET_CODE (m_src); |
| |
| m_ai = absint_t::explore (m_src, iod.regs, mode); |
| |
| if (m_ai.popcount ()) |
| { |
| if (m_ai.end_knows (CONST_INT) >= n_bytes) |
| { |
| m_code = CONST_INT; |
| m_old_code = CONSTANT_P (m_src) ? UNKNOWN : src_code; |
| m_isrc = m_ai.get_value (n_bytes); |
| m_size = n_bytes; |
| } |
| else if (! REG_P (m_src) |
| && n_bytes == 1 |
| && m_ai.end_knows (REG) >= n_bytes) |
| { |
| m_code = REG; |
| m_old_code = src_code; |
| m_rsrc = m_ai[0].regno (); |
| m_size = n_bytes; |
| } |
| else if (n_bytes == 1) |
| { |
| absint_byte_t &aib = m_new_src; |
| aib = m_ai[0].find_alternative_binary (iod.regs); |
| |
| if (aib.arity () == 2 |
| && aib.arg (0).regno == m_regno) |
| { |
| m_old_code = src_code; |
| m_code = aib.get_code (); |
| m_size = n_bytes; |
| } |
| } |
| else if (n_bytes >= 2 |
| && m_ai.end_knows (VALUE) >= n_bytes) |
| { |
| m_code = src_code; |
| m_size = n_bytes; |
| } |
| |
| if (dump_file && m_size != 0) |
| { |
| avr_dump (";; %sinit1 (%C", purpose, |
| m_old_code ? m_old_code : m_code); |
| if (m_old_code) |
| avr_dump ("-> %C", m_code); |
| avr_dump (") insn %d to R%d[%d] := %C:%m = ", INSN_UID (iod.insn), |
| m_regno, n_bytes, src_code, mode); |
| |
| m_ai.dump (dest); |
| |
| if (dump_flags & TDF_FOLDING) |
| avr_dump ("\n"); |
| } |
| } |
| |
| return m_size != 0; |
| } |
| |
| |
| // The private worker for .apply_insn(). |
| void |
| memento_t::apply_insn1 (rtx_insn *insn, bool unused) |
| { |
| gcc_assert (NONDEBUG_INSN_P (insn)); |
| |
| if (INSN_CODE (insn) == CODE_FOR_set_some) |
| { |
| // This insn only sets some selected bytes of register $3 of |
| // modesize $4. If non-0, then $1 is a QImode scratch d-reg with |
| // a known value of $2. |
| |
| const auto &xop = recog_data.operand; |
| extract_insn (insn); |
| gcc_assert (recog_data.n_operands == 7); |
| gcc_assert (set_some_operation (xop[0], VOIDmode)); |
| |
| const rtx &xscratch = xop[1]; |
| const rtx &xscratch_value = xop[2]; |
| const int sets_start = 5; |
| |
| for (int i = sets_start; i < XVECLEN (xop[0], 0); ++i) |
| { |
| rtx xset = XVECEXP (xop[0], 0, i); |
| avr_dump (";; set_some %r = %r\n", XEXP (xset, 0), XEXP (xset, 1)); |
| set_values (XEXP (xset, 0), XEXP (xset, 1)); |
| } |
| |
| if (REG_P (xscratch)) |
| { |
| avr_dump (";; set_some %r = %r restore\n", xscratch, xscratch_value); |
| set_values (xscratch, xscratch_value); |
| } |
| |
| return; |
| } // CODE_FOR_set_some |
| |
| memento_t mold = *this; |
| |
| // When insn changes a register in whatever way, set it to "unknown". |
| |
| HARD_REG_SET rset; |
| find_all_hard_reg_sets (insn, &rset, true /* implicit */); |
| (*this) &= ~rset; |
| |
| rtx set = single_set (insn); |
| rtx dest; |
| |
| if (! set |
| || ! REG_P (dest = SET_DEST (set)) |
| || END_REGNO (dest) > REG_32 |
| || (regmask (dest) & memento_t::fixed_regs_mask)) |
| return; |
| |
| rtx src = SET_SRC (set); |
| const rtx_code src_code = GET_CODE (src); |
| const machine_mode mode = GET_MODE (dest); |
| const int n_bytes = GET_MODE_SIZE (mode); |
| |
| // Insns that are too complicated or have a poor yield. |
| // Just record which regs are clobberd / changed. |
| if (n_bytes > FUSE_MOVE_MAX_MODESIZE |
| || MEM_P (src) |
| || (REG_P (src) && END_REGNO (src) > REG_32)) |
| { |
| // Comparisons may clobber the compared reg when it is unused after. |
| if (src_code == COMPARE |
| && REG_P (XEXP (src, 0)) |
| && CONSTANT_P (XEXP (src, 1))) |
| { |
| rtx reg = XEXP (src, 0); |
| for (unsigned r = REGNO (reg); r < END_REGNO (reg); ++r) |
| set_unknown (r); |
| } |
| return; |
| } |
| |
| if (unused) |
| return; |
| |
| // Simulate the effect of some selected insns that are likely to produce |
| // or propagate known values. |
| |
| // Get an abstract representation of src. Bytes may be unknown, |
| // known to equal some 8-bit compile-time constant (CTC) value, |
| // or are known to equal some 8-bit register. |
| // TODO: Currently, only the ai[].val8 knowledge ist used. |
| // What's the best way to make use of ai[].regno ? |
| |
| absint_t ai = absint_t::explore (src, mold, mode); |
| |
| if (ai.popcount ()) |
| { |
| avr_dump (";; apply_insn %d R%d[%d] := %C:%m = ", INSN_UID (insn), |
| REGNO (dest), n_bytes, src_code, mode); |
| ai.dump (); |
| |
| for (int i = 0; i < n_bytes; ++i) |
| if (ai[i].can (CONST_INT)) |
| set_value (i + REGNO (dest), ai[i].val8 ()); |
| } |
| } |
| |
| |
| void |
| memento_t::apply (const ply_t &p) |
| { |
| if (p.is_movw ()) |
| { |
| copy_value (p.regno, p.arg); |
| copy_value (p.regno + 1, p.arg + 1); |
| } |
| else if (p.is_adiw ()) |
| { |
| int val = p.arg + values[p.regno] + 256 * values[1 + p.regno]; |
| set_value (p.regno, val); |
| set_value (p.regno + 1, val >> 8); |
| } |
| else if (p.size == 1) |
| { |
| switch (p.code) |
| { |
| default: |
| gcc_unreachable (); |
| break; |
| |
| case REG: |
| copy_value (p.regno, p.arg); |
| break; |
| |
| case SET: |
| set_value (p.regno, p.arg); |
| if (p.scratch >= REG_16) |
| set_unknown (p.scratch); |
| break; |
| |
| case MOD: // BLD |
| gcc_assert (knows (p.regno)); |
| if (popcount_hwi (p.arg) == 1) |
| values[p.regno] |= p.arg; |
| else if (popcount_hwi (p.arg) == 7) |
| values[p.regno] &= p.arg; |
| else |
| gcc_unreachable (); |
| break; |
| |
| #define DO_ARITH1(code, expr) \ |
| case code: \ |
| gcc_assert (knows (p.regno)); \ |
| { \ |
| const int x = values[p.regno]; \ |
| set_value (p.regno, expr); \ |
| } \ |
| break |
| |
| #define DO_ARITH2(code, expr) \ |
| case code: \ |
| gcc_assert (knows (p.regno)); \ |
| gcc_assert (knows (p.arg)); \ |
| { \ |
| const int x = values[p.regno]; \ |
| const int y = values[p.arg]; \ |
| set_value (p.regno, expr); \ |
| } \ |
| break |
| |
| DO_ARITH1 (NEG, -x); |
| DO_ARITH1 (NOT, ~x); |
| DO_ARITH1 (PRE_INC, x + 1); |
| DO_ARITH1 (PRE_DEC, x - 1); |
| DO_ARITH1 (ROTATE, (x << 4) | (x >> 4)); |
| DO_ARITH1 (ASHIFT, x << 1); |
| DO_ARITH1 (LSHIFTRT, x >> 1); |
| DO_ARITH1 (ASHIFTRT, (x >> 1) | (x & 0x80)); |
| |
| DO_ARITH2 (AND, x & y); |
| DO_ARITH2 (IOR, x | y); |
| DO_ARITH2 (XOR, x ^ y); |
| DO_ARITH2 (PLUS, x + y); |
| DO_ARITH2 (MINUS, x - y); |
| #undef DO_ARITH1 |
| #undef DO_ARITH2 |
| } |
| } // size == 1 |
| else |
| gcc_unreachable (); |
| } |
| |
| |
| // Try to find a sequence of ply_t's that represent a II.m_regno = II.m_isrc |
| // insn that sets a reg to a compile-time constant, and that is more |
| // efficient than just a move insn. (When try_split_any_p is on, then |
| // solutions that perform equal to a move insn are also allowed). |
| // MEMO0 is the GPR state before II runs. A solution has been found |
| // when .fpd->solution has at least one entry. LEN specifies the |
| // depth of recursion, which works on the LEN-th ply_t. |
| void |
| bbinfo_t::find_plies (int len, const insninfo_t &ii, const memento_t &memo0) |
| { |
| if (len > fpd->n_best_plys) |
| return; |
| |
| memento_t memo = memo0; |
| bool ply_applied_p = false; |
| |
| //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! |
| const bool extra = dump_file && (dump_flags & TDF_FOLDING); |
| |
| if (extra) |
| { |
| fprintf (dump_file, ";; #%d (HAM=%d): get_plies R%d[%d] = ", len, |
| ii.hamming (fpd->regs0), ii.m_regno, ii.m_size); |
| fprintf (dump_file, "0x%0*" PRIx64 "\n", |
| 2 * ii.m_size, ii.m_isrc & size_to_mask (ii.m_size)); |
| } |
| |
| plies_t &ps = fpd->plies[len - 1]; |
| |
| const ply_t *const prev = len >= 2 ? fpd->ply_stack[len - 2] : nullptr; |
| const ply_t *const prev2 = len >= 3 ? fpd->ply_stack[len - 3] : nullptr; |
| |
| bbinfo_t::get_plies (ps, ii, memo0, prev); |
| |
| #define NEXT(reason) \ |
| do { \ |
| if (extra) \ |
| fprintf (dump_file, ";; cont=%s\n", reason); \ |
| goto next; \ |
| } while (0) |
| |
| for (int ip = 0; ip < ps.n_plies; ++ip) |
| { |
| const ply_t &p = ps.plies[ip]; |
| |
| fpd->ply_stack[len - 1] = &p; |
| |
| if (0) |
| next: continue; |
| |
| if (extra) |
| ply_t::dump_plys (dump_file, len, 1, fpd->ply_stack + len - 1, memo0); |
| |
| // A MOVW with a Hamming distance of < 2 requires more plys. |
| if (p.is_movw () && len + (2 - p.dhamming) > fpd->n_best_plys) |
| NEXT ("movw.plys"); |
| |
| if (len >= 2) |
| { |
| // Destroying (parts of) the results of the previous ply |
| // won't yield an optimal sequence. |
| if (p.overrides (prev)) |
| NEXT ("overrides"); |
| |
| // When two plys are independent of each other, then only |
| // investigate sequences that operate on the higher reg first. |
| // This canonicalization reduces the number of candidates, |
| if (p.commutes_with (prev, ii.m_scratch) |
| && p.regno > prev->regno) |
| NEXT ("noncanonic"); |
| |
| // Two subsequent BLDs touching the same register. |
| if (p.is_bld () |
| && prev->is_bld () |
| && p.changes_result_of (prev)) |
| NEXT ("2bld"); |
| |
| // When there is a BLD, then at least 2 of the same kind |
| // shall occur in a row. |
| if (prev->is_bld () |
| && ! p.is_bld () |
| && (len == 2 |
| || (prev->is_setbld () && ! prev2->is_setbld ()) |
| || (prev->is_cltbld () && ! prev2->is_cltbld ()))) |
| NEXT ("1bld"); |
| } |
| |
| // The hamming delta of a MOVW may be less than 2, namely 0 or 1. |
| // When the latter is the case, then a reasonable sequence must |
| // modify the result of the MOVW. |
| if (len >= 2 |
| && prev->is_movw () |
| && prev->dhamming == 1 |
| && ! p.changes_result_of (prev)) |
| NEXT ("movw.dh=1"); |
| |
| if (len >= 3 |
| && prev2->is_movw () |
| && prev2->dhamming == 0 |
| && ! p.changes_result_of (prev2)) |
| NEXT ("movw.dh=0"); |
| |
| // When setting an n-byte destination, then at most n/2 MOVWs |
| // will occur in an optimal sequence. |
| int n_movw = 0; |
| for (int i = 0; i < len; ++i) |
| n_movw += fpd->ply_stack[i]->is_movw (); |
| if (n_movw > ii.m_size / 2) |
| NEXT ("movws"); |
| |
| if (ply_applied_p) |
| memo = memo0; |
| |
| memo.apply (p); |
| |
| ply_applied_p = true; |
| |
| // Calculate the cost of the sequence we have so far. Scale by some |
| // factor so that we can express that ADIW is more expensive than MOVW |
| // because it is slower, but without defeating MOVW. |
| const int SCALE = 4; |
| |
| int penal = 0; |
| int cost = SCALE * 0; |
| |
| bool movw_p = 0; |
| for (int i = 0; i < len; ++i) |
| { |
| bool adiw_p = fpd->ply_stack[i]->is_adiw (); |
| cost += SCALE * fpd->ply_stack[i]->cost + adiw_p; |
| penal += adiw_p; |
| movw_p |= fpd->ply_stack[i]->is_movw (); |
| } |
| penal += movw_p; |
| |
| const int hamm = ii.hamming (memo); |
| |
| // The current Hamming distance yields a lower bound of how many |
| // plys are still required. Consider that future cost already now. |
| int future_cost = AVR_HAVE_MOVW || (AVR_HAVE_ADIW && ii.m_regno >= REG_22) |
| ? (1 + hamm) / 2 |
| : hamm; |
| |
| // Similarly, when MOVW doesn't decrease the Hamming distance by 2, |
| // then we know that at least 2 - dhamming plys must follow in the |
| // future. (MOVW + ADIW will not occur.) |
| if (p.is_movw ()) |
| future_cost = std::max (future_cost, 2 - p.dhamming); |
| |
| if (extra && future_cost) |
| avr_dump (";; future cost = %d, dh=%d\n", future_cost, hamm); |
| |
| cost += SCALE * future_cost; |
| |
| bool profitable = (cost < SCALE * fpd->max_ply_cost |
| || (bbinfo_t::try_split_any_p |
| && fpd->solution.n_plies == 0 |
| && cost / SCALE <= fpd->max_ply_cost |
| && cost / SCALE == fpd->movmode_cost)); |
| if (! profitable) |
| { |
| if (extra) |
| avr_dump (";; cont=cost %d+%d/%d\n", cost / SCALE, penal, SCALE); |
| continue; |
| } |
| |
| if (hamm) |
| { |
| // Go down that rabbit hole. |
| gcc_assert (ply_applied_p); |
| bbinfo_t::find_plies (1 + len, ii, memo); |
| continue; |
| } |
| |
| // Found a solution that's better than everything so far. |
| |
| // Reduce the upper cost bound according to the found solution. |
| // No future solution will be more expensive. |
| fpd->max_ply_cost = cost / SCALE; |
| |
| fpd->solution = plies_t (len, fpd->ply_stack); |
| |
| if (dump_file) |
| { |
| avr_dump (";; #%d FOUND COST = %d%s\n", len, cost / SCALE, |
| penal ? " with penalty" : ""); |
| ply_t::dump_plys (dump_file, 0, len, fpd->ply_stack, fpd->regs0); |
| if (extra) |
| avr_dump (";; END\n"); |
| } |
| } // for ply_t's |
| |
| #undef NEXT |
| } |
| |
| |
| // Run .find_plies() and return true when .fpd->solution is a sequence of ply_t's |
| // that represents II, a REG = CONST insn. MEMO is the GPR state prior to II. |
| bool |
| bbinfo_t::run_find_plies (const insninfo_t &ii, const memento_t &memo) const |
| { |
| fpd->solution.reset (); |
| fpd->regs0 = memo; |
| fpd->n_get_plies = 0; |
| |
| const int hamm = ii.hamming (memo); |
| |
| if (hamm == 0) |
| { |
| avr_dump (";; Found redundant insn %d\n", |
| ii.m_insn ? INSN_UID (ii.m_insn) : 0); |
| return true; |
| } |
| |
| // Upper bound (in words) for any solution that's better than mov<mode>. |
| // Will be decreased by find plies as it finds better solutions. |
| fpd->movmode_cost = ii.cost (); |
| fpd->max_ply_cost = fpd->movmode_cost; |
| |
| // With a non-zero Hamming distance, this insn will require at least one |
| // instruction. When the upper bound for required instructions is that |
| // small, then the current insn is good enough. |
| if (fpd->max_ply_cost <= 1) |
| return false; |
| |
| fpd->n_best_plys = ii.n_best_plys (hamm); |
| gcc_assert (fpd->n_best_plys <= N_BEST_PLYS); |
| |
| if (dump_file) |
| { |
| const uint64_t mask = size_to_mask (ii.m_size); |
| fprintf (dump_file, ";; find_plies R%d[%d] = 0x%0*" PRIx64, |
| ii.m_regno, ii.m_size, 2 * ii.m_size, ii.m_isrc & mask); |
| if (ii.m_scratch) |
| fprintf (dump_file, ", scratch=r%d", ii.m_scratch); |
| memo.dump ("\n;; regs%s\n"); |
| } |
| |
| avr_dump (";; mov<mode> cost = %d\n", fpd->max_ply_cost); |
| avr_dump (";; max plys = %d\n", fpd->n_best_plys); |
| ply_t::n_ply_ts = 0; |
| |
| find_plies (1, ii, memo); |
| |
| avr_dump (";; get_plies called %d times\n", fpd->n_get_plies); |
| avr_dump (";; n_ply_ts = %d\n", ply_t::n_ply_ts); |
| ply_t::max_n_ply_ts = std::max (ply_t::max_n_ply_ts, ply_t::n_ply_ts); |
| |
| return fpd->solution.n_plies != 0; |
| } |
| |
| |
| // Try to propagate __zero_reg__ to a mem = reg insn's source. |
| // Returns true on success and sets .n_new_insns. |
| bool |
| optimize_data_t::try_mem0 (bbinfo_t *) |
| { |
| rtx_insn *insn = curr.ii.m_insn; |
| rtx set, mem, reg; |
| machine_mode mode; |
| |
| if (insn |
| && (set = single_set (insn)) |
| && MEM_P (mem = SET_DEST (set)) |
| && REG_P (reg = SET_SRC (set)) |
| && GET_MODE_SIZE (mode = GET_MODE (mem)) <= 4 |
| && END_REGNO (reg) <= REG_32 |
| && ! (regmask (reg) & memento_t::fixed_regs_mask) |
| && curr.regs.have_value (REGNO (reg), GET_MODE_SIZE (mode), 0x0)) |
| { |
| avr_dump (";; Found insn %d: mem:%m = 0 = r%d\n", INSN_UID (insn), |
| mode, REGNO (reg)); |
| |
| // Some insns like PUSHes don't clobber REG_CC. |
| bool clobbers_cc = GET_CODE (PATTERN (insn)) == PARALLEL; |
| |
| if (clobbers_cc) |
| emit_valid_move_clobbercc (mem, CONST0_RTX (mode)); |
| else |
| emit_valid_insn (gen_rtx_SET (mem, CONST0_RTX (mode))); |
| |
| n_new_insns = 1; |
| |
| return true; |
| } |
| |
| return false; |
| } |
| |
| |
| // Try to fuse two 1-byte insns .prev and .curr to one 2-byte insn (MOVW). |
| // Returns true on success, and sets .n_new_insns, .ignore_mask etc. |
| bool |
| optimize_data_t::try_fuse (bbinfo_t *bbi) |
| { |
| insninfo_t comb; |
| |
| if (! prev.ii.m_size |
| || ! curr.ii.m_size |
| || ! comb.combine (prev.ii, curr.ii)) |
| return false; |
| |
| avr_dump (";; Working on fuse of insn %d + insn %d = 0x%04x\n", |
| INSN_UID (prev.insn), INSN_UID (curr.insn), |
| (unsigned) comb.m_isrc); |
| |
| bool found = bbi->run_find_plies (comb, prev.regs); |
| if (found) |
| { |
| avr_dump (";; Found fuse of insns %d and %d\n", |
| INSN_UID (prev.insn), INSN_UID (curr.insn)); |
| |
| n_new_insns = bbinfo_t::fpd->solution.emit_insns (comb, prev.regs); |
| delete_prev_p = true; |
| |
| if (prev.ii.m_scratch) |
| ignore_mask |= regmask (prev.ii.m_scratch, 1); |
| if (curr.ii.m_scratch) |
| ignore_mask |= regmask (curr.ii.m_scratch, 1); |
| ignore_mask &= ~regmask (comb.m_regno, comb.m_size); |
| } |
| |
| return found; |
| } |
| |
| |
| // Try to replace an arithmetic 1-byte insn by a reg-reg move. |
| // Returns true on success, and sets .n_new_insns etc. |
| bool |
| optimize_data_t::try_simplify (bbinfo_t *) |
| { |
| if (curr.ii.m_size == 1 |
| && curr.ii.m_old_code != REG |
| && curr.ii.m_code == REG) |
| { |
| avr_dump (";; Found simplify of insn %d\n", INSN_UID (curr.insn)); |
| |
| n_new_insns = curr.ii.emit_insn (); |
| |
| return true; |
| } |
| |
| return false; |
| } |
| |
| |
| // Try to replace XEXP (*, 1) of a binary operation by a cheaper expression. |
| // Returns true on success; sets .n_new_insns, .ignore_mask, .delete_prev_p. |
| bool |
| optimize_data_t::try_bin_arg1 (bbinfo_t *) |
| { |
| if (curr.ii.m_size != 1 |
| || curr.ii.m_new_src.arity () != 2 |
| || curr.unused) |
| return false; |
| |
| avr_dump (";; Working on bin_arg1 insn %d\n", INSN_UID (curr.insn)); |
| |
| gcc_assert (curr.ii.m_src && BINARY_P (curr.ii.m_src)); |
| rtx xarg1_old = XEXP (curr.ii.m_src, 1); |
| |
| const absint_byte_t &aib = curr.ii.m_new_src; |
| const absint_val_t &arg0 = aib.arg (0); |
| const absint_val_t &arg1 = aib.arg (1); |
| const absint_val_t &arg1_old = curr.ii.m_ai[0].arg (1); |
| |
| rtx src = NULL_RTX; |
| |
| if (CONSTANT_P (xarg1_old)) |
| { |
| // Sometimes, we allow expensive constants as 2nd operand like |
| // in R2 += 2 which produces two INCs. When we have the |
| // constant handy in a reg, then use that instead of the constant. |
| const rtx_code code = aib.get_code (); |
| gcc_assert (arg1.val8 == (INTVAL (xarg1_old) & 0xff)); |
| |
| if (AVRasm::constant_cost (code, arg0.regno, arg1.val8) > 1) |
| src = aib.to_rtx (); |
| } |
| else if (REG_P (xarg1_old) |
| && dead_or_set_p (curr.insn, xarg1_old)) |
| { |
| src = aib.to_rtx (); |
| |
| // The 2nd operand is a reg with a known content that dies |
| // at the current insn. Chances are high that the register |
| // holds a reload value only used by the current insn. |
| if (prev.ii.m_size == 1 |
| && rtx_equal_p (xarg1_old, SET_DEST (prev.ii.m_set)) |
| && CONSTANT_P (prev.ii.m_src)) |
| { |
| avr_dump (";; Found dying reload insn %d\n", INSN_UID (prev.insn)); |
| |
| delete_prev_p = true; |
| ignore_mask = regmask (arg1_old.regno, 1); |
| } |
| } |
| |
| if (src) |
| { |
| rtx dest = SET_DEST (curr.ii.m_set); |
| |
| avr_dump (";; Found bin_arg1 for insn %d: ", INSN_UID (curr.insn)); |
| avr_dump ("%C:%m %r", curr.ii.m_code, GET_MODE (dest), xarg1_old); |
| aib.dump (" = %s\n"); |
| |
| emit_valid_move_clobbercc (dest, src); |
| n_new_insns = 1; |
| } |
| |
| return src != NULL_RTX; |
| } |
| |
| |
| // Try to replace a REG = CONST insn by a cheaper sequence. |
| // Returns true on success, and sets .n_new_insns, .ignore_mask etc. |
| bool |
| optimize_data_t::try_split_ldi (bbinfo_t *bbi) |
| { |
| if (! curr.ii.m_size |
| || curr.unused |
| || curr.ii.m_code != CONST_INT |
| || (! bbinfo_t::try_split_any_p |
| // Finding plys will only ever succeed when there are |
| // regs with a known value. |
| && ! (curr.regs.known |
| || (AVR_HAVE_MOVW |
| && curr.ii.m_regno < REG_16 && curr.ii.m_size == 4)))) |
| return false; |
| |
| avr_dump (";; Working on split_ldi insn %d\n", INSN_UID (curr.insn)); |
| |
| bool found = bbi->run_find_plies (curr.ii, curr.regs); |
| if (found) |
| { |
| avr_dump (";; Found split for ldi insn %d\n", INSN_UID (curr.insn)); |
| |
| n_new_insns = bbinfo_t::fpd->solution.emit_insns (curr.ii, curr.regs); |
| |
| if (curr.ii.m_scratch) |
| ignore_mask = regmask (curr.ii.m_scratch, 1); |
| } |
| |
| return found; |
| } |
| |
| |
| // Helper for try_split_any(). |
| bool |
| optimize_data_t::fail (const char *reason) |
| { |
| n_new_insns = -1; |
| |
| if (dump_file) |
| fprintf (dump_file, ";; Giving up split_any: %s\n", reason); |
| |
| return false; |
| } |
| |
| |
| // Helper for try_split_any(). |
| rtx_insn * |
| optimize_data_t::emit_and_apply_move (memento_t &memo, rtx dest, rtx src) |
| { |
| rtx_insn *insn = emit_valid_move_clobbercc (dest, src); |
| n_new_insns += 1; |
| memo.apply_insn (insn, false); |
| |
| return insn; |
| } |
| |
| |
| // Set X0 and X1 so that they are operands valid for a andqi3, iorqi3, xorqi3 |
| // or addqi3 insn with destination R_DEST. The method loads X1 to |
| // a scratch reg as needed and records the GPR effect in IOD.regs. |
| // EXTRA_COST are extra costs in units of words of insns that cost more |
| // than one instruction. This is a helper for try_split_any(). |
| bool |
| optimize_data_t |
| ::get_2ary_operands (rtx_code &code, const absint_byte_t &aib, |
| insn_optimize_data_t &iod, int r_dest, |
| absint_val_t &x0, absint_val_t &x1, int &extra_cost) |
| { |
| if (code != IOR && code != AND && code != XOR && code != PLUS) |
| return fail ("2ary: unknown code"); |
| |
| x0 = aib.arg (0); |
| x1 = aib.arg (1); |
| |
| if (! x0.knows_regno () |
| || x1.clueless ()) |
| return fail ("2ary: clueless"); |
| |
| int val8 = x1.val8; |
| int val8_cost = val8 < 0 ? 100 : AVRasm::constant_cost (code, r_dest, val8); |
| |
| if (x0.regno == r_dest |
| && (x1.knows_regno () |
| || val8_cost <= 1)) |
| { |
| if (code == XOR |
| && val8 == 0x80 |
| && x0.regno >= REG_16) |
| { |
| // xorxi3 can only "r,0,r". |
| // x0 ^ 0x80 <=> x0 - 0x80. |
| x1.regno = 0; |
| code = MINUS; |
| } |
| return true; |
| } |
| |
| const bool and_1_bit = code == AND && popcount_hwi (val8) == 1; |
| // andqi3 has a "r,r,Cb1" alternative where Cb1 has exactly 1 bit set. |
| // This can accommodate bytes of higher AND Cb<N> alternatives. |
| if (x0.regno != r_dest) |
| { |
| if (and_1_bit) |
| { |
| extra_cost += 1 + (r_dest < REG_16); |
| return true; |
| } |
| else if (x1.regno == r_dest) |
| { |
| std::swap (x0, x1); |
| return true; |
| } |
| return fail ("2ary is a 3-operand insn"); |
| } |
| |
| // Now we have: |
| // 1) r_dest = x0.regno, and |
| // 2) x1 is val8, and |
| // 3) x1 costs 2. |
| |
| const bool needs_scratch_p = select<bool>() |
| : code == XOR ? true |
| : code == AND ? popcount_hwi (val8) != 7 |
| : code == IOR ? popcount_hwi (val8) != 1 |
| : code == PLUS ? IN_RANGE (val8, 3, 0xff - 3) |
| : bad_case<bool> (); |
| |
| const int r_val8 = iod.regs.regno_with_value (val8, 0 /* excludes: none */); |
| if (r_val8) |
| { |
| // Found a reg that already holds the constant. |
| x1.val8 = -1; |
| x1.regno = r_val8; |
| return true; |
| } |
| else if (iod.ii.m_scratch) |
| { |
| // Using the insn's scratch reg. |
| rtx xdst = gen_rtx_REG (QImode, iod.ii.m_scratch); |
| rtx xsrc = gen_int_mode (x1.val8, QImode); |
| emit_and_apply_move (iod.regs, xdst, xsrc); |
| |
| x1.regno = iod.ii.m_scratch; |
| x1.val8 = -1; |
| |
| return true; |
| } |
| else if (! needs_scratch_p) |
| { |
| // Some constants (1 and -1) can be loaded without a scratch. |
| extra_cost += 1; |
| return true; |
| } |
| else if (and_1_bit) |
| { |
| // This can always fall back to BST + CLR + BLD, but may be cheaper. |
| extra_cost += 1 + (r_dest < REG_16); |
| return true; |
| } |
| |
| return fail ("2ary: expensive constant"); |
| } |
| |
| |
| static inline bool |
| any_shift_p (rtx_code code) |
| { |
| return code == LSHIFTRT || code == ASHIFTRT || code == ASHIFT; |
| } |
| |
| // Try to split .curr into a sequence of 1-byte insns. |
| // Returns true on success. Sets .n_new_insns and .ignore_mask. |
| bool |
| optimize_data_t::try_split_any (bbinfo_t *) |
| { |
| if (curr.ii.m_size < 2 |
| // Constants are split by split_ldi. |
| || CONSTANT_P (curr.ii.m_src) |
| // Splitting requires knowledge about what to do with each byte. |
| || curr.ii.m_ai.end_knows (VALUE) < curr.ii.m_size) |
| return false; |
| |
| avr_dump (";; Working on split_any %C:%m insn %d\n", curr.ii.m_code, |
| GET_MODE (SET_DEST (curr.ii.m_set)), INSN_UID (curr.insn)); |
| |
| const insninfo_t &ii = curr.ii; |
| const int n_bytes = ii.m_size; |
| int extra_cost = 0; |
| int binop_cost = -1; |
| |
| // For plain AND, IOR, XOR get the current cost in units of words. |
| if (BINARY_P (curr.ii.m_src)) |
| { |
| const rtx_code code = curr.ii.m_code; |
| if ((code == IOR || code == AND || code == XOR) |
| && REG_P (XEXP (curr.ii.m_src, 0)) |
| && CONSTANT_P (XEXP (curr.ii.m_src, 1))) |
| { |
| binop_cost = get_attr_length (curr.insn); |
| avr_dump (";; Competing against %C:%m cost = %d\n", code, |
| GET_MODE (curr.ii.m_src), binop_cost); |
| } |
| } |
| |
| // Step 1: Work out conflicts and which sign extends to perform. |
| |
| const gprmask_t regs_dest = regmask (ii.m_regno, n_bytes); |
| int r_sign = 0; |
| gprmask_t regs_signs = 0; |
| bool has_lsl = false; |
| bool has_lsr = false; |
| |
| for (int i = 0; i < n_bytes; ++i) |
| { |
| const absint_byte_t &aib = ii.m_ai[i]; |
| const int r_dest = ii.m_regno + i; |
| const gprmask_t regs_src = aib.reg_mask (); |
| |
| // When only regs to the right are used, or only regs to the left |
| // are used, then there's no conflict like it is arising for rotates. |
| // For now, only implement conflict-free splits. |
| has_lsl |= has_bits_in (regs_src & regs_dest, 0, r_dest - 1); |
| has_lsr |= has_bits_in (regs_src & regs_dest, r_dest + 1, 31); |
| if (has_lsl && has_lsr) |
| return fail ("has both << and >>"); |
| |
| if (aib.get_code () == SIGN_EXTEND) |
| { |
| const absint_val_t x0 = aib.arg (0); |
| if (! r_sign) |
| r_sign = x0.regno; |
| else if (r_sign != x0.regno) |
| return fail ("too many signs"); |
| |
| // Signs are handled below after all the other bytes. |
| regs_signs |= regmask (r_dest, 1); |
| } |
| } |
| |
| // Step 2: Work on the individual bytes and emit according insns. |
| |
| n_new_insns = 0; |
| memento_t memo = curr.regs; |
| |
| const int step = has_lsl ? -1 : 1; |
| const int istart = step == 1 ? 0 : n_bytes - 1; |
| const int iend = step == 1 ? n_bytes : -1; |
| |
| for (int i = istart; i != iend; i += step) |
| { |
| const absint_byte_t &aib = ii.m_ai[i]; |
| const int r_dest = ii.m_regno + i; |
| rtx_code code = aib.get_code (); |
| rtx xsrc = NULL_RTX; |
| rtx xdest = gen_rtx_REG (QImode, r_dest); |
| |
| if (code == SET) |
| { |
| const int r_src = aib.regno (false); |
| const int val8 = aib.val8 (false); |
| int r16; |
| |
| // A no-op... |
| if (r_dest == r_src) |
| continue; |
| // ...or an existing 16-bit constant... |
| else if (AVR_HAVE_MOVW |
| && i + step != iend |
| // Next is not a no-op. |
| && ii.m_ai[i + step].regno (false) != r_dest + step |
| // Eligible for MOVW. |
| && r_dest + step == (r_dest ^ 1) |
| && r_dest % 2 == i % 2 |
| && (r16 = ii.m_ai.reg16_with_value (i, i + step, memo))) |
| { |
| xdest = gen_rtx_REG (HImode, r_dest & ~1); |
| xsrc = gen_rtx_REG (HImode, r16); |
| i += step; |
| } |
| // ...or a reg-reg move from a multi-byte move... |
| else if (r_src |
| // Prefer a reg-reg move over a (potential) load |
| // of a constant, because the subsequent RTL |
| // peephole pass may combine it to a MOVW again. |
| && AVR_HAVE_MOVW |
| && REG_P (curr.ii.m_src)) |
| xsrc = gen_rtx_REG (QImode, r_src); |
| // ...or a cheap constant... |
| else if (val8 >= 0 |
| && AVRasm::constant_cost (SET, r_dest, val8) <= 1) |
| xsrc = gen_int_mode (val8, QImode); |
| // ...or a reg-reg move... |
| else if (r_src) |
| xsrc = gen_rtx_REG (QImode, r_src); |
| // ...or a costly constant that already exists in some reg... |
| else if (memo.regno_with_value (val8, 0 /* excludes: none */)) |
| xsrc = gen_rtx_REG (QImode, memo.regno_with_value (val8, 0)); |
| // ...or a costly constant loaded into curr.insn's scratch reg... |
| else if (ii.m_scratch) |
| { |
| rtx xscratch = gen_rtx_REG (QImode, ii.m_scratch); |
| rtx xval8 = gen_int_mode (val8, QImode); |
| emit_and_apply_move (memo, xscratch, xval8); |
| xsrc = xscratch; |
| } |
| // ...or a costly constant (1 or -1) that doesn't need a scratch. |
| else if (! AVRasm::ldi_needs_scratch (r_dest, val8)) |
| { |
| extra_cost += 1; |
| xsrc = gen_int_mode (val8, QImode); |
| } |
| else |
| return fail ("expensive val8"); |
| } // SET |
| else if (aib.arity () == 1) |
| { |
| if (aib.get_code () == SIGN_EXTEND) |
| // Signs are handled after all the others. |
| continue; |
| else |
| { |
| const absint_val_t x0 = aib.arg (0); |
| rtx xop0 = gen_rtx_REG (QImode, x0.regno); |
| xsrc = gen_rtx_fmt_e (code, QImode, xop0); |
| } |
| } // unary |
| else if (aib.arity () == 2) |
| { |
| absint_val_t x0; |
| absint_val_t x1; |
| insn_optimize_data_t iod (memo); |
| iod.ii = curr.ii; |
| |
| if (! get_2ary_operands (code, aib, iod, r_dest, x0, x1, extra_cost)) |
| return false; |
| rtx xop0 = gen_rtx_REG (QImode, x0.regno); |
| rtx xop1 = x1.knows_val8 () |
| ? gen_int_mode (x1.val8, QImode) |
| : gen_rtx_REG (QImode, x1.regno); |
| |
| xsrc = gen_rtx_fmt_ee (code, QImode, xop0, xop1); |
| } // binary |
| |
| if (! xsrc) |
| return fail ("no source found"); |
| |
| if (r_sign |
| && (regmask (xdest) & regmask (r_sign, 1))) |
| return fail ("clobbered r_sign"); |
| |
| emit_and_apply_move (memo, xdest, xsrc); |
| } |
| |
| // Step 3: Emit insns for sign extend. |
| // No more need to track memo beyond this point. |
| |
| if (! emit_signs (r_sign, regs_signs)) |
| return false; |
| |
| if (binop_cost >= 0) |
| { |
| avr_dump (";; Expected cost: %d + %d\n", n_new_insns, extra_cost); |
| if (n_new_insns + extra_cost > binop_cost) |
| return fail ("too expensive"); |
| } |
| |
| if (ii.m_scratch) |
| ignore_mask = regmask (ii.m_scratch, 1); |
| |
| return true; |
| } |
| |
| |
| // A helper for try_split_any() above. |
| // Emit sign extends from R_MSB.7 to all regs in REGS_SIGNS. |
| bool |
| optimize_data_t::emit_signs (const int r_msb, gprmask_t regs_signs) |
| { |
| if (! regs_signs) |
| return true; |
| else if (! r_msb) |
| return fail ("fatal: no r_msb given"); |
| |
| // Pick an arbitrary reg from the sign destinations when the source |
| // isn't one of the signs. |
| const int r_signs = regs_signs & regmask (r_msb, 1) |
| ? r_msb |
| : ctz_hwi (regs_signs); |
| |
| // Set all bits in r_signs according to the sign of r_msb using the |
| // r,r,C07 alternative of ashrqi3. |
| rtx xsrc = gen_rtx_fmt_ee (ASHIFTRT, QImode, |
| gen_rtx_REG (QImode, r_msb), GEN_INT (7)); |
| emit_valid_move_clobbercc (gen_rtx_REG (QImode, r_signs), xsrc); |
| regs_signs &= ~regmask (r_signs, 1); |
| |
| // Set up a 16-bit sign register if possible. |
| int r16_signs = 0; |
| if (regs_signs & regmask (r_signs ^ 1, 1)) |
| { |
| emit_move_mask (r_signs ^ 1, r_signs, 1, regs_signs); |
| r16_signs = r_signs & ~1; |
| } |
| |
| // Handle all 16-bit signs regs provided MOVW. |
| if (AVR_HAVE_MOVW) |
| for (int r = FIRST_GPR; r < REG_32; r += 2) |
| { |
| const gprmask_t m = regmask (r, 2); |
| if ((m & regs_signs) == m) |
| { |
| if (r16_signs) |
| emit_move_mask (r, r16_signs, 2, regs_signs); |
| else |
| { |
| emit_move_mask (r + 0, r_signs, 1, regs_signs); |
| emit_move_mask (r + 1, r_signs, 1, regs_signs); |
| r16_signs = r; |
| } |
| } |
| } |
| |
| // Handle all remaining signs. |
| while (regs_signs) |
| emit_move_mask (ctz_hwi (regs_signs), r_signs, 1, regs_signs); |
| |
| return true; |
| } |
| |
| // Helper for the method above. Move N_BYTES registers from R_SRC to R_DST, |
| // keeping track of which regs are still to be done in MASK. |
| void |
| optimize_data_t::emit_move_mask (int r_dst, int r_src, int n_bytes, |
| gprmask_t &mask) |
| { |
| const gprmask_t mask_dst = regmask (r_dst, n_bytes); |
| const gprmask_t mask_src = regmask (r_src, n_bytes); |
| gcc_assert ((mask_dst & mask) == mask_dst); |
| gcc_assert ((mask_src & mask) == 0); |
| rtx xdst = gen_rtx_REG (size_to_mode (n_bytes), r_dst); |
| rtx xsrc = gen_rtx_REG (size_to_mode (n_bytes), r_src); |
| emit_valid_move_clobbercc (xdst, xsrc); |
| n_new_insns += 1; |
| mask &= ~mask_dst; |
| } |
| |
| |
| void |
| bbinfo_t::optimize_one_block (bool &changed) |
| { |
| memento_t prev_regs; |
| |
| rtx_insn *insn = next_nondebug_insn_bb (bb, BB_HEAD (bb)); |
| |
| for (rtx_insn *next_insn; insn; insn = next_insn) |
| { |
| next_insn = next_nondebug_insn_bb (bb, insn); |
| |
| avr_dump ("\n;; Working on insn %d\n%r\n\n", INSN_UID (insn), insn); |
| |
| optimize_data_t od (prev_regs, regs); |
| |
| od.prev.insn = prev_nondebug_insn_bb (bb, insn); |
| od.curr.insn = insn; |
| |
| od.prev.ii.init1 (od.prev, 1, "IIprev "); |
| od.curr.ii.init1 (od.curr, 8, "IIcurr "); |
| |
| start_sequence (); |
| |
| bool found = ((bbinfo_t::try_fuse_p && od.try_fuse (this)) |
| || (bbinfo_t::try_bin_arg1_p && od.try_bin_arg1 (this)) |
| || (bbinfo_t::try_simplify_p && od.try_simplify (this)) |
| || (bbinfo_t::try_split_ldi_p && od.try_split_ldi (this)) |
| || (bbinfo_t::try_split_any_p && od.try_split_any (this)) |
| || (bbinfo_t::try_mem0_p && od.try_mem0 (this))); |
| |
| rtx_insn *new_insns = end_sequence (); |
| |
| gcc_assert (found == (od.n_new_insns >= 0)); |
| |
| ++tick; |
| |
| // This insn will become the previous one in the next loop iteration. |
| // Just used in dumps. |
| rtx_insn *new_curr_insn; |
| |
| if (! found) |
| { |
| // Nothing changed. |
| avr_dump (";; Keeping old route.\n"); |
| gcc_assert (! od.delete_prev_p); |
| |
| prev_regs = regs; |
| regs.apply_insn (insn, false); |
| |
| new_curr_insn = insn; |
| } |
| else |
| { |
| // We have new_insns. |
| changed = true; |
| |
| if (dump_file) |
| { |
| avr_dump ("\n;; EMIT %d new insn%s replacing ", |
| od.n_new_insns, "s" + (od.n_new_insns == 1)); |
| if (od.delete_prev_p) |
| avr_dump ("insn %d and ", INSN_UID (od.prev.insn)); |
| avr_dump ("insn %d, delete_prev=%d:\n%L\n", INSN_UID (insn), |
| od.delete_prev_p, new_insns); |
| } |
| |
| new_curr_insn = od.emit_sequence (bb, new_insns); |
| } // found |
| |
| if (dump_file && new_curr_insn) |
| { |
| avr_dump ("\n"); |
| |
| const int d = regs.distance_to (prev_regs); |
| if (d || new_curr_insn != insn) |
| avr_dump (";; %d regs changed state:\n", d); |
| |
| if (new_curr_insn != insn) |
| { |
| avr_dump (";; Befor insn %d", INSN_UID (new_curr_insn)); |
| prev_regs.dump (); |
| } |
| |
| avr_dump (";; After insn %d", INSN_UID (new_curr_insn)); |
| regs.dump (); |
| } |
| } // for BB insns |
| } |
| |
| |
| void |
| bbinfo_t::optimize_one_function (function *func) |
| { |
| bbinfo_t::fpd = XNEW (bbinfo_t::find_plies_data_t); |
| bbinfo_t::bb_info = XCNEWVEC (bbinfo_t, last_basic_block_for_fn (func)); |
| int *post_order = XNEWVEC (int, n_basic_blocks_for_fn (func)); |
| |
| plies_t::max_n_plies = 0; |
| |
| using elt0_getter_HRS = elt0_getter<HARD_REG_SET, HARD_REG_ELT_TYPE>; |
| memento_t::fixed_regs_mask = (gprmask_t) elt0_getter_HRS::get (fixed_reg_set); |
| |
| // Option -mfuse-move=<0,23> provides a 3:2:2:2 mixed radix value: |
| // -mfuse-move= 0 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 20 1 2 3 Digit |
| // fuse 1 1 1 1 1 1 1 1 1 1 1 1 0 |
| // bin_arg1 1 1 1 1 1 1 1 1 1 1 1 1 1 |
| // split_any 1 1 1 1 1 1 1 1 1 1 1 1 2 |
| // split_ldi 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 3 |
| // use arith 1 1 1 1 1 1 1 1 3 |
| |
| // Which optimization(s) to perform. |
| bbinfo_t::try_fuse_p = avropt_fuse_move & 0x1; // Digit 0 in [0, 1]. |
| bbinfo_t::try_mem0_p = avropt_fuse_move & 0x1; // Digit 0 in [0, 1]. |
| bbinfo_t::try_bin_arg1_p = avropt_fuse_move & 0x2; // Digit 1 in [0, 1]. |
| bbinfo_t::try_split_any_p = avropt_fuse_move & 0x4; // Digit 2 in [0, 1]. |
| bbinfo_t::try_split_ldi_p = avropt_fuse_move >> 3; // Digit 3 in [0, 2]. |
| bbinfo_t::use_arith_p = (avropt_fuse_move >> 3) >= 2; // Digit 3 in [0, 2]. |
| bbinfo_t::use_set_some_p = bbinfo_t::try_split_ldi_p; // Digit 3 in [0, 2]. |
| bbinfo_t::try_simplify_p = avropt_fuse_move != 0; |
| |
| // Topologically sort BBs from last to first. |
| |
| const int n_post_order = post_order_compute (post_order, false, false); |
| bool changed = false; |
| |
| // Traverse the BBs from first to last in order to increase the chance |
| // that register values from all incoming edges are known. |
| |
| for (int n = n_post_order - 1; n >= 0; --n) |
| { |
| basic_block bb = BASIC_BLOCK_FOR_FN (func, post_order[n]); |
| |
| bbinfo_t::bb_info[bb->index].bb = bb; |
| bbinfo_t::bb_info[bb->index].enter (); |
| bbinfo_t::bb_info[bb->index].optimize_one_block (changed); |
| bbinfo_t::bb_info[bb->index].leave (); |
| } |
| |
| if (plies_t::max_n_plies) |
| avr_dump (";; max_n_plies=%d\n", (int) plies_t::max_n_plies); |
| |
| if (changed) |
| { |
| df_note_add_problem (); |
| df_analyze (); |
| } |
| |
| XDELETEVEC (post_order); |
| XDELETEVEC (bbinfo_t::bb_info); |
| XDELETE (bbinfo_t::fpd); |
| } |
| |
| } // anonymous namespace |
| |
| |
| namespace |
| { |
| |
| |
| ////////////////////////////////////////////////////////////////////////////// |
| // Try to replace 2 cbranch insns with 1 comparison and 2 branches. |
| |
| static const pass_data avr_pass_data_ifelse = |
| { |
| RTL_PASS, // type |
| "", // name (will be patched) |
| OPTGROUP_NONE, // optinfo_flags |
| TV_DF_SCAN, // tv_id |
| 0, // properties_required |
| 0, // properties_provided |
| 0, // properties_destroyed |
| 0, // todo_flags_start |
| TODO_df_finish | TODO_df_verify // todo_flags_finish |
| }; |
| |
| class avr_pass_ifelse : public rtl_opt_pass |
| { |
| public: |
| avr_pass_ifelse (gcc::context *ctxt, const char *name) |
| : rtl_opt_pass (avr_pass_data_ifelse, ctxt) |
| { |
| this->name = name; |
| } |
| |
| bool gate (function *) final override |
| { |
| return optimize > 0; |
| } |
| |
| unsigned int execute (function *func) final override; |
| }; // avr_pass_ifelse |
| |
| |
| /* Return TRUE iff comparison code CODE is explicitly signed. */ |
| |
| static bool |
| avr_strict_signed_p (rtx_code code) |
| { |
| return code == GT || code == GE || code == LT || code == LE; |
| } |
| |
| |
| /* Return TRUE iff comparison code CODE is explicitly unsigned. */ |
| |
| static bool |
| avr_strict_unsigned_p (rtx_code code) |
| { |
| return code == GTU || code == GEU || code == LTU || code == LEU; |
| } |
| |
| #include "config/avr/ranges.h" |
| |
| /* Suppose the inputs represent a code like |
| |
| if (x <CMP1> XVAL1) goto way1; |
| if (x <CMP2> XVAL2) goto way2; |
| way3:; |
| |
| with two integer mode comparisons where XVAL1 and XVAL2 are CONST_INT. |
| When this can be rewritten in the form |
| |
| if (x <cond1> xval) goto way1; |
| if (x <cond2> xval) goto way2; |
| way3:; |
| |
| then set CMP1 = cond1, CMP2 = cond2, and return xval. Else return NULL_RTX. |
| When SWAPT is returned true, then way1 and way2 must be swapped. |
| When the incomping SWAPT is false, the outgoing one will be false, too. */ |
| |
| static rtx |
| avr_2comparisons_rhs (rtx_code &cmp1, rtx xval1, |
| rtx_code &cmp2, rtx xval2, |
| machine_mode mode, bool &swapt) |
| { |
| const bool may_swapt = swapt; |
| swapt = false; |
| |
| ////////////////////////////////////////////////////////////////// |
| // Step 0: Decide about signedness, map xval1/2 to the range |
| // of [un]signed machine mode. |
| |
| const bool signed1_p = avr_strict_signed_p (cmp1); |
| const bool signed2_p = avr_strict_signed_p (cmp2); |
| const bool unsigned1_p = avr_strict_unsigned_p (cmp1); |
| const bool unsigned2_p = avr_strict_unsigned_p (cmp2); |
| const bool signed_p = signed1_p || signed2_p; |
| bool unsigned_p = unsigned1_p || unsigned2_p; |
| |
| using T = Ranges::scalar_type; |
| T val1 = INTVAL (xval1); |
| T val2 = INTVAL (xval2); |
| |
| if (signed_p + unsigned_p > 1) |
| { |
| // Don't go down that rabbit hole. When the RHSs are the |
| // same, we can still save one comparison. |
| return val1 == val2 ? xval1 : NULL_RTX; |
| } |
| |
| // Decide about signedness. When no explicit signedness is present, |
| // then cases that are close to the unsigned boundary like EQ 0, EQ 1 |
| // can also be optimized. |
| if (unsigned_p |
| || (! signed_p && IN_RANGE (val1, -2, 2))) |
| { |
| unsigned_p = true; |
| val1 = UINTVAL (xval1) & GET_MODE_MASK (mode); |
| val2 = UINTVAL (xval2) & GET_MODE_MASK (mode); |
| } |
| |
| // No way we can decompose the domain in a usable manner when the |
| // RHSes are too far apart. |
| if (! IN_RANGE (val1 - val2, -2, 2)) |
| return NULL_RTX; |
| |
| ////////////////////////////////////////////////////////////////// |
| // Step 1: Represent the input conditions as truth Ranges. This |
| // establishes a decomposition / coloring of the domain. |
| |
| Ranges dom = Ranges::NBitsRanges (GET_MODE_BITSIZE (mode), unsigned_p, |
| Ranges::ALL); |
| Ranges r[4] = { dom, dom.truth (cmp1, val1), dom.truth (cmp2, val2), dom }; |
| |
| // r[1] shadows r[2] shadows r[3]. r[0] is just for nice indices. |
| r[3].minus (r[2]); |
| r[3].minus (r[1]); |
| r[2].minus (r[1]); |
| |
| ////////////////////////////////////////////////////////////////// |
| // Step 2: Filter for cases where the domain decomposes into three |
| // intervals: One to the left, one to the right, and one |
| // in the middle where the latter holds exactly one value. |
| |
| for (int i = 1; i <= 3; ++i) |
| { |
| // Keep track of which Ranges is which. |
| r[i].tag = i; |
| |
| gcc_assert (r[i].check ()); |
| |
| // Filter for proper intervals. Also return for the empty set, |
| // since cases where [m_min, m_max] decomposes into two intervals |
| // or less have been sorted out by the generic optimizers already, |
| // and hence should not be seen here. And more than two intervals |
| // at a time cannot be optimized of course. |
| if (r[i].size () != 1) |
| return NULL_RTX; |
| } |
| |
| // Bubble-sort the three intervals such that: |
| // [1] is the left interval, i.e. the one taken by LT[U]. |
| // [2] is the middle interval, i.e. the one taken by EQ. |
| // [3] is the right interval, i.e. the one taken by GT[U]. |
| Ranges::sort2 (r[1], r[3]); |
| Ranges::sort2 (r[2], r[3]); |
| Ranges::sort2 (r[1], r[2]); |
| |
| if (dump_file) |
| fprintf (dump_file, |
| ";; Decomposed: .%d=[%ld, %ld] .%d=[%ld, %ld] .%d=[%ld, %ld]\n", |
| r[1].tag, (long) r[1].ranges[0].lo, (long) r[1].ranges[0].hi, |
| r[2].tag, (long) r[2].ranges[0].lo, (long) r[2].ranges[0].hi, |
| r[3].tag, (long) r[3].ranges[0].lo, (long) r[3].ranges[0].hi); |
| |
| // EQ / NE can handle only one value. |
| if (r[2].cardinality (0) != 1) |
| return NULL_RTX; |
| |
| // Success! This is the sought for xval. |
| const T val = r[2].ranges[0].lo; |
| |
| ////////////////////////////////////////////////////////////////// |
| // Step 3: Work out which label gets which condition, trying to |
| // avoid the expensive codes GT[U] and LE[U] if possible. |
| // Avoiding expensive codes is always possible when labels |
| // way1 and way2 may be swapped. |
| |
| // The xx1 ways have an expensive GT for cmp1 which can be avoided |
| // by swapping way1 with way2. |
| swapt = may_swapt && r[3].tag == 1; |
| if (swapt) |
| std::swap (r[3], r[2].tag == 2 ? r[2] : r[1]); |
| |
| // 6 = 3! ways to assign LT, EQ, GT to the three labels. |
| const int way = 100 * r[1].tag + 10 * r[2].tag + r[3].tag; |
| |
| if (dump_file) |
| fprintf (dump_file, ";; Success: unsigned=%d, swapt=%d, way=%d, rhs=%ld\n", |
| unsigned_p, swapt, way, (long) val); |
| |
| #define WAY(w, c1, c2) \ |
| case w: \ |
| cmp1 = unsigned_p ? unsigned_condition (c1) : c1; \ |
| cmp2 = unsigned_p ? unsigned_condition (c2) : c2; \ |
| break; |
| |
| switch (way) |
| { |
| default: |
| gcc_unreachable (); |
| |
| // cmp1 gets the LT, avoid difficult branches for cmp2. |
| WAY (123, LT, EQ); |
| WAY (132, LT, NE); |
| |
| // cmp1 gets the EQ, avoid difficult branches for cmp2. |
| WAY (213, EQ, LT); |
| WAY (312, EQ, GE); |
| |
| // cmp1 gets the difficult GT, unavoidable as we may not swap way1/2. |
| WAY (231, GT, NE); |
| WAY (321, GT, EQ); |
| } |
| |
| #undef WAY |
| |
| return gen_int_mode (val, mode); |
| } |
| |
| |
| /* A helper for the next method. Suppose we have two conditional branches |
| with REG and CONST_INT operands |
| |
| if (reg <cond1> xval1) goto label1; |
| if (reg <cond2> xval2) goto label2; |
| |
| If the second comparison is redundant and there are codes <cmp1> |
| and <cmp2> such that the sequence can be performed as |
| |
| REG_CC = compare (reg, xval); |
| if (REG_CC <cmp1> 0) goto label1; |
| if (REG_CC <cmp2> 0) goto label2; |
| |
| then set COND1 to cmp1, COND2 to cmp2, SWAPT to true when the branch |
| targets have to be swapped, and return XVAL. Otherwise, return NULL_RTX. |
| This function may clobber COND1 and COND2 even when it returns NULL_RTX. |
| |
| REVERSE_COND1 can be set to reverse condition COND1. This is useful |
| when the second comparison does not follow the first one, but is |
| located after label1 like in: |
| |
| if (reg <cond1> xval1) goto label1; |
| ... |
| label1: |
| if (reg <cond2> xval2) goto label2; |
| |
| In such a case we cannot swap the labels, and we may end up with a |
| difficult branch -- though one comparison can still be optimized out. |
| Getting rid of such difficult branches would require to reorder blocks. */ |
| |
| static rtx |
| avr_redundant_compare (rtx xreg1, rtx_code &cond1, rtx xval1, |
| rtx xreg2, rtx_code &cond2, rtx xval2, |
| bool reverse_cond1, bool &swapt) |
| { |
| // Make sure we have two REG <cond> CONST_INT comparisons with the same reg. |
| if (! rtx_equal_p (xreg1, xreg2) |
| || ! CONST_INT_P (xval1) |
| || ! CONST_INT_P (xval2)) |
| return NULL_RTX; |
| |
| if (reverse_cond1) |
| cond1 = reverse_condition (cond1); |
| |
| // Allow swapping label1 <-> label2 only when ! reverse_cond1. |
| swapt = ! reverse_cond1; |
| rtx_code c1 = cond1; |
| rtx_code c2 = cond2; |
| rtx xval = avr_2comparisons_rhs (c1, xval1, |
| c2, xval2, GET_MODE (xreg1), swapt); |
| if (! xval) |
| return NULL_RTX; |
| |
| if (dump_file) |
| { |
| rtx_code a1 = reverse_cond1 ? reverse_condition (cond1) : cond1; |
| rtx_code b1 = reverse_cond1 ? reverse_condition (c1) : c1; |
| const char *s_rev1 = reverse_cond1 ? " reverse_cond1" : ""; |
| avr_dump (";; cond1: %C %r%s\n", a1, xval1, s_rev1); |
| avr_dump (";; cond2: %C %r\n", cond2, xval2); |
| avr_dump (";; => %C %d\n", b1, (int) INTVAL (xval)); |
| avr_dump (";; => %C %d\n", c2, (int) INTVAL (xval)); |
| } |
| |
| cond1 = c1; |
| cond2 = c2; |
| |
| return xval; |
| } |
| |
| |
| /* Similar to the function above, but assume that |
| |
| if (xreg1 <cond1> xval1) goto label1; |
| if (xreg2 <cond2> xval2) goto label2; |
| |
| are two subsequent REG-REG comparisons. When this can be represented as |
| |
| REG_CC = compare (reg, xval); |
| if (REG_CC <cmp1> 0) goto label1; |
| if (REG_CC <cmp2> 0) goto label2; |
| |
| then set XREG1 to reg, COND1 and COND2 accordingly, and return xval. |
| Otherwise, return NULL_RTX. This optmization can be performed |
| when { xreg1, xval1 } and { xreg2, xval2 } are equal as sets. |
| It can be done in such a way that no difficult branches occur. */ |
| |
| static rtx |
| avr_redundant_compare_regs (rtx &xreg1, rtx_code &cond1, rtx &xval1, |
| rtx &xreg2, rtx_code &cond2, rtx &xval2, |
| bool reverse_cond1) |
| { |
| bool swapped; |
| |
| if (! REG_P (xval1)) |
| return NULL_RTX; |
| else if (rtx_equal_p (xreg1, xreg2) |
| && rtx_equal_p (xval1, xval2)) |
| swapped = false; |
| else if (rtx_equal_p (xreg1, xval2) |
| && rtx_equal_p (xreg2, xval1)) |
| swapped = true; |
| else |
| return NULL_RTX; |
| |
| // Found a redundant REG-REG comparison. Assume that the incoming |
| // representation has been canonicalized by CANONICALIZE_COMPARISON. |
| // We can always represent this using only one comparison and in such |
| // a way that no difficult branches are required. |
| |
| if (dump_file) |
| { |
| const char *s_rev1 = reverse_cond1 ? " reverse_cond1" : ""; |
| avr_dump (";; %r %C %r%s\n", xreg1, cond1, xval1, s_rev1); |
| avr_dump (";; %r %C %r\n", xreg2, cond2, xval2); |
| } |
| |
| if (reverse_cond1) |
| cond1 = reverse_condition (cond1); |
| |
| if (swapped) |
| { |
| if (cond1 == EQ || cond1 == NE) |
| { |
| avr_dump (";; case #21\n"); |
| std::swap (xreg1, xval1); |
| } |
| else |
| { |
| std::swap (xreg2, xval2); |
| cond2 = swap_condition (cond2); |
| |
| // The swap may have introduced a difficult comparison. |
| // In order to get of it, only a few cases need extra care. |
| if ((cond1 == LT && cond2 == GT) |
| || (cond1 == LTU && cond2 == GTU)) |
| { |
| avr_dump (";; case #22\n"); |
| cond2 = NE; |
| } |
| else |
| avr_dump (";; case #23\n"); |
| } |
| } |
| else |
| avr_dump (";; case #20\n"); |
| |
| return xval1; |
| } |
| |
| |
| /* INSN1 and INSN2 are two cbranch insns for the same integer mode. |
| When FOLLOW_LABEL1 is false, then INSN2 is located in the fallthrough |
| path of INSN1. When FOLLOW_LABEL1 is true, then INSN2 is located at |
| the true edge of INSN1, INSN2 is preceded by a barrier, and no other |
| edge leads to the basic block of INSN2. |
| |
| Try to replace INSN1 and INSN2 by a compare insn and two branch insns. |
| When such a replacement has been performed, then return the insn where the |
| caller should continue scanning the insn stream. Else, return nullptr. */ |
| |
| static rtx_insn * |
| avr_optimize_2ifelse (rtx_jump_insn *insn1, |
| rtx_jump_insn *insn2, bool follow_label1) |
| { |
| avr_dump (";; Investigating jump_insn %d and jump_insn %d.\n", |
| INSN_UID (insn1), INSN_UID (insn2)); |
| |
| // Extract the operands of the insns: |
| // $0 = comparison operator ($1, $2) |
| // $1 = reg |
| // $2 = reg or const_int |
| // $3 = code_label |
| // $4 = optional SCRATCH for HI, PSI, SI cases. |
| |
| const auto &op = recog_data.operand; |
| |
| extract_insn (insn1); |
| rtx xop1[5] = { op[0], op[1], op[2], op[3], op[4] }; |
| int n_operands = recog_data.n_operands; |
| |
| extract_insn (insn2); |
| rtx xop2[5] = { op[0], op[1], op[2], op[3], op[4] }; |
| |
| rtx_code code1 = GET_CODE (xop1[0]); |
| rtx_code code2 = GET_CODE (xop2[0]); |
| bool swap_targets = false; |
| |
| // Search redundant REG-REG comparison. |
| rtx xval = avr_redundant_compare_regs (xop1[1], code1, xop1[2], |
| xop2[1], code2, xop2[2], |
| follow_label1); |
| |
| // Search redundant REG-CONST_INT comparison. |
| if (! xval) |
| xval = avr_redundant_compare (xop1[1], code1, xop1[2], |
| xop2[1], code2, xop2[2], |
| follow_label1, swap_targets); |
| if (! xval) |
| { |
| avr_dump (";; Nothing found for jump_insn %d and jump_insn %d.\n", |
| INSN_UID (insn1), INSN_UID (insn2)); |
| return nullptr; |
| } |
| |
| if (follow_label1) |
| code1 = reverse_condition (code1); |
| |
| ////////////////////////////////////////////////////// |
| // Found a replacement. |
| |
| if (dump_file) |
| { |
| avr_dump (";; => %C %r\n", code1, xval); |
| avr_dump (";; => %C %r\n", code2, xval); |
| |
| fprintf (dump_file, "\n;; Found chain of jump_insn %d and" |
| " jump_insn %d, follow_label1=%d:\n", |
| INSN_UID (insn1), INSN_UID (insn2), follow_label1); |
| print_rtl_single (dump_file, PATTERN (insn1)); |
| print_rtl_single (dump_file, PATTERN (insn2)); |
| } |
| |
| rtx_insn *next_insn |
| = next_nonnote_nondebug_insn (follow_label1 ? insn1 : insn2); |
| |
| // Pop the new branch conditions and the new comparison. |
| // Prematurely split into compare + branch so that we can drop |
| // the 2nd comparison. The following pass, split2, splits all |
| // insns for REG_CC, and it should still work as usual even when |
| // there are already some REG_CC insns around. |
| |
| rtx xcond1 = gen_rtx_fmt_ee (code1, VOIDmode, cc_reg_rtx, const0_rtx); |
| rtx xcond2 = gen_rtx_fmt_ee (code2, VOIDmode, cc_reg_rtx, const0_rtx); |
| rtx xpat1 = gen_branch (xop1[3], xcond1); |
| rtx xpat2 = gen_branch (xop2[3], xcond2); |
| rtx xcompare = NULL_RTX; |
| machine_mode mode = GET_MODE (xop1[1]); |
| |
| if (mode == QImode) |
| { |
| gcc_assert (n_operands == 4); |
| xcompare = gen_cmpqi3 (xop1[1], xval); |
| } |
| else |
| { |
| gcc_assert (n_operands == 5); |
| rtx scratch = GET_CODE (xop1[4]) == SCRATCH ? xop2[4] : xop1[4]; |
| rtx (*gen_cmp)(rtx,rtx,rtx) |
| = mode == HImode ? gen_gen_comparehi |
| : mode == PSImode ? gen_gen_comparepsi |
| : gen_gen_comparesi; // SImode |
| xcompare = gen_cmp (xop1[1], xval, scratch); |
| } |
| |
| // Emit that stuff. |
| |
| rtx_insn *cmp = emit_insn_before (xcompare, insn1); |
| rtx_jump_insn *branch1 = emit_jump_insn_after (xpat1, insn1); |
| rtx_jump_insn *branch2 = emit_jump_insn_after (xpat2, insn2); |
| |
| JUMP_LABEL (branch1) = xop1[3]; |
| JUMP_LABEL (branch2) = xop2[3]; |
| // delete_insn() decrements LABEL_NUSES when deleting a JUMP_INSN, |
| // but when we pop a new JUMP_INSN, do it by hand. |
| ++LABEL_NUSES (xop1[3]); |
| ++LABEL_NUSES (xop2[3]); |
| |
| delete_insn (insn1); |
| delete_insn (insn2); |
| |
| if (swap_targets) |
| { |
| gcc_assert (! follow_label1); |
| |
| basic_block to1 = BLOCK_FOR_INSN (xop1[3]); |
| basic_block to2 = BLOCK_FOR_INSN (xop2[3]); |
| edge e1 = find_edge (BLOCK_FOR_INSN (branch1), to1); |
| edge e2 = find_edge (BLOCK_FOR_INSN (branch2), to2); |
| gcc_assert (e1); |
| gcc_assert (e2); |
| redirect_edge_and_branch (e1, to2); |
| redirect_edge_and_branch (e2, to1); |
| } |
| |
| // As a side effect, also recog the new insns. |
| gcc_assert (valid_insn_p (cmp)); |
| gcc_assert (valid_insn_p (branch1)); |
| gcc_assert (valid_insn_p (branch2)); |
| |
| return next_insn; |
| } |
| |
| |
| /* Sequences like |
| |
| SREG = compare (reg, 1 + val); |
| if (SREG >= 0) goto label1; |
| SREG = compare (reg, val); |
| if (SREG == 0) goto label2; |
| |
| can be optimized to |
| |
| SREG = compare (reg, val); |
| if (SREG == 0) goto label2; |
| if (SREG >= 0) goto label1; |
| |
| Almost all cases where one of the comparisons is redundant can |
| be transformed in such a way that only one comparison is required |
| and no difficult branches are needed. */ |
| |
| unsigned int |
| avr_pass_ifelse::execute (function *) |
| { |
| rtx_insn *next_insn; |
| |
| for (rtx_insn *insn = get_insns (); insn; insn = next_insn) |
| { |
| next_insn = next_nonnote_nondebug_insn (insn); |
| |
| if (! next_insn) |
| break; |
| |
| // Search for two cbranch insns. The first one is a cbranch. |
| // Filter for "cbranch<mode>4_insn" with mode in QI, HI, PSI, SI. |
| |
| if (! JUMP_P (insn)) |
| continue; |
| |
| int icode1 = recog_memoized (insn); |
| |
| if (icode1 != CODE_FOR_cbranchqi4_insn |
| && icode1 != CODE_FOR_cbranchhi4_insn |
| && icode1 != CODE_FOR_cbranchpsi4_insn |
| && icode1 != CODE_FOR_cbranchsi4_insn) |
| continue; |
| |
| rtx_jump_insn *insn1 = as_a<rtx_jump_insn *> (insn); |
| |
| // jmp[0]: We can optimize cbranches that follow cbranch insn1. |
| rtx_insn *jmp[2] = { next_insn, nullptr }; |
| |
| // jmp[1]: A cbranch following the label of cbranch insn1. |
| if (LABEL_NUSES (JUMP_LABEL (insn1)) == 1) |
| { |
| rtx_insn *code_label1 = JUMP_LABEL_AS_INSN (insn1); |
| rtx_insn *barrier = prev_nonnote_nondebug_insn (code_label1); |
| |
| // When the target label of insn1 is used exactly once and is |
| // not a fallthrough, i.e. is preceded by a barrier, then |
| // consider the insn following that label. |
| if (barrier && BARRIER_P (barrier)) |
| jmp[1] = next_nonnote_nondebug_insn (code_label1); |
| } |
| |
| // With almost certainty, only one of the two possible jumps can |
| // be optimized with insn1, but it's hard to tell which one a priori. |
| // Just try both. In the unlikely case where both could be optimized, |
| // prefer jmp[0] because eliminating difficult branches is impeded |
| // by following label1. |
| |
| for (int j = 0; j < 2; ++j) |
| if (jmp[j] && JUMP_P (jmp[j]) |
| && recog_memoized (jmp[j]) == icode1) |
| { |
| rtx_insn *next |
| = avr_optimize_2ifelse (insn1, as_a<rtx_jump_insn *> (jmp[j]), |
| j == 1 /* follow_label1 */); |
| if (next) |
| { |
| next_insn = next; |
| break; |
| } |
| } |
| |
| } // loop insns |
| |
| return 0; |
| } |
| |
| |
| |
| ////////////////////////////////////////////////////////////////////////////// |
| // Optimize results of the casesi expander for modes < SImode. |
| |
| static const pass_data avr_pass_data_casesi = |
| { |
| RTL_PASS, // type |
| "", // name (will be patched) |
| OPTGROUP_NONE, // optinfo_flags |
| TV_DF_SCAN, // tv_id |
| 0, // properties_required |
| 0, // properties_provided |
| 0, // properties_destroyed |
| 0, // todo_flags_start |
| 0 // todo_flags_finish |
| }; |
| |
| class avr_pass_casesi : public rtl_opt_pass |
| { |
| public: |
| avr_pass_casesi (gcc::context *ctxt, const char *name) |
| : rtl_opt_pass (avr_pass_data_casesi, ctxt) |
| { |
| this->name = name; |
| } |
| |
| bool gate (function *) final override |
| { |
| return optimize > 0; |
| } |
| |
| unsigned int execute (function *) final override; |
| }; // avr_pass_casesi |
| |
| |
| /* Make one parallel insn with all the patterns from insns i[0]..i[5]. */ |
| |
| static rtx_insn * |
| avr_parallel_insn_from_insns (rtx_insn *i[5]) |
| { |
| rtvec vec = gen_rtvec (5, PATTERN (i[0]), PATTERN (i[1]), PATTERN (i[2]), |
| PATTERN (i[3]), PATTERN (i[4])); |
| start_sequence (); |
| emit (gen_rtx_PARALLEL (VOIDmode, vec)); |
| return end_sequence (); |
| } |
| |
| |
| /* Return true if we see an insn stream generated by casesi expander together |
| with an extension to SImode of the switch value. |
| |
| If this is the case, fill in the insns from casesi to INSNS[1..5] and |
| the SImode extension to INSNS[0]. Moreover, extract the operands of |
| pattern casesi_<mode>_sequence forged from the sequence to recog_data. */ |
| |
| static bool |
| avr_is_casesi_sequence (basic_block bb, rtx_insn *insn, rtx_insn *insns[5]) |
| { |
| rtx set_4, set_0; |
| |
| /* A first and quick test for a casesi sequences. As a side effect of |
| the test, harvest respective insns to INSNS[0..4]. */ |
| |
| if (!(JUMP_P (insns[4] = insn) |
| // casesi is the only insn that comes up with UNSPEC_INDEX_JMP, |
| // hence the following test ensures that we are actually dealing |
| // with code from casesi. |
| && (set_4 = single_set (insns[4])) |
| && UNSPEC == GET_CODE (SET_SRC (set_4)) |
| && UNSPEC_INDEX_JMP == XINT (SET_SRC (set_4), 1) |
| |
| && (insns[3] = prev_real_insn (insns[4])) |
| && (insns[2] = prev_real_insn (insns[3])) |
| && (insns[1] = prev_real_insn (insns[2])) |
| |
| // Insn prior to casesi. |
| && (insns[0] = prev_real_insn (insns[1])) |
| && (set_0 = single_set (insns[0])) |
| && extend_operator (SET_SRC (set_0), SImode))) |
| { |
| return false; |
| } |
| |
| if (dump_file) |
| { |
| fprintf (dump_file, ";; Sequence from casesi in " |
| "[bb %d]:\n\n", bb->index); |
| for (int i = 0; i < 5; i++) |
| print_rtl_single (dump_file, insns[i]); |
| } |
| |
| /* We have to deal with quite some operands. Extracting them by hand |
| would be tedious, therefore wrap the insn patterns into a parallel, |
| run recog against it and then use insn extract to get the operands. */ |
| |
| rtx_insn *xinsn = avr_parallel_insn_from_insns (insns); |
| |
| INSN_CODE (xinsn) = recog (PATTERN (xinsn), xinsn, NULL /* num_clobbers */); |
| |
| /* Failing to recognize means that someone changed the casesi expander or |
| that some passes prior to this one performed some unexpected changes. |
| Gracefully drop such situations instead of aborting. */ |
| |
| if (INSN_CODE (xinsn) < 0) |
| { |
| if (dump_file) |
| fprintf (dump_file, ";; Sequence not recognized, giving up.\n\n"); |
| |
| return false; |
| } |
| |
| gcc_assert (CODE_FOR_casesi_qi_sequence == INSN_CODE (xinsn) |
| || CODE_FOR_casesi_hi_sequence == INSN_CODE (xinsn)); |
| |
| extract_insn (xinsn); |
| |
| // Assert on the anatomy of xinsn's operands we are going to work with. |
| |
| gcc_assert (recog_data.n_operands == 12); |
| gcc_assert (recog_data.n_dups == 3); |
| |
| if (dump_file) |
| { |
| fprintf (dump_file, ";; Operands extracted:\n"); |
| for (int i = 0; i < recog_data.n_operands; i++) |
| avr_fdump (dump_file, ";; $%d = %r\n", i, recog_data.operand[i]); |
| fprintf (dump_file, "\n"); |
| } |
| |
| return true; |
| } |
| |
| |
| /* INSNS[1..4] is a sequence as generated by casesi and INSNS[0] is an |
| extension of an 8-bit or 16-bit integer to SImode. XOP contains the |
| operands of INSNS as extracted by insn_extract from pattern |
| casesi_<mode>_sequence: |
| |
| $0: SImode reg switch value as result of $10. |
| $1: Negative of smallest index in switch. |
| $2: Number of entries in switch. |
| $3: Label to table. |
| $4: Label if out-of-bounds. |
| $5: $0 + $1. |
| $6: 3-byte PC: subreg:HI ($5) + label_ref ($3) |
| 2-byte PC: subreg:HI ($5) |
| $7: HI reg index into table (Z or pseudo) |
| $8: Z or scratch:HI (to be clobbered) |
| $9: R24 or const0_rtx (to be clobbered) |
| $10: Extension to SImode of an 8-bit or 16-bit integer register $11. |
| $11: QImode or HImode register input of $10. |
| |
| Try to optimize this sequence, i.e. use the original HImode / QImode |
| switch value instead of SImode. */ |
| |
| static void |
| avr_optimize_casesi (rtx_insn *insns[5], rtx *xop) |
| { |
| // Original mode of the switch value; this is QImode or HImode. |
| machine_mode mode = GET_MODE (xop[11]); |
| |
| // How the original switch value was extended to SImode; this is |
| // SIGN_EXTEND or ZERO_EXTEND. |
| rtx_code code = GET_CODE (xop[10]); |
| |
| // Lower index, upper index (plus one) and range of case calues. |
| HOST_WIDE_INT low_idx = -INTVAL (xop[1]); |
| HOST_WIDE_INT num_idx = INTVAL (xop[2]); |
| HOST_WIDE_INT hig_idx = low_idx + num_idx; |
| |
| // Maximum ranges of (un)signed QImode resp. HImode. |
| unsigned umax = QImode == mode ? 0xff : 0xffff; |
| int imax = QImode == mode ? 0x7f : 0x7fff; |
| int imin = -imax - 1; |
| |
| // Testing the case range and whether it fits into the range of the |
| // (un)signed mode. This test should actually always pass because it |
| // makes no sense to have case values outside the mode range. Notice |
| // that case labels which are unreachable because they are outside the |
| // mode of the switch value (e.g. "case -1" for uint8_t) have already |
| // been thrown away by the middle-end. |
| |
| if (SIGN_EXTEND == code |
| && low_idx >= imin |
| && hig_idx <= imax) |
| { |
| // ok |
| } |
| else if (ZERO_EXTEND == code |
| && low_idx >= 0 |
| && (unsigned) hig_idx <= umax) |
| { |
| // ok |
| } |
| else |
| { |
| if (dump_file) |
| fprintf (dump_file, ";; Case ranges too big, giving up.\n\n"); |
| return; |
| } |
| |
| // Do normalization of switch value $10 and out-of-bound check in its |
| // original mode instead of in SImode. Use a newly created pseudo. |
| // This will replace insns[1..2]. |
| |
| start_sequence (); |
| |
| rtx reg = copy_to_mode_reg (mode, xop[11]); |
| |
| rtx (*gen_add)(rtx,rtx,rtx) = QImode == mode ? gen_addqi3 : gen_addhi3; |
| rtx (*gen_cbranch)(rtx,rtx,rtx,rtx) |
| = QImode == mode ? gen_cbranchqi4 : gen_cbranchhi4; |
| |
| emit_insn (gen_add (reg, reg, gen_int_mode (-low_idx, mode))); |
| rtx op0 = reg; rtx op1 = gen_int_mode (num_idx, mode); |
| rtx labelref = copy_rtx (xop[4]); |
| rtx xbranch = gen_cbranch (gen_rtx_fmt_ee (GTU, VOIDmode, op0, op1), |
| op0, op1, labelref); |
| rtx_insn *cbranch = emit_jump_insn (xbranch); |
| JUMP_LABEL (cbranch) = xop[4]; |
| ++LABEL_NUSES (xop[4]); |
| |
| rtx_insn *last1 = get_last_insn (); |
| rtx_insn *seq1 = end_sequence (); |
| |
| emit_insn_after (seq1, insns[2]); |
| |
| // After the out-of-bounds test and corresponding branch, use a |
| // 16-bit index. If QImode is used, extend it to HImode first. |
| // This will replace insns[4]. |
| |
| start_sequence (); |
| |
| if (QImode == mode) |
| reg = force_reg (HImode, gen_rtx_fmt_e (code, HImode, reg)); |
| |
| rtx pat_4 = AVR_3_BYTE_PC |
| ? gen_movhi (xop[7], reg) |
| : gen_addhi3 (xop[7], reg, gen_rtx_LABEL_REF (VOIDmode, xop[3])); |
| |
| emit_insn (pat_4); |
| |
| rtx_insn *last2 = get_last_insn (); |
| rtx_insn *seq2 = end_sequence (); |
| |
| emit_insn_after (seq2, insns[3]); |
| |
| if (dump_file) |
| { |
| fprintf (dump_file, ";; New insns: "); |
| |
| for (rtx_insn *insn = seq1; ; insn = NEXT_INSN (insn)) |
| { |
| fprintf (dump_file, "%d, ", INSN_UID (insn)); |
| if (insn == last1) |
| break; |
| } |
| for (rtx_insn *insn = seq2; ; insn = NEXT_INSN (insn)) |
| { |
| fprintf (dump_file, "%d%s", INSN_UID (insn), |
| insn == last2 ? ".\n\n" : ", "); |
| if (insn == last2) |
| break; |
| } |
| |
| fprintf (dump_file, ";; Deleting insns: %d, %d, %d.\n\n", |
| INSN_UID (insns[1]), INSN_UID (insns[2]), INSN_UID (insns[3])); |
| } |
| |
| // Pseudodelete the SImode and subreg of SImode insns. We don't care |
| // about the extension insns[0]: Its result is now unused and other |
| // passes will clean it up. |
| |
| SET_INSN_DELETED (insns[1]); |
| SET_INSN_DELETED (insns[2]); |
| SET_INSN_DELETED (insns[3]); |
| } |
| |
| |
| unsigned int |
| avr_pass_casesi::execute (function *func) |
| { |
| basic_block bb; |
| |
| FOR_EACH_BB_FN (bb, func) |
| { |
| rtx_insn *insn, *insns[5]; |
| |
| FOR_BB_INSNS (bb, insn) |
| { |
| if (avr_is_casesi_sequence (bb, insn, insns)) |
| { |
| avr_optimize_casesi (insns, recog_data.operand); |
| } |
| } |
| } |
| |
| return 0; |
| } |
| |
| } // anonymous namespace |
| |
| /* Perform some extra checks on operands of casesi_<mode>_sequence. |
| Not all operand dependencies can be described by means of predicates. |
| This function performs left over checks and should always return true. |
| Returning false means that someone changed the casesi expander but did |
| not adjust casesi_<mode>_sequence. */ |
| |
| bool |
| avr_casei_sequence_check_operands (rtx *xop) |
| { |
| rtx sub_5 = NULL_RTX; |
| |
| if (AVR_HAVE_EIJMP_EICALL |
| // The last clobber op of the tablejump. |
| && xop[9] == all_regs_rtx[REG_24]) |
| { |
| // $6 is: (subreg:SI ($5) 0) |
| sub_5 = xop[6]; |
| } |
| |
| if (!AVR_HAVE_EIJMP_EICALL |
| // $6 is: (plus:HI (subreg:SI ($5) 0) |
| // (label_ref ($3))) |
| && PLUS == GET_CODE (xop[6]) |
| && LABEL_REF == GET_CODE (XEXP (xop[6], 1)) |
| && rtx_equal_p (xop[3], XEXP (XEXP (xop[6], 1), 0)) |
| // The last clobber op of the tablejump. |
| && xop[9] == const0_rtx) |
| { |
| sub_5 = XEXP (xop[6], 0); |
| } |
| |
| if (sub_5 |
| && SUBREG_P (sub_5) |
| && SUBREG_BYTE (sub_5) == 0 |
| && rtx_equal_p (xop[5], SUBREG_REG (sub_5))) |
| return true; |
| |
| if (dump_file) |
| fprintf (dump_file, "\n;; Failed condition for casesi_<mode>_sequence\n\n"); |
| |
| return false; |
| } |
| |
| namespace |
| { |
| |
| |
| ////////////////////////////////////////////////////////////////////////////// |
| // Find more POST_INC and PRE_DEC cases. |
| |
| static const pass_data avr_pass_data_fuse_add = |
| { |
| RTL_PASS, // type |
| "", // name (will be patched) |
| OPTGROUP_NONE, // optinfo_flags |
| TV_MACH_DEP, // tv_id |
| 0, // properties_required |
| 0, // properties_provided |
| 0, // properties_destroyed |
| 0, // todo_flags_start |
| TODO_df_finish // todo_flags_finish |
| }; |
| |
| class avr_pass_fuse_add : public rtl_opt_pass |
| { |
| public: |
| avr_pass_fuse_add (gcc::context *ctxt, const char *name) |
| : rtl_opt_pass (avr_pass_data_fuse_add, ctxt) |
| { |
| this->name = name; |
| } |
| |
| // Cloning is required because we are running one instance of the pass |
| // before peephole2. and a second one after cprop_hardreg. |
| opt_pass * clone () final override |
| { |
| return make_avr_pass_fuse_add (m_ctxt); |
| } |
| |
| unsigned int execute (function *func) final override |
| { |
| func->machine->n_avr_fuse_add_executed += 1; |
| n_avr_fuse_add_executed = func->machine->n_avr_fuse_add_executed; |
| |
| if (optimize && avropt_fuse_add > 0) |
| return execute1 (func); |
| return 0; |
| } |
| |
| unsigned int execute1 (function *); |
| |
| struct Some_Insn |
| { |
| rtx_insn *insn = nullptr; |
| rtx dest, src; |
| bool valid () const { return insn != nullptr; } |
| void set_deleted () |
| { |
| gcc_assert (insn); |
| SET_INSN_DELETED (insn); |
| insn = nullptr; |
| } |
| }; |
| |
| // If .insn is not NULL, then this is a reg:HI += const_int |
| // of an address register. |
| struct Add_Insn : Some_Insn |
| { |
| rtx addend; |
| int regno; |
| Add_Insn () {} |
| Add_Insn (rtx_insn *insn); |
| }; |
| |
| // If .insn is not NULL, then this sets an address register |
| // to a constant value. |
| struct Ldi_Insn : Some_Insn |
| { |
| int regno; |
| Ldi_Insn () {} |
| Ldi_Insn (rtx_insn *insn); |
| }; |
| |
| // If .insn is not NULL, then this is a load or store insn where the |
| // address is REG or POST_INC with an address register. |
| struct Mem_Insn : Some_Insn |
| { |
| rtx reg_or_0, mem, addr, addr_reg; |
| int addr_regno; |
| rtx_code addr_code; |
| machine_mode mode; |
| addr_space_t addr_space; |
| bool store_p, volatile_p; |
| Mem_Insn () {} |
| Mem_Insn (rtx_insn *insn); |
| }; |
| |
| rtx_insn *fuse_ldi_add (Ldi_Insn &prev_ldi, Add_Insn &add); |
| rtx_insn *fuse_add_add (Add_Insn &prev_add, Add_Insn &add); |
| rtx_insn *fuse_add_mem (Add_Insn &prev_add, Mem_Insn &mem); |
| rtx_insn *fuse_mem_add (Mem_Insn &prev_mem, Add_Insn &add); |
| }; // avr_pass_fuse_add |
| |
| |
| /* Describe properties of AVR's indirect load and store instructions |
| LD, LDD, ST, STD, LPM, ELPM depending on register number, volatility etc. |
| Rules for "volatile" accesses are: |
| |
| | Xmega | non-Xmega |
| ------+-----------------+---------------- |
| load | read LSB first | read LSB first |
| store | write LSB first | write MSB first |
| */ |
| |
| struct AVR_LdSt_Props |
| { |
| bool has_postinc, has_predec, has_ldd; |
| // The insn printers will use POST_INC or PRE_DEC addressing, no matter |
| // what adressing modes we are feeding into them. |
| bool want_postinc, want_predec; |
| |
| AVR_LdSt_Props (int regno, bool store_p, bool volatile_p, addr_space_t as) |
| { |
| bool generic_p = ADDR_SPACE_GENERIC_P (as); |
| bool flashx_p = (! generic_p |
| && as != ADDR_SPACE_MEMX && as != ADDR_SPACE_FLASHX); |
| has_postinc = generic_p || (flashx_p && regno == REG_Z); |
| has_predec = generic_p; |
| has_ldd = ! AVR_TINY && generic_p && (regno == REG_Y || regno == REG_Z); |
| want_predec = volatile_p && generic_p && ! AVR_XMEGA && store_p; |
| want_postinc = volatile_p && generic_p && (AVR_XMEGA || ! store_p); |
| want_postinc |= flashx_p && regno == REG_Z; |
| } |
| |
| AVR_LdSt_Props (const avr_pass_fuse_add::Mem_Insn &m) |
| : AVR_LdSt_Props (m.addr_regno, m.store_p, m.volatile_p, m.addr_space) |
| { |
| gcc_assert (m.valid ()); |
| } |
| }; |
| |
| |
| /* Emit a single_set that clobbers REG_CC. */ |
| |
| static rtx_insn * |
| emit_move_ccc (rtx dest, rtx src) |
| { |
| return emit_insn (gen_gen_move_clobbercc (dest, src)); |
| } |
| |
| |
| /* Emit a single_set that clobbers REG_CC after insn AFTER. */ |
| |
| static rtx_insn * |
| emit_move_ccc_after (rtx dest, rtx src, rtx_insn *after) |
| { |
| return emit_insn_after (gen_gen_move_clobbercc (dest, src), after); |
| } |
| |
| static bool |
| reg_seen_between_p (const_rtx reg, const rtx_insn *from, const rtx_insn *to) |
| { |
| return (reg_used_between_p (reg, from, to) |
| || reg_set_between_p (reg, from, to)); |
| } |
| |
| |
| static void |
| avr_maybe_adjust_cfa (rtx_insn *insn, rtx reg, int addend) |
| { |
| if (addend |
| && frame_pointer_needed |
| && REGNO (reg) == FRAME_POINTER_REGNUM |
| && avropt_fuse_add == 3) |
| { |
| rtx plus = plus_constant (Pmode, reg, addend); |
| RTX_FRAME_RELATED_P (insn) = 1; |
| add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (reg, plus)); |
| } |
| } |
| |
| |
| // If successful, this represents a SET of a pointer register to a constant. |
| avr_pass_fuse_add::Ldi_Insn::Ldi_Insn (rtx_insn *insn) |
| { |
| rtx set = single_set (insn); |
| if (!set) |
| return; |
| |
| src = SET_SRC (set); |
| dest = SET_DEST (set); |
| |
| if (REG_P (dest) |
| && GET_MODE (dest) == Pmode |
| && IN_RANGE (regno = REGNO (dest), REG_X, REG_Z) |
| && CONSTANT_P (src)) |
| { |
| this->insn = insn; |
| } |
| } |
| |
| // If successful, this represents a PLUS with CONST_INT of a pointer |
| // register X, Y or Z. Otherwise, the object is not valid(). |
| avr_pass_fuse_add::Add_Insn::Add_Insn (rtx_insn *insn) |
| { |
| rtx set = single_set (insn); |
| if (!set) |
| return; |
| |
| src = SET_SRC (set); |
| dest = SET_DEST (set); |
| if (REG_P (dest) |
| // We are only interested in PLUSes that change address regs. |
| && GET_MODE (dest) == Pmode |
| && IN_RANGE (regno = REGNO (dest), REG_X, REG_Z) |
| && PLUS == GET_CODE (src) |
| && rtx_equal_p (XEXP (src, 0), dest) |
| && CONST_INT_P (XEXP (src, 1))) |
| { |
| // This is reg:HI += const_int. |
| addend = XEXP (src, 1); |
| this->insn = insn; |
| } |
| } |
| |
| // If successful, this represents a load or store insn where the addressing |
| // mode uses pointer register X, Y or Z. Otherwise, the object is not valid(). |
| avr_pass_fuse_add::avr_pass_fuse_add::Mem_Insn::Mem_Insn (rtx_insn *insn) |
| { |
| rtx set = single_set (insn); |
| if (!set) |
| return; |
| |
| src = SET_SRC (set); |
| dest = SET_DEST (set); |
| mode = GET_MODE (dest); |
| |
| if (MEM_P (dest) |
| && (REG_P (src) || src == CONST0_RTX (mode))) |
| { |
| reg_or_0 = src; |
| mem = dest; |
| } |
| else if (REG_P (dest) && MEM_P (src)) |
| { |
| reg_or_0 = dest; |
| mem = src; |
| } |
| else |
| return; |
| |
| if (avr_mem_memx_p (mem) |
| || avr_load_libgcc_p (mem)) |
| return; |
| |
| addr = XEXP (mem, 0); |
| addr_code = GET_CODE (addr); |
| |
| if (addr_code == REG) |
| addr_reg = addr; |
| else if (addr_code == POST_INC || addr_code == PRE_DEC) |
| addr_reg = XEXP (addr, 0); |
| else |
| return; |
| |
| addr_regno = REGNO (addr_reg); |
| |
| if (avropt_fuse_add == 2 |
| && frame_pointer_needed |
| && addr_regno == FRAME_POINTER_REGNUM) |
| MEM_VOLATILE_P (mem) = 0; |
| |
| if (reg_overlap_mentioned_p (reg_or_0, addr) // Can handle CONSTANT_P. |
| || addr_regno > REG_Z |
| || avr_mem_memx_p (mem) |
| // The following optimizations only handle REG and POST_INC, |
| // so that's all what we allow here. |
| || (addr_code != REG && addr_code != POST_INC)) |
| return; |
| |
| addr_space = MEM_ADDR_SPACE (mem); |
| volatile_p = MEM_VOLATILE_P (mem); |
| store_p = MEM_P (dest); |
| |
| // Turn this "valid". |
| this->insn = insn; |
| } |
| |
| /* Try to combine a Ldi insn with a PLUS CONST_INT addend to one Ldi insn. |
| If LDI is valid, then it precedes ADD in the same block. |
| When a replacement is found, a new insn is emitted and the old insns |
| are pseudo-deleted. The returned insn is the point where the calling |
| scanner should continue. When no replacement is found, nullptr is |
| returned and nothing changed. */ |
| |
| rtx_insn * |
| avr_pass_fuse_add::fuse_ldi_add (Ldi_Insn &ldi, Add_Insn &add) |
| { |
| if (! ldi.valid () |
| || reg_seen_between_p (ldi.dest, ldi.insn, add.insn)) |
| { |
| // If something is between the Ldi and the current insn, we can |
| // set the Ldi invalid to speed future scans. |
| return ldi.insn = nullptr; |
| } |
| |
| // Found a Ldi with const and a PLUS insns in the same BB, |
| // and with no interfering insns between them. |
| |
| // Emit new Ldi with the sum of the original offsets after the old Ldi. |
| rtx xval = plus_constant (Pmode, ldi.src, INTVAL (add.addend)); |
| |
| rtx_insn *insn = emit_move_ccc_after (ldi.dest, xval, ldi.insn); |
| avr_dump (";; new Ldi[%d] insn %d after %d: R%d = %r\n\n", ldi.regno, |
| INSN_UID (insn), INSN_UID (ldi.insn), ldi.regno, xval); |
| |
| rtx_insn *next = NEXT_INSN (add.insn); |
| ldi.set_deleted (); |
| add.set_deleted (); |
| |
| return next; |
| } |
| |
| /* Try to combine two PLUS insns with CONST_INT addend to one such insn. |
| If PREV_ADD is valid, then it precedes ADD in the same basic block. |
| When a replacement is found, a new insn is emitted and the old insns |
| are pseudo-deleted. The returned insn is the point where the calling |
| scanner should continue. When no replacement is found, nullptr is |
| returned and nothing changed. */ |
| |
| rtx_insn * |
| avr_pass_fuse_add::fuse_add_add (Add_Insn &prev_add, Add_Insn &add) |
| { |
| if (! prev_add.valid () |
| || reg_seen_between_p (add.dest, prev_add.insn, add.insn)) |
| { |
| // If something is between the previous Add and the current insn, |
| // we can set the previous Add invalid to speed future scans. |
| return prev_add.insn = nullptr; |
| } |
| |
| // Found two PLUS insns in the same BB, and with no interfering |
| // insns between them. |
| rtx plus = plus_constant (Pmode, add.src, INTVAL (prev_add.addend)); |
| |
| rtx_insn *next; |
| if (REG_P (plus)) |
| { |
| avr_dump (";; Add[%d] from %d annihilates %d\n\n", add.regno, |
| INSN_UID (prev_add.insn), INSN_UID (add.insn)); |
| next = NEXT_INSN (add.insn); |
| } |
| else |
| { |
| // Emit after the current insn, so that it will be picked |
| // up as next valid Add insn. |
| next = emit_move_ccc_after (add.dest, plus, add.insn); |
| avr_dump (";; #1 new Add[%d] insn %d after %d: R%d += %d\n\n", |
| add.regno, INSN_UID (next), INSN_UID (add.insn), |
| add.regno, (int) INTVAL (XEXP (plus, 1))); |
| gcc_assert (GET_CODE (plus) == PLUS); |
| } |
| |
| add.set_deleted (); |
| prev_add.set_deleted (); |
| |
| return next; |
| } |
| |
| /* Try to combine a PLUS of the address register with a load or store insn. |
| If ADD is valid, then it precedes MEM in the same basic block. |
| When a replacement is found, a new insn is emitted and the old insns |
| are pseudo-deleted. The returned insn is the point where the calling |
| scanner should continue. When no replacement is found, nullptr is |
| returned and nothing changed. */ |
| |
| rtx_insn * |
| avr_pass_fuse_add::fuse_add_mem (Add_Insn &add, Mem_Insn &mem) |
| { |
| if (! add.valid () |
| || reg_seen_between_p (add.dest, add.insn, mem.insn)) |
| { |
| // If something is between the Add and the current insn, we can |
| // set the Add invalid to speed future scans. |
| return add.insn = nullptr; |
| } |
| |
| AVR_LdSt_Props ap { mem }; |
| |
| int msize = GET_MODE_SIZE (mem.mode); |
| |
| // The mem insn really wants PRE_DEC. |
| bool case1 = ((mem.addr_code == REG || mem.addr_code == POST_INC) |
| && msize > 1 && ap.want_predec && ! ap.has_ldd); |
| |
| // The offset can be consumed by a PRE_DEC. |
| bool case2 = (- INTVAL (add.addend) == msize |
| && (mem.addr_code == REG || mem.addr_code == POST_INC) |
| && ap.has_predec && ! ap.want_postinc); |
| |
| if (! case1 && ! case2) |
| return nullptr; |
| |
| // Change from REG or POST_INC to PRE_DEC. |
| rtx xmem = change_address (mem.mem, mem.mode, |
| gen_rtx_PRE_DEC (Pmode, mem.addr_reg)); |
| rtx dest = mem.store_p ? xmem : mem.reg_or_0; |
| rtx src = mem.store_p ? mem.reg_or_0 : xmem; |
| |
| rtx_insn *next = emit_move_ccc_after (dest, src, mem.insn); |
| add_reg_note (next, REG_INC, mem.addr_reg); |
| avr_dump (";; new Mem[%d] insn %d after %d: %r = %r\n\n", mem.addr_regno, |
| INSN_UID (next), INSN_UID (mem.insn), dest, src); |
| |
| // Changing REG or POST_INC -> PRE_DEC means that the addend before |
| // the memory access must be increased by the size of the access, |
| rtx plus = plus_constant (Pmode, add.src, msize); |
| if (! REG_P (plus)) |
| { |
| rtx_insn *insn = emit_move_ccc_after (add.dest, plus, add.insn); |
| avr_dump (";; #2 new Add[%d] insn %d after %d: R%d += %d\n\n", |
| add.regno, INSN_UID (insn), INSN_UID (add.insn), |
| add.regno, (int) INTVAL (XEXP (plus, 1))); |
| gcc_assert (GET_CODE (plus) == PLUS); |
| } |
| else |
| avr_dump (";; Add[%d] insn %d consumed into %d\n\n", |
| add.regno, INSN_UID (add.insn), INSN_UID (next)); |
| |
| // Changing POST_INC -> PRE_DEC means that the addend after the mem has to be |
| // the size of the access. The hope is that this new add insn may be unused. |
| if (mem.addr_code == POST_INC) |
| { |
| plus = plus_constant (Pmode, add.dest, msize); |
| rtx_insn *next2 = emit_move_ccc_after (add.dest, plus, next); |
| avr_dump (";; #3 new Add[%d] insn %d after %d: R%d += %d\n\n", add.regno, |
| INSN_UID (next2), INSN_UID (next), add.regno, msize); |
| next = next2; |
| } |
| |
| add.set_deleted (); |
| mem.set_deleted (); |
| |
| return next; |
| } |
| |
| /* Try to combine a load or store insn with a PLUS of the address register. |
| If MEM is valid, then it precedes ADD in the same basic block. |
| When a replacement is found, a new insn is emitted and the old insns |
| are pseudo-deleted. The returned insn is the point where the calling |
| scanner should continue. When no replacement is found, nullptr is |
| returned and nothing changed. */ |
| |
| rtx_insn * |
| avr_pass_fuse_add::fuse_mem_add (Mem_Insn &mem, Add_Insn &add) |
| { |
| if (! mem.valid () |
| || reg_seen_between_p (add.dest, mem.insn, add.insn)) |
| { |
| // If something is between the Mem and the current insn, we can |
| // set the Mem invalid to speed future scans. |
| return mem.insn = nullptr; |
| } |
| |
| AVR_LdSt_Props ap { mem }; |
| |
| int msize = GET_MODE_SIZE (mem.mode); |
| |
| // The add insn can be consumed by a POST_INC. |
| bool case1 = (mem.addr_code == REG |
| && INTVAL (add.addend) == msize |
| && ap.has_postinc && ! ap.want_predec); |
| |
| // There are cases where even a partial consumption of the offset is better. |
| // This are the cases where no LD+offset addressing is available, because |
| // the address register is obviously used after the mem insn, and a mem insn |
| // with REG addressing mode will have to restore the address. |
| bool case2 = (mem.addr_code == REG |
| && msize > 1 && ap.want_postinc && ! ap.has_ldd); |
| |
| if (! case1 && ! case2) |
| return nullptr; |
| |
| // Change addressing mode from REG to POST_INC. |
| rtx xmem = change_address (mem.mem, mem.mode, |
| gen_rtx_POST_INC (Pmode, mem.addr_reg)); |
| rtx dest = mem.store_p ? xmem : mem.reg_or_0; |
| rtx src = mem.store_p ? mem.reg_or_0 : xmem; |
| |
| rtx_insn *insn = emit_move_ccc_after (dest, src, mem.insn); |
| add_reg_note (insn, REG_INC, mem.addr_reg); |
| avr_dump (";; new Mem[%d] insn %d after %d: %r = %r\n\n", add.regno, |
| INSN_UID (insn), INSN_UID (mem.insn), dest, src); |
| |
| rtx_insn *next = NEXT_INSN (add.insn); |
| |
| // Changing REG -> POST_INC means that the post addend must be |
| // decreased by the size of the access. |
| rtx plus = plus_constant (Pmode, add.src, -msize); |
| if (! REG_P (plus)) |
| { |
| next = emit_move_ccc_after (mem.addr_reg, plus, add.insn); |
| avr_dump (";; #4 new Add[%d] insn %d after %d: R%d += %d\n\n", |
| add.regno, INSN_UID (next), INSN_UID (add.insn), |
| add.regno, (int) INTVAL (XEXP (plus, 1))); |
| gcc_assert (GET_CODE (plus) == PLUS); |
| } |
| else |
| avr_dump (";; Add[%d] insn %d consumed into %d\n\n", |
| add.regno, INSN_UID (add.insn), INSN_UID (insn)); |
| |
| add.set_deleted (); |
| mem.set_deleted (); |
| |
| return next; |
| } |
| |
| /* Try to post-reload combine PLUS with CONST_INt of pointer registers with: |
| - Sets to a constant address. |
| - PLUS insn of that kind. |
| - Indirect loads and stores. |
| In almost all cases, combine opportunities arise from the preparation |
| done by `avr_split_fake_addressing_move', but in some rare cases combinations |
| are found for the ordinary cores, too. |
| As we consider at most one Mem insn per try, there may still be missed |
| optimizations like POST_INC + PLUS + POST_INC might be performed |
| as PRE_DEC + PRE_DEC for two adjacent locations. */ |
| |
| unsigned int |
| avr_pass_fuse_add::execute1 (function *func) |
| { |
| df_note_add_problem (); |
| df_analyze (); |
| |
| int n_add = 0, n_mem = 0, n_ldi = 0; |
| basic_block bb; |
| |
| FOR_EACH_BB_FN (bb, func) |
| { |
| Ldi_Insn prev_ldi_insns[REG_32]; |
| Add_Insn prev_add_insns[REG_32]; |
| Mem_Insn prev_mem_insns[REG_32]; |
| rtx_insn *insn, *curr; |
| |
| avr_dump ("\n;; basic block %d\n\n", bb->index); |
| |
| FOR_BB_INSNS_SAFE (bb, insn, curr) |
| { |
| rtx_insn *next = nullptr; |
| Ldi_Insn ldi_insn { insn }; |
| Add_Insn add_insn { insn }; |
| Mem_Insn mem_insn { insn }; |
| |
| if (add_insn.valid ()) |
| { |
| // Found reg:HI += const_int |
| avr_dump (";; insn %d: Add[%d]: R%d += %d\n\n", |
| INSN_UID (add_insn.insn), add_insn.regno, |
| add_insn.regno, (int) INTVAL (add_insn.addend)); |
| Ldi_Insn &prev_ldi_insn = prev_ldi_insns[add_insn.regno]; |
| Add_Insn &prev_add_insn = prev_add_insns[add_insn.regno]; |
| Mem_Insn &prev_mem_insn = prev_mem_insns[add_insn.regno]; |
| if ((next = fuse_ldi_add (prev_ldi_insn, add_insn))) |
| curr = next, n_ldi += 1; |
| else if ((next = fuse_add_add (prev_add_insn, add_insn))) |
| curr = next, n_add += 1; |
| else if ((next = fuse_mem_add (prev_mem_insn, add_insn))) |
| curr = next, n_mem += 1; |
| else |
| prev_add_insn = add_insn; |
| } |
| else if (mem_insn.valid ()) |
| { |
| int addr_regno = REGNO (mem_insn.addr_reg); |
| avr_dump (";; insn %d: Mem[%d]: %r = %r\n\n", |
| INSN_UID (mem_insn.insn), addr_regno, |
| mem_insn.dest, mem_insn.src); |
| Add_Insn &prev_add_insn = prev_add_insns[addr_regno]; |
| if ((next = fuse_add_mem (prev_add_insn, mem_insn))) |
| curr = next, n_mem += 1; |
| else |
| prev_mem_insns[addr_regno] = mem_insn; |
| } |
| else if (ldi_insn.valid ()) |
| { |
| if (! CONST_INT_P (ldi_insn.src)) |
| avr_dump (";; insn %d: Ldi[%d]: R%d = %r\n\n", |
| INSN_UID (ldi_insn.insn), ldi_insn.regno, |
| ldi_insn.regno, ldi_insn.src); |
| prev_ldi_insns[ldi_insn.regno] = ldi_insn; |
| } |
| } // for insns |
| } // for BBs |
| |
| avr_dump (";; Function %f: Found %d changes: %d ldi, %d add, %d mem.\n", |
| n_ldi + n_add + n_mem, n_ldi, n_add, n_mem); |
| |
| return 0; |
| } |
| |
| |
| |
| ////////////////////////////////////////////////////////////////////////////// |
| // Fuse 2 move insns after combine. |
| |
| static const pass_data avr_pass_data_2moves = |
| { |
| RTL_PASS, // type |
| "", // name (will be patched) |
| OPTGROUP_NONE, // optinfo_flags |
| TV_DF_SCAN, // tv_id |
| 0, // properties_required |
| 0, // properties_provided |
| 0, // properties_destroyed |
| 0, // todo_flags_start |
| 0 // todo_flags_finish |
| }; |
| |
| class avr_pass_2moves : public rtl_opt_pass |
| { |
| public: |
| avr_pass_2moves (gcc::context *ctxt, const char *name) |
| : rtl_opt_pass (avr_pass_data_2moves, ctxt) |
| { |
| this->name = name; |
| } |
| |
| unsigned int execute (function *func) final override |
| { |
| if (optimize && avropt_fuse_move2) |
| { |
| bool changed = false; |
| basic_block bb; |
| |
| FOR_EACH_BB_FN (bb, func) |
| { |
| changed |= optimize_2moves_bb (bb); |
| } |
| |
| if (changed) |
| { |
| df_note_add_problem (); |
| df_analyze (); |
| } |
| } |
| |
| return 0; |
| } |
| |
| bool optimize_2moves (rtx_insn *, rtx_insn *); |
| bool optimize_2moves_bb (basic_block); |
| }; // avr_pass_2moves |
| |
| bool |
| avr_pass_2moves::optimize_2moves_bb (basic_block bb) |
| { |
| bool changed = false; |
| rtx_insn *insn1 = nullptr; |
| rtx_insn *insn2 = nullptr; |
| rtx_insn *curr; |
| |
| FOR_BB_INSNS (bb, curr) |
| { |
| if (insn1 && INSN_P (insn1) |
| && insn2 && INSN_P (insn2)) |
| changed |= optimize_2moves (insn1, insn2); |
| |
| insn1 = insn2; |
| insn2 = curr; |
| } |
| |
| return changed; |
| } |
| |
| bool |
| avr_pass_2moves::optimize_2moves (rtx_insn *insn1, rtx_insn *insn2) |
| { |
| bool good = false; |
| bool bad = false; |
| rtx set1, dest1, src1; |
| rtx set2, dest2, src2; |
| |
| if ((set1 = single_set (insn1)) |
| && (set2 = single_set (insn2)) |
| && (src1 = SET_SRC (set1)) |
| && REG_P (src2 = SET_SRC (set2)) |
| && REG_P (dest1 = SET_DEST (set1)) |
| && REG_P (dest2 = SET_DEST (set2)) |
| && rtx_equal_p (dest1, src2) |
| // Now we have: |
| // insn1: dest1 = src1 |
| // insn2: dest2 = dest1 |
| && REGNO (dest1) >= FIRST_PSEUDO_REGISTER |
| // Paranoia. |
| && GET_CODE (PATTERN (insn1)) != PARALLEL |
| && GET_CODE (PATTERN (insn2)) != PARALLEL |
| && (rtx_equal_p (dest2, src1) |
| || !reg_overlap_mentioned_p (dest2, src1))) |
| { |
| avr_dump ("\n;; Found 2moves:\n%r\n%r\n", insn1, insn2); |
| avr_dump (";; reg %d: insn uses uids:", REGNO (dest1)); |
| |
| // Go check that dest1 is used exactly once, namely by insn2. |
| |
| df_ref use = DF_REG_USE_CHAIN (REGNO (dest1)); |
| for (; use; use = DF_REF_NEXT_REG (use)) |
| { |
| rtx_insn *user = DF_REF_INSN (use); |
| avr_dump (" %d", INSN_UID (user)); |
| good |= INSN_UID (user) == INSN_UID (insn2); |
| bad |= INSN_UID (user) != INSN_UID (insn2); |
| } |
| avr_dump (".\n"); |
| |
| if (good && !bad |
| // Propagate src1 to insn2: |
| // insn1: # Deleted |
| // insn2: dest2 = src1 |
| && validate_change (insn2, &SET_SRC (set2), src1, false)) |
| { |
| SET_INSN_DELETED (insn1); |
| return true; |
| } |
| } |
| |
| if (good && !bad) |
| avr_dump (";; Failed\n"); |
| |
| return false; |
| } |
| |
| |
| |
| ////////////////////////////////////////////////////////////////////////////// |
| // Split insns with nonzero_bits() after combine. |
| |
| static const pass_data avr_pass_data_split_nzb = |
| { |
| RTL_PASS, // type |
| "", // name (will be patched) |
| OPTGROUP_NONE, // optinfo_flags |
| TV_DF_SCAN, // tv_id |
| 0, // properties_required |
| 0, // properties_provided |
| 0, // properties_destroyed |
| 0, // todo_flags_start |
| 0 // todo_flags_finish |
| }; |
| |
| class avr_pass_split_nzb : public rtl_opt_pass |
| { |
| public: |
| avr_pass_split_nzb (gcc::context *ctxt, const char *name) |
| : rtl_opt_pass (avr_pass_data_split_nzb, ctxt) |
| { |
| this->name = name; |
| } |
| |
| unsigned int execute (function *) final override |
| { |
| if (avropt_use_nonzero_bits) |
| split_nzb_insns (); |
| return 0; |
| } |
| |
| void split_nzb_insns (); |
| |
| }; // avr_pass_split_nzb |
| |
| |
| void |
| avr_pass_split_nzb::split_nzb_insns () |
| { |
| rtx_insn *next; |
| |
| for (rtx_insn *insn = get_insns (); insn; insn = next) |
| { |
| next = NEXT_INSN (insn); |
| |
| if (INSN_P (insn) |
| && single_set (insn) |
| && get_attr_nzb (insn) == NZB_YES) |
| { |
| rtx_insn *last = try_split (PATTERN (insn), insn, 1 /*last*/); |
| |
| // The nonzero_bits() insns *must* split. If not: ICE. |
| if (last == insn) |
| { |
| debug_rtx (insn); |
| internal_error ("failed to split insn"); |
| } |
| } |
| } |
| } |
| |
| |
| |
| ////////////////////////////////////////////////////////////////////////////// |
| // Split shift insns after peephole2 / befor avr-fuse-move. |
| |
| static const pass_data avr_pass_data_split_after_peephole2 = |
| { |
| RTL_PASS, // type |
| "", // name (will be patched) |
| OPTGROUP_NONE, // optinfo_flags |
| TV_DF_SCAN, // tv_id |
| 0, // properties_required |
| 0, // properties_provided |
| 0, // properties_destroyed |
| 0, // todo_flags_start |
| 0 // todo_flags_finish |
| }; |
| |
| class avr_pass_split_after_peephole2 : public rtl_opt_pass |
| { |
| public: |
| avr_pass_split_after_peephole2 (gcc::context *ctxt, const char *name) |
| : rtl_opt_pass (avr_pass_data_split_after_peephole2, ctxt) |
| { |
| this->name = name; |
| } |
| |
| unsigned int execute (function *) final override |
| { |
| if (avr_shift_is_3op ()) |
| split_all_insns (); |
| return 0; |
| } |
| |
| }; // avr_pass_split_after_peephole2 |
| |
| } // anonymous namespace |
| |
| |
| /* Whether some shift insn alternatives are a `3op' 3-operand insn. |
| This 3op alternatives allow the source and the destination register |
| of the shift to be different right from the start, because the splitter |
| will split the 3op shift into a 3-operand byte shift and a 2-operand |
| residual bit shift. (When the residual shift has an offset of one |
| less than the bitsize, then the residual shift is also a 3op insn.) */ |
| |
| bool |
| avr_shift_is_3op () |
| { |
| // Don't split for OPTIMIZE_SIZE_MAX (-Oz). |
| // For OPTIMIZE_SIZE_BALANCED (-Os), we still split because |
| // the size overhead (if at all) is marginal. |
| |
| return (avropt_split_bit_shift |
| && optimize > 0 |
| && avr_optimize_size_level () < OPTIMIZE_SIZE_MAX); |
| } |
| |
| |
| /* Implement constraints `C2a', `C2l', `C2r' ... `C4a', `C4l', `C4r'. |
| Whether we split an N_BYTES shift of code CODE in { ASHIFTRT, |
| LSHIFTRT, ASHIFT } into a byte shift and a residual bit shift. */ |
| |
| bool |
| avr_split_shift_p (int n_bytes, int offset, rtx_code code) |
| { |
| gcc_assert (n_bytes == 4 || n_bytes == 3 || n_bytes == 2); |
| |
| if (! avr_shift_is_3op () |
| || offset % 8 == 0) |
| return false; |
| |
| if (n_bytes == 4) |
| return select<bool>() |
| : code == ASHIFT ? IN_RANGE (offset, 9, 31) && offset != 15 |
| : code == ASHIFTRT ? IN_RANGE (offset, 9, 29) && offset != 15 |
| : code == LSHIFTRT ? IN_RANGE (offset, 9, 31) && offset != 15 |
| : bad_case<bool> (); |
| |
| if (n_bytes == 3) |
| return select<bool>() |
| : code == ASHIFT ? IN_RANGE (offset, 9, 23) && offset != 15 |
| : code == ASHIFTRT ? IN_RANGE (offset, 9, 21) && offset != 15 |
| : code == LSHIFTRT ? IN_RANGE (offset, 9, 23) && offset != 15 |
| : bad_case<bool> (); |
| |
| if (n_bytes == 2) |
| return select<bool>() |
| : code == ASHIFT ? IN_RANGE (offset, 9, 15) |
| : code == ASHIFTRT ? IN_RANGE (offset, 9, 13) |
| : code == LSHIFTRT ? IN_RANGE (offset, 9, 15) |
| : bad_case<bool> (); |
| |
| return false; |
| } |
| |
| |
| /* Emit a DEST = SRC <code> OFF shift of QImode, HImode or PSImode. |
| SCRATCH is a QImode d-register, scratch:QI, or NULL_RTX. |
| This function is used to emit shifts that have been split into |
| a byte shift and a residual bit shift that operates on a mode |
| strictly smaller than the original shift. */ |
| |
| static void |
| avr_emit_shift (rtx_code code, rtx dest, rtx src, int off, rtx scratch) |
| { |
| const machine_mode mode = GET_MODE (dest); |
| const int n_bits = GET_MODE_BITSIZE (mode); |
| rtx xoff = GEN_INT (off); |
| |
| // Work out which alternatives can handle 3 operands independent |
| // of options. |
| |
| const bool b8_is_3op = off == 6; |
| |
| const bool b16_is_3op = select<bool>() |
| : code == ASHIFT ? (satisfies_constraint_C7c (xoff) // 7...12 |
| // The "C05 C06" alternative of *ashlhi3_const. |
| || (AVR_HAVE_MUL && scratch && (off == 5 || off == 6))) |
| : code == LSHIFTRT ? satisfies_constraint_C7c (xoff) |
| : code == ASHIFTRT ? off == 7 |
| : bad_case<bool> (); |
| |
| const bool b24_is_3op = select<bool>() |
| : code == ASHIFT ? off == 15 |
| : code == LSHIFTRT ? off == 15 |
| : code == ASHIFTRT ? false |
| : bad_case<bool> (); |
| |
| const bool is_3op = (off % 8 == 0 |
| || off == n_bits - 1 |
| || (code == ASHIFTRT && off == n_bits - 2) |
| || (n_bits == 8 && b8_is_3op) |
| || (n_bits == 16 && b16_is_3op) |
| || (n_bits == 24 && b24_is_3op)); |
| rtx shift; |
| |
| if (is_3op) |
| { |
| shift = gen_rtx_fmt_ee (code, mode, src, xoff); |
| } |
| else |
| { |
| if (REGNO (dest) != REGNO (src)) |
| emit_valid_move_clobbercc (dest, src); |
| shift = gen_rtx_fmt_ee (code, mode, dest, xoff); |
| } |
| |
| if (n_bits == 8) |
| // 8-bit shifts don't have a scratch operand. |
| scratch = NULL_RTX; |
| else if (! scratch && n_bits == 24) |
| // 24-bit shifts always have a scratch operand. |
| scratch = gen_rtx_SCRATCH (QImode); |
| |
| emit_valid_move_clobbercc (dest, shift, scratch); |
| } |
| |
| |
| /* Handle the 4-byte case of avr_split_shift below: |
| Split 4-byte shift DEST = SRC <code> IOFF into a 3-operand |
| byte shift and a residual shift in a smaller mode if possible. |
| SCRATCH is a QImode upper scratch register or NULL_RTX. */ |
| |
| static bool |
| avr_split_shift4 (rtx dest, rtx src, int ioff, rtx scratch, rtx_code code) |
| { |
| gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 4); |
| |
| if (code == ASHIFT) |
| { |
| if (IN_RANGE (ioff, 25, 31)) |
| { |
| rtx dst8 = avr_byte (dest, 3); |
| rtx src8 = avr_byte (src, 0); |
| avr_emit_shift (code, dst8, src8, ioff - 24, NULL_RTX); |
| emit_valid_move_clobbercc (avr_byte (dest, 2), const0_rtx); |
| emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx); |
| return true; |
| } |
| else if (IN_RANGE (ioff, 17, 23)) |
| { |
| rtx dst16 = avr_word (dest, 2); |
| rtx src16 = avr_word (src, 0); |
| avr_emit_shift (code, dst16, src16, ioff - 16, scratch); |
| emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx); |
| return true; |
| } |
| // ...the 9...14 cases are only handled by define_split because |
| // for now, we don't exploit that the low byte is zero. |
| } |
| else if (code == ASHIFTRT |
| || code == LSHIFTRT) |
| { |
| if (IN_RANGE (ioff, 25, 30 + (code == LSHIFTRT))) |
| { |
| rtx dst8 = avr_byte (dest, 0); |
| rtx src8 = avr_byte (src, 3); |
| avr_emit_shift (code, dst8, src8, ioff - 24, NULL_RTX); |
| if (code == ASHIFTRT) |
| { |
| rtx signs = avr_byte (dest, 1); |
| avr_emit_shift (code, signs, src8, 7, NULL_RTX); |
| emit_valid_move_clobbercc (avr_byte (dest, 2), signs); |
| emit_valid_move_clobbercc (avr_byte (dest, 3), signs); |
| } |
| else |
| { |
| emit_valid_move_clobbercc (avr_byte (dest, 1), const0_rtx); |
| emit_valid_move_clobbercc (avr_word (dest, 2), const0_rtx); |
| } |
| return true; |
| } |
| else if (IN_RANGE (ioff, 17, 23)) |
| { |
| rtx dst16 = avr_word (dest, 0); |
| rtx src16 = avr_word (src, 2); |
| avr_emit_shift (code, dst16, src16, ioff - 16, scratch); |
| if (code == ASHIFTRT) |
| { |
| rtx msb = avr_byte (src, 3); |
| rtx signs = avr_byte (dest, 2); |
| avr_emit_shift (code, signs, msb, 7, NULL_RTX); |
| emit_valid_move_clobbercc (avr_byte (dest, 3), signs); |
| } |
| else |
| emit_valid_move_clobbercc (avr_word (dest, 2), const0_rtx); |
| |
| return true; |
| } |
| else if (IN_RANGE (ioff, 9, 15)) |
| { |
| avr_emit_shift (code, dest, src, 8, scratch); |
| rtx dst24 = avr_chunk (PSImode, dest, 0); |
| rtx src24 = avr_chunk (PSImode, dest, 0); |
| avr_emit_shift (code, dst24, src24, ioff - 8, scratch); |
| return true; |
| } |
| } |
| else |
| gcc_unreachable (); |
| |
| return false; |
| } |
| |
| |
| /* Handle the 3-byte case of avr_split_shift below: |
| Split 3-byte shift DEST = SRC <code> IOFF into a 3-operand |
| byte shift and a residual shift in a smaller mode if possible. |
| SCRATCH is a QImode upper scratch register or NULL_RTX. */ |
| |
| static bool |
| avr_split_shift3 (rtx dest, rtx src, int ioff, rtx scratch, rtx_code code) |
| { |
| gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 3); |
| |
| if (code == ASHIFT) |
| { |
| if (IN_RANGE (ioff, 17, 23)) |
| { |
| rtx dst8 = avr_byte (dest, 2); |
| rtx src8 = avr_byte (src, 0); |
| avr_emit_shift (code, dst8, src8, ioff - 16, NULL_RTX); |
| emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx); |
| return true; |
| } |
| // ...the 9...14 cases are only handled by define_split because |
| // for now, we don't exploit that the low byte is zero. |
| } |
| else if (code == ASHIFTRT |
| || code == LSHIFTRT) |
| { |
| if (IN_RANGE (ioff, 17, 22 + (code == LSHIFTRT))) |
| { |
| rtx dst8 = avr_byte (dest, 0); |
| rtx src8 = avr_byte (src, 2); |
| avr_emit_shift (code, dst8, src8, ioff - 16, NULL_RTX); |
| if (code == ASHIFTRT) |
| { |
| rtx signs = avr_byte (dest, 1); |
| avr_emit_shift (code, signs, src8, 7, NULL_RTX); |
| emit_valid_move_clobbercc (avr_byte (dest, 2), signs); |
| } |
| else |
| { |
| emit_valid_move_clobbercc (avr_byte (dest, 1), const0_rtx); |
| emit_valid_move_clobbercc (avr_byte (dest, 2), const0_rtx); |
| } |
| return true; |
| } |
| else if (IN_RANGE (ioff, 9, 15)) |
| { |
| avr_emit_shift (code, dest, src, 8, scratch); |
| rtx dst16 = avr_chunk (HImode, dest, 0); |
| rtx src16 = avr_chunk (HImode, dest, 0); |
| avr_emit_shift (code, dst16, src16, ioff - 8, scratch); |
| return true; |
| } |
| } |
| else |
| gcc_unreachable (); |
| |
| return false; |
| } |
| |
| |
| /* Handle the 2-byte case of avr_split_shift below: |
| Split 2-byte shift DEST = SRC <code> IOFF into a 3-operand |
| byte shift and a residual shift in a smaller mode if possible. |
| SCRATCH is a QImode upper scratch register or NULL_RTX. */ |
| |
| static bool |
| avr_split_shift2 (rtx dest, rtx src, int ioff, rtx /*scratch*/, rtx_code code) |
| { |
| gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 2); |
| |
| if (code == ASHIFT) |
| { |
| if (IN_RANGE (ioff, 9, 15)) |
| { |
| rtx dst8 = avr_byte (dest, 1); |
| rtx src8 = avr_byte (src, 0); |
| avr_emit_shift (code, dst8, src8, ioff - 8, NULL_RTX); |
| emit_valid_move_clobbercc (avr_byte (dest, 0), const0_rtx); |
| return true; |
| } |
| } |
| else if (code == ASHIFTRT |
| || code == LSHIFTRT) |
| { |
| if (IN_RANGE (ioff, 9, 14 + (code == LSHIFTRT))) |
| { |
| rtx dst8 = avr_byte (dest, 0); |
| rtx src8 = avr_byte (src, 1); |
| rtx signs = const0_rtx; |
| avr_emit_shift (code, dst8, src8, ioff - 8, NULL_RTX); |
| if (code == ASHIFTRT) |
| { |
| signs = avr_byte (dest, 1); |
| avr_emit_shift (code, signs, src8, 7, NULL_RTX); |
| } |
| emit_valid_move_clobbercc (avr_byte (dest, 1), signs); |
| return true; |
| } |
| } |
| else |
| gcc_unreachable (); |
| |
| return false; |
| } |
| |
| |
| /* Worker for a define_split that runs when -msplit-bit-shift is on. |
| Split a shift of code CODE into a 3op byte shift and a residual bit shift. |
| Return 'true' when a split has been performed and insns have been emitted. |
| Otherwise, return 'false'. */ |
| |
| bool |
| avr_split_shift (rtx xop[], rtx scratch, rtx_code code) |
| { |
| scratch = scratch && REG_P (scratch) ? scratch : NULL_RTX; |
| rtx dest = xop[0]; |
| rtx src = xop[1]; |
| int ioff = INTVAL (xop[2]); |
| int n_bytes = GET_MODE_SIZE (GET_MODE (dest)); |
| |
| return select<bool>() |
| : n_bytes == 2 ? avr_split_shift2 (dest, src, ioff, scratch, code) |
| : n_bytes == 3 ? avr_split_shift3 (dest, src, ioff, scratch, code) |
| : n_bytes == 4 ? avr_split_shift4 (dest, src, ioff, scratch, code) |
| : bad_case<bool> (); |
| } |
| |
| |
| namespace |
| { |
| |
| |
| ////////////////////////////////////////////////////////////////////////////// |
| // Determine whether an ISR may use the __gcc_isr pseudo-instruction. |
| |
| static const pass_data avr_pass_data_pre_proep = |
| { |
| RTL_PASS, // type |
| "", // name (will be patched) |
| OPTGROUP_NONE, // optinfo_flags |
| TV_DF_SCAN, // tv_id |
| 0, // properties_required |
| 0, // properties_provided |
| 0, // properties_destroyed |
| 0, // todo_flags_start |
| 0 // todo_flags_finish |
| }; |
| |
| class avr_pass_pre_proep : public rtl_opt_pass |
| { |
| public: |
| avr_pass_pre_proep (gcc::context *ctxt, const char *name) |
| : rtl_opt_pass (avr_pass_data_pre_proep, ctxt) |
| { |
| this->name = name; |
| } |
| |
| void compute_maybe_gasisr (function *); |
| |
| unsigned int execute (function *fun) final override |
| { |
| if (avropt_gasisr_prologues |
| // Whether this function is an ISR worth scanning at all. |
| && !fun->machine->is_no_gccisr |
| && (fun->machine->is_interrupt |
| || fun->machine->is_signal) |
| && !cfun->machine->is_naked |
| // Paranoia: Non-local gotos and labels that might escape. |
| && !cfun->calls_setjmp |
| && !cfun->has_nonlocal_label |
| && !cfun->has_forced_label_in_static) |
| { |
| compute_maybe_gasisr (fun); |
| } |
| |
| return 0; |
| } |
| |
| }; // avr_pass_pre_proep |
| |
| |
| /* Set fun->machine->gasisr.maybe provided we don't find anything that |
| prohibits GAS generating parts of ISR prologues / epilogues for us. */ |
| |
| void |
| avr_pass_pre_proep::compute_maybe_gasisr (function *fun) |
| { |
| // Don't use BB iterators so that we see JUMP_TABLE_DATA. |
| |
| for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn)) |
| { |
| // Transparent calls always use [R]CALL and are filtered out by GAS. |
| // ISRs don't use -mcall-prologues, hence what remains to be filtered |
| // out are open coded (tail) calls. |
| |
| if (CALL_P (insn)) |
| return; |
| |
| // __tablejump2__ clobbers something and is targeted by JMP so |
| // that GAS won't see its usage. |
| |
| if (AVR_HAVE_JMP_CALL |
| && JUMP_TABLE_DATA_P (insn)) |
| return; |
| |
| // Non-local gotos not seen in *FUN. |
| |
| if (JUMP_P (insn) |
| && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX)) |
| return; |
| } |
| |
| fun->machine->gasisr.maybe = 1; |
| } |
| |
| |
| |
| ////////////////////////////////////////////////////////////////////////////// |
| // Late recomputation of notes so we can use `reg_unused_after()' and friends. |
| |
| static const pass_data avr_pass_data_recompute_notes = |
| { |
| RTL_PASS, // type |
| "", // name (will be patched) |
| OPTGROUP_NONE, // optinfo_flags |
| TV_DF_SCAN, // tv_id |
| 0, // properties_required |
| 0, // properties_provided |
| 0, // properties_destroyed |
| 0, // todo_flags_start |
| TODO_df_finish | TODO_df_verify // todo_flags_finish |
| }; |
| |
| class avr_pass_recompute_notes : public rtl_opt_pass |
| { |
| public: |
| avr_pass_recompute_notes (gcc::context *ctxt, const char *name) |
| : rtl_opt_pass (avr_pass_data_recompute_notes, ctxt) |
| { |
| this->name = name; |
| } |
| |
| unsigned int execute (function *) final override |
| { |
| df_note_add_problem (); |
| df_analyze (); |
| |
| return 0; |
| } |
| }; // avr_pass_recompute_notes |
| |
| } // anonymous namespace |
| |
| |
| |
| ////////////////////////////////////////////////////////////////////////////// |
| // Function visible and used outside this module. |
| |
| /* During reload, we allow much more addresses than Reduced Tiny actually |
| supports. Split them after reload in order to get closer to the |
| core's capabilities. This sets the stage for pass .avr-fuse-add. */ |
| |
| bool |
| avr_split_fake_addressing_move (rtx_insn * /*insn*/, rtx *xop) |
| { |
| bool store_p = false; |
| rtx mem, reg_or_0; |
| |
| if (REG_P (xop[0]) && MEM_P (xop[1])) |
| { |
| reg_or_0 = xop[0]; |
| mem = xop[1]; |
| } |
| else if (MEM_P (xop[0]) |
| && (REG_P (xop[1]) |
| || xop[1] == CONST0_RTX (GET_MODE (xop[0])))) |
| { |
| mem = xop[0]; |
| reg_or_0 = xop[1]; |
| store_p = true; |
| } |
| else |
| return false; |
| |
| machine_mode mode = GET_MODE (mem); |
| rtx base, addr = XEXP (mem, 0); |
| rtx_code addr_code = GET_CODE (addr); |
| |
| if (REG_P (reg_or_0) |
| && reg_overlap_mentioned_p (reg_or_0, addr)) |
| return false; |
| else if (addr_code == PLUS || addr_code == PRE_DEC || addr_code == POST_INC) |
| base = XEXP (addr, 0); |
| else if (addr_code == REG) |
| base = addr; |
| else |
| return false; |
| |
| if (REGNO (base) > REG_Z) |
| return false; |
| |
| if (! AVR_TINY |
| // Only keep base registers that can't do PLUS addressing. |
| && ((REGNO (base) != REG_X |
| && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem))) |
| || avr_load_libgcc_p (mem) |
| || avr_mem_memx_p (mem))) |
| return false; |
| |
| bool volatile_p = MEM_VOLATILE_P (mem); |
| bool mem_volatile_p = false; |
| if (frame_pointer_needed |
| && REGNO (base) == FRAME_POINTER_REGNUM) |
| { |
| if (avropt_fuse_add < 2 |
| // Be a projection (we always split PLUS). |
| || (avropt_fuse_add == 2 && volatile_p && addr_code != PLUS)) |
| return false; |
| |
| // Changing the frame pointer locally may confuse later passes |
| // like .dse2 which don't track changes of FP, not even when |
| // respective CFA notes are present. An example is pr22141-1.c. |
| if (avropt_fuse_add == 2) |
| mem_volatile_p = true; |
| } |
| |
| rtx_code new_code = UNKNOWN; |
| HOST_WIDE_INT add = 0, sub = 0; |
| int msize = GET_MODE_SIZE (mode); |
| |
| AVR_LdSt_Props ap { (int) REGNO (base), store_p, volatile_p, |
| ADDR_SPACE_GENERIC }; |
| |
| switch (addr_code) |
| { |
| default: |
| return false; |
| |
| case PLUS: |
| add = INTVAL (XEXP (addr, 1)); |
| if (msize == 1) |
| { |
| new_code = REG; |
| sub = -add; |
| } |
| else if (ap.want_predec) |
| { |
| // volatile stores prefer PRE_DEC (MSB first) |
| sub = -add; |
| add += msize; |
| new_code = PRE_DEC; |
| } |
| else |
| { |
| new_code = POST_INC; |
| sub = -add - msize; |
| } |
| break; |
| |
| case POST_INC: |
| // volatile stores prefer PRE_DEC (MSB first) |
| if (msize > 1 && ap.want_predec) |
| { |
| add = msize; |
| new_code = PRE_DEC; |
| sub = msize; |
| break; |
| } |
| return false; |
| |
| case PRE_DEC: |
| // volatile loads prefer POST_INC (LSB first) |
| if (msize > 1 && ap.want_postinc) |
| { |
| add = -msize; |
| new_code = POST_INC; |
| sub = -msize; |
| break; |
| } |
| return false; |
| |
| case REG: |
| if (msize == 1) |
| return false; |
| |
| if (ap.want_predec) |
| { |
| add = msize; |
| new_code = PRE_DEC; |
| sub = 0; |
| } |
| else |
| { |
| add = 0; |
| new_code = POST_INC; |
| sub = -msize; |
| } |
| break; |
| } // switch addr_code |
| |
| rtx_insn *insn; |
| |
| if (add) |
| { |
| insn = emit_move_ccc (base, plus_constant (Pmode, base, add)); |
| avr_maybe_adjust_cfa (insn, base, add); |
| } |
| |
| rtx new_addr = new_code == REG |
| ? base |
| : gen_rtx_fmt_e (new_code, Pmode, base); |
| |
| rtx new_mem = change_address (mem, mode, new_addr); |
| if (mem_volatile_p) |
| MEM_VOLATILE_P (new_mem) = 1; |
| |
| insn = emit_move_ccc (store_p ? new_mem : reg_or_0, |
| store_p ? reg_or_0 : new_mem); |
| if (auto_inc_p (new_addr)) |
| { |
| add_reg_note (insn, REG_INC, base); |
| int off = new_code == POST_INC ? msize : -msize; |
| avr_maybe_adjust_cfa (insn, base, off); |
| } |
| |
| if (sub) |
| { |
| insn = emit_move_ccc (base, plus_constant (Pmode, base, sub)); |
| avr_maybe_adjust_cfa (insn, base, sub); |
| } |
| |
| return true; |
| } |
| |
| |
| /* Given memory reference mem(ADDR), return true when it can be split into |
| single-byte moves, and all resulting addresses are natively supported. |
| ADDR is in addr-space generic. */ |
| |
| static bool |
| splittable_address_p (rtx addr, int n_bytes) |
| { |
| if (CONSTANT_ADDRESS_P (addr) |
| || GET_CODE (addr) == PRE_DEC |
| || GET_CODE (addr) == POST_INC) |
| return true; |
| |
| if (! AVR_TINY) |
| { |
| rtx base = select<rtx>() |
| : REG_P (addr) ? addr |
| : GET_CODE (addr) == PLUS ? XEXP (addr, 0) |
| : NULL_RTX; |
| |
| int off = select<int>() |
| : REG_P (addr) ? 0 |
| : GET_CODE (addr) == PLUS ? (int) INTVAL (XEXP (addr, 1)) |
| : -1; |
| |
| return (base && REG_P (base) |
| && (REGNO (base) == REG_Y || REGNO (base) == REG_Z) |
| && IN_RANGE (off, 0, 64 - n_bytes)); |
| } |
| |
| return false; |
| } |
| |
| |
| /* Like avr_byte(), but also knows how to split POST_INC and PRE_DEC |
| memory references. */ |
| |
| static rtx |
| avr_byte_maybe_mem (rtx x, int n) |
| { |
| rtx addr, b; |
| if (MEM_P (x) |
| && (GET_CODE (addr = XEXP (x, 0)) == POST_INC |
| || GET_CODE (addr) == PRE_DEC)) |
| b = gen_rtx_MEM (QImode, copy_rtx (addr)); |
| else |
| b = avr_byte (x, n); |
| |
| if (MEM_P (x)) |
| gcc_assert (MEM_P (b)); |
| |
| return b; |
| } |
| |
| |
| /* Split multi-byte load / stores into 1-byte such insns |
| provided non-volatile, addr-space = generic, no reg-overlap |
| and the resulting addressings are all natively supported. |
| Returns true when the XOP[0] = XOP[1] insn has been split and |
| false, otherwise. */ |
| |
| bool |
| avr_split_ldst (rtx *xop) |
| { |
| rtx dest = xop[0]; |
| rtx src = xop[1]; |
| machine_mode mode = GET_MODE (dest); |
| int n_bytes = GET_MODE_SIZE (mode); |
| rtx mem, reg_or_0; |
| |
| if (MEM_P (dest) && reg_or_0_operand (src, mode)) |
| { |
| mem = dest; |
| reg_or_0 = src; |
| } |
| else if (register_operand (dest, mode) && MEM_P (src)) |
| { |
| reg_or_0 = dest; |
| mem = src; |
| } |
| else |
| return false; |
| |
| rtx addr = XEXP (mem, 0); |
| |
| if (MEM_VOLATILE_P (mem) |
| || ! ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem)) |
| || ! IN_RANGE (n_bytes, 2, 4) |
| || ! splittable_address_p (addr, n_bytes) |
| || reg_overlap_mentioned_p (reg_or_0, addr)) |
| return false; |
| |
| const int step = GET_CODE (addr) == PRE_DEC ? -1 : 1; |
| const int istart = step > 0 ? 0 : n_bytes - 1; |
| const int iend = istart + step * n_bytes; |
| |
| for (int i = istart; i != iend; i += step) |
| { |
| rtx di = avr_byte_maybe_mem (dest, i); |
| rtx si = avr_byte_maybe_mem (src, i); |
| emit_move_ccc (di, si); |
| } |
| |
| return true; |
| } |
| |
| |
| |
| // Functions make_<pass-name> (gcc::context*) where <pass-name> is |
| // according to the pass declaration in avr-passes.def. GCC's pass |
| // manager uses these function to create the respective pass object. |
| |
| // Optimize results of the casesi expander for modes < SImode. |
| |
| rtl_opt_pass * |
| make_avr_pass_casesi (gcc::context *ctxt) |
| { |
| return new avr_pass_casesi (ctxt, "avr-casesi"); |
| } |
| |
| // Optimize 2 consecutive moves after combine. |
| |
| rtl_opt_pass * |
| make_avr_pass_2moves (gcc::context *ctxt) |
| { |
| return new avr_pass_2moves (ctxt, "avr-2moves"); |
| } |
| |
| rtl_opt_pass * |
| make_avr_pass_split_nzb (gcc::context *ctxt) |
| { |
| return new avr_pass_split_nzb (ctxt, "avr-split-nzb"); |
| } |
| |
| // Try to replace 2 cbranch insns with 1 comparison and 2 branches. |
| |
| rtl_opt_pass * |
| make_avr_pass_ifelse (gcc::context *ctxt) |
| { |
| return new avr_pass_ifelse (ctxt, "avr-ifelse"); |
| } |
| |
| // Determine whether an ISR may use the __gcc_isr pseudo-instruction. |
| |
| rtl_opt_pass * |
| make_avr_pass_pre_proep (gcc::context *ctxt) |
| { |
| return new avr_pass_pre_proep (ctxt, "avr-pre-proep"); |
| } |
| |
| // Find more POST_INC and PRE_DEC cases. |
| |
| rtl_opt_pass * |
| make_avr_pass_fuse_add (gcc::context *ctxt) |
| { |
| return new avr_pass_fuse_add (ctxt, "avr-fuse-add"); |
| } |
| |
| // Late recomputation of notes so we can use `reg_unused_after()' and friends. |
| |
| rtl_opt_pass * |
| make_avr_pass_recompute_notes (gcc::context *ctxt) |
| { |
| return new avr_pass_recompute_notes (ctxt, "avr-notes-free-cfg"); |
| } |
| |
| // Optimize moves after reload. |
| |
| rtl_opt_pass * |
| make_avr_pass_fuse_move (gcc::context *ctxt) |
| { |
| return new avr_pass_fuse_move (ctxt, "avr-fuse-move"); |
| } |
| |
| // Split insns after peephole2 / befor avr-fuse-move. |
| |
| rtl_opt_pass * |
| make_avr_pass_split_after_peephole2 (gcc::context *ctxt) |
| { |
| return new avr_pass_split_after_peephole2 (ctxt, "avr-split-after-peephole2"); |
| } |