| // LoadPair fusion optimization pass for AArch64. |
| // Copyright (C) 2023-2024 Free Software Foundation, Inc. |
| // |
| // This file is part of GCC. |
| // |
| // GCC is free software; you can redistribute it and/or modify it |
| // under the terms of the GNU General Public License as published by |
| // the Free Software Foundation; either version 3, or (at your option) |
| // any later version. |
| // |
| // GCC is distributed in the hope that it will be useful, but |
| // WITHOUT ANY WARRANTY; without even the implied warranty of |
| // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| // General Public License for more details. |
| // |
| // You should have received a copy of the GNU General Public License |
| // along with GCC; see the file COPYING3. If not see |
| // <http://www.gnu.org/licenses/>. |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "backend.h" |
| #include "rtl.h" |
| #include "memmodel.h" |
| #include "emit-rtl.h" |
| #include "tm_p.h" |
| #include "rtl-iter.h" |
| #include "tree-pass.h" |
| #include "insn-attr.h" |
| #include "pair-fusion.h" |
| |
| static constexpr HOST_WIDE_INT LDP_IMM_BITS = 7; |
| static constexpr HOST_WIDE_INT LDP_IMM_SIGN_BIT = (1 << (LDP_IMM_BITS - 1)); |
| static constexpr HOST_WIDE_INT LDP_MAX_IMM = LDP_IMM_SIGN_BIT - 1; |
| static constexpr HOST_WIDE_INT LDP_MIN_IMM = -LDP_MAX_IMM - 1; |
| |
| struct aarch64_pair_fusion : public pair_fusion |
| { |
| bool fpsimd_op_p (rtx reg_op, machine_mode mem_mode, |
| bool load_p) override final |
| { |
| // Before RA, we use the modes, noting that stores of constant zero |
| // operands use GPRs (even in non-integer modes). After RA, we use |
| // the hard register numbers. |
| return reload_completed |
| ? (REG_P (reg_op) && FP_REGNUM_P (REGNO (reg_op))) |
| : (GET_MODE_CLASS (mem_mode) != MODE_INT |
| && (load_p || !aarch64_const_zero_rtx_p (reg_op))); |
| } |
| |
| bool pair_mem_insn_p (rtx_insn *rti, bool &load_p) override final; |
| |
| bool pair_mem_ok_with_policy (rtx base_mem, bool load_p) override final |
| { |
| return aarch64_mem_ok_with_ldpstp_policy_model (base_mem, |
| load_p, |
| GET_MODE (base_mem)); |
| } |
| |
| bool pair_operand_mode_ok_p (machine_mode mode) override final; |
| |
| rtx gen_pair (rtx *pats, rtx writeback, bool load_p) override final; |
| |
| bool pair_reg_operand_ok_p (bool load_p, rtx reg_op, |
| machine_mode mode) override final |
| { |
| return (load_p |
| ? aarch64_ldp_reg_operand (reg_op, mode) |
| : aarch64_stp_reg_operand (reg_op, mode)); |
| } |
| |
| int pair_mem_alias_check_limit () override final |
| { |
| return aarch64_ldp_alias_check_limit; |
| } |
| |
| bool should_handle_writeback (writeback_type which) override final |
| { |
| if (which == writeback_type::ALL) |
| return aarch64_ldp_writeback > 1; |
| else |
| return aarch64_ldp_writeback; |
| } |
| |
| bool track_loads_p () override final |
| { |
| return aarch64_tune_params.ldp_policy_model |
| != AARCH64_LDP_STP_POLICY_NEVER; |
| } |
| |
| bool track_stores_p () override final |
| { |
| return aarch64_tune_params.stp_policy_model |
| != AARCH64_LDP_STP_POLICY_NEVER; |
| } |
| |
| bool pair_mem_in_range_p (HOST_WIDE_INT offset) override final |
| { |
| return (offset >= LDP_MIN_IMM && offset <= LDP_MAX_IMM); |
| } |
| |
| rtx gen_promote_writeback_pair (rtx wb_effect, rtx mem, rtx regs[2], |
| bool load_p) override final; |
| |
| rtx destructure_pair (rtx regs[2], rtx pattern, bool load_p) override final; |
| }; |
| |
| bool |
| aarch64_pair_fusion::pair_mem_insn_p (rtx_insn *rti, bool &load_p) |
| { |
| rtx pat = PATTERN (rti); |
| if (GET_CODE (pat) == PARALLEL |
| && XVECLEN (pat, 0) == 2) |
| { |
| const auto attr = get_attr_ldpstp (rti); |
| if (attr == LDPSTP_NONE) |
| return false; |
| |
| load_p = (attr == LDPSTP_LDP); |
| gcc_checking_assert (load_p || attr == LDPSTP_STP); |
| return true; |
| } |
| return false; |
| } |
| |
| rtx |
| aarch64_pair_fusion::gen_pair (rtx *pats, rtx writeback, bool load_p) |
| { |
| rtx pair_pat; |
| |
| if (writeback) |
| { |
| auto patvec = gen_rtvec (3, writeback, pats[0], pats[1]); |
| return gen_rtx_PARALLEL (VOIDmode, patvec); |
| } |
| else if (load_p) |
| return aarch64_gen_load_pair (XEXP (pats[0], 0), |
| XEXP (pats[1], 0), |
| XEXP (pats[0], 1)); |
| else |
| return aarch64_gen_store_pair (XEXP (pats[0], 0), |
| XEXP (pats[0], 1), |
| XEXP (pats[1], 1)); |
| return pair_pat; |
| } |
| |
| // Return true if we should consider forming ldp/stp insns from memory |
| // accesses with operand mode MODE at this stage in compilation. |
| bool |
| aarch64_pair_fusion::pair_operand_mode_ok_p (machine_mode mode) |
| { |
| if (!aarch64_ldpstp_operand_mode_p (mode)) |
| return false; |
| |
| // We don't pair up TImode accesses before RA because TImode is |
| // special in that it can be allocated to a pair of GPRs or a single |
| // FPR, and the RA is best placed to make that decision. |
| return reload_completed || mode != TImode; |
| } |
| |
| // Given a pair mode MODE, return a canonical mode to be used for a single |
| // operand of such a pair. Currently we only use this when promoting a |
| // non-writeback pair into a writeback pair, as it isn't otherwise clear |
| // which mode to use when storing a modeless CONST_INT. |
| static machine_mode |
| aarch64_operand_mode_for_pair_mode (machine_mode mode) |
| { |
| switch (mode) |
| { |
| case E_V2x4QImode: |
| return SImode; |
| case E_V2x8QImode: |
| return DImode; |
| case E_V2x16QImode: |
| return V16QImode; |
| default: |
| gcc_unreachable (); |
| } |
| } |
| |
| // Given a load pair insn in PATTERN, unpack the insn, storing |
| // the registers in REGS and returning the mem. |
| static rtx |
| aarch64_destructure_load_pair (rtx regs[2], rtx pattern) |
| { |
| rtx mem = NULL_RTX; |
| |
| for (int i = 0; i < 2; i++) |
| { |
| rtx pat = XVECEXP (pattern, 0, i); |
| regs[i] = XEXP (pat, 0); |
| rtx unspec = XEXP (pat, 1); |
| gcc_checking_assert (GET_CODE (unspec) == UNSPEC); |
| rtx this_mem = XVECEXP (unspec, 0, 0); |
| if (mem) |
| gcc_checking_assert (rtx_equal_p (mem, this_mem)); |
| else |
| { |
| gcc_checking_assert (MEM_P (this_mem)); |
| mem = this_mem; |
| } |
| } |
| |
| return mem; |
| } |
| |
| // Given a store pair insn in PATTERN, unpack the insn, storing |
| // the register operands in REGS, and returning the mem. |
| static rtx |
| aarch64_destructure_store_pair (rtx regs[2], rtx pattern) |
| { |
| rtx mem = XEXP (pattern, 0); |
| rtx unspec = XEXP (pattern, 1); |
| gcc_checking_assert (GET_CODE (unspec) == UNSPEC); |
| for (int i = 0; i < 2; i++) |
| regs[i] = XVECEXP (unspec, 0, i); |
| return mem; |
| } |
| |
| rtx |
| aarch64_pair_fusion::destructure_pair (rtx regs[2], rtx pattern, bool load_p) |
| { |
| if (load_p) |
| return aarch64_destructure_load_pair (regs, pattern); |
| else |
| return aarch64_destructure_store_pair (regs, pattern); |
| } |
| |
| rtx |
| aarch64_pair_fusion::gen_promote_writeback_pair (rtx wb_effect, rtx pair_mem, |
| rtx regs[2], |
| bool load_p) |
| { |
| auto op_mode = aarch64_operand_mode_for_pair_mode (GET_MODE (pair_mem)); |
| |
| machine_mode modes[2]; |
| for (int i = 0; i < 2; i++) |
| { |
| machine_mode mode = GET_MODE (regs[i]); |
| if (load_p) |
| gcc_checking_assert (mode != VOIDmode); |
| else if (mode == VOIDmode) |
| mode = op_mode; |
| |
| modes[i] = mode; |
| } |
| |
| const auto op_size = GET_MODE_SIZE (modes[0]); |
| gcc_checking_assert (known_eq (op_size, GET_MODE_SIZE (modes[1]))); |
| |
| rtx pats[2]; |
| for (int i = 0; i < 2; i++) |
| { |
| rtx mem = adjust_address_nv (pair_mem, modes[i], op_size * i); |
| pats[i] = load_p |
| ? gen_rtx_SET (regs[i], mem) |
| : gen_rtx_SET (mem, regs[i]); |
| } |
| |
| return gen_rtx_PARALLEL (VOIDmode, |
| gen_rtvec (3, wb_effect, pats[0], pats[1])); |
| } |
| |
| namespace { |
| |
| const pass_data pass_data_ldp_fusion = |
| { |
| RTL_PASS, /* type */ |
| "ldp_fusion", /* name */ |
| OPTGROUP_NONE, /* optinfo_flags */ |
| TV_NONE, /* tv_id */ |
| 0, /* properties_required */ |
| 0, /* properties_provided */ |
| 0, /* properties_destroyed */ |
| 0, /* todo_flags_start */ |
| TODO_df_finish, /* todo_flags_finish */ |
| }; |
| |
| class pass_ldp_fusion : public rtl_opt_pass |
| { |
| public: |
| pass_ldp_fusion (gcc::context *ctx) |
| : rtl_opt_pass (pass_data_ldp_fusion, ctx) |
| {} |
| |
| opt_pass *clone () override { return new pass_ldp_fusion (m_ctxt); } |
| |
| bool gate (function *) final override |
| { |
| if (!optimize || optimize_debug) |
| return false; |
| |
| if (reload_completed) |
| return flag_aarch64_late_ldp_fusion; |
| else |
| return flag_aarch64_early_ldp_fusion; |
| } |
| |
| unsigned execute (function *) final override |
| { |
| aarch64_pair_fusion pass; |
| pass.run (); |
| return 0; |
| } |
| }; |
| |
| } // anon namespace |
| |
| rtl_opt_pass * |
| make_pass_ldp_fusion (gcc::context *ctx) |
| { |
| return new pass_ldp_fusion (ctx); |
| } |