| /* Convert tree expression to rtl instructions, for GNU compiler. |
| Copyright (C) 1988-2021 Free Software Foundation, Inc. |
| |
| This file is part of GCC. |
| |
| GCC is free software; you can redistribute it and/or modify it under |
| the terms of the GNU General Public License as published by the Free |
| Software Foundation; either version 3, or (at your option) any later |
| version. |
| |
| GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
| WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| for more details. |
| |
| You should have received a copy of the GNU General Public License |
| along with GCC; see the file COPYING3. If not see |
| <http://www.gnu.org/licenses/>. */ |
| |
| #include "config.h" |
| #include "system.h" |
| #include "coretypes.h" |
| #include "backend.h" |
| #include "target.h" |
| #include "rtl.h" |
| #include "tree.h" |
| #include "gimple.h" |
| #include "predict.h" |
| #include "memmodel.h" |
| #include "tm_p.h" |
| #include "ssa.h" |
| #include "optabs.h" |
| #include "expmed.h" |
| #include "regs.h" |
| #include "emit-rtl.h" |
| #include "recog.h" |
| #include "cgraph.h" |
| #include "diagnostic.h" |
| #include "alias.h" |
| #include "fold-const.h" |
| #include "stor-layout.h" |
| #include "attribs.h" |
| #include "varasm.h" |
| #include "except.h" |
| #include "insn-attr.h" |
| #include "dojump.h" |
| #include "explow.h" |
| #include "calls.h" |
| #include "stmt.h" |
| /* Include expr.h after insn-config.h so we get HAVE_conditional_move. */ |
| #include "expr.h" |
| #include "optabs-tree.h" |
| #include "libfuncs.h" |
| #include "reload.h" |
| #include "langhooks.h" |
| #include "common/common-target.h" |
| #include "tree-dfa.h" |
| #include "tree-ssa-live.h" |
| #include "tree-outof-ssa.h" |
| #include "tree-ssa-address.h" |
| #include "builtins.h" |
| #include "ccmp.h" |
| #include "gimple-fold.h" |
| #include "rtx-vector-builder.h" |
| #include "tree-pretty-print.h" |
| #include "flags.h" |
| |
| |
| /* If this is nonzero, we do not bother generating VOLATILE |
| around volatile memory references, and we are willing to |
| output indirect addresses. If cse is to follow, we reject |
| indirect addresses so a useful potential cse is generated; |
| if it is used only once, instruction combination will produce |
| the same indirect address eventually. */ |
| int cse_not_expected; |
| |
| static bool block_move_libcall_safe_for_call_parm (void); |
| static bool emit_block_move_via_pattern (rtx, rtx, rtx, unsigned, unsigned, |
| HOST_WIDE_INT, unsigned HOST_WIDE_INT, |
| unsigned HOST_WIDE_INT, |
| unsigned HOST_WIDE_INT, bool); |
| static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned); |
| static void clear_by_pieces (rtx, unsigned HOST_WIDE_INT, unsigned int); |
| static rtx_insn *compress_float_constant (rtx, rtx); |
| static rtx get_subtarget (rtx); |
| static void store_constructor (tree, rtx, int, poly_int64, bool); |
| static rtx store_field (rtx, poly_int64, poly_int64, poly_uint64, poly_uint64, |
| machine_mode, tree, alias_set_type, bool, bool); |
| |
| static unsigned HOST_WIDE_INT highest_pow2_factor_for_target (const_tree, const_tree); |
| |
| static int is_aligning_offset (const_tree, const_tree); |
| static rtx reduce_to_bit_field_precision (rtx, rtx, tree); |
| static rtx do_store_flag (sepops, rtx, machine_mode); |
| #ifdef PUSH_ROUNDING |
| static void emit_single_push_insn (machine_mode, rtx, tree); |
| #endif |
| static void do_tablejump (rtx, machine_mode, rtx, rtx, rtx, |
| profile_probability); |
| static rtx const_vector_from_tree (tree); |
| static tree tree_expr_size (const_tree); |
| static HOST_WIDE_INT int_expr_size (tree); |
| static void convert_mode_scalar (rtx, rtx, int); |
| |
| |
| /* This is run to set up which modes can be used |
| directly in memory and to initialize the block move optab. It is run |
| at the beginning of compilation and when the target is reinitialized. */ |
| |
| void |
| init_expr_target (void) |
| { |
| rtx pat; |
| int num_clobbers; |
| rtx mem, mem1; |
| rtx reg; |
| |
| /* Try indexing by frame ptr and try by stack ptr. |
| It is known that on the Convex the stack ptr isn't a valid index. |
| With luck, one or the other is valid on any machine. */ |
| mem = gen_rtx_MEM (word_mode, stack_pointer_rtx); |
| mem1 = gen_rtx_MEM (word_mode, frame_pointer_rtx); |
| |
| /* A scratch register we can modify in-place below to avoid |
| useless RTL allocations. */ |
| reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1); |
| |
| rtx_insn *insn = as_a<rtx_insn *> (rtx_alloc (INSN)); |
| pat = gen_rtx_SET (NULL_RTX, NULL_RTX); |
| PATTERN (insn) = pat; |
| |
| for (machine_mode mode = VOIDmode; (int) mode < NUM_MACHINE_MODES; |
| mode = (machine_mode) ((int) mode + 1)) |
| { |
| int regno; |
| |
| direct_load[(int) mode] = direct_store[(int) mode] = 0; |
| PUT_MODE (mem, mode); |
| PUT_MODE (mem1, mode); |
| |
| /* See if there is some register that can be used in this mode and |
| directly loaded or stored from memory. */ |
| |
| if (mode != VOIDmode && mode != BLKmode) |
| for (regno = 0; regno < FIRST_PSEUDO_REGISTER |
| && (direct_load[(int) mode] == 0 || direct_store[(int) mode] == 0); |
| regno++) |
| { |
| if (!targetm.hard_regno_mode_ok (regno, mode)) |
| continue; |
| |
| set_mode_and_regno (reg, mode, regno); |
| |
| SET_SRC (pat) = mem; |
| SET_DEST (pat) = reg; |
| if (recog (pat, insn, &num_clobbers) >= 0) |
| direct_load[(int) mode] = 1; |
| |
| SET_SRC (pat) = mem1; |
| SET_DEST (pat) = reg; |
| if (recog (pat, insn, &num_clobbers) >= 0) |
| direct_load[(int) mode] = 1; |
| |
| SET_SRC (pat) = reg; |
| SET_DEST (pat) = mem; |
| if (recog (pat, insn, &num_clobbers) >= 0) |
| direct_store[(int) mode] = 1; |
| |
| SET_SRC (pat) = reg; |
| SET_DEST (pat) = mem1; |
| if (recog (pat, insn, &num_clobbers) >= 0) |
| direct_store[(int) mode] = 1; |
| } |
| } |
| |
| mem = gen_rtx_MEM (VOIDmode, gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1)); |
| |
| opt_scalar_float_mode mode_iter; |
| FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT) |
| { |
| scalar_float_mode mode = mode_iter.require (); |
| scalar_float_mode srcmode; |
| FOR_EACH_MODE_UNTIL (srcmode, mode) |
| { |
| enum insn_code ic; |
| |
| ic = can_extend_p (mode, srcmode, 0); |
| if (ic == CODE_FOR_nothing) |
| continue; |
| |
| PUT_MODE (mem, srcmode); |
| |
| if (insn_operand_matches (ic, 1, mem)) |
| float_extend_from_mem[mode][srcmode] = true; |
| } |
| } |
| } |
| |
| /* This is run at the start of compiling a function. */ |
| |
| void |
| init_expr (void) |
| { |
| memset (&crtl->expr, 0, sizeof (crtl->expr)); |
| } |
| |
| /* Copy data from FROM to TO, where the machine modes are not the same. |
| Both modes may be integer, or both may be floating, or both may be |
| fixed-point. |
| UNSIGNEDP should be nonzero if FROM is an unsigned type. |
| This causes zero-extension instead of sign-extension. */ |
| |
| void |
| convert_move (rtx to, rtx from, int unsignedp) |
| { |
| machine_mode to_mode = GET_MODE (to); |
| machine_mode from_mode = GET_MODE (from); |
| |
| gcc_assert (to_mode != BLKmode); |
| gcc_assert (from_mode != BLKmode); |
| |
| /* If the source and destination are already the same, then there's |
| nothing to do. */ |
| if (to == from) |
| return; |
| |
| /* If FROM is a SUBREG that indicates that we have already done at least |
| the required extension, strip it. We don't handle such SUBREGs as |
| TO here. */ |
| |
| scalar_int_mode to_int_mode; |
| if (GET_CODE (from) == SUBREG |
| && SUBREG_PROMOTED_VAR_P (from) |
| && is_a <scalar_int_mode> (to_mode, &to_int_mode) |
| && (GET_MODE_PRECISION (subreg_promoted_mode (from)) |
| >= GET_MODE_PRECISION (to_int_mode)) |
| && SUBREG_CHECK_PROMOTED_SIGN (from, unsignedp)) |
| { |
| scalar_int_mode int_orig_mode; |
| scalar_int_mode int_inner_mode; |
| machine_mode orig_mode = GET_MODE (from); |
| |
| from = gen_lowpart (to_int_mode, SUBREG_REG (from)); |
| from_mode = to_int_mode; |
| |
| /* Preserve SUBREG_PROMOTED_VAR_P if the new mode is wider than |
| the original mode, but narrower than the inner mode. */ |
| if (GET_CODE (from) == SUBREG |
| && is_a <scalar_int_mode> (orig_mode, &int_orig_mode) |
| && GET_MODE_PRECISION (to_int_mode) |
| > GET_MODE_PRECISION (int_orig_mode) |
| && is_a <scalar_int_mode> (GET_MODE (SUBREG_REG (from)), |
| &int_inner_mode) |
| && GET_MODE_PRECISION (int_inner_mode) |
| > GET_MODE_PRECISION (to_int_mode)) |
| { |
| SUBREG_PROMOTED_VAR_P (from) = 1; |
| SUBREG_PROMOTED_SET (from, unsignedp); |
| } |
| } |
| |
| gcc_assert (GET_CODE (to) != SUBREG || !SUBREG_PROMOTED_VAR_P (to)); |
| |
| if (to_mode == from_mode |
| || (from_mode == VOIDmode && CONSTANT_P (from))) |
| { |
| emit_move_insn (to, from); |
| return; |
| } |
| |
| if (VECTOR_MODE_P (to_mode) || VECTOR_MODE_P (from_mode)) |
| { |
| if (GET_MODE_UNIT_PRECISION (to_mode) |
| > GET_MODE_UNIT_PRECISION (from_mode)) |
| { |
| optab op = unsignedp ? zext_optab : sext_optab; |
| insn_code icode = convert_optab_handler (op, to_mode, from_mode); |
| if (icode != CODE_FOR_nothing) |
| { |
| emit_unop_insn (icode, to, from, |
| unsignedp ? ZERO_EXTEND : SIGN_EXTEND); |
| return; |
| } |
| } |
| |
| if (GET_MODE_UNIT_PRECISION (to_mode) |
| < GET_MODE_UNIT_PRECISION (from_mode)) |
| { |
| insn_code icode = convert_optab_handler (trunc_optab, |
| to_mode, from_mode); |
| if (icode != CODE_FOR_nothing) |
| { |
| emit_unop_insn (icode, to, from, TRUNCATE); |
| return; |
| } |
| } |
| |
| gcc_assert (known_eq (GET_MODE_BITSIZE (from_mode), |
| GET_MODE_BITSIZE (to_mode))); |
| |
| if (VECTOR_MODE_P (to_mode)) |
| from = simplify_gen_subreg (to_mode, from, GET_MODE (from), 0); |
| else |
| to = simplify_gen_subreg (from_mode, to, GET_MODE (to), 0); |
| |
| emit_move_insn (to, from); |
| return; |
| } |
| |
| if (GET_CODE (to) == CONCAT && GET_CODE (from) == CONCAT) |
| { |
| convert_move (XEXP (to, 0), XEXP (from, 0), unsignedp); |
| convert_move (XEXP (to, 1), XEXP (from, 1), unsignedp); |
| return; |
| } |
| |
| convert_mode_scalar (to, from, unsignedp); |
| } |
| |
| /* Like convert_move, but deals only with scalar modes. */ |
| |
| static void |
| convert_mode_scalar (rtx to, rtx from, int unsignedp) |
| { |
| /* Both modes should be scalar types. */ |
| scalar_mode from_mode = as_a <scalar_mode> (GET_MODE (from)); |
| scalar_mode to_mode = as_a <scalar_mode> (GET_MODE (to)); |
| bool to_real = SCALAR_FLOAT_MODE_P (to_mode); |
| bool from_real = SCALAR_FLOAT_MODE_P (from_mode); |
| enum insn_code code; |
| rtx libcall; |
| |
| gcc_assert (to_real == from_real); |
| |
| /* rtx code for making an equivalent value. */ |
| enum rtx_code equiv_code = (unsignedp < 0 ? UNKNOWN |
| : (unsignedp ? ZERO_EXTEND : SIGN_EXTEND)); |
| |
| if (to_real) |
| { |
| rtx value; |
| rtx_insn *insns; |
| convert_optab tab; |
| |
| gcc_assert ((GET_MODE_PRECISION (from_mode) |
| != GET_MODE_PRECISION (to_mode)) |
| || (DECIMAL_FLOAT_MODE_P (from_mode) |
| != DECIMAL_FLOAT_MODE_P (to_mode))); |
| |
| if (GET_MODE_PRECISION (from_mode) == GET_MODE_PRECISION (to_mode)) |
| /* Conversion between decimal float and binary float, same size. */ |
| tab = DECIMAL_FLOAT_MODE_P (from_mode) ? trunc_optab : sext_optab; |
| else if (GET_MODE_PRECISION (from_mode) < GET_MODE_PRECISION (to_mode)) |
| tab = sext_optab; |
| else |
| tab = trunc_optab; |
| |
| /* Try converting directly if the insn is supported. */ |
| |
| code = convert_optab_handler (tab, to_mode, from_mode); |
| if (code != CODE_FOR_nothing) |
| { |
| emit_unop_insn (code, to, from, |
| tab == sext_optab ? FLOAT_EXTEND : FLOAT_TRUNCATE); |
| return; |
| } |
| |
| /* Otherwise use a libcall. */ |
| libcall = convert_optab_libfunc (tab, to_mode, from_mode); |
| |
| /* Is this conversion implemented yet? */ |
| gcc_assert (libcall); |
| |
| start_sequence (); |
| value = emit_library_call_value (libcall, NULL_RTX, LCT_CONST, to_mode, |
| from, from_mode); |
| insns = get_insns (); |
| end_sequence (); |
| emit_libcall_block (insns, to, value, |
| tab == trunc_optab ? gen_rtx_FLOAT_TRUNCATE (to_mode, |
| from) |
| : gen_rtx_FLOAT_EXTEND (to_mode, from)); |
| return; |
| } |
| |
| /* Handle pointer conversion. */ /* SPEE 900220. */ |
| /* If the target has a converter from FROM_MODE to TO_MODE, use it. */ |
| { |
| convert_optab ctab; |
| |
| if (GET_MODE_PRECISION (from_mode) > GET_MODE_PRECISION (to_mode)) |
| ctab = trunc_optab; |
| else if (unsignedp) |
| ctab = zext_optab; |
| else |
| ctab = sext_optab; |
| |
| if (convert_optab_handler (ctab, to_mode, from_mode) |
| != CODE_FOR_nothing) |
| { |
| emit_unop_insn (convert_optab_handler (ctab, to_mode, from_mode), |
| to, from, UNKNOWN); |
| return; |
| } |
| } |
| |
| /* Targets are expected to provide conversion insns between PxImode and |
| xImode for all MODE_PARTIAL_INT modes they use, but no others. */ |
| if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT) |
| { |
| scalar_int_mode full_mode |
| = smallest_int_mode_for_size (GET_MODE_BITSIZE (to_mode)); |
| |
| gcc_assert (convert_optab_handler (trunc_optab, to_mode, full_mode) |
| != CODE_FOR_nothing); |
| |
| if (full_mode != from_mode) |
| from = convert_to_mode (full_mode, from, unsignedp); |
| emit_unop_insn (convert_optab_handler (trunc_optab, to_mode, full_mode), |
| to, from, UNKNOWN); |
| return; |
| } |
| if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT) |
| { |
| rtx new_from; |
| scalar_int_mode full_mode |
| = smallest_int_mode_for_size (GET_MODE_BITSIZE (from_mode)); |
| convert_optab ctab = unsignedp ? zext_optab : sext_optab; |
| enum insn_code icode; |
| |
| icode = convert_optab_handler (ctab, full_mode, from_mode); |
| gcc_assert (icode != CODE_FOR_nothing); |
| |
| if (to_mode == full_mode) |
| { |
| emit_unop_insn (icode, to, from, UNKNOWN); |
| return; |
| } |
| |
| new_from = gen_reg_rtx (full_mode); |
| emit_unop_insn (icode, new_from, from, UNKNOWN); |
| |
| /* else proceed to integer conversions below. */ |
| from_mode = full_mode; |
| from = new_from; |
| } |
| |
| /* Make sure both are fixed-point modes or both are not. */ |
| gcc_assert (ALL_SCALAR_FIXED_POINT_MODE_P (from_mode) == |
| ALL_SCALAR_FIXED_POINT_MODE_P (to_mode)); |
| if (ALL_SCALAR_FIXED_POINT_MODE_P (from_mode)) |
| { |
| /* If we widen from_mode to to_mode and they are in the same class, |
| we won't saturate the result. |
| Otherwise, always saturate the result to play safe. */ |
| if (GET_MODE_CLASS (from_mode) == GET_MODE_CLASS (to_mode) |
| && GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode)) |
| expand_fixed_convert (to, from, 0, 0); |
| else |
| expand_fixed_convert (to, from, 0, 1); |
| return; |
| } |
| |
| /* Now both modes are integers. */ |
| |
| /* Handle expanding beyond a word. */ |
| if (GET_MODE_PRECISION (from_mode) < GET_MODE_PRECISION (to_mode) |
| && GET_MODE_PRECISION (to_mode) > BITS_PER_WORD) |
| { |
| rtx_insn *insns; |
| rtx lowpart; |
| rtx fill_value; |
| rtx lowfrom; |
| int i; |
| scalar_mode lowpart_mode; |
| int nwords = CEIL (GET_MODE_SIZE (to_mode), UNITS_PER_WORD); |
| |
| /* Try converting directly if the insn is supported. */ |
| if ((code = can_extend_p (to_mode, from_mode, unsignedp)) |
| != CODE_FOR_nothing) |
| { |
| /* If FROM is a SUBREG, put it into a register. Do this |
| so that we always generate the same set of insns for |
| better cse'ing; if an intermediate assignment occurred, |
| we won't be doing the operation directly on the SUBREG. */ |
| if (optimize > 0 && GET_CODE (from) == SUBREG) |
| from = force_reg (from_mode, from); |
| emit_unop_insn (code, to, from, equiv_code); |
| return; |
| } |
| /* Next, try converting via full word. */ |
| else if (GET_MODE_PRECISION (from_mode) < BITS_PER_WORD |
| && ((code = can_extend_p (to_mode, word_mode, unsignedp)) |
| != CODE_FOR_nothing)) |
| { |
| rtx word_to = gen_reg_rtx (word_mode); |
| if (REG_P (to)) |
| { |
| if (reg_overlap_mentioned_p (to, from)) |
| from = force_reg (from_mode, from); |
| emit_clobber (to); |
| } |
| convert_move (word_to, from, unsignedp); |
| emit_unop_insn (code, to, word_to, equiv_code); |
| return; |
| } |
| |
| /* No special multiword conversion insn; do it by hand. */ |
| start_sequence (); |
| |
| /* Since we will turn this into a no conflict block, we must ensure |
| the source does not overlap the target so force it into an isolated |
| register when maybe so. Likewise for any MEM input, since the |
| conversion sequence might require several references to it and we |
| must ensure we're getting the same value every time. */ |
| |
| if (MEM_P (from) || reg_overlap_mentioned_p (to, from)) |
| from = force_reg (from_mode, from); |
| |
| /* Get a copy of FROM widened to a word, if necessary. */ |
| if (GET_MODE_PRECISION (from_mode) < BITS_PER_WORD) |
| lowpart_mode = word_mode; |
| else |
| lowpart_mode = from_mode; |
| |
| lowfrom = convert_to_mode (lowpart_mode, from, unsignedp); |
| |
| lowpart = gen_lowpart (lowpart_mode, to); |
| emit_move_insn (lowpart, lowfrom); |
| |
| /* Compute the value to put in each remaining word. */ |
| if (unsignedp) |
| fill_value = const0_rtx; |
| else |
| fill_value = emit_store_flag_force (gen_reg_rtx (word_mode), |
| LT, lowfrom, const0_rtx, |
| lowpart_mode, 0, -1); |
| |
| /* Fill the remaining words. */ |
| for (i = GET_MODE_SIZE (lowpart_mode) / UNITS_PER_WORD; i < nwords; i++) |
| { |
| int index = (WORDS_BIG_ENDIAN ? nwords - i - 1 : i); |
| rtx subword = operand_subword (to, index, 1, to_mode); |
| |
| gcc_assert (subword); |
| |
| if (fill_value != subword) |
| emit_move_insn (subword, fill_value); |
| } |
| |
| insns = get_insns (); |
| end_sequence (); |
| |
| emit_insn (insns); |
| return; |
| } |
| |
| /* Truncating multi-word to a word or less. */ |
| if (GET_MODE_PRECISION (from_mode) > BITS_PER_WORD |
| && GET_MODE_PRECISION (to_mode) <= BITS_PER_WORD) |
| { |
| if (!((MEM_P (from) |
| && ! MEM_VOLATILE_P (from) |
| && direct_load[(int) to_mode] |
| && ! mode_dependent_address_p (XEXP (from, 0), |
| MEM_ADDR_SPACE (from))) |
| || REG_P (from) |
| || GET_CODE (from) == SUBREG)) |
| from = force_reg (from_mode, from); |
| convert_move (to, gen_lowpart (word_mode, from), 0); |
| return; |
| } |
| |
| /* Now follow all the conversions between integers |
| no more than a word long. */ |
| |
| /* For truncation, usually we can just refer to FROM in a narrower mode. */ |
| if (GET_MODE_BITSIZE (to_mode) < GET_MODE_BITSIZE (from_mode) |
| && TRULY_NOOP_TRUNCATION_MODES_P (to_mode, from_mode)) |
| { |
| if (!((MEM_P (from) |
| && ! MEM_VOLATILE_P (from) |
| && direct_load[(int) to_mode] |
| && ! mode_dependent_address_p (XEXP (from, 0), |
| MEM_ADDR_SPACE (from))) |
| || REG_P (from) |
| || GET_CODE (from) == SUBREG)) |
| from = force_reg (from_mode, from); |
| if (REG_P (from) && REGNO (from) < FIRST_PSEUDO_REGISTER |
| && !targetm.hard_regno_mode_ok (REGNO (from), to_mode)) |
| from = copy_to_reg (from); |
| emit_move_insn (to, gen_lowpart (to_mode, from)); |
| return; |
| } |
| |
| /* Handle extension. */ |
| if (GET_MODE_PRECISION (to_mode) > GET_MODE_PRECISION (from_mode)) |
| { |
| /* Convert directly if that works. */ |
| if ((code = can_extend_p (to_mode, from_mode, unsignedp)) |
| != CODE_FOR_nothing) |
| { |
| emit_unop_insn (code, to, from, equiv_code); |
| return; |
| } |
| else |
| { |
| rtx tmp; |
| int shift_amount; |
| |
| /* Search for a mode to convert via. */ |
| opt_scalar_mode intermediate_iter; |
| FOR_EACH_MODE_FROM (intermediate_iter, from_mode) |
| { |
| scalar_mode intermediate = intermediate_iter.require (); |
| if (((can_extend_p (to_mode, intermediate, unsignedp) |
| != CODE_FOR_nothing) |
| || (GET_MODE_SIZE (to_mode) < GET_MODE_SIZE (intermediate) |
| && TRULY_NOOP_TRUNCATION_MODES_P (to_mode, |
| intermediate))) |
| && (can_extend_p (intermediate, from_mode, unsignedp) |
| != CODE_FOR_nothing)) |
| { |
| convert_move (to, convert_to_mode (intermediate, from, |
| unsignedp), unsignedp); |
| return; |
| } |
| } |
| |
| /* No suitable intermediate mode. |
| Generate what we need with shifts. */ |
| shift_amount = (GET_MODE_PRECISION (to_mode) |
| - GET_MODE_PRECISION (from_mode)); |
| from = gen_lowpart (to_mode, force_reg (from_mode, from)); |
| tmp = expand_shift (LSHIFT_EXPR, to_mode, from, shift_amount, |
| to, unsignedp); |
| tmp = expand_shift (RSHIFT_EXPR, to_mode, tmp, shift_amount, |
| to, unsignedp); |
| if (tmp != to) |
| emit_move_insn (to, tmp); |
| return; |
| } |
| } |
| |
| /* Support special truncate insns for certain modes. */ |
| if (convert_optab_handler (trunc_optab, to_mode, |
| from_mode) != CODE_FOR_nothing) |
| { |
| emit_unop_insn (convert_optab_handler (trunc_optab, to_mode, from_mode), |
| to, from, UNKNOWN); |
| return; |
| } |
| |
| /* Handle truncation of volatile memrefs, and so on; |
| the things that couldn't be truncated directly, |
| and for which there was no special instruction. |
| |
| ??? Code above formerly short-circuited this, for most integer |
| mode pairs, with a force_reg in from_mode followed by a recursive |
| call to this routine. Appears always to have been wrong. */ |
| if (GET_MODE_PRECISION (to_mode) < GET_MODE_PRECISION (from_mode)) |
| { |
| rtx temp = force_reg (to_mode, gen_lowpart (to_mode, from)); |
| emit_move_insn (to, temp); |
| return; |
| } |
| |
| /* Mode combination is not recognized. */ |
| gcc_unreachable (); |
| } |
| |
| /* Return an rtx for a value that would result |
| from converting X to mode MODE. |
| Both X and MODE may be floating, or both integer. |
| UNSIGNEDP is nonzero if X is an unsigned value. |
| This can be done by referring to a part of X in place |
| or by copying to a new temporary with conversion. */ |
| |
| rtx |
| convert_to_mode (machine_mode mode, rtx x, int unsignedp) |
| { |
| return convert_modes (mode, VOIDmode, x, unsignedp); |
| } |
| |
| /* Return an rtx for a value that would result |
| from converting X from mode OLDMODE to mode MODE. |
| Both modes may be floating, or both integer. |
| UNSIGNEDP is nonzero if X is an unsigned value. |
| |
| This can be done by referring to a part of X in place |
| or by copying to a new temporary with conversion. |
| |
| You can give VOIDmode for OLDMODE, if you are sure X has a nonvoid mode. */ |
| |
| rtx |
| convert_modes (machine_mode mode, machine_mode oldmode, rtx x, int unsignedp) |
| { |
| rtx temp; |
| scalar_int_mode int_mode; |
| |
| /* If FROM is a SUBREG that indicates that we have already done at least |
| the required extension, strip it. */ |
| |
| if (GET_CODE (x) == SUBREG |
| && SUBREG_PROMOTED_VAR_P (x) |
| && is_a <scalar_int_mode> (mode, &int_mode) |
| && (GET_MODE_PRECISION (subreg_promoted_mode (x)) |
| >= GET_MODE_PRECISION (int_mode)) |
| && SUBREG_CHECK_PROMOTED_SIGN (x, unsignedp)) |
| { |
| scalar_int_mode int_orig_mode; |
| scalar_int_mode int_inner_mode; |
| machine_mode orig_mode = GET_MODE (x); |
| x = gen_lowpart (int_mode, SUBREG_REG (x)); |
| |
| /* Preserve SUBREG_PROMOTED_VAR_P if the new mode is wider than |
| the original mode, but narrower than the inner mode. */ |
| if (GET_CODE (x) == SUBREG |
| && is_a <scalar_int_mode> (orig_mode, &int_orig_mode) |
| && GET_MODE_PRECISION (int_mode) |
| > GET_MODE_PRECISION (int_orig_mode) |
| && is_a <scalar_int_mode> (GET_MODE (SUBREG_REG (x)), |
| &int_inner_mode) |
| && GET_MODE_PRECISION (int_inner_mode) |
| > GET_MODE_PRECISION (int_mode)) |
| { |
| SUBREG_PROMOTED_VAR_P (x) = 1; |
| SUBREG_PROMOTED_SET (x, unsignedp); |
| } |
| } |
| |
| if (GET_MODE (x) != VOIDmode) |
| oldmode = GET_MODE (x); |
| |
| if (mode == oldmode) |
| return x; |
| |
| if (CONST_SCALAR_INT_P (x) |
| && is_a <scalar_int_mode> (mode, &int_mode)) |
| { |
| /* If the caller did not tell us the old mode, then there is not |
| much to do with respect to canonicalization. We have to |
| assume that all the bits are significant. */ |
| if (!is_a <scalar_int_mode> (oldmode)) |
| oldmode = MAX_MODE_INT; |
| wide_int w = wide_int::from (rtx_mode_t (x, oldmode), |
| GET_MODE_PRECISION (int_mode), |
| unsignedp ? UNSIGNED : SIGNED); |
| return immed_wide_int_const (w, int_mode); |
| } |
| |
| /* We can do this with a gen_lowpart if both desired and current modes |
| are integer, and this is either a constant integer, a register, or a |
| non-volatile MEM. */ |
| scalar_int_mode int_oldmode; |
| if (is_int_mode (mode, &int_mode) |
| && is_int_mode (oldmode, &int_oldmode) |
| && GET_MODE_PRECISION (int_mode) <= GET_MODE_PRECISION (int_oldmode) |
| && ((MEM_P (x) && !MEM_VOLATILE_P (x) && direct_load[(int) int_mode]) |
| || CONST_POLY_INT_P (x) |
| || (REG_P (x) |
| && (!HARD_REGISTER_P (x) |
| || targetm.hard_regno_mode_ok (REGNO (x), int_mode)) |
| && TRULY_NOOP_TRUNCATION_MODES_P (int_mode, GET_MODE (x))))) |
| return gen_lowpart (int_mode, x); |
| |
| /* Converting from integer constant into mode is always equivalent to an |
| subreg operation. */ |
| if (VECTOR_MODE_P (mode) && GET_MODE (x) == VOIDmode) |
| { |
| gcc_assert (known_eq (GET_MODE_BITSIZE (mode), |
| GET_MODE_BITSIZE (oldmode))); |
| return simplify_gen_subreg (mode, x, oldmode, 0); |
| } |
| |
| temp = gen_reg_rtx (mode); |
| convert_move (temp, x, unsignedp); |
| return temp; |
| } |
| |
| /* Return the largest alignment we can use for doing a move (or store) |
| of MAX_PIECES. ALIGN is the largest alignment we could use. */ |
| |
| static unsigned int |
| alignment_for_piecewise_move (unsigned int max_pieces, unsigned int align) |
| { |
| scalar_int_mode tmode |
| = int_mode_for_size (max_pieces * BITS_PER_UNIT, 0).require (); |
| |
| if (align >= GET_MODE_ALIGNMENT (tmode)) |
| align = GET_MODE_ALIGNMENT (tmode); |
| else |
| { |
| scalar_int_mode xmode = NARROWEST_INT_MODE; |
| opt_scalar_int_mode mode_iter; |
| FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT) |
| { |
| tmode = mode_iter.require (); |
| if (GET_MODE_SIZE (tmode) > max_pieces |
| || targetm.slow_unaligned_access (tmode, align)) |
| break; |
| xmode = tmode; |
| } |
| |
| align = MAX (align, GET_MODE_ALIGNMENT (xmode)); |
| } |
| |
| return align; |
| } |
| |
| /* Return the widest QI vector, if QI_MODE is true, or integer mode |
| that is narrower than SIZE bytes. */ |
| |
| static fixed_size_mode |
| widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector) |
| { |
| fixed_size_mode result = NARROWEST_INT_MODE; |
| |
| gcc_checking_assert (size > 1); |
| |
| /* Use QI vector only if size is wider than a WORD. */ |
| if (qi_vector && size > UNITS_PER_WORD) |
| { |
| machine_mode mode; |
| fixed_size_mode candidate; |
| FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT) |
| if (is_a<fixed_size_mode> (mode, &candidate) |
| && GET_MODE_INNER (candidate) == QImode) |
| { |
| if (GET_MODE_SIZE (candidate) >= size) |
| break; |
| if (optab_handler (vec_duplicate_optab, candidate) |
| != CODE_FOR_nothing) |
| result = candidate; |
| } |
| |
| if (result != NARROWEST_INT_MODE) |
| return result; |
| } |
| |
| opt_scalar_int_mode tmode; |
| FOR_EACH_MODE_IN_CLASS (tmode, MODE_INT) |
| if (GET_MODE_SIZE (tmode.require ()) < size) |
| result = tmode.require (); |
| |
| return result; |
| } |
| |
| /* Determine whether an operation OP on LEN bytes with alignment ALIGN can |
| and should be performed piecewise. */ |
| |
| static bool |
| can_do_by_pieces (unsigned HOST_WIDE_INT len, unsigned int align, |
| enum by_pieces_operation op) |
| { |
| return targetm.use_by_pieces_infrastructure_p (len, align, op, |
| optimize_insn_for_speed_p ()); |
| } |
| |
| /* Determine whether the LEN bytes can be moved by using several move |
| instructions. Return nonzero if a call to move_by_pieces should |
| succeed. */ |
| |
| bool |
| can_move_by_pieces (unsigned HOST_WIDE_INT len, unsigned int align) |
| { |
| return can_do_by_pieces (len, align, MOVE_BY_PIECES); |
| } |
| |
| /* Return number of insns required to perform operation OP by pieces |
| for L bytes. ALIGN (in bits) is maximum alignment we can assume. */ |
| |
| unsigned HOST_WIDE_INT |
| by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align, |
| unsigned int max_size, by_pieces_operation op) |
| { |
| unsigned HOST_WIDE_INT n_insns = 0; |
| fixed_size_mode mode; |
| |
| if (targetm.overlap_op_by_pieces_p () && op != COMPARE_BY_PIECES) |
| { |
| /* NB: Round up L and ALIGN to the widest integer mode for |
| MAX_SIZE. */ |
| mode = widest_fixed_size_mode_for_size (max_size, |
| op == SET_BY_PIECES); |
| if (optab_handler (mov_optab, mode) != CODE_FOR_nothing) |
| { |
| unsigned HOST_WIDE_INT up = ROUND_UP (l, GET_MODE_SIZE (mode)); |
| if (up > l) |
| l = up; |
| align = GET_MODE_ALIGNMENT (mode); |
| } |
| } |
| |
| align = alignment_for_piecewise_move (MOVE_MAX_PIECES, align); |
| |
| while (max_size > 1 && l > 0) |
| { |
| mode = widest_fixed_size_mode_for_size (max_size, |
| op == SET_BY_PIECES); |
| enum insn_code icode; |
| |
| unsigned int modesize = GET_MODE_SIZE (mode); |
| |
| icode = optab_handler (mov_optab, mode); |
| if (icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode)) |
| { |
| unsigned HOST_WIDE_INT n_pieces = l / modesize; |
| l %= modesize; |
| switch (op) |
| { |
| default: |
| n_insns += n_pieces; |
| break; |
| |
| case COMPARE_BY_PIECES: |
| int batch = targetm.compare_by_pieces_branch_ratio (mode); |
| int batch_ops = 4 * batch - 1; |
| unsigned HOST_WIDE_INT full = n_pieces / batch; |
| n_insns += full * batch_ops; |
| if (n_pieces % batch != 0) |
| n_insns++; |
| break; |
| |
| } |
| } |
| max_size = modesize; |
| } |
| |
| gcc_assert (!l); |
| return n_insns; |
| } |
| |
| /* Used when performing piecewise block operations, holds information |
| about one of the memory objects involved. The member functions |
| can be used to generate code for loading from the object and |
| updating the address when iterating. */ |
| |
| class pieces_addr |
| { |
| /* The object being referenced, a MEM. Can be NULL_RTX to indicate |
| stack pushes. */ |
| rtx m_obj; |
| /* The address of the object. Can differ from that seen in the |
| MEM rtx if we copied the address to a register. */ |
| rtx m_addr; |
| /* Nonzero if the address on the object has an autoincrement already, |
| signifies whether that was an increment or decrement. */ |
| signed char m_addr_inc; |
| /* Nonzero if we intend to use autoinc without the address already |
| having autoinc form. We will insert add insns around each memory |
| reference, expecting later passes to form autoinc addressing modes. |
| The only supported options are predecrement and postincrement. */ |
| signed char m_explicit_inc; |
| /* True if we have either of the two possible cases of using |
| autoincrement. */ |
| bool m_auto; |
| /* True if this is an address to be used for load operations rather |
| than stores. */ |
| bool m_is_load; |
| |
| /* Optionally, a function to obtain constants for any given offset into |
| the objects, and data associated with it. */ |
| by_pieces_constfn m_constfn; |
| void *m_cfndata; |
| public: |
| pieces_addr (rtx, bool, by_pieces_constfn, void *); |
| rtx adjust (fixed_size_mode, HOST_WIDE_INT, by_pieces_prev * = nullptr); |
| void increment_address (HOST_WIDE_INT); |
| void maybe_predec (HOST_WIDE_INT); |
| void maybe_postinc (HOST_WIDE_INT); |
| void decide_autoinc (machine_mode, bool, HOST_WIDE_INT); |
| int get_addr_inc () |
| { |
| return m_addr_inc; |
| } |
| }; |
| |
| /* Initialize a pieces_addr structure from an object OBJ. IS_LOAD is |
| true if the operation to be performed on this object is a load |
| rather than a store. For stores, OBJ can be NULL, in which case we |
| assume the operation is a stack push. For loads, the optional |
| CONSTFN and its associated CFNDATA can be used in place of the |
| memory load. */ |
| |
| pieces_addr::pieces_addr (rtx obj, bool is_load, by_pieces_constfn constfn, |
| void *cfndata) |
| : m_obj (obj), m_is_load (is_load), m_constfn (constfn), m_cfndata (cfndata) |
| { |
| m_addr_inc = 0; |
| m_auto = false; |
| if (obj) |
| { |
| rtx addr = XEXP (obj, 0); |
| rtx_code code = GET_CODE (addr); |
| m_addr = addr; |
| bool dec = code == PRE_DEC || code == POST_DEC; |
| bool inc = code == PRE_INC || code == POST_INC; |
| m_auto = inc || dec; |
| if (m_auto) |
| m_addr_inc = dec ? -1 : 1; |
| |
| /* While we have always looked for these codes here, the code |
| implementing the memory operation has never handled them. |
| Support could be added later if necessary or beneficial. */ |
| gcc_assert (code != PRE_INC && code != POST_DEC); |
| } |
| else |
| { |
| m_addr = NULL_RTX; |
| if (!is_load) |
| { |
| m_auto = true; |
| if (STACK_GROWS_DOWNWARD) |
| m_addr_inc = -1; |
| else |
| m_addr_inc = 1; |
| } |
| else |
| gcc_assert (constfn != NULL); |
| } |
| m_explicit_inc = 0; |
| if (constfn) |
| gcc_assert (is_load); |
| } |
| |
| /* Decide whether to use autoinc for an address involved in a memory op. |
| MODE is the mode of the accesses, REVERSE is true if we've decided to |
| perform the operation starting from the end, and LEN is the length of |
| the operation. Don't override an earlier decision to set m_auto. */ |
| |
| void |
| pieces_addr::decide_autoinc (machine_mode ARG_UNUSED (mode), bool reverse, |
| HOST_WIDE_INT len) |
| { |
| if (m_auto || m_obj == NULL_RTX) |
| return; |
| |
| bool use_predec = (m_is_load |
| ? USE_LOAD_PRE_DECREMENT (mode) |
| : USE_STORE_PRE_DECREMENT (mode)); |
| bool use_postinc = (m_is_load |
| ? USE_LOAD_POST_INCREMENT (mode) |
| : USE_STORE_POST_INCREMENT (mode)); |
| machine_mode addr_mode = get_address_mode (m_obj); |
| |
| if (use_predec && reverse) |
| { |
| m_addr = copy_to_mode_reg (addr_mode, |
| plus_constant (addr_mode, |
| m_addr, len)); |
| m_auto = true; |
| m_explicit_inc = -1; |
| } |
| else if (use_postinc && !reverse) |
| { |
| m_addr = copy_to_mode_reg (addr_mode, m_addr); |
| m_auto = true; |
| m_explicit_inc = 1; |
| } |
| else if (CONSTANT_P (m_addr)) |
| m_addr = copy_to_mode_reg (addr_mode, m_addr); |
| } |
| |
| /* Adjust the address to refer to the data at OFFSET in MODE. If we |
| are using autoincrement for this address, we don't add the offset, |
| but we still modify the MEM's properties. */ |
| |
| rtx |
| pieces_addr::adjust (fixed_size_mode mode, HOST_WIDE_INT offset, |
| by_pieces_prev *prev) |
| { |
| if (m_constfn) |
| /* Pass the previous data to m_constfn. */ |
| return m_constfn (m_cfndata, prev, offset, mode); |
| if (m_obj == NULL_RTX) |
| return NULL_RTX; |
| if (m_auto) |
| return adjust_automodify_address (m_obj, mode, m_addr, offset); |
| else |
| return adjust_address (m_obj, mode, offset); |
| } |
| |
| /* Emit an add instruction to increment the address by SIZE. */ |
| |
| void |
| pieces_addr::increment_address (HOST_WIDE_INT size) |
| { |
| rtx amount = gen_int_mode (size, GET_MODE (m_addr)); |
| emit_insn (gen_add2_insn (m_addr, amount)); |
| } |
| |
| /* If we are supposed to decrement the address after each access, emit code |
| to do so now. Increment by SIZE (which has should have the correct sign |
| already). */ |
| |
| void |
| pieces_addr::maybe_predec (HOST_WIDE_INT size) |
| { |
| if (m_explicit_inc >= 0) |
| return; |
| gcc_assert (HAVE_PRE_DECREMENT); |
| increment_address (size); |
| } |
| |
| /* If we are supposed to decrement the address after each access, emit code |
| to do so now. Increment by SIZE. */ |
| |
| void |
| pieces_addr::maybe_postinc (HOST_WIDE_INT size) |
| { |
| if (m_explicit_inc <= 0) |
| return; |
| gcc_assert (HAVE_POST_INCREMENT); |
| increment_address (size); |
| } |
| |
| /* This structure is used by do_op_by_pieces to describe the operation |
| to be performed. */ |
| |
| class op_by_pieces_d |
| { |
| private: |
| fixed_size_mode get_usable_mode (fixed_size_mode, unsigned int); |
| fixed_size_mode smallest_fixed_size_mode_for_size (unsigned int); |
| |
| protected: |
| pieces_addr m_to, m_from; |
| /* Make m_len read-only so that smallest_fixed_size_mode_for_size can |
| use it to check the valid mode size. */ |
| const unsigned HOST_WIDE_INT m_len; |
| HOST_WIDE_INT m_offset; |
| unsigned int m_align; |
| unsigned int m_max_size; |
| bool m_reverse; |
| /* True if this is a stack push. */ |
| bool m_push; |
| /* True if targetm.overlap_op_by_pieces_p () returns true. */ |
| bool m_overlap_op_by_pieces; |
| /* True if QI vector mode can be used. */ |
| bool m_qi_vector_mode; |
| |
| /* Virtual functions, overriden by derived classes for the specific |
| operation. */ |
| virtual void generate (rtx, rtx, machine_mode) = 0; |
| virtual bool prepare_mode (machine_mode, unsigned int) = 0; |
| virtual void finish_mode (machine_mode) |
| { |
| } |
| |
| public: |
| op_by_pieces_d (unsigned int, rtx, bool, rtx, bool, by_pieces_constfn, |
| void *, unsigned HOST_WIDE_INT, unsigned int, bool, |
| bool = false); |
| void run (); |
| }; |
| |
| /* The constructor for an op_by_pieces_d structure. We require two |
| objects named TO and FROM, which are identified as loads or stores |
| by TO_LOAD and FROM_LOAD. If FROM is a load, the optional FROM_CFN |
| and its associated FROM_CFN_DATA can be used to replace loads with |
| constant values. MAX_PIECES describes the maximum number of bytes |
| at a time which can be moved efficiently. LEN describes the length |
| of the operation. */ |
| |
| op_by_pieces_d::op_by_pieces_d (unsigned int max_pieces, rtx to, |
| bool to_load, rtx from, bool from_load, |
| by_pieces_constfn from_cfn, |
| void *from_cfn_data, |
| unsigned HOST_WIDE_INT len, |
| unsigned int align, bool push, |
| bool qi_vector_mode) |
| : m_to (to, to_load, NULL, NULL), |
| m_from (from, from_load, from_cfn, from_cfn_data), |
| m_len (len), m_max_size (max_pieces + 1), |
| m_push (push), m_qi_vector_mode (qi_vector_mode) |
| { |
| int toi = m_to.get_addr_inc (); |
| int fromi = m_from.get_addr_inc (); |
| if (toi >= 0 && fromi >= 0) |
| m_reverse = false; |
| else if (toi <= 0 && fromi <= 0) |
| m_reverse = true; |
| else |
| gcc_unreachable (); |
| |
| m_offset = m_reverse ? len : 0; |
| align = MIN (to ? MEM_ALIGN (to) : align, |
| from ? MEM_ALIGN (from) : align); |
| |
| /* If copying requires more than two move insns, |
| copy addresses to registers (to make displacements shorter) |
| and use post-increment if available. */ |
| if (by_pieces_ninsns (len, align, m_max_size, MOVE_BY_PIECES) > 2) |
| { |
| /* Find the mode of the largest comparison. */ |
| fixed_size_mode mode |
| = widest_fixed_size_mode_for_size (m_max_size, |
| m_qi_vector_mode); |
| |
| m_from.decide_autoinc (mode, m_reverse, len); |
| m_to.decide_autoinc (mode, m_reverse, len); |
| } |
| |
| align = alignment_for_piecewise_move (MOVE_MAX_PIECES, align); |
| m_align = align; |
| |
| m_overlap_op_by_pieces = targetm.overlap_op_by_pieces_p (); |
| } |
| |
| /* This function returns the largest usable integer mode for LEN bytes |
| whose size is no bigger than size of MODE. */ |
| |
| fixed_size_mode |
| op_by_pieces_d::get_usable_mode (fixed_size_mode mode, unsigned int len) |
| { |
| unsigned int size; |
| do |
| { |
| size = GET_MODE_SIZE (mode); |
| if (len >= size && prepare_mode (mode, m_align)) |
| break; |
| /* widest_fixed_size_mode_for_size checks SIZE > 1. */ |
| mode = widest_fixed_size_mode_for_size (size, m_qi_vector_mode); |
| } |
| while (1); |
| return mode; |
| } |
| |
| /* Return the smallest integer or QI vector mode that is not narrower |
| than SIZE bytes. */ |
| |
| fixed_size_mode |
| op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size) |
| { |
| /* Use QI vector only for > size of WORD. */ |
| if (m_qi_vector_mode && size > UNITS_PER_WORD) |
| { |
| machine_mode mode; |
| fixed_size_mode candidate; |
| FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT) |
| if (is_a<fixed_size_mode> (mode, &candidate) |
| && GET_MODE_INNER (candidate) == QImode) |
| { |
| /* Don't return a mode wider than M_LEN. */ |
| if (GET_MODE_SIZE (candidate) > m_len) |
| break; |
| |
| if (GET_MODE_SIZE (candidate) >= size |
| && (optab_handler (vec_duplicate_optab, candidate) |
| != CODE_FOR_nothing)) |
| return candidate; |
| } |
| } |
| |
| return smallest_int_mode_for_size (size * BITS_PER_UNIT); |
| } |
| |
| /* This function contains the main loop used for expanding a block |
| operation. First move what we can in the largest integer mode, |
| then go to successively smaller modes. For every access, call |
| GENFUN with the two operands and the EXTRA_DATA. */ |
| |
| void |
| op_by_pieces_d::run () |
| { |
| if (m_len == 0) |
| return; |
| |
| unsigned HOST_WIDE_INT length = m_len; |
| |
| /* widest_fixed_size_mode_for_size checks M_MAX_SIZE > 1. */ |
| fixed_size_mode mode |
| = widest_fixed_size_mode_for_size (m_max_size, m_qi_vector_mode); |
| mode = get_usable_mode (mode, length); |
| |
| by_pieces_prev to_prev = { nullptr, mode }; |
| by_pieces_prev from_prev = { nullptr, mode }; |
| |
| do |
| { |
| unsigned int size = GET_MODE_SIZE (mode); |
| rtx to1 = NULL_RTX, from1; |
| |
| while (length >= size) |
| { |
| if (m_reverse) |
| m_offset -= size; |
| |
| to1 = m_to.adjust (mode, m_offset, &to_prev); |
| to_prev.data = to1; |
| to_prev.mode = mode; |
| from1 = m_from.adjust (mode, m_offset, &from_prev); |
| from_prev.data = from1; |
| from_prev.mode = mode; |
| |
| m_to.maybe_predec (-(HOST_WIDE_INT)size); |
| m_from.maybe_predec (-(HOST_WIDE_INT)size); |
| |
| generate (to1, from1, mode); |
| |
| m_to.maybe_postinc (size); |
| m_from.maybe_postinc (size); |
| |
| if (!m_reverse) |
| m_offset += size; |
| |
| length -= size; |
| } |
| |
| finish_mode (mode); |
| |
| if (length == 0) |
| return; |
| |
| if (!m_push && m_overlap_op_by_pieces) |
| { |
| /* NB: Generate overlapping operations if it is not a stack |
| push since stack push must not overlap. Get the smallest |
| fixed size mode for M_LEN bytes. */ |
| mode = smallest_fixed_size_mode_for_size (length); |
| mode = get_usable_mode (mode, GET_MODE_SIZE (mode)); |
| int gap = GET_MODE_SIZE (mode) - length; |
| if (gap > 0) |
| { |
| /* If size of MODE > M_LEN, generate the last operation |
| in MODE for the remaining bytes with ovelapping memory |
| from the previois operation. */ |
| if (m_reverse) |
| m_offset += gap; |
| else |
| m_offset -= gap; |
| length += gap; |
| } |
| } |
| else |
| { |
| /* widest_fixed_size_mode_for_size checks SIZE > 1. */ |
| mode = widest_fixed_size_mode_for_size (size, |
| m_qi_vector_mode); |
| mode = get_usable_mode (mode, length); |
| } |
| } |
| while (1); |
| |
| /* The code above should have handled everything. */ |
| gcc_assert (!length); |
| } |
| |
| /* Derived class from op_by_pieces_d, providing support for block move |
| operations. */ |
| |
| #ifdef PUSH_ROUNDING |
| #define PUSHG_P(to) ((to) == nullptr) |
| #else |
| #define PUSHG_P(to) false |
| #endif |
| |
| class move_by_pieces_d : public op_by_pieces_d |
| { |
| insn_gen_fn m_gen_fun; |
| void generate (rtx, rtx, machine_mode); |
| bool prepare_mode (machine_mode, unsigned int); |
| |
| public: |
| move_by_pieces_d (rtx to, rtx from, unsigned HOST_WIDE_INT len, |
| unsigned int align) |
| : op_by_pieces_d (MOVE_MAX_PIECES, to, false, from, true, NULL, |
| NULL, len, align, PUSHG_P (to)) |
| { |
| } |
| rtx finish_retmode (memop_ret); |
| }; |
| |
| /* Return true if MODE can be used for a set of copies, given an |
| alignment ALIGN. Prepare whatever data is necessary for later |
| calls to generate. */ |
| |
| bool |
| move_by_pieces_d::prepare_mode (machine_mode mode, unsigned int align) |
| { |
| insn_code icode = optab_handler (mov_optab, mode); |
| m_gen_fun = GEN_FCN (icode); |
| return icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode); |
| } |
| |
| /* A callback used when iterating for a compare_by_pieces_operation. |
| OP0 and OP1 are the values that have been loaded and should be |
| compared in MODE. If OP0 is NULL, this means we should generate a |
| push; otherwise EXTRA_DATA holds a pointer to a pointer to the insn |
| gen function that should be used to generate the mode. */ |
| |
| void |
| move_by_pieces_d::generate (rtx op0, rtx op1, |
| machine_mode mode ATTRIBUTE_UNUSED) |
| { |
| #ifdef PUSH_ROUNDING |
| if (op0 == NULL_RTX) |
| { |
| emit_single_push_insn (mode, op1, NULL); |
| return; |
| } |
| #endif |
| emit_insn (m_gen_fun (op0, op1)); |
| } |
| |
| /* Perform the final adjustment at the end of a string to obtain the |
| correct return value for the block operation. |
| Return value is based on RETMODE argument. */ |
| |
| rtx |
| move_by_pieces_d::finish_retmode (memop_ret retmode) |
| { |
| gcc_assert (!m_reverse); |
| if (retmode == RETURN_END_MINUS_ONE) |
| { |
| m_to.maybe_postinc (-1); |
| --m_offset; |
| } |
| return m_to.adjust (QImode, m_offset); |
| } |
| |
| /* Generate several move instructions to copy LEN bytes from block FROM to |
| block TO. (These are MEM rtx's with BLKmode). |
| |
| If PUSH_ROUNDING is defined and TO is NULL, emit_single_push_insn is |
| used to push FROM to the stack. |
| |
| ALIGN is maximum stack alignment we can assume. |
| |
| Return value is based on RETMODE argument. */ |
| |
| rtx |
| move_by_pieces (rtx to, rtx from, unsigned HOST_WIDE_INT len, |
| unsigned int align, memop_ret retmode) |
| { |
| #ifndef PUSH_ROUNDING |
| if (to == NULL) |
| gcc_unreachable (); |
| #endif |
| |
| move_by_pieces_d data (to, from, len, align); |
| |
| data.run (); |
| |
| if (retmode != RETURN_BEGIN) |
| return data.finish_retmode (retmode); |
| else |
| return to; |
| } |
| |
| /* Derived class from op_by_pieces_d, providing support for block move |
| operations. */ |
| |
| class store_by_pieces_d : public op_by_pieces_d |
| { |
| insn_gen_fn m_gen_fun; |
| void generate (rtx, rtx, machine_mode); |
| bool prepare_mode (machine_mode, unsigned int); |
| |
| public: |
| store_by_pieces_d (rtx to, by_pieces_constfn cfn, void *cfn_data, |
| unsigned HOST_WIDE_INT len, unsigned int align, |
| bool qi_vector_mode) |
| : op_by_pieces_d (STORE_MAX_PIECES, to, false, NULL_RTX, true, cfn, |
| cfn_data, len, align, false, qi_vector_mode) |
| { |
| } |
| rtx finish_retmode (memop_ret); |
| }; |
| |
| /* Return true if MODE can be used for a set of stores, given an |
| alignment ALIGN. Prepare whatever data is necessary for later |
| calls to generate. */ |
| |
| bool |
| store_by_pieces_d::prepare_mode (machine_mode mode, unsigned int align) |
| { |
| insn_code icode = optab_handler (mov_optab, mode); |
| m_gen_fun = GEN_FCN (icode); |
| return icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode); |
| } |
| |
| /* A callback used when iterating for a store_by_pieces_operation. |
| OP0 and OP1 are the values that have been loaded and should be |
| compared in MODE. If OP0 is NULL, this means we should generate a |
| push; otherwise EXTRA_DATA holds a pointer to a pointer to the insn |
| gen function that should be used to generate the mode. */ |
| |
| void |
| store_by_pieces_d::generate (rtx op0, rtx op1, machine_mode) |
| { |
| emit_insn (m_gen_fun (op0, op1)); |
| } |
| |
| /* Perform the final adjustment at the end of a string to obtain the |
| correct return value for the block operation. |
| Return value is based on RETMODE argument. */ |
| |
| rtx |
| store_by_pieces_d::finish_retmode (memop_ret retmode) |
| { |
| gcc_assert (!m_reverse); |
| if (retmode == RETURN_END_MINUS_ONE) |
| { |
| m_to.maybe_postinc (-1); |
| --m_offset; |
| } |
| return m_to.adjust (QImode, m_offset); |
| } |
| |
| /* Determine whether the LEN bytes generated by CONSTFUN can be |
| stored to memory using several move instructions. CONSTFUNDATA is |
| a pointer which will be passed as argument in every CONSTFUN call. |
| ALIGN is maximum alignment we can assume. MEMSETP is true if this is |
| a memset operation and false if it's a copy of a constant string. |
| Return nonzero if a call to store_by_pieces should succeed. */ |
| |
| int |
| can_store_by_pieces (unsigned HOST_WIDE_INT len, |
| by_pieces_constfn constfun, |
| void *constfundata, unsigned int align, bool memsetp) |
| { |
| unsigned HOST_WIDE_INT l; |
| unsigned int max_size; |
| HOST_WIDE_INT offset = 0; |
| enum insn_code icode; |
| int reverse; |
| /* cst is set but not used if LEGITIMATE_CONSTANT doesn't use it. */ |
| rtx cst ATTRIBUTE_UNUSED; |
| |
| if (len == 0) |
| return 1; |
| |
| if (!targetm.use_by_pieces_infrastructure_p (len, align, |
| memsetp |
| ? SET_BY_PIECES |
| : STORE_BY_PIECES, |
| optimize_insn_for_speed_p ())) |
| return 0; |
| |
| align = alignment_for_piecewise_move (STORE_MAX_PIECES, align); |
| |
| /* We would first store what we can in the largest integer mode, then go to |
| successively smaller modes. */ |
| |
| for (reverse = 0; |
| reverse <= (HAVE_PRE_DECREMENT || HAVE_POST_DECREMENT); |
| reverse++) |
| { |
| l = len; |
| max_size = STORE_MAX_PIECES + 1; |
| while (max_size > 1 && l > 0) |
| { |
| fixed_size_mode mode |
| = widest_fixed_size_mode_for_size (max_size, memsetp); |
| |
| icode = optab_handler (mov_optab, mode); |
| if (icode != CODE_FOR_nothing |
| && align >= GET_MODE_ALIGNMENT (mode)) |
| { |
| unsigned int size = GET_MODE_SIZE (mode); |
| |
| while (l >= size) |
| { |
| if (reverse) |
| offset -= size; |
| |
| cst = (*constfun) (constfundata, nullptr, offset, mode); |
| /* All CONST_VECTORs can be loaded for memset since |
| vec_duplicate_optab is a precondition to pick a |
| vector mode for the memset expander. */ |
| if (!((memsetp && VECTOR_MODE_P (mode)) |
| || targetm.legitimate_constant_p (mode, cst))) |
| return 0; |
| |
| if (!reverse) |
| offset += size; |
| |
| l -= size; |
| } |
| } |
| |
| max_size = GET_MODE_SIZE (mode); |
| } |
| |
| /* The code above should have handled everything. */ |
| gcc_assert (!l); |
| } |
| |
| return 1; |
| } |
| |
| /* Generate several move instructions to store LEN bytes generated by |
| CONSTFUN to block TO. (A MEM rtx with BLKmode). CONSTFUNDATA is a |
| pointer which will be passed as argument in every CONSTFUN call. |
| ALIGN is maximum alignment we can assume. MEMSETP is true if this is |
| a memset operation and false if it's a copy of a constant string. |
| Return value is based on RETMODE argument. */ |
| |
| rtx |
| store_by_pieces (rtx to, unsigned HOST_WIDE_INT len, |
| by_pieces_constfn constfun, |
| void *constfundata, unsigned int align, bool memsetp, |
| memop_ret retmode) |
| { |
| if (len == 0) |
| { |
| gcc_assert (retmode != RETURN_END_MINUS_ONE); |
| return to; |
| } |
| |
| gcc_assert (targetm.use_by_pieces_infrastructure_p |
| (len, align, |
| memsetp ? SET_BY_PIECES : STORE_BY_PIECES, |
| optimize_insn_for_speed_p ())); |
| |
| store_by_pieces_d data (to, constfun, constfundata, len, align, |
| memsetp); |
| data.run (); |
| |
| if (retmode != RETURN_BEGIN) |
| return data.finish_retmode (retmode); |
| else |
| return to; |
| } |
| |
| /* Generate several move instructions to clear LEN bytes of block TO. (A MEM |
| rtx with BLKmode). ALIGN is maximum alignment we can assume. */ |
| |
| static void |
| clear_by_pieces (rtx to, unsigned HOST_WIDE_INT len, unsigned int align) |
| { |
| if (len == 0) |
| return; |
| |
| /* Use builtin_memset_read_str to support vector mode broadcast. */ |
| char c = 0; |
| store_by_pieces_d data (to, builtin_memset_read_str, &c, len, align, |
| true); |
| data.run (); |
| } |
| |
| /* Context used by compare_by_pieces_genfn. It stores the fail label |
| to jump to in case of miscomparison, and for branch ratios greater than 1, |
| it stores an accumulator and the current and maximum counts before |
| emitting another branch. */ |
| |
| class compare_by_pieces_d : public op_by_pieces_d |
| { |
| rtx_code_label *m_fail_label; |
| rtx m_accumulator; |
| int m_count, m_batch; |
| |
| void generate (rtx, rtx, machine_mode); |
| bool prepare_mode (machine_mode, unsigned int); |
| void finish_mode (machine_mode); |
| public: |
| compare_by_pieces_d (rtx op0, rtx op1, by_pieces_constfn op1_cfn, |
| void *op1_cfn_data, HOST_WIDE_INT len, int align, |
| rtx_code_label *fail_label) |
| : op_by_pieces_d (COMPARE_MAX_PIECES, op0, true, op1, true, op1_cfn, |
| op1_cfn_data, len, align, false) |
| { |
| m_fail_label = fail_label; |
| } |
| }; |
| |
| /* A callback used when iterating for a compare_by_pieces_operation. |
| OP0 and OP1 are the values that have been loaded and should be |
| compared in MODE. DATA holds a pointer to the compare_by_pieces_data |
| context structure. */ |
| |
| void |
| compare_by_pieces_d::generate (rtx op0, rtx op1, machine_mode mode) |
| { |
| if (m_batch > 1) |
| { |
| rtx temp = expand_binop (mode, sub_optab, op0, op1, NULL_RTX, |
| true, OPTAB_LIB_WIDEN); |
| if (m_count != 0) |
| temp = expand_binop (mode, ior_optab, m_accumulator, temp, temp, |
| true, OPTAB_LIB_WIDEN); |
| m_accumulator = temp; |
| |
| if (++m_count < m_batch) |
| return; |
| |
| m_count = 0; |
| op0 = m_accumulator; |
| op1 = const0_rtx; |
| m_accumulator = NULL_RTX; |
| } |
| do_compare_rtx_and_jump (op0, op1, NE, true, mode, NULL_RTX, NULL, |
| m_fail_label, profile_probability::uninitialized ()); |
| } |
| |
| /* Return true if MODE can be used for a set of moves and comparisons, |
| given an alignment ALIGN. Prepare whatever data is necessary for |
| later calls to generate. */ |
| |
| bool |
| compare_by_pieces_d::prepare_mode (machine_mode mode, unsigned int align) |
| { |
| insn_code icode = optab_handler (mov_optab, mode); |
| if (icode == CODE_FOR_nothing |
| || align < GET_MODE_ALIGNMENT (mode) |
| || !can_compare_p (EQ, mode, ccp_jump)) |
| return false; |
| m_batch = targetm.compare_by_pieces_branch_ratio (mode); |
| if (m_batch < 0) |
| return false; |
| m_accumulator = NULL_RTX; |
| m_count = 0; |
| return true; |
| } |
| |
| /* Called after expanding a series of comparisons in MODE. If we have |
| accumulated results for which we haven't emitted a branch yet, do |
| so now. */ |
| |
| void |
| compare_by_pieces_d::finish_mode (machine_mode mode) |
| { |
| if (m_accumulator != NULL_RTX) |
| do_compare_rtx_and_jump (m_accumulator, const0_rtx, NE, true, mode, |
| NULL_RTX, NULL, m_fail_label, |
| profile_probability::uninitialized ()); |
| } |
| |
| /* Generate several move instructions to compare LEN bytes from blocks |
| ARG0 and ARG1. (These are MEM rtx's with BLKmode). |
| |
| If PUSH_ROUNDING is defined and TO is NULL, emit_single_push_insn is |
| used to push FROM to the stack. |
| |
| ALIGN is maximum stack alignment we can assume. |
| |
| Optionally, the caller can pass a constfn and associated data in A1_CFN |
| and A1_CFN_DATA. describing that the second operand being compared is a |
| known constant and how to obtain its data. */ |
| |
| static rtx |
| compare_by_pieces (rtx arg0, rtx arg1, unsigned HOST_WIDE_INT len, |
| rtx target, unsigned int align, |
| by_pieces_constfn a1_cfn, void *a1_cfn_data) |
| { |
| rtx_code_label *fail_label = gen_label_rtx (); |
| rtx_code_label *end_label = gen_label_rtx (); |
| |
| if (target == NULL_RTX |
| || !REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) |
| target = gen_reg_rtx (TYPE_MODE (integer_type_node)); |
| |
| compare_by_pieces_d data (arg0, arg1, a1_cfn, a1_cfn_data, len, align, |
| fail_label); |
| |
| data.run (); |
| |
| emit_move_insn (target, const0_rtx); |
| emit_jump (end_label); |
| emit_barrier (); |
| emit_label (fail_label); |
| emit_move_insn (target, const1_rtx); |
| emit_label (end_label); |
| |
| return target; |
| } |
| |
| /* Emit code to move a block Y to a block X. This may be done with |
| string-move instructions, with multiple scalar move instructions, |
| or with a library call. |
| |
| Both X and Y must be MEM rtx's (perhaps inside VOLATILE) with mode BLKmode. |
| SIZE is an rtx that says how long they are. |
| ALIGN is the maximum alignment we can assume they have. |
| METHOD describes what kind of copy this is, and what mechanisms may be used. |
| MIN_SIZE is the minimal size of block to move |
| MAX_SIZE is the maximal size of block to move, if it cannot be represented |
| in unsigned HOST_WIDE_INT, than it is mask of all ones. |
| |
| Return the address of the new block, if memcpy is called and returns it, |
| 0 otherwise. */ |
| |
| rtx |
| emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method, |
| unsigned int expected_align, HOST_WIDE_INT expected_size, |
| unsigned HOST_WIDE_INT min_size, |
| unsigned HOST_WIDE_INT max_size, |
| unsigned HOST_WIDE_INT probable_max_size, |
| bool bail_out_libcall, bool *is_move_done, |
| bool might_overlap) |
| { |
| int may_use_call; |
| rtx retval = 0; |
| unsigned int align; |
| |
| if (is_move_done) |
| *is_move_done = true; |
| |
| gcc_assert (size); |
| if (CONST_INT_P (size) && INTVAL (size) == 0) |
| return 0; |
| |
| switch (method) |
| { |
| case BLOCK_OP_NORMAL: |
| case BLOCK_OP_TAILCALL: |
| may_use_call = 1; |
| break; |
| |
| case BLOCK_OP_CALL_PARM: |
| may_use_call = block_move_libcall_safe_for_call_parm (); |
| |
| /* Make inhibit_defer_pop nonzero around the library call |
| to force it to pop the arguments right away. */ |
| NO_DEFER_POP; |
| break; |
| |
| case BLOCK_OP_NO_LIBCALL: |
| may_use_call = 0; |
| break; |
| |
| case BLOCK_OP_NO_LIBCALL_RET: |
| may_use_call = -1; |
| break; |
| |
| default: |
| gcc_unreachable (); |
| } |
| |
| gcc_assert (MEM_P (x) && MEM_P (y)); |
| align = MIN (MEM_ALIGN (x), MEM_ALIGN (y)); |
| gcc_assert (align >= BITS_PER_UNIT); |
| |
| /* Make sure we've got BLKmode addresses; store_one_arg can decide that |
| block copy is more efficient for other large modes, e.g. DCmode. */ |
| x = adjust_address (x, BLKmode, 0); |
| y = adjust_address (y, BLKmode, 0); |
| |
| /* If source and destination are the same, no need to copy anything. */ |
| if (rtx_equal_p (x, y) |
| && !MEM_VOLATILE_P (x) |
| && !MEM_VOLATILE_P (y)) |
| return 0; |
| |
| /* Set MEM_SIZE as appropriate for this block copy. The main place this |
| can be incorrect is coming from __builtin_memcpy. */ |
| poly_int64 const_size; |
| if (poly_int_rtx_p (size, &const_size)) |
| { |
| x = shallow_copy_rtx (x); |
| y = shallow_copy_rtx (y); |
| set_mem_size (x, const_size); |
| set_mem_size (y, const_size); |
| } |
| |
| bool pieces_ok = CONST_INT_P (size) |
| && can_move_by_pieces (INTVAL (size), align); |
| bool pattern_ok = false; |
| |
| if (!pieces_ok || might_overlap) |
| { |
| pattern_ok |
| = emit_block_move_via_pattern (x, y, size, align, |
| expected_align, expected_size, |
| min_size, max_size, probable_max_size, |
| might_overlap); |
| if (!pattern_ok && might_overlap) |
| { |
| /* Do not try any of the other methods below as they are not safe |
| for overlapping moves. */ |
| *is_move_done = false; |
| return retval; |
| } |
| } |
| |
| if (pattern_ok) |
| ; |
| else if (pieces_ok) |
| move_by_pieces (x, y, INTVAL (size), align, RETURN_BEGIN); |
| else if (may_use_call && !might_overlap |
| && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x)) |
| && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (y))) |
| { |
| if (bail_out_libcall) |
| { |
| if (is_move_done) |
| *is_move_done = false; |
| return retval; |
| } |
| |
| if (may_use_call < 0) |
| return pc_rtx; |
| |
| retval = emit_block_copy_via_libcall (x, y, size, |
| method == BLOCK_OP_TAILCALL); |
| } |
| else if (might_overlap) |
| *is_move_done = false; |
| else |
| emit_block_move_via_loop (x, y, size, align); |
| |
| if (method == BLOCK_OP_CALL_PARM) |
| OK_DEFER_POP; |
| |
| return retval; |
| } |
| |
| rtx |
| emit_block_move (rtx x, rtx y, rtx size, enum block_op_methods method) |
| { |
| unsigned HOST_WIDE_INT max, min = 0; |
| if (GET_CODE (size) == CONST_INT) |
| min = max = UINTVAL (size); |
| else |
| max = GET_MODE_MASK (GET_MODE (size)); |
| return emit_block_move_hints (x, y, size, method, 0, -1, |
| min, max, max); |
| } |
| |
| /* A subroutine of emit_block_move. Returns true if calling the |
| block move libcall will not clobber any parameters which may have |
| already been placed on the stack. */ |
| |
| static bool |
| block_move_libcall_safe_for_call_parm (void) |
| { |
| tree fn; |
| |
| /* If arguments are pushed on the stack, then they're safe. */ |
| if (targetm.calls.push_argument (0)) |
| return true; |
| |
| /* If registers go on the stack anyway, any argument is sure to clobber |
| an outgoing argument. */ |
| #if defined (REG_PARM_STACK_SPACE) |
| fn = builtin_decl_implicit (BUILT_IN_MEMCPY); |
| /* Avoid set but not used warning if *REG_PARM_STACK_SPACE doesn't |
| depend on its argument. */ |
| (void) fn; |
| if (OUTGOING_REG_PARM_STACK_SPACE ((!fn ? NULL_TREE : TREE_TYPE (fn))) |
| && REG_PARM_STACK_SPACE (fn) != 0) |
| return false; |
| #endif |
| |
| /* If any argument goes in memory, then it might clobber an outgoing |
| argument. */ |
| { |
| CUMULATIVE_ARGS args_so_far_v; |
| cumulative_args_t args_so_far; |
| tree arg; |
| |
| fn = builtin_decl_implicit (BUILT_IN_MEMCPY); |
| INIT_CUMULATIVE_ARGS (args_so_far_v, TREE_TYPE (fn), NULL_RTX, 0, 3); |
| args_so_far = pack_cumulative_args (&args_so_far_v); |
| |
| arg = TYPE_ARG_TYPES (TREE_TYPE (fn)); |
| for ( ; arg != void_list_node ; arg = TREE_CHAIN (arg)) |
| { |
| machine_mode mode = TYPE_MODE (TREE_VALUE (arg)); |
| function_arg_info arg_info (mode, /*named=*/true); |
| rtx tmp = targetm.calls.function_arg (args_so_far, arg_info); |
| if (!tmp || !REG_P (tmp)) |
| return false; |
| if (targetm.calls.arg_partial_bytes (args_so_far, arg_info)) |
| return false; |
| targetm.calls.function_arg_advance (args_so_far, arg_info); |
| } |
| } |
| return true; |
| } |
| |
| /* A subroutine of emit_block_move. Expand a cpymem or movmem pattern; |
| return true if successful. |
| |
| X is the destination of the copy or move. |
| Y is the source of the copy or move. |
| SIZE is the size of the block to be moved. |
| |
| MIGHT_OVERLAP indicates this originated with expansion of a |
| builtin_memmove() and the source and destination blocks may |
| overlap. |
| */ |
| |
| static bool |
| emit_block_move_via_pattern (rtx x, rtx y, rtx size, unsigned int align, |
| unsigned int expected_align, |
| HOST_WIDE_INT expected_size, |
| unsigned HOST_WIDE_INT min_size, |
| unsigned HOST_WIDE_INT max_size, |
| unsigned HOST_WIDE_INT probable_max_size, |
| bool might_overlap) |
| { |
| if (expected_align < align) |
| expected_align = align; |
| if (expected_size != -1) |
| { |
| if ((unsigned HOST_WIDE_INT)expected_size > probable_max_size) |
| expected_size = probable_max_size; |
| if ((unsigned HOST_WIDE_INT)expected_size < min_size) |
| expected_size = min_size; |
| } |
| |
| /* Since this is a move insn, we don't care about volatility. */ |
| temporary_volatile_ok v (true); |
| |
| /* Try the most limited insn first, because there's no point |
| including more than one in the machine description unless |
| the more limited one has some advantage. */ |
| |
| opt_scalar_int_mode mode_iter; |
| FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT) |
| { |
| scalar_int_mode mode = mode_iter.require (); |
| enum insn_code code; |
| if (might_overlap) |
| code = direct_optab_handler (movmem_optab, mode); |
| else |
| code = direct_optab_handler (cpymem_optab, mode); |
| |
| if (code != CODE_FOR_nothing |
| /* We don't need MODE to be narrower than BITS_PER_HOST_WIDE_INT |
| here because if SIZE is less than the mode mask, as it is |
| returned by the macro, it will definitely be less than the |
| actual mode mask. Since SIZE is within the Pmode address |
| space, we limit MODE to Pmode. */ |
| && ((CONST_INT_P (size) |
| && ((unsigned HOST_WIDE_INT) INTVAL (size) |
| <= (GET_MODE_MASK (mode) >> 1))) |
| || max_size <= (GET_MODE_MASK (mode) >> 1) |
| || GET_MODE_BITSIZE (mode) >= GET_MODE_BITSIZE (Pmode))) |
| { |
| class expand_operand ops[9]; |
| unsigned int nops; |
| |
| /* ??? When called via emit_block_move_for_call, it'd be |
| nice if there were some way to inform the backend, so |
| that it doesn't fail the expansion because it thinks |
| emitting the libcall would be more efficient. */ |
| nops = insn_data[(int) code].n_generator_args; |
| gcc_assert (nops == 4 || nops == 6 || nops == 8 || nops == 9); |
| |
| create_fixed_operand (&ops[0], x); |
| create_fixed_operand (&ops[1], y); |
| /* The check above guarantees that this size conversion is valid. */ |
| create_convert_operand_to (&ops[2], size, mode, true); |
| create_integer_operand (&ops[3], align / BITS_PER_UNIT); |
| if (nops >= 6) |
| { |
| create_integer_operand (&ops[4], expected_align / BITS_PER_UNIT); |
| create_integer_operand (&ops[5], expected_size); |
| } |
| if (nops >= 8) |
| { |
| create_integer_operand (&ops[6], min_size); |
| /* If we cannot represent the maximal size, |
| make parameter NULL. */ |
| if ((HOST_WIDE_INT) max_size != -1) |
| create_integer_operand (&ops[7], max_size); |
| else |
| create_fixed_operand (&ops[7], NULL); |
| } |
| if (nops == 9) |
| { |
| /* If we cannot represent the maximal size, |
| make parameter NULL. */ |
| if ((HOST_WIDE_INT) probable_max_size != -1) |
| create_integer_operand (&ops[8], probable_max_size); |
| else |
| create_fixed_operand (&ops[8], NULL); |
| } |
| if (maybe_expand_insn (code, nops, ops)) |
| return true; |
| } |
| } |
| |
| return false; |
| } |
| |
| /* A subroutine of emit_block_move. Copy the data via an explicit |
| loop. This is used only when libcalls are forbidden. */ |
| /* ??? It'd be nice to copy in hunks larger than QImode. */ |
| |
| static void |
| emit_block_move_via_loop (rtx x, rtx y, rtx size, |
| unsigned int align ATTRIBUTE_UNUSED) |
| { |
| rtx_code_label *cmp_label, *top_label; |
| rtx iter, x_addr, y_addr, tmp; |
| machine_mode x_addr_mode = get_address_mode (x); |
| machine_mode y_addr_mode = get_address_mode (y); |
| machine_mode iter_mode; |
| |
| iter_mode = GET_MODE (size); |
| if (iter_mode == VOIDmode) |
| iter_mode = word_mode; |
| |
| top_label = gen_label_rtx (); |
| cmp_label = gen_label_rtx (); |
| iter = gen_reg_rtx (iter_mode); |
| |
| emit_move_insn (iter, const0_rtx); |
| |
| x_addr = force_operand (XEXP (x, 0), NULL_RTX); |
| y_addr = force_operand (XEXP (y, 0), NULL_RTX); |
| do_pending_stack_adjust (); |
| |
| emit_jump (cmp_label); |
| emit_label (top_label); |
| |
| tmp = convert_modes (x_addr_mode, iter_mode, iter, true); |
| x_addr = simplify_gen_binary (PLUS, x_addr_mode, x_addr, tmp); |
| |
| if (x_addr_mode != y_addr_mode) |
| tmp = convert_modes (y_addr_mode, iter_mode, iter, true); |
| y_addr = simplify_gen_binary (PLUS, y_addr_mode, y_addr, tmp); |
| |
| x = change_address (x, QImode, x_addr); |
| y = change_address (y, QImode, y_addr); |
| |
| emit_move_insn (x, y); |
| |
| tmp = expand_simple_binop (iter_mode, PLUS, iter, const1_rtx, iter, |
| true, OPTAB_LIB_WIDEN); |
| if (tmp != iter) |
| emit_move_insn (iter, tmp); |
| |
| emit_label (cmp_label); |
| |
| emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode, |
| true, top_label, |
| profile_probability::guessed_always () |
| .apply_scale (9, 10)); |
| } |
| |
| /* Expand a call to memcpy or memmove or memcmp, and return the result. |
| TAILCALL is true if this is a tail call. */ |
| |
| rtx |
| emit_block_op_via_libcall (enum built_in_function fncode, rtx dst, rtx src, |
| rtx size, bool tailcall) |
| { |
| rtx dst_addr, src_addr; |
| tree call_expr, dst_tree, src_tree, size_tree; |
| machine_mode size_mode; |
| |
| /* Since dst and src are passed to a libcall, mark the corresponding |
| tree EXPR as addressable. */ |
| tree dst_expr = MEM_EXPR (dst); |
| tree src_expr = MEM_EXPR (src); |
| if (dst_expr) |
| mark_addressable (dst_expr); |
| if (src_expr) |
| mark_addressable (src_expr); |
| |
| dst_addr = copy_addr_to_reg (XEXP (dst, 0)); |
| dst_addr = convert_memory_address (ptr_mode, dst_addr); |
| dst_tree = make_tree (ptr_type_node, dst_addr); |
| |
| src_addr = copy_addr_to_reg (XEXP (src, 0)); |
| src_addr = convert_memory_address (ptr_mode, src_addr); |
| src_tree = make_tree (ptr_type_node, src_addr); |
| |
| size_mode = TYPE_MODE (sizetype); |
| size = convert_to_mode (size_mode, size, 1); |
| size = copy_to_mode_reg (size_mode, size); |
| size_tree = make_tree (sizetype, size); |
| |
| /* It is incorrect to use the libcall calling conventions for calls to |
| memcpy/memmove/memcmp because they can be provided by the user. */ |
| tree fn = builtin_decl_implicit (fncode); |
| call_expr = build_call_expr (fn, 3, dst_tree, src_tree, size_tree); |
| CALL_EXPR_TAILCALL (call_expr) = tailcall; |
| |
| return expand_call (call_expr, NULL_RTX, false); |
| } |
| |
| /* Try to expand cmpstrn or cmpmem operation ICODE with the given operands. |
| ARG3_TYPE is the type of ARG3_RTX. Return the result rtx on success, |
| otherwise return null. */ |
| |
| rtx |
| expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, rtx arg1_rtx, |
| rtx arg2_rtx, tree arg3_type, rtx arg3_rtx, |
| HOST_WIDE_INT align) |
| { |
| machine_mode insn_mode = insn_data[icode].operand[0].mode; |
| |
| if (target && (!REG_P (target) || HARD_REGISTER_P (target))) |
| target = NULL_RTX; |
| |
| class expand_operand ops[5]; |
| create_output_operand (&ops[0], target, insn_mode); |
| create_fixed_operand (&ops[1], arg1_rtx); |
| create_fixed_operand (&ops[2], arg2_rtx); |
| create_convert_operand_from (&ops[3], arg3_rtx, TYPE_MODE (arg3_type), |
| TYPE_UNSIGNED (arg3_type)); |
| create_integer_operand (&ops[4], align); |
| if (maybe_expand_insn (icode, 5, ops)) |
| return ops[0].value; |
| return NULL_RTX; |
| } |
| |
| /* Expand a block compare between X and Y with length LEN using the |
| cmpmem optab, placing the result in TARGET. LEN_TYPE is the type |
| of the expression that was used to calculate the length. ALIGN |
| gives the known minimum common alignment. */ |
| |
| static rtx |
| emit_block_cmp_via_cmpmem (rtx x, rtx y, rtx len, tree len_type, rtx target, |
| unsigned align) |
| { |
| /* Note: The cmpstrnsi pattern, if it exists, is not suitable for |
| implementing memcmp because it will stop if it encounters two |
| zero bytes. */ |
| insn_code icode = direct_optab_handler (cmpmem_optab, SImode); |
| |
| if (icode == CODE_FOR_nothing) |
| return NULL_RTX; |
| |
| return expand_cmpstrn_or_cmpmem (icode, target, x, y, len_type, len, align); |
| } |
| |
| /* Emit code to compare a block Y to a block X. This may be done with |
| string-compare instructions, with multiple scalar instructions, |
| or with a library call. |
| |
| Both X and Y must be MEM rtx's. LEN is an rtx that says how long |
| they are. LEN_TYPE is the type of the expression that was used to |
| calculate it. |
| |
| If EQUALITY_ONLY is true, it means we don't have to return the tri-state |
| value of a normal memcmp call, instead we can just compare for equality. |
| If FORCE_LIBCALL is true, we should emit a call to memcmp rather than |
| returning NULL_RTX. |
| |
| Optionally, the caller can pass a constfn and associated data in Y_CFN |
| and Y_CFN_DATA. describing that the second operand being compared is a |
| known constant and how to obtain its data. |
| Return the result of the comparison, or NULL_RTX if we failed to |
| perform the operation. */ |
| |
| rtx |
| emit_block_cmp_hints (rtx x, rtx y, rtx len, tree len_type, rtx target, |
| bool equality_only, by_pieces_constfn y_cfn, |
| void *y_cfndata) |
| { |
| rtx result = 0; |
| |
| if (CONST_INT_P (len) && INTVAL (len) == 0) |
| return const0_rtx; |
| |
| gcc_assert (MEM_P (x) && MEM_P (y)); |
| unsigned int align = MIN (MEM_ALIGN (x), MEM_ALIGN (y)); |
| gcc_assert (align >= BITS_PER_UNIT); |
| |
| x = adjust_address (x, BLKmode, 0); |
| y = adjust_address (y, BLKmode, 0); |
| |
| if (equality_only |
| && CONST_INT_P (len) |
| && can_do_by_pieces (INTVAL (len), align, COMPARE_BY_PIECES)) |
| result = compare_by_pieces (x, y, INTVAL (len), target, align, |
| y_cfn, y_cfndata); |
| else |
| result = emit_block_cmp_via_cmpmem (x, y, len, len_type, target, align); |
| |
| return result; |
| } |
| |
| /* Copy all or part of a value X into registers starting at REGNO. |
| The number of registers to be filled is NREGS. */ |
| |
| void |
| move_block_to_reg (int regno, rtx x, int nregs, machine_mode mode) |
| { |
| if (nregs == 0) |
| return; |
| |
| if (CONSTANT_P (x) && !targetm.legitimate_constant_p (mode, x)) |
| x = validize_mem (force_const_mem (mode, x)); |
| |
| /* See if the machine can do this with a load multiple insn. */ |
| if (targetm.have_load_multiple ()) |
| { |
| rtx_insn *last = get_last_insn (); |
| rtx first = gen_rtx_REG (word_mode, regno); |
| if (rtx_insn *pat = targetm.gen_load_multiple (first, x, |
| GEN_INT (nregs))) |
| { |
| emit_insn (pat); |
| return; |
| } |
| else |
| delete_insns_since (last); |
| } |
| |
| for (int i = 0; i < nregs; i++) |
| emit_move_insn (gen_rtx_REG (word_mode, regno + i), |
| operand_subword_force (x, i, mode)); |
| } |
| |
| /* Copy all or part of a BLKmode value X out of registers starting at REGNO. |
| The number of registers to be filled is NREGS. */ |
| |
| void |
| move_block_from_reg (int regno, rtx x, int nregs) |
| { |
| if (nregs == 0) |
| return; |
| |
| /* See if the machine can do this with a store multiple insn. */ |
| if (targetm.have_store_multiple ()) |
| { |
| rtx_insn *last = get_last_insn (); |
| rtx first = gen_rtx_REG (word_mode, regno); |
| if (rtx_insn *pat = targetm.gen_store_multiple (x, first, |
| GEN_INT (nregs))) |
| { |
| emit_insn (pat); |
| return; |
| } |
| else |
| delete_insns_since (last); |
| } |
| |
| for (int i = 0; i < nregs; i++) |
| { |
| rtx tem = operand_subword (x, i, 1, BLKmode); |
| |
| gcc_assert (tem); |
| |
| emit_move_insn (tem, gen_rtx_REG (word_mode, regno + i)); |
| } |
| } |
| |
| /* Generate a PARALLEL rtx for a new non-consecutive group of registers from |
| ORIG, where ORIG is a non-consecutive group of registers represented by |
| a PARALLEL. The clone is identical to the original except in that the |
| original set of registers is replaced by a new set of pseudo registers. |
| The new set has the same modes as the original set. */ |
| |
| rtx |
| gen_group_rtx (rtx orig) |
| { |
| int i, length; |
| rtx *tmps; |
| |
| gcc_assert (GET_CODE (orig) == PARALLEL); |
| |
| length = XVECLEN (orig, 0); |
| tmps = XALLOCAVEC (rtx, length); |
| |
| /* Skip a NULL entry in first slot. */ |
| i = XEXP (XVECEXP (orig, 0, 0), 0) ? 0 : 1; |
| |
| if (i) |
| tmps[0] = 0; |
| |
| for (; i < length; i++) |
| { |
| machine_mode mode = GET_MODE (XEXP (XVECEXP (orig, 0, i), 0)); |
| rtx offset = XEXP (XVECEXP (orig, 0, i), 1); |
| |
| tmps[i] = gen_rtx_EXPR_LIST (VOIDmode, gen_reg_rtx (mode), offset); |
| } |
| |
| return gen_rtx_PARALLEL (GET_MODE (orig), gen_rtvec_v (length, tmps)); |
| } |
| |
| /* A subroutine of emit_group_load. Arguments as for emit_group_load, |
| except that values are placed in TMPS[i], and must later be moved |
| into corresponding XEXP (XVECEXP (DST, 0, i), 0) element. */ |
| |
| static void |
| emit_group_load_1 (rtx *tmps, rtx dst, rtx orig_src, tree type, |
| poly_int64 ssize) |
| { |
| rtx src; |
| int start, i; |
| machine_mode m = GET_MODE (orig_src); |
| |
| gcc_assert (GET_CODE (dst) == PARALLEL); |
| |
| if (m != VOIDmode |
| && !SCALAR_INT_MODE_P (m) |
| && !MEM_P (orig_src) |
| && GET_CODE (orig_src) != CONCAT) |
| { |
| scalar_int_mode imode; |
| if (int_mode_for_mode (GET_MODE (orig_src)).exists (&imode)) |
| { |
| src = gen_reg_rtx (imode); |
| emit_move_insn (gen_lowpart (GET_MODE (orig_src), src), orig_src); |
| } |
| else |
| { |
| src = assign_stack_temp (GET_MODE (orig_src), ssize); |
| emit_move_insn (src, orig_src); |
| } |
| emit_group_load_1 (tmps, dst, src, type, ssize); |
| return; |
| } |
| |
| /* Check for a NULL entry, used to indicate that the parameter goes |
| both on the stack and in registers. */ |
| if (XEXP (XVECEXP (dst, 0, 0), 0)) |
| start = 0; |
| else |
| start = 1; |
| |
| /* Process the pieces. */ |
| for (i = start; i < XVECLEN (dst, 0); i++) |
| { |
| machine_mode mode = GET_MODE (XEXP (XVECEXP (dst, 0, i), 0)); |
| poly_int64 bytepos = rtx_to_poly_int64 (XEXP (XVECEXP (dst, 0, i), 1)); |
| poly_int64 bytelen = GET_MODE_SIZE (mode); |
| poly_int64 shift = 0; |
| |
| /* Handle trailing fragments that run over the size of the struct. |
| It's the target's responsibility to make sure that the fragment |
| cannot be strictly smaller in some cases and strictly larger |
| in others. */ |
| gcc_checking_assert (ordered_p (bytepos + bytelen, ssize)); |
| if (known_size_p (ssize) && maybe_gt (bytepos + bytelen, ssize)) |
| { |
| /* Arrange to shift the fragment to where it belongs. |
| extract_bit_field loads to the lsb of the reg. */ |
| if ( |
| #ifdef BLOCK_REG_PADDING |
| BLOCK_REG_PADDING (GET_MODE (orig_src), type, i == start) |
| == (BYTES_BIG_ENDIAN ? PAD_UPWARD : PAD_DOWNWARD) |
| #else |
| BYTES_BIG_ENDIAN |
| #endif |
| ) |
| shift = (bytelen - (ssize - bytepos)) * BITS_PER_UNIT; |
| bytelen = ssize - bytepos; |
| gcc_assert (maybe_gt (bytelen, 0)); |
| } |
| |
| /* If we won't be loading directly from memory, protect the real source |
| from strange tricks we might play; but make sure that the source can |
| be loaded directly into the destination. */ |
| src = orig_src; |
| if (!MEM_P (orig_src) |
| && (!CONSTANT_P (orig_src) |
| || (GET_MODE (orig_src) != mode |
| && GET_MODE (orig_src) != VOIDmode))) |
| { |
| if (GET_MODE (orig_src) == VOIDmode) |
| src = gen_reg_rtx (mode); |
| else |
| src = gen_reg_rtx (GET_MODE (orig_src)); |
| |
| emit_move_insn (src, orig_src); |
| } |
| |
| /* Optimize the access just a bit. */ |
| if (MEM_P (src) |
| && (! targetm.slow_unaligned_access (mode, MEM_ALIGN (src)) |
| || MEM_ALIGN (src) >= GET_MODE_ALIGNMENT (mode)) |
| && multiple_p (bytepos * BITS_PER_UNIT, GET_MODE_ALIGNMENT (mode)) |
| && known_eq (bytelen, GET_MODE_SIZE (mode))) |
| { |
| tmps[i] = gen_reg_rtx (mode); |
| emit_move_insn (tmps[i], adjust_address (src, mode, bytepos)); |
| } |
| else if (COMPLEX_MODE_P (mode) |
| && GET_MODE (src) == mode |
| && known_eq (bytelen, GET_MODE_SIZE (mode))) |
| /* Let emit_move_complex do the bulk of the work. */ |
| tmps[i] = src; |
| else if (GET_CODE (src) == CONCAT) |
| { |
| poly_int64 slen = GET_MODE_SIZE (GET_MODE (src)); |
| poly_int64 slen0 = GET_MODE_SIZE (GET_MODE (XEXP (src, 0))); |
| unsigned int elt; |
| poly_int64 subpos; |
| |
| if (can_div_trunc_p (bytepos, slen0, &elt, &subpos) |
| && known_le (subpos + bytelen, slen0)) |
| { |
| /* The following assumes that the concatenated objects all |
| have the same size. In this case, a simple calculation |
| can be used to determine the object and the bit field |
| to be extracted. */ |
| tmps[i] = XEXP (src, elt); |
| if (maybe_ne (subpos, 0) |
| || maybe_ne (subpos + bytelen, slen0) |
| || (!CONSTANT_P (tmps[i]) |
| && (!REG_P (tmps[i]) || GET_MODE (tmps[i]) != mode))) |
| tmps[i] = extract_bit_field (tmps[i], bytelen * BITS_PER_UNIT, |
| subpos * BITS_PER_UNIT, |
| 1, NULL_RTX, mode, mode, false, |
| NULL); |
| } |
| else |
| { |
| rtx mem; |
| |
| gcc_assert (known_eq (bytepos, 0)); |
| mem = assign_stack_temp (GET_MODE (src), slen); |
| emit_move_insn (mem, src); |
| tmps[i] = extract_bit_field (mem, bytelen * BITS_PER_UNIT, |
| 0, 1, NULL_RTX, mode, mode, false, |
| NULL); |
| } |
| } |
| /* FIXME: A SIMD parallel will eventually lead to a subreg of a |
| SIMD register, which is currently broken. While we get GCC |
| to emit proper RTL for these cases, let's dump to memory. */ |
| else if (VECTOR_MODE_P (GET_MODE (dst)) |
| && REG_P (src)) |
| { |
| poly_uint64 slen = GET_MODE_SIZE (GET_MODE (src)); |
| rtx mem; |
| |
| mem = assign_stack_temp (GET_MODE (src), slen); |
| emit_move_insn (mem, src); |
| tmps[i] = adjust_address (mem, mode, bytepos); |
| } |
| else if (CONSTANT_P (src) && GET_MODE (dst) != BLKmode |
| && XVECLEN (dst, 0) > 1) |
| tmps[i] = simplify_gen_subreg (mode, src, GET_MODE (dst), bytepos); |
| else if (CONSTANT_P (src)) |
| { |
| if (known_eq (bytelen, ssize)) |
| tmps[i] = src; |
| else |
| { |
| rtx first, second; |
| |
| /* TODO: const_wide_int can have sizes other than this... */ |
| gcc_assert (known_eq (2 * bytelen, ssize)); |
| split_double (src, &first, &second); |
| if (i) |
| tmps[i] = second; |
| else |
| tmps[i] = first; |
| } |
| } |
| else if (REG_P (src) && GET_MODE (src) == mode) |
| tmps[i] = src; |
| else |
| tmps[i] = extract_bit_field (src, bytelen * BITS_PER_UNIT, |
| bytepos * BITS_PER_UNIT, 1, NULL_RTX, |
| mode, mode, false, NULL); |
| |
| if (maybe_ne (shift, 0)) |
| tmps[i] = expand_shift (LSHIFT_EXPR, mode, tmps[i], |
| shift, tmps[i], 0); |
| } |
| } |
| |
| /* Emit code to move a block SRC of type TYPE to a block DST, |
| where DST is non-consecutive registers represented by a PARALLEL. |
| SSIZE represents the total size of block ORIG_SRC in bytes, or -1 |
| if not known. */ |
| |
| void |
| emit_group_load (rtx dst, rtx src, tree type, poly_int64 ssize) |
| { |
| rtx *tmps; |
| int i; |
| |
| tmps = XALLOCAVEC (rtx, XVECLEN (dst, 0)); |
| emit_group_load_1 (tmps, dst, src, type, ssize); |
| |
| /* Copy the extracted pieces into the proper (probable) hard regs. */ |
| for (i = 0; i < XVECLEN (dst, 0); i++) |
| { |
| rtx d = XEXP (XVECEXP (dst, 0, i), 0); |
| if (d == NULL) |
| continue; |
| emit_move_insn (d, tmps[i]); |
| } |
| } |
| |
| /* Similar, but load SRC into new pseudos in a format that looks like |
| PARALLEL. This can later be fed to emit_group_move to get things |
| in the right place. */ |
| |
| rtx |
| emit_group_load_into_temps (rtx parallel, rtx src, tree type, poly_int64 ssize) |
| { |
| rtvec vec; |
| int i; |
| |
| vec = rtvec_alloc (XVECLEN (parallel, 0)); |
| emit_group_load_1 (&RTVEC_ELT (vec, 0), parallel, src, type, ssize); |
| |
| /* Convert the vector to look just like the original PARALLEL, except |
| with the computed values. */ |
| for (i = 0; i < XVECLEN (parallel, 0); i++) |
| { |
| rtx e = XVECEXP (parallel, 0, i); |
| rtx d = XEXP (e, 0); |
| |
| if (d) |
| { |
| d = force_reg (GET_MODE (d), RTVEC_ELT (vec, i)); |
| e = alloc_EXPR_LIST (REG_NOTE_KIND (e), d, XEXP (e, 1)); |
| } |
| RTVEC_ELT (vec, i) = e; |
| } |
| |
| return gen_rtx_PARALLEL (GET_MODE (parallel), vec); |
| } |
| |
| /* Emit code to move a block SRC to block DST, where SRC and DST are |
| non-consecutive groups of registers, each represented by a PARALLEL. */ |
| |
| void |
| emit_group_move (rtx dst, rtx src) |
| { |
| int i; |
| |
| gcc_assert (GET_CODE (src) == PARALLEL |
| && GET_CODE (dst) == PARALLEL |
| && XVECLEN (src, 0) == XVECLEN (dst, 0)); |
| |
| /* Skip first entry if NULL. */ |
| for (i = XEXP (XVECEXP (src, 0, 0), 0) ? 0 : 1; i < XVECLEN (src, 0); i++) |
| emit_move_insn (XEXP (XVECEXP (dst, 0, i), 0), |
| XEXP (XVECEXP (src, 0, i), 0)); |
| } |
| |
| /* Move a group of registers represented by a PARALLEL into pseudos. */ |
| |
| rtx |
| emit_group_move_into_temps (rtx src) |
| { |
| rtvec vec = rtvec_alloc (XVECLEN (src, 0)); |
| int i; |
| |
| for (i = 0; i < XVECLEN (src, 0); i++) |
| { |
| rtx e = XVECEXP (src, 0, i); |
| rtx d = XEXP (e, 0); |
| |
| if (d) |
| e = alloc_EXPR_LIST (REG_NOTE_KIND (e), copy_to_reg (d), XEXP (e, 1)); |
| RTVEC_ELT (vec, i) = e; |
| } |
| |
| return gen_rtx_PARALLEL (GET_MODE (src), vec); |
| } |
| |
| /* Emit code to move a block SRC to a block ORIG_DST of type TYPE, |
| where SRC is non-consecutive registers represented by a PARALLEL. |
| SSIZE represents the total size of block ORIG_DST, or -1 if not |
| known. */ |
| |
| void |
| emit_group_store (rtx orig_dst, rtx src, tree type ATTRIBUTE_UNUSED, |
| poly_int64 ssize) |
| { |
| rtx *tmps, dst; |
| int start, finish, i; |
| machine_mode m = GET_MODE (orig_dst); |
| |
| gcc_assert (GET_CODE (src) == PARALLEL); |
| |
| if (!SCALAR_INT_MODE_P (m) |
| && !MEM_P (orig_dst) && GET_CODE (orig_dst) != CONCAT) |
| { |
| scalar_int_mode imode; |
| if (int_mode_for_mode (GET_MODE (orig_dst)).exists (&imode)) |
| { |
| dst = gen_reg_rtx (imode); |
| emit_group_store (dst, src, type, ssize); |
| dst = gen_lowpart (GET_MODE (orig_dst), dst); |
| } |
| else |
| { |
| dst = assign_stack_temp (GET_MODE (orig_dst), ssize); |
| emit_group_store (dst, src, type, ssize); |
| } |
| emit_move_insn (orig_dst, dst); |
| return; |
| } |
| |
| /* Check for a NULL entry, used to indicate that the parameter goes |
| both on the stack and in registers. */ |
| if (XEXP (XVECEXP (src, 0, 0), 0)) |
| start = 0; |
| else |
| start = 1; |
| finish = XVECLEN (src, 0); |
| |
| tmps = XALLOCAVEC (rtx, finish); |
| |
| /* Copy the (probable) hard regs into pseudos. */ |
| for (i = start; i < finish; i++) |
| { |
| rtx reg = XEXP (XVECEXP (src, 0, i), 0); |
| if (!REG_P (reg) || REGNO (reg) < FIRST_PSEUDO_REGISTER) |
| { |
| tmps[i] = gen_reg_rtx (GET_MODE (reg)); |
| emit_move_insn (tmps[i], reg); |
| } |
| else |
| tmps[i] = reg; |
| } |
| |
| /* If we won't be storing directly into memory, protect the real destination |
| from strange tricks we might play. */ |
| dst = orig_dst; |
| if (GET_CODE (dst) == PARALLEL) |
| { |
| rtx temp; |
| |
| /* We can get a PARALLEL dst if there is a conditional expression in |
| a return statement. In that case, the dst and src are the same, |
| so no action is necessary. */ |
| if (rtx_equal_p (dst, src)) |
| return; |
| |
| /* It is unclear if we can ever reach here, but we may as well handle |
| it. Allocate a temporary, and split this into a store/load to/from |
| the temporary. */ |
| temp = assign_stack_temp (GET_MODE (dst), ssize); |
| emit_group_store (temp, src, type, ssize); |
| emit_group_load (dst, temp, type, ssize); |
| return; |
| } |
| else if (!MEM_P (dst) && GET_CODE (dst) != CONCAT) |
| { |
| machine_mode outer = GET_MODE (dst); |
| machine_mode inner; |
| poly_int64 bytepos; |
| bool done = false; |
| rtx temp; |
| |
| if (!REG_P (dst) || REGNO (dst) < FIRST_PSEUDO_REGISTER) |
| dst = gen_reg_rtx (outer); |
| |
| /* Make life a bit easier for combine. */ |
| /* If the first element of the vector is the low part |
| of the destination mode, use a paradoxical subreg to |
| initialize the destination. */ |
| if (start < finish) |
| { |
| inner = GET_MODE (tmps[start]); |
| bytepos = subreg_lowpart_offset (inner, outer); |
| if (known_eq (rtx_to_poly_int64 (XEXP (XVECEXP (src, 0, start), 1)), |
| bytepos)) |
| { |
| temp = simplify_gen_subreg (outer, tmps[start], |
| inner, 0); |
| if (temp) |
| { |
| emit_move_insn (dst, temp); |
| done = true; |
| start++; |
| } |
| } |
| } |
| |
| /* If the first element wasn't the low part, try the last. */ |
| if (!done |
| && start < finish - 1) |
| { |
| inner = GET_MODE (tmps[finish - 1]); |
| bytepos = subreg_lowpart_offset (inner, outer); |
| if (known_eq (rtx_to_poly_int64 (XEXP (XVECEXP (src, 0, |
| finish - 1), 1)), |
| bytepos)) |
| { |
| temp = simplify_gen_subreg (outer, tmps[finish - 1], |
| inner, 0); |
| if (temp) |
| { |
| emit_move_insn (dst, temp); |
| done = true; |
| finish--; |
| } |
| } |
| } |
| |
| /* Otherwise, simply initialize the result to zero. */ |
| if (!done) |
| emit_move_insn (dst, CONST0_RTX (outer)); |
| } |
| |
| /* Process the pieces. */ |
| for (i = start; i < finish; i++) |
| { |
| poly_int64 bytepos = rtx_to_poly_int64 (XEXP (XVECEXP (src, 0, i), 1)); |
| machine_mode mode = GET_MODE (tmps[i]); |
| poly_int64 bytelen = GET_MODE_SIZE (mode); |
| poly_uint64 adj_bytelen; |
| rtx dest = dst; |
| |
| /* Handle trailing fragments that run over the size of the struct. |
| It's the target's responsibility to make sure that the fragment |
| cannot be strictly smaller in some cases and strictly larger |
| in others. */ |
| gcc_checking_assert (ordered_p (bytepos + bytelen, ssize)); |
| if (known_size_p (ssize) && maybe_gt (bytepos + bytelen, ssize)) |
| adj_bytelen = ssize - bytepos; |
| else |
| adj_bytelen = bytelen; |
| |
| if (GET_CODE (dst) == CONCAT) |
| { |
| if (known_le (bytepos + adj_bytelen, |
| GET_MODE_SIZE (GET_MODE (XEXP (dst, 0))))) |
| dest = XEXP (dst, 0); |
| else if (known_ge (bytepos, GET_MODE_SIZE (GET_MODE (XEXP (dst, 0))))) |
| { |
| bytepos -= GET_MODE_SIZE (GET_MODE (XEXP (dst, 0))); |
| dest = XEXP (dst, 1); |
| } |
| else |
| { |
| machine_mode dest_mode = GET_MODE (dest); |
| machine_mode tmp_mode = GET_MODE (tmps[i]); |
| |
| gcc_assert (known_eq (bytepos, 0) && XVECLEN (src, 0)); |
| |
| if (GET_MODE_ALIGNMENT (dest_mode) |
| >= GET_MODE_ALIGNMENT (tmp_mode)) |
| { |
| dest = assign_stack_temp (dest_mode, |
| GET_MODE_SIZE (dest_mode)); |
| emit_move_insn (adjust_address (dest, |
| tmp_mode, |
| bytepos), |
| tmps[i]); |
| dst = dest; |
| } |
| else |
| { |
| dest = assign_stack_temp (tmp_mode, |
| GET_MODE_SIZE (tmp_mode)); |
| emit_move_insn (dest, tmps[i]); |
| dst = adjust_address (dest, dest_mode, bytepos); |
| } |
| break; |
| } |
| } |
| |
| /* Handle trailing fragments that run over the size of the struct. */ |
| if (known_size_p (ssize) && maybe_gt (bytepos + bytelen, ssize)) |
| { |
| /* store_bit_field always takes its value from the lsb. |
| Move the fragment to the lsb if it's not already there. */ |
| if ( |
| #ifdef BLOCK_REG_PADDING |
| BLOCK_REG_PADDING (GET_MODE (orig_dst), type, i == start) |
| == (BYTES_BIG_ENDIAN ? PAD_UPWARD : PAD_DOWNWARD) |
| #else |
| BYTES_BIG_ENDIAN |
| #endif |
| ) |
| { |
| poly_int64 shift = (bytelen - (ssize - bytepos)) * BITS_PER_UNIT; |
| tmps[i] = expand_shift (RSHIFT_EXPR, mode, tmps[i], |
| shift, tmps[i], 0); |
| } |
| |
| /* Make sure not to write past the end of the struct. */ |
| store_bit_field (dest, |
| adj_bytelen * BITS_PER_UNIT, bytepos * BITS_PER_UNIT, |
| bytepos * BITS_PER_UNIT, ssize * BITS_PER_UNIT - 1, |
| VOIDmode, tmps[i], false); |
| } |
| |
| /* Optimize the access just a bit. */ |
| else if (MEM_P (dest) |
| && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (dest)) |
| || MEM_ALIGN (dest) >= GET_MODE_ALIGNMENT (mode)) |
| && multiple_p (bytepos * BITS_PER_UNIT, |
| GET_MODE_ALIGNMENT (mode)) |
| && known_eq (bytelen, GET_MODE_SIZE (mode))) |
| emit_move_insn (adjust_address (dest, mode, bytepos), tmps[i]); |
| |
| else |
| store_bit_field (dest, bytelen * BITS_PER_UNIT, bytepos * BITS_PER_UNIT, |
| 0, 0, mode, tmps[i], false); |
| } |
| |
| /* Copy from the pseudo into the (probable) hard reg. */ |
| if (orig_dst != dst) |
| emit_move_insn (orig_dst, dst); |
| } |
| |
| /* Return a form of X that does not use a PARALLEL. TYPE is the type |
| of the value stored in X. */ |
| |
| rtx |
| maybe_emit_group_store (rtx x, tree type) |
| { |
| machine_mode mode = TYPE_MODE (type); |
| gcc_checking_assert (GET_MODE (x) == VOIDmode || GET_MODE (x) == mode); |
| if (GET_CODE (x) == PARALLEL) |
| { |
| rtx result = gen_reg_rtx (mode); |
| emit_group_store (result, x, type, int_size_in_bytes (type)); |
| return result; |
| } |
| return x; |
| } |
| |
| /* Copy a BLKmode object of TYPE out of a register SRCREG into TARGET. |
| |
| This is used on targets that return BLKmode values in registers. */ |
| |
| static void |
| copy_blkmode_from_reg (rtx target, rtx srcreg, tree type) |
| { |
| unsigned HOST_WIDE_INT bytes = int_size_in_bytes (type); |
| rtx src = NULL, dst = NULL; |
| unsigned HOST_WIDE_INT bitsize = MIN (TYPE_ALIGN (type), BITS_PER_WORD); |
| unsigned HOST_WIDE_INT bitpos, xbitpos, padding_correction = 0; |
| /* No current ABI uses variable-sized modes to pass a BLKmnode type. */ |
| fixed_size_mode mode = as_a <fixed_size_mode> (GET_MODE (srcreg)); |
| fixed_size_mode tmode = as_a <fixed_size_mode> (GET_MODE (target)); |
| fixed_size_mode copy_mode; |
| |
| /* BLKmode registers created in the back-end shouldn't have survived. */ |
| gcc_assert (mode != BLKmode); |
| |
| /* If the structure doesn't take up a whole number of words, see whether |
| SRCREG is padded on the left or on the right. If it's on the left, |
| set PADDING_CORRECTION to the number of bits to skip. |
| |
| In most ABIs, the structure will be returned at the least end of |
| the register, which translates to right padding on little-endian |
| targets and left padding on big-endian targets. The opposite |
| holds if the structure is returned at the most significant |
| end of the register. */ |
| if (bytes % UNITS_PER_WORD != 0 |
| && (targetm.calls.return_in_msb (type) |
| ? !BYTES_BIG_ENDIAN |
| : BYTES_BIG_ENDIAN)) |
| padding_correction |
| = (BITS_PER_WORD - ((bytes % UNITS_PER_WORD) * BITS_PER_UNIT)); |
| |
| /* We can use a single move if we have an exact mode for the size. */ |
| else if (MEM_P (target) |
| && (!targetm.slow_unaligned_access (mode, MEM_ALIGN (target)) |
| || MEM_ALIGN (target) >= GET_MODE_ALIGNMENT (mode)) |
| && bytes == GET_MODE_SIZE (mode)) |
| { |
| emit_move_insn (adjust_address (target, mode, 0), srcreg); |
| return; |
| } |
| |
| /* And if we additionally have the same mode for a register. */ |
| else if (REG_P (target) |
| && GET_MODE (target) == mode |
| && bytes == GET_MODE_SIZE (mode)) |
| { |
| emit_move_insn (target, srcreg); |
| return; |
| } |
| |
| /* This code assumes srcreg is at least a full word. If it isn't, copy it |
| into a new pseudo which is a full word. */ |
| if (GET_MODE_SIZE (mode) < UNITS_PER_WORD) |
| { |
| srcreg = convert_to_mode (word_mode, srcreg, TYPE_UNSIGNED (type)); |
| mode = word_mode; |
| } |
| |
| /* Copy the structure BITSIZE bits at a time. If the target lives in |
| memory, take care of not reading/writing past its end by selecting |
| a copy mode suited to BITSIZE. This should always be possible given |
| how it is computed. |
| |
| If the target lives in register, make sure not to select a copy mode |
| larger than the mode of the register. |
| |
| We could probably emit more efficient code for machines which do not use |
| strict alignment, but it doesn't seem worth the effort at the current |
| time. */ |
| |
| copy_mode = word_mode; |
| if (MEM_P (target)) |
| { |
| opt_scalar_int_mode mem_mode = int_mode_for_size (bitsize, 1); |
| if (mem_mode.exists ()) |
| copy_mode = mem_mode.require (); |
| } |
| else if (REG_P (target) && GET_MODE_BITSIZE (tmode) < BITS_PER_WORD) |
| copy_mode = tmode; |
| |
| for (bitpos = 0, xbitpos = padding_correction; |
| bitpos < bytes * BITS_PER_UNIT; |
| bitpos += bitsize, xbitpos += bitsize) |
| { |
| /* We need a new source operand each time xbitpos is on a |
| word boundary and when xbitpos == padding_correction |
| (the first time through). */ |
| if (xbitpos % BITS_PER_WORD == 0 || xbitpos == padding_correction) |
| src = operand_subword_force (srcreg, xbitpos / BITS_PER_WORD, mode); |
| |
| /* We need a new destination operand each time bitpos is on |
| a word boundary. */ |
| if (REG_P (target) && GET_MODE_BITSIZE (tmode) < BITS_PER_WORD) |
| dst = target; |
| else if (bitpos % BITS_PER_WORD == 0) |
| dst = operand_subword (target, bitpos / BITS_PER_WORD, 1, tmode); |
| |
| /* Use xbitpos for the source extraction (right justified) and |
| bitpos for the destination store (left justified). */ |
| store_bit_field (dst, bitsize, bitpos % BITS_PER_WORD, 0, 0, copy_mode, |
| extract_bit_field (src, bitsize, |
| xbitpos % BITS_PER_WORD, 1, |
| NULL_RTX, copy_mode, copy_mode, |
| false, NULL), |
| false); |
| } |
| } |
| |
| /* Copy BLKmode value SRC into a register of mode MODE_IN. Return the |
| register if it contains any data, otherwise return null. |
| |
| This is used on targets that return BLKmode values in registers. */ |
| |
| rtx |
| copy_blkmode_to_reg (machine_mode mode_in, tree src) |
| { |
| int i, n_regs; |
| unsigned HOST_WIDE_INT bitpos, xbitpos, padding_correction = 0, bytes; |
| unsigned int bitsize; |
| rtx *dst_words, dst, x, src_word = NULL_RTX, dst_word = NULL_RTX; |
| /* No current ABI uses variable-sized modes to pass a BLKmnode type. */ |
| fixed_size_mode mode = as_a <fixed_size_mode> (mode_in); |
| fixed_size_mode dst_mode; |
| scalar_int_mode min_mode; |
| |
| gcc_assert (TYPE_MODE (TREE_TYPE (src)) == BLKmode); |
| |
| x = expand_normal (src); |
| |
| bytes = arg_int_size_in_bytes (TREE_TYPE (src)); |
| if (bytes == 0) |
| return NULL_RTX; |
| |
| /* If the structure doesn't take up a whole number of words, see |
| whether the register value should be padded on the left or on |
| the right. Set PADDING_CORRECTION to the number of padding |
| bits needed on the left side. |
| |
| In most ABIs, the structure will be returned at the least end of |
| the register, which translates to right padding on little-endian |
| targets and left padding on big-endian targets. The opposite |
| holds if the structure is returned at the most significant |
| end of the register. */ |
| if (bytes % UNITS_PER_WORD != 0 |
| && (targetm.calls.return_in_msb (TREE_TYPE (src)) |
| ? !BYTES_BIG_ENDIAN |
| : BYTES_BIG_ENDIAN)) |
| padding_correction = (BITS_PER_WORD - ((bytes % UNITS_PER_WORD) |
| * BITS_PER_UNIT)); |
| |
| n_regs = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; |
| dst_words = XALLOCAVEC (rtx, n_regs); |
| bitsize = MIN (TYPE_ALIGN (TREE_TYPE (src)), BITS_PER_WORD); |
| min_mode = smallest_int_mode_for_size (bitsize); |
| |
| /* Copy the structure BITSIZE bits at a time. */ |
| for (bitpos = 0, xbitpos = padding_correction; |
| bitpos < bytes * BITS_PER_UNIT; |
| bitpos += bitsize, xbitpos += bitsize) |
| { |
| /* We need a new destination pseudo each time xbitpos is |
| on a word boundary and when xbitpos == padding_correction |
| (the first time through). */ |
| if (xbitpos % BITS_PER_WORD == 0 |
| || xbitpos == padding_correction) |
| { |
| /* Generate an appropriate register. */ |
| dst_word = gen_reg_rtx (word_mode); |
| dst_words[xbitpos / BITS_PER_WORD] = dst_word; |
| |
| /* Clear the destination before we move anything into it. */ |
| emit_move_insn (dst_word, CONST0_RTX (word_mode)); |
| } |
| |
| /* Find the largest integer mode that can be used to copy all or as |
| many bits as possible of the structure if the target supports larger |
| copies. There are too many corner cases here w.r.t to alignments on |
| the read/writes. So if there is any padding just use single byte |
| operations. */ |
| opt_scalar_int_mode mode_iter; |
| if (padding_correction == 0 && !STRICT_ALIGNMENT) |
| { |
| FOR_EACH_MODE_FROM (mode_iter, min_mode) |
| { |
| unsigned int msize = GET_MODE_BITSIZE (mode_iter.require ()); |
| if (msize <= ((bytes * BITS_PER_UNIT) - bitpos) |
| && msize <= BITS_PER_WORD) |
| bitsize = msize; |
| else |
| break; |
| } |
| } |
| |
| /* We need a new source operand each time bitpos is on a word |
| boundary. */ |
| if (bitpos % BITS_PER_WORD == 0) |
| src_word = operand_subword_force (x, bitpos / BITS_PER_WORD, BLKmode); |
| |
| /* Use bitpos for the source extraction (left justified) and |
| xbitpos for the destination store (right justified). */ |
| store_bit_field (dst_word, bitsize, xbitpos % BITS_PER_WORD, |
| 0, 0, word_mode, |
| extract_bit_field (src_word, bitsize, |
| bitpos % BITS_PER_WORD, 1, |
| NULL_RTX, word_mode, word_mode, |
| false, NULL), |
| false); |
| } |
| |
| if (mode == BLKmode) |
| { |
| /* Find the smallest integer mode large enough to hold the |
| entire structure. */ |
| opt_scalar_int_mode mode_iter; |
| FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT) |
| if (GET_MODE_SIZE (mode_iter.require ()) >= bytes) |
| break; |
| |
| /* A suitable mode should have been found. */ |
| mode = mode_iter.require (); |
| } |
| |
| if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (word_mode)) |
| dst_mode = word_mode; |
| else |
| dst_mode = mode; |
| dst = gen_reg_rtx (dst_mode); |
| |
| for (i = 0; i < n_regs; i++) |
| emit_move_insn (operand_subword (dst, i, 0, dst_mode), dst_words[i]); |
| |
| if (mode != dst_mode) |
| dst = gen_lowpart (mode, dst); |
| |
| return dst; |
| } |
| |
| /* Add a USE expression for REG to the (possibly empty) list pointed
|