blob: eb33643bd77062f2238d983ba38e5a363d43d906 [file] [log] [blame]
/* Convert tree expression to rtl instructions, for GNU compiler.
Copyright (C) 1988-2021 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "target.h"
#include "rtl.h"
#include "tree.h"
#include "gimple.h"
#include "predict.h"
#include "memmodel.h"
#include "tm_p.h"
#include "ssa.h"
#include "optabs.h"
#include "expmed.h"
#include "regs.h"
#include "emit-rtl.h"
#include "recog.h"
#include "cgraph.h"
#include "diagnostic.h"
#include "alias.h"
#include "fold-const.h"
#include "stor-layout.h"
#include "attribs.h"
#include "varasm.h"
#include "except.h"
#include "insn-attr.h"
#include "dojump.h"
#include "explow.h"
#include "calls.h"
#include "stmt.h"
/* Include expr.h after insn-config.h so we get HAVE_conditional_move. */
#include "expr.h"
#include "optabs-tree.h"
#include "libfuncs.h"
#include "reload.h"
#include "langhooks.h"
#include "common/common-target.h"
#include "tree-dfa.h"
#include "tree-ssa-live.h"
#include "tree-outof-ssa.h"
#include "tree-ssa-address.h"
#include "builtins.h"
#include "ccmp.h"
#include "gimple-fold.h"
#include "rtx-vector-builder.h"
#include "tree-pretty-print.h"
#include "flags.h"
/* If this is nonzero, we do not bother generating VOLATILE
around volatile memory references, and we are willing to
output indirect addresses. If cse is to follow, we reject
indirect addresses so a useful potential cse is generated;
if it is used only once, instruction combination will produce
the same indirect address eventually. */
int cse_not_expected;
static bool block_move_libcall_safe_for_call_parm (void);
static bool emit_block_move_via_pattern (rtx, rtx, rtx, unsigned, unsigned,
HOST_WIDE_INT, unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT, bool);
static void emit_block_move_via_loop (rtx, rtx, rtx, unsigned);
static void clear_by_pieces (rtx, unsigned HOST_WIDE_INT, unsigned int);
static rtx_insn *compress_float_constant (rtx, rtx);
static rtx get_subtarget (rtx);
static void store_constructor (tree, rtx, int, poly_int64, bool);
static rtx store_field (rtx, poly_int64, poly_int64, poly_uint64, poly_uint64,
machine_mode, tree, alias_set_type, bool, bool);
static unsigned HOST_WIDE_INT highest_pow2_factor_for_target (const_tree, const_tree);
static int is_aligning_offset (const_tree, const_tree);
static rtx reduce_to_bit_field_precision (rtx, rtx, tree);
static rtx do_store_flag (sepops, rtx, machine_mode);
#ifdef PUSH_ROUNDING
static void emit_single_push_insn (machine_mode, rtx, tree);
#endif
static void do_tablejump (rtx, machine_mode, rtx, rtx, rtx,
profile_probability);
static rtx const_vector_from_tree (tree);
static tree tree_expr_size (const_tree);
static HOST_WIDE_INT int_expr_size (tree);
static void convert_mode_scalar (rtx, rtx, int);
/* This is run to set up which modes can be used
directly in memory and to initialize the block move optab. It is run
at the beginning of compilation and when the target is reinitialized. */
void
init_expr_target (void)
{
rtx pat;
int num_clobbers;
rtx mem, mem1;
rtx reg;
/* Try indexing by frame ptr and try by stack ptr.
It is known that on the Convex the stack ptr isn't a valid index.
With luck, one or the other is valid on any machine. */
mem = gen_rtx_MEM (word_mode, stack_pointer_rtx);
mem1 = gen_rtx_MEM (word_mode, frame_pointer_rtx);
/* A scratch register we can modify in-place below to avoid
useless RTL allocations. */
reg = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1);
rtx_insn *insn = as_a<rtx_insn *> (rtx_alloc (INSN));
pat = gen_rtx_SET (NULL_RTX, NULL_RTX);
PATTERN (insn) = pat;
for (machine_mode mode = VOIDmode; (int) mode < NUM_MACHINE_MODES;
mode = (machine_mode) ((int) mode + 1))
{
int regno;
direct_load[(int) mode] = direct_store[(int) mode] = 0;
PUT_MODE (mem, mode);
PUT_MODE (mem1, mode);
/* See if there is some register that can be used in this mode and
directly loaded or stored from memory. */
if (mode != VOIDmode && mode != BLKmode)
for (regno = 0; regno < FIRST_PSEUDO_REGISTER
&& (direct_load[(int) mode] == 0 || direct_store[(int) mode] == 0);
regno++)
{
if (!targetm.hard_regno_mode_ok (regno, mode))
continue;
set_mode_and_regno (reg, mode, regno);
SET_SRC (pat) = mem;
SET_DEST (pat) = reg;
if (recog (pat, insn, &num_clobbers) >= 0)
direct_load[(int) mode] = 1;
SET_SRC (pat) = mem1;
SET_DEST (pat) = reg;
if (recog (pat, insn, &num_clobbers) >= 0)
direct_load[(int) mode] = 1;
SET_SRC (pat) = reg;
SET_DEST (pat) = mem;
if (recog (pat, insn, &num_clobbers) >= 0)
direct_store[(int) mode] = 1;
SET_SRC (pat) = reg;
SET_DEST (pat) = mem1;
if (recog (pat, insn, &num_clobbers) >= 0)
direct_store[(int) mode] = 1;
}
}
mem = gen_rtx_MEM (VOIDmode, gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1));
opt_scalar_float_mode mode_iter;
FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
{
scalar_float_mode mode = mode_iter.require ();
scalar_float_mode srcmode;
FOR_EACH_MODE_UNTIL (srcmode, mode)
{
enum insn_code ic;
ic = can_extend_p (mode, srcmode, 0);
if (ic == CODE_FOR_nothing)
continue;
PUT_MODE (mem, srcmode);
if (insn_operand_matches (ic, 1, mem))
float_extend_from_mem[mode][srcmode] = true;
}
}
}
/* This is run at the start of compiling a function. */
void
init_expr (void)
{
memset (&crtl->expr, 0, sizeof (crtl->expr));
}
/* Copy data from FROM to TO, where the machine modes are not the same.
Both modes may be integer, or both may be floating, or both may be
fixed-point.
UNSIGNEDP should be nonzero if FROM is an unsigned type.
This causes zero-extension instead of sign-extension. */
void
convert_move (rtx to, rtx from, int unsignedp)
{
machine_mode to_mode = GET_MODE (to);
machine_mode from_mode = GET_MODE (from);
gcc_assert (to_mode != BLKmode);
gcc_assert (from_mode != BLKmode);
/* If the source and destination are already the same, then there's
nothing to do. */
if (to == from)
return;
/* If FROM is a SUBREG that indicates that we have already done at least
the required extension, strip it. We don't handle such SUBREGs as
TO here. */
scalar_int_mode to_int_mode;
if (GET_CODE (from) == SUBREG
&& SUBREG_PROMOTED_VAR_P (from)
&& is_a <scalar_int_mode> (to_mode, &to_int_mode)
&& (GET_MODE_PRECISION (subreg_promoted_mode (from))
>= GET_MODE_PRECISION (to_int_mode))
&& SUBREG_CHECK_PROMOTED_SIGN (from, unsignedp))
{
scalar_int_mode int_orig_mode;
scalar_int_mode int_inner_mode;
machine_mode orig_mode = GET_MODE (from);
from = gen_lowpart (to_int_mode, SUBREG_REG (from));
from_mode = to_int_mode;
/* Preserve SUBREG_PROMOTED_VAR_P if the new mode is wider than
the original mode, but narrower than the inner mode. */
if (GET_CODE (from) == SUBREG
&& is_a <scalar_int_mode> (orig_mode, &int_orig_mode)
&& GET_MODE_PRECISION (to_int_mode)
> GET_MODE_PRECISION (int_orig_mode)
&& is_a <scalar_int_mode> (GET_MODE (SUBREG_REG (from)),
&int_inner_mode)
&& GET_MODE_PRECISION (int_inner_mode)
> GET_MODE_PRECISION (to_int_mode))
{
SUBREG_PROMOTED_VAR_P (from) = 1;
SUBREG_PROMOTED_SET (from, unsignedp);
}
}
gcc_assert (GET_CODE (to) != SUBREG || !SUBREG_PROMOTED_VAR_P (to));
if (to_mode == from_mode
|| (from_mode == VOIDmode && CONSTANT_P (from)))
{
emit_move_insn (to, from);
return;
}
if (VECTOR_MODE_P (to_mode) || VECTOR_MODE_P (from_mode))
{
if (GET_MODE_UNIT_PRECISION (to_mode)
> GET_MODE_UNIT_PRECISION (from_mode))
{
optab op = unsignedp ? zext_optab : sext_optab;
insn_code icode = convert_optab_handler (op, to_mode, from_mode);
if (icode != CODE_FOR_nothing)
{
emit_unop_insn (icode, to, from,
unsignedp ? ZERO_EXTEND : SIGN_EXTEND);
return;
}
}
if (GET_MODE_UNIT_PRECISION (to_mode)
< GET_MODE_UNIT_PRECISION (from_mode))
{
insn_code icode = convert_optab_handler (trunc_optab,
to_mode, from_mode);
if (icode != CODE_FOR_nothing)
{
emit_unop_insn (icode, to, from, TRUNCATE);
return;
}
}
gcc_assert (known_eq (GET_MODE_BITSIZE (from_mode),
GET_MODE_BITSIZE (to_mode)));
if (VECTOR_MODE_P (to_mode))
from = simplify_gen_subreg (to_mode, from, GET_MODE (from), 0);
else
to = simplify_gen_subreg (from_mode, to, GET_MODE (to), 0);
emit_move_insn (to, from);
return;
}
if (GET_CODE (to) == CONCAT && GET_CODE (from) == CONCAT)
{
convert_move (XEXP (to, 0), XEXP (from, 0), unsignedp);
convert_move (XEXP (to, 1), XEXP (from, 1), unsignedp);
return;
}
convert_mode_scalar (to, from, unsignedp);
}
/* Like convert_move, but deals only with scalar modes. */
static void
convert_mode_scalar (rtx to, rtx from, int unsignedp)
{
/* Both modes should be scalar types. */
scalar_mode from_mode = as_a <scalar_mode> (GET_MODE (from));
scalar_mode to_mode = as_a <scalar_mode> (GET_MODE (to));
bool to_real = SCALAR_FLOAT_MODE_P (to_mode);
bool from_real = SCALAR_FLOAT_MODE_P (from_mode);
enum insn_code code;
rtx libcall;
gcc_assert (to_real == from_real);
/* rtx code for making an equivalent value. */
enum rtx_code equiv_code = (unsignedp < 0 ? UNKNOWN
: (unsignedp ? ZERO_EXTEND : SIGN_EXTEND));
if (to_real)
{
rtx value;
rtx_insn *insns;
convert_optab tab;
gcc_assert ((GET_MODE_PRECISION (from_mode)
!= GET_MODE_PRECISION (to_mode))
|| (DECIMAL_FLOAT_MODE_P (from_mode)
!= DECIMAL_FLOAT_MODE_P (to_mode)));
if (GET_MODE_PRECISION (from_mode) == GET_MODE_PRECISION (to_mode))
/* Conversion between decimal float and binary float, same size. */
tab = DECIMAL_FLOAT_MODE_P (from_mode) ? trunc_optab : sext_optab;
else if (GET_MODE_PRECISION (from_mode) < GET_MODE_PRECISION (to_mode))
tab = sext_optab;
else
tab = trunc_optab;
/* Try converting directly if the insn is supported. */
code = convert_optab_handler (tab, to_mode, from_mode);
if (code != CODE_FOR_nothing)
{
emit_unop_insn (code, to, from,
tab == sext_optab ? FLOAT_EXTEND : FLOAT_TRUNCATE);
return;
}
/* Otherwise use a libcall. */
libcall = convert_optab_libfunc (tab, to_mode, from_mode);
/* Is this conversion implemented yet? */
gcc_assert (libcall);
start_sequence ();
value = emit_library_call_value (libcall, NULL_RTX, LCT_CONST, to_mode,
from, from_mode);
insns = get_insns ();
end_sequence ();
emit_libcall_block (insns, to, value,
tab == trunc_optab ? gen_rtx_FLOAT_TRUNCATE (to_mode,
from)
: gen_rtx_FLOAT_EXTEND (to_mode, from));
return;
}
/* Handle pointer conversion. */ /* SPEE 900220. */
/* If the target has a converter from FROM_MODE to TO_MODE, use it. */
{
convert_optab ctab;
if (GET_MODE_PRECISION (from_mode) > GET_MODE_PRECISION (to_mode))
ctab = trunc_optab;
else if (unsignedp)
ctab = zext_optab;
else
ctab = sext_optab;
if (convert_optab_handler (ctab, to_mode, from_mode)
!= CODE_FOR_nothing)
{
emit_unop_insn (convert_optab_handler (ctab, to_mode, from_mode),
to, from, UNKNOWN);
return;
}
}
/* Targets are expected to provide conversion insns between PxImode and
xImode for all MODE_PARTIAL_INT modes they use, but no others. */
if (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT)
{
scalar_int_mode full_mode
= smallest_int_mode_for_size (GET_MODE_BITSIZE (to_mode));
gcc_assert (convert_optab_handler (trunc_optab, to_mode, full_mode)
!= CODE_FOR_nothing);
if (full_mode != from_mode)
from = convert_to_mode (full_mode, from, unsignedp);
emit_unop_insn (convert_optab_handler (trunc_optab, to_mode, full_mode),
to, from, UNKNOWN);
return;
}
if (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT)
{
rtx new_from;
scalar_int_mode full_mode
= smallest_int_mode_for_size (GET_MODE_BITSIZE (from_mode));
convert_optab ctab = unsignedp ? zext_optab : sext_optab;
enum insn_code icode;
icode = convert_optab_handler (ctab, full_mode, from_mode);
gcc_assert (icode != CODE_FOR_nothing);
if (to_mode == full_mode)
{
emit_unop_insn (icode, to, from, UNKNOWN);
return;
}
new_from = gen_reg_rtx (full_mode);
emit_unop_insn (icode, new_from, from, UNKNOWN);
/* else proceed to integer conversions below. */
from_mode = full_mode;
from = new_from;
}
/* Make sure both are fixed-point modes or both are not. */
gcc_assert (ALL_SCALAR_FIXED_POINT_MODE_P (from_mode) ==
ALL_SCALAR_FIXED_POINT_MODE_P (to_mode));
if (ALL_SCALAR_FIXED_POINT_MODE_P (from_mode))
{
/* If we widen from_mode to to_mode and they are in the same class,
we won't saturate the result.
Otherwise, always saturate the result to play safe. */
if (GET_MODE_CLASS (from_mode) == GET_MODE_CLASS (to_mode)
&& GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
expand_fixed_convert (to, from, 0, 0);
else
expand_fixed_convert (to, from, 0, 1);
return;
}
/* Now both modes are integers. */
/* Handle expanding beyond a word. */
if (GET_MODE_PRECISION (from_mode) < GET_MODE_PRECISION (to_mode)
&& GET_MODE_PRECISION (to_mode) > BITS_PER_WORD)
{
rtx_insn *insns;
rtx lowpart;
rtx fill_value;
rtx lowfrom;
int i;
scalar_mode lowpart_mode;
int nwords = CEIL (GET_MODE_SIZE (to_mode), UNITS_PER_WORD);
/* Try converting directly if the insn is supported. */
if ((code = can_extend_p (to_mode, from_mode, unsignedp))
!= CODE_FOR_nothing)
{
/* If FROM is a SUBREG, put it into a register. Do this
so that we always generate the same set of insns for
better cse'ing; if an intermediate assignment occurred,
we won't be doing the operation directly on the SUBREG. */
if (optimize > 0 && GET_CODE (from) == SUBREG)
from = force_reg (from_mode, from);
emit_unop_insn (code, to, from, equiv_code);
return;
}
/* Next, try converting via full word. */
else if (GET_MODE_PRECISION (from_mode) < BITS_PER_WORD
&& ((code = can_extend_p (to_mode, word_mode, unsignedp))
!= CODE_FOR_nothing))
{
rtx word_to = gen_reg_rtx (word_mode);
if (REG_P (to))
{
if (reg_overlap_mentioned_p (to, from))
from = force_reg (from_mode, from);
emit_clobber (to);
}
convert_move (word_to, from, unsignedp);
emit_unop_insn (code, to, word_to, equiv_code);
return;
}
/* No special multiword conversion insn; do it by hand. */
start_sequence ();
/* Since we will turn this into a no conflict block, we must ensure
the source does not overlap the target so force it into an isolated
register when maybe so. Likewise for any MEM input, since the
conversion sequence might require several references to it and we
must ensure we're getting the same value every time. */
if (MEM_P (from) || reg_overlap_mentioned_p (to, from))
from = force_reg (from_mode, from);
/* Get a copy of FROM widened to a word, if necessary. */
if (GET_MODE_PRECISION (from_mode) < BITS_PER_WORD)
lowpart_mode = word_mode;
else
lowpart_mode = from_mode;
lowfrom = convert_to_mode (lowpart_mode, from, unsignedp);
lowpart = gen_lowpart (lowpart_mode, to);
emit_move_insn (lowpart, lowfrom);
/* Compute the value to put in each remaining word. */
if (unsignedp)
fill_value = const0_rtx;
else
fill_value = emit_store_flag_force (gen_reg_rtx (word_mode),
LT, lowfrom, const0_rtx,
lowpart_mode, 0, -1);
/* Fill the remaining words. */
for (i = GET_MODE_SIZE (lowpart_mode) / UNITS_PER_WORD; i < nwords; i++)
{
int index = (WORDS_BIG_ENDIAN ? nwords - i - 1 : i);
rtx subword = operand_subword (to, index, 1, to_mode);
gcc_assert (subword);
if (fill_value != subword)
emit_move_insn (subword, fill_value);
}
insns = get_insns ();
end_sequence ();
emit_insn (insns);
return;
}
/* Truncating multi-word to a word or less. */
if (GET_MODE_PRECISION (from_mode) > BITS_PER_WORD
&& GET_MODE_PRECISION (to_mode) <= BITS_PER_WORD)
{
if (!((MEM_P (from)
&& ! MEM_VOLATILE_P (from)
&& direct_load[(int) to_mode]
&& ! mode_dependent_address_p (XEXP (from, 0),
MEM_ADDR_SPACE (from)))
|| REG_P (from)
|| GET_CODE (from) == SUBREG))
from = force_reg (from_mode, from);
convert_move (to, gen_lowpart (word_mode, from), 0);
return;
}
/* Now follow all the conversions between integers
no more than a word long. */
/* For truncation, usually we can just refer to FROM in a narrower mode. */
if (GET_MODE_BITSIZE (to_mode) < GET_MODE_BITSIZE (from_mode)
&& TRULY_NOOP_TRUNCATION_MODES_P (to_mode, from_mode))
{
if (!((MEM_P (from)
&& ! MEM_VOLATILE_P (from)
&& direct_load[(int) to_mode]
&& ! mode_dependent_address_p (XEXP (from, 0),
MEM_ADDR_SPACE (from)))
|| REG_P (from)
|| GET_CODE (from) == SUBREG))
from = force_reg (from_mode, from);
if (REG_P (from) && REGNO (from) < FIRST_PSEUDO_REGISTER
&& !targetm.hard_regno_mode_ok (REGNO (from), to_mode))
from = copy_to_reg (from);
emit_move_insn (to, gen_lowpart (to_mode, from));
return;
}
/* Handle extension. */
if (GET_MODE_PRECISION (to_mode) > GET_MODE_PRECISION (from_mode))
{
/* Convert directly if that works. */
if ((code = can_extend_p (to_mode, from_mode, unsignedp))
!= CODE_FOR_nothing)
{
emit_unop_insn (code, to, from, equiv_code);
return;
}
else
{
rtx tmp;
int shift_amount;
/* Search for a mode to convert via. */
opt_scalar_mode intermediate_iter;
FOR_EACH_MODE_FROM (intermediate_iter, from_mode)
{
scalar_mode intermediate = intermediate_iter.require ();
if (((can_extend_p (to_mode, intermediate, unsignedp)
!= CODE_FOR_nothing)
|| (GET_MODE_SIZE (to_mode) < GET_MODE_SIZE (intermediate)
&& TRULY_NOOP_TRUNCATION_MODES_P (to_mode,
intermediate)))
&& (can_extend_p (intermediate, from_mode, unsignedp)
!= CODE_FOR_nothing))
{
convert_move (to, convert_to_mode (intermediate, from,
unsignedp), unsignedp);
return;
}
}
/* No suitable intermediate mode.
Generate what we need with shifts. */
shift_amount = (GET_MODE_PRECISION (to_mode)
- GET_MODE_PRECISION (from_mode));
from = gen_lowpart (to_mode, force_reg (from_mode, from));
tmp = expand_shift (LSHIFT_EXPR, to_mode, from, shift_amount,
to, unsignedp);
tmp = expand_shift (RSHIFT_EXPR, to_mode, tmp, shift_amount,
to, unsignedp);
if (tmp != to)
emit_move_insn (to, tmp);
return;
}
}
/* Support special truncate insns for certain modes. */
if (convert_optab_handler (trunc_optab, to_mode,
from_mode) != CODE_FOR_nothing)
{
emit_unop_insn (convert_optab_handler (trunc_optab, to_mode, from_mode),
to, from, UNKNOWN);
return;
}
/* Handle truncation of volatile memrefs, and so on;
the things that couldn't be truncated directly,
and for which there was no special instruction.
??? Code above formerly short-circuited this, for most integer
mode pairs, with a force_reg in from_mode followed by a recursive
call to this routine. Appears always to have been wrong. */
if (GET_MODE_PRECISION (to_mode) < GET_MODE_PRECISION (from_mode))
{
rtx temp = force_reg (to_mode, gen_lowpart (to_mode, from));
emit_move_insn (to, temp);
return;
}
/* Mode combination is not recognized. */
gcc_unreachable ();
}
/* Return an rtx for a value that would result
from converting X to mode MODE.
Both X and MODE may be floating, or both integer.
UNSIGNEDP is nonzero if X is an unsigned value.
This can be done by referring to a part of X in place
or by copying to a new temporary with conversion. */
rtx
convert_to_mode (machine_mode mode, rtx x, int unsignedp)
{
return convert_modes (mode, VOIDmode, x, unsignedp);
}
/* Return an rtx for a value that would result
from converting X from mode OLDMODE to mode MODE.
Both modes may be floating, or both integer.
UNSIGNEDP is nonzero if X is an unsigned value.
This can be done by referring to a part of X in place
or by copying to a new temporary with conversion.
You can give VOIDmode for OLDMODE, if you are sure X has a nonvoid mode. */
rtx
convert_modes (machine_mode mode, machine_mode oldmode, rtx x, int unsignedp)
{
rtx temp;
scalar_int_mode int_mode;
/* If FROM is a SUBREG that indicates that we have already done at least
the required extension, strip it. */
if (GET_CODE (x) == SUBREG
&& SUBREG_PROMOTED_VAR_P (x)
&& is_a <scalar_int_mode> (mode, &int_mode)
&& (GET_MODE_PRECISION (subreg_promoted_mode (x))
>= GET_MODE_PRECISION (int_mode))
&& SUBREG_CHECK_PROMOTED_SIGN (x, unsignedp))
{
scalar_int_mode int_orig_mode;
scalar_int_mode int_inner_mode;
machine_mode orig_mode = GET_MODE (x);
x = gen_lowpart (int_mode, SUBREG_REG (x));
/* Preserve SUBREG_PROMOTED_VAR_P if the new mode is wider than
the original mode, but narrower than the inner mode. */
if (GET_CODE (x) == SUBREG
&& is_a <scalar_int_mode> (orig_mode, &int_orig_mode)
&& GET_MODE_PRECISION (int_mode)
> GET_MODE_PRECISION (int_orig_mode)
&& is_a <scalar_int_mode> (GET_MODE (SUBREG_REG (x)),
&int_inner_mode)
&& GET_MODE_PRECISION (int_inner_mode)
> GET_MODE_PRECISION (int_mode))
{
SUBREG_PROMOTED_VAR_P (x) = 1;
SUBREG_PROMOTED_SET (x, unsignedp);
}
}
if (GET_MODE (x) != VOIDmode)
oldmode = GET_MODE (x);
if (mode == oldmode)
return x;
if (CONST_SCALAR_INT_P (x)
&& is_a <scalar_int_mode> (mode, &int_mode))
{
/* If the caller did not tell us the old mode, then there is not
much to do with respect to canonicalization. We have to
assume that all the bits are significant. */
if (!is_a <scalar_int_mode> (oldmode))
oldmode = MAX_MODE_INT;
wide_int w = wide_int::from (rtx_mode_t (x, oldmode),
GET_MODE_PRECISION (int_mode),
unsignedp ? UNSIGNED : SIGNED);
return immed_wide_int_const (w, int_mode);
}
/* We can do this with a gen_lowpart if both desired and current modes
are integer, and this is either a constant integer, a register, or a
non-volatile MEM. */
scalar_int_mode int_oldmode;
if (is_int_mode (mode, &int_mode)
&& is_int_mode (oldmode, &int_oldmode)
&& GET_MODE_PRECISION (int_mode) <= GET_MODE_PRECISION (int_oldmode)
&& ((MEM_P (x) && !MEM_VOLATILE_P (x) && direct_load[(int) int_mode])
|| CONST_POLY_INT_P (x)
|| (REG_P (x)
&& (!HARD_REGISTER_P (x)
|| targetm.hard_regno_mode_ok (REGNO (x), int_mode))
&& TRULY_NOOP_TRUNCATION_MODES_P (int_mode, GET_MODE (x)))))
return gen_lowpart (int_mode, x);
/* Converting from integer constant into mode is always equivalent to an
subreg operation. */
if (VECTOR_MODE_P (mode) && GET_MODE (x) == VOIDmode)
{
gcc_assert (known_eq (GET_MODE_BITSIZE (mode),
GET_MODE_BITSIZE (oldmode)));
return simplify_gen_subreg (mode, x, oldmode, 0);
}
temp = gen_reg_rtx (mode);
convert_move (temp, x, unsignedp);
return temp;
}
/* Return the largest alignment we can use for doing a move (or store)
of MAX_PIECES. ALIGN is the largest alignment we could use. */
static unsigned int
alignment_for_piecewise_move (unsigned int max_pieces, unsigned int align)
{
scalar_int_mode tmode
= int_mode_for_size (max_pieces * BITS_PER_UNIT, 0).require ();
if (align >= GET_MODE_ALIGNMENT (tmode))
align = GET_MODE_ALIGNMENT (tmode);
else
{
scalar_int_mode xmode = NARROWEST_INT_MODE;
opt_scalar_int_mode mode_iter;
FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
{
tmode = mode_iter.require ();
if (GET_MODE_SIZE (tmode) > max_pieces
|| targetm.slow_unaligned_access (tmode, align))
break;
xmode = tmode;
}
align = MAX (align, GET_MODE_ALIGNMENT (xmode));
}
return align;
}
/* Return the widest QI vector, if QI_MODE is true, or integer mode
that is narrower than SIZE bytes. */
static fixed_size_mode
widest_fixed_size_mode_for_size (unsigned int size, bool qi_vector)
{
fixed_size_mode result = NARROWEST_INT_MODE;
gcc_checking_assert (size > 1);
/* Use QI vector only if size is wider than a WORD. */
if (qi_vector && size > UNITS_PER_WORD)
{
machine_mode mode;
fixed_size_mode candidate;
FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT)
if (is_a<fixed_size_mode> (mode, &candidate)
&& GET_MODE_INNER (candidate) == QImode)
{
if (GET_MODE_SIZE (candidate) >= size)
break;
if (optab_handler (vec_duplicate_optab, candidate)
!= CODE_FOR_nothing)
result = candidate;
}
if (result != NARROWEST_INT_MODE)
return result;
}
opt_scalar_int_mode tmode;
FOR_EACH_MODE_IN_CLASS (tmode, MODE_INT)
if (GET_MODE_SIZE (tmode.require ()) < size)
result = tmode.require ();
return result;
}
/* Determine whether an operation OP on LEN bytes with alignment ALIGN can
and should be performed piecewise. */
static bool
can_do_by_pieces (unsigned HOST_WIDE_INT len, unsigned int align,
enum by_pieces_operation op)
{
return targetm.use_by_pieces_infrastructure_p (len, align, op,
optimize_insn_for_speed_p ());
}
/* Determine whether the LEN bytes can be moved by using several move
instructions. Return nonzero if a call to move_by_pieces should
succeed. */
bool
can_move_by_pieces (unsigned HOST_WIDE_INT len, unsigned int align)
{
return can_do_by_pieces (len, align, MOVE_BY_PIECES);
}
/* Return number of insns required to perform operation OP by pieces
for L bytes. ALIGN (in bits) is maximum alignment we can assume. */
unsigned HOST_WIDE_INT
by_pieces_ninsns (unsigned HOST_WIDE_INT l, unsigned int align,
unsigned int max_size, by_pieces_operation op)
{
unsigned HOST_WIDE_INT n_insns = 0;
fixed_size_mode mode;
if (targetm.overlap_op_by_pieces_p () && op != COMPARE_BY_PIECES)
{
/* NB: Round up L and ALIGN to the widest integer mode for
MAX_SIZE. */
mode = widest_fixed_size_mode_for_size (max_size,
op == SET_BY_PIECES);
if (optab_handler (mov_optab, mode) != CODE_FOR_nothing)
{
unsigned HOST_WIDE_INT up = ROUND_UP (l, GET_MODE_SIZE (mode));
if (up > l)
l = up;
align = GET_MODE_ALIGNMENT (mode);
}
}
align = alignment_for_piecewise_move (MOVE_MAX_PIECES, align);
while (max_size > 1 && l > 0)
{
mode = widest_fixed_size_mode_for_size (max_size,
op == SET_BY_PIECES);
enum insn_code icode;
unsigned int modesize = GET_MODE_SIZE (mode);
icode = optab_handler (mov_optab, mode);
if (icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode))
{
unsigned HOST_WIDE_INT n_pieces = l / modesize;
l %= modesize;
switch (op)
{
default:
n_insns += n_pieces;
break;
case COMPARE_BY_PIECES:
int batch = targetm.compare_by_pieces_branch_ratio (mode);
int batch_ops = 4 * batch - 1;
unsigned HOST_WIDE_INT full = n_pieces / batch;
n_insns += full * batch_ops;
if (n_pieces % batch != 0)
n_insns++;
break;
}
}
max_size = modesize;
}
gcc_assert (!l);
return n_insns;
}
/* Used when performing piecewise block operations, holds information
about one of the memory objects involved. The member functions
can be used to generate code for loading from the object and
updating the address when iterating. */
class pieces_addr
{
/* The object being referenced, a MEM. Can be NULL_RTX to indicate
stack pushes. */
rtx m_obj;
/* The address of the object. Can differ from that seen in the
MEM rtx if we copied the address to a register. */
rtx m_addr;
/* Nonzero if the address on the object has an autoincrement already,
signifies whether that was an increment or decrement. */
signed char m_addr_inc;
/* Nonzero if we intend to use autoinc without the address already
having autoinc form. We will insert add insns around each memory
reference, expecting later passes to form autoinc addressing modes.
The only supported options are predecrement and postincrement. */
signed char m_explicit_inc;
/* True if we have either of the two possible cases of using
autoincrement. */
bool m_auto;
/* True if this is an address to be used for load operations rather
than stores. */
bool m_is_load;
/* Optionally, a function to obtain constants for any given offset into
the objects, and data associated with it. */
by_pieces_constfn m_constfn;
void *m_cfndata;
public:
pieces_addr (rtx, bool, by_pieces_constfn, void *);
rtx adjust (fixed_size_mode, HOST_WIDE_INT, by_pieces_prev * = nullptr);
void increment_address (HOST_WIDE_INT);
void maybe_predec (HOST_WIDE_INT);
void maybe_postinc (HOST_WIDE_INT);
void decide_autoinc (machine_mode, bool, HOST_WIDE_INT);
int get_addr_inc ()
{
return m_addr_inc;
}
};
/* Initialize a pieces_addr structure from an object OBJ. IS_LOAD is
true if the operation to be performed on this object is a load
rather than a store. For stores, OBJ can be NULL, in which case we
assume the operation is a stack push. For loads, the optional
CONSTFN and its associated CFNDATA can be used in place of the
memory load. */
pieces_addr::pieces_addr (rtx obj, bool is_load, by_pieces_constfn constfn,
void *cfndata)
: m_obj (obj), m_is_load (is_load), m_constfn (constfn), m_cfndata (cfndata)
{
m_addr_inc = 0;
m_auto = false;
if (obj)
{
rtx addr = XEXP (obj, 0);
rtx_code code = GET_CODE (addr);
m_addr = addr;
bool dec = code == PRE_DEC || code == POST_DEC;
bool inc = code == PRE_INC || code == POST_INC;
m_auto = inc || dec;
if (m_auto)
m_addr_inc = dec ? -1 : 1;
/* While we have always looked for these codes here, the code
implementing the memory operation has never handled them.
Support could be added later if necessary or beneficial. */
gcc_assert (code != PRE_INC && code != POST_DEC);
}
else
{
m_addr = NULL_RTX;
if (!is_load)
{
m_auto = true;
if (STACK_GROWS_DOWNWARD)
m_addr_inc = -1;
else
m_addr_inc = 1;
}
else
gcc_assert (constfn != NULL);
}
m_explicit_inc = 0;
if (constfn)
gcc_assert (is_load);
}
/* Decide whether to use autoinc for an address involved in a memory op.
MODE is the mode of the accesses, REVERSE is true if we've decided to
perform the operation starting from the end, and LEN is the length of
the operation. Don't override an earlier decision to set m_auto. */
void
pieces_addr::decide_autoinc (machine_mode ARG_UNUSED (mode), bool reverse,
HOST_WIDE_INT len)
{
if (m_auto || m_obj == NULL_RTX)
return;
bool use_predec = (m_is_load
? USE_LOAD_PRE_DECREMENT (mode)
: USE_STORE_PRE_DECREMENT (mode));
bool use_postinc = (m_is_load
? USE_LOAD_POST_INCREMENT (mode)
: USE_STORE_POST_INCREMENT (mode));
machine_mode addr_mode = get_address_mode (m_obj);
if (use_predec && reverse)
{
m_addr = copy_to_mode_reg (addr_mode,
plus_constant (addr_mode,
m_addr, len));
m_auto = true;
m_explicit_inc = -1;
}
else if (use_postinc && !reverse)
{
m_addr = copy_to_mode_reg (addr_mode, m_addr);
m_auto = true;
m_explicit_inc = 1;
}
else if (CONSTANT_P (m_addr))
m_addr = copy_to_mode_reg (addr_mode, m_addr);
}
/* Adjust the address to refer to the data at OFFSET in MODE. If we
are using autoincrement for this address, we don't add the offset,
but we still modify the MEM's properties. */
rtx
pieces_addr::adjust (fixed_size_mode mode, HOST_WIDE_INT offset,
by_pieces_prev *prev)
{
if (m_constfn)
/* Pass the previous data to m_constfn. */
return m_constfn (m_cfndata, prev, offset, mode);
if (m_obj == NULL_RTX)
return NULL_RTX;
if (m_auto)
return adjust_automodify_address (m_obj, mode, m_addr, offset);
else
return adjust_address (m_obj, mode, offset);
}
/* Emit an add instruction to increment the address by SIZE. */
void
pieces_addr::increment_address (HOST_WIDE_INT size)
{
rtx amount = gen_int_mode (size, GET_MODE (m_addr));
emit_insn (gen_add2_insn (m_addr, amount));
}
/* If we are supposed to decrement the address after each access, emit code
to do so now. Increment by SIZE (which has should have the correct sign
already). */
void
pieces_addr::maybe_predec (HOST_WIDE_INT size)
{
if (m_explicit_inc >= 0)
return;
gcc_assert (HAVE_PRE_DECREMENT);
increment_address (size);
}
/* If we are supposed to decrement the address after each access, emit code
to do so now. Increment by SIZE. */
void
pieces_addr::maybe_postinc (HOST_WIDE_INT size)
{
if (m_explicit_inc <= 0)
return;
gcc_assert (HAVE_POST_INCREMENT);
increment_address (size);
}
/* This structure is used by do_op_by_pieces to describe the operation
to be performed. */
class op_by_pieces_d
{
private:
fixed_size_mode get_usable_mode (fixed_size_mode, unsigned int);
fixed_size_mode smallest_fixed_size_mode_for_size (unsigned int);
protected:
pieces_addr m_to, m_from;
/* Make m_len read-only so that smallest_fixed_size_mode_for_size can
use it to check the valid mode size. */
const unsigned HOST_WIDE_INT m_len;
HOST_WIDE_INT m_offset;
unsigned int m_align;
unsigned int m_max_size;
bool m_reverse;
/* True if this is a stack push. */
bool m_push;
/* True if targetm.overlap_op_by_pieces_p () returns true. */
bool m_overlap_op_by_pieces;
/* True if QI vector mode can be used. */
bool m_qi_vector_mode;
/* Virtual functions, overriden by derived classes for the specific
operation. */
virtual void generate (rtx, rtx, machine_mode) = 0;
virtual bool prepare_mode (machine_mode, unsigned int) = 0;
virtual void finish_mode (machine_mode)
{
}
public:
op_by_pieces_d (unsigned int, rtx, bool, rtx, bool, by_pieces_constfn,
void *, unsigned HOST_WIDE_INT, unsigned int, bool,
bool = false);
void run ();
};
/* The constructor for an op_by_pieces_d structure. We require two
objects named TO and FROM, which are identified as loads or stores
by TO_LOAD and FROM_LOAD. If FROM is a load, the optional FROM_CFN
and its associated FROM_CFN_DATA can be used to replace loads with
constant values. MAX_PIECES describes the maximum number of bytes
at a time which can be moved efficiently. LEN describes the length
of the operation. */
op_by_pieces_d::op_by_pieces_d (unsigned int max_pieces, rtx to,
bool to_load, rtx from, bool from_load,
by_pieces_constfn from_cfn,
void *from_cfn_data,
unsigned HOST_WIDE_INT len,
unsigned int align, bool push,
bool qi_vector_mode)
: m_to (to, to_load, NULL, NULL),
m_from (from, from_load, from_cfn, from_cfn_data),
m_len (len), m_max_size (max_pieces + 1),
m_push (push), m_qi_vector_mode (qi_vector_mode)
{
int toi = m_to.get_addr_inc ();
int fromi = m_from.get_addr_inc ();
if (toi >= 0 && fromi >= 0)
m_reverse = false;
else if (toi <= 0 && fromi <= 0)
m_reverse = true;
else
gcc_unreachable ();
m_offset = m_reverse ? len : 0;
align = MIN (to ? MEM_ALIGN (to) : align,
from ? MEM_ALIGN (from) : align);
/* If copying requires more than two move insns,
copy addresses to registers (to make displacements shorter)
and use post-increment if available. */
if (by_pieces_ninsns (len, align, m_max_size, MOVE_BY_PIECES) > 2)
{
/* Find the mode of the largest comparison. */
fixed_size_mode mode
= widest_fixed_size_mode_for_size (m_max_size,
m_qi_vector_mode);
m_from.decide_autoinc (mode, m_reverse, len);
m_to.decide_autoinc (mode, m_reverse, len);
}
align = alignment_for_piecewise_move (MOVE_MAX_PIECES, align);
m_align = align;
m_overlap_op_by_pieces = targetm.overlap_op_by_pieces_p ();
}
/* This function returns the largest usable integer mode for LEN bytes
whose size is no bigger than size of MODE. */
fixed_size_mode
op_by_pieces_d::get_usable_mode (fixed_size_mode mode, unsigned int len)
{
unsigned int size;
do
{
size = GET_MODE_SIZE (mode);
if (len >= size && prepare_mode (mode, m_align))
break;
/* widest_fixed_size_mode_for_size checks SIZE > 1. */
mode = widest_fixed_size_mode_for_size (size, m_qi_vector_mode);
}
while (1);
return mode;
}
/* Return the smallest integer or QI vector mode that is not narrower
than SIZE bytes. */
fixed_size_mode
op_by_pieces_d::smallest_fixed_size_mode_for_size (unsigned int size)
{
/* Use QI vector only for > size of WORD. */
if (m_qi_vector_mode && size > UNITS_PER_WORD)
{
machine_mode mode;
fixed_size_mode candidate;
FOR_EACH_MODE_IN_CLASS (mode, MODE_VECTOR_INT)
if (is_a<fixed_size_mode> (mode, &candidate)
&& GET_MODE_INNER (candidate) == QImode)
{
/* Don't return a mode wider than M_LEN. */
if (GET_MODE_SIZE (candidate) > m_len)
break;
if (GET_MODE_SIZE (candidate) >= size
&& (optab_handler (vec_duplicate_optab, candidate)
!= CODE_FOR_nothing))
return candidate;
}
}
return smallest_int_mode_for_size (size * BITS_PER_UNIT);
}
/* This function contains the main loop used for expanding a block
operation. First move what we can in the largest integer mode,
then go to successively smaller modes. For every access, call
GENFUN with the two operands and the EXTRA_DATA. */
void
op_by_pieces_d::run ()
{
if (m_len == 0)
return;
unsigned HOST_WIDE_INT length = m_len;
/* widest_fixed_size_mode_for_size checks M_MAX_SIZE > 1. */
fixed_size_mode mode
= widest_fixed_size_mode_for_size (m_max_size, m_qi_vector_mode);
mode = get_usable_mode (mode, length);
by_pieces_prev to_prev = { nullptr, mode };
by_pieces_prev from_prev = { nullptr, mode };
do
{
unsigned int size = GET_MODE_SIZE (mode);
rtx to1 = NULL_RTX, from1;
while (length >= size)
{
if (m_reverse)
m_offset -= size;
to1 = m_to.adjust (mode, m_offset, &to_prev);
to_prev.data = to1;
to_prev.mode = mode;
from1 = m_from.adjust (mode, m_offset, &from_prev);
from_prev.data = from1;
from_prev.mode = mode;
m_to.maybe_predec (-(HOST_WIDE_INT)size);
m_from.maybe_predec (-(HOST_WIDE_INT)size);
generate (to1, from1, mode);
m_to.maybe_postinc (size);
m_from.maybe_postinc (size);
if (!m_reverse)
m_offset += size;
length -= size;
}
finish_mode (mode);
if (length == 0)
return;
if (!m_push && m_overlap_op_by_pieces)
{
/* NB: Generate overlapping operations if it is not a stack
push since stack push must not overlap. Get the smallest
fixed size mode for M_LEN bytes. */
mode = smallest_fixed_size_mode_for_size (length);
mode = get_usable_mode (mode, GET_MODE_SIZE (mode));
int gap = GET_MODE_SIZE (mode) - length;
if (gap > 0)
{
/* If size of MODE > M_LEN, generate the last operation
in MODE for the remaining bytes with ovelapping memory
from the previois operation. */
if (m_reverse)
m_offset += gap;
else
m_offset -= gap;
length += gap;
}
}
else
{
/* widest_fixed_size_mode_for_size checks SIZE > 1. */
mode = widest_fixed_size_mode_for_size (size,
m_qi_vector_mode);
mode = get_usable_mode (mode, length);
}
}
while (1);
/* The code above should have handled everything. */
gcc_assert (!length);
}
/* Derived class from op_by_pieces_d, providing support for block move
operations. */
#ifdef PUSH_ROUNDING
#define PUSHG_P(to) ((to) == nullptr)
#else
#define PUSHG_P(to) false
#endif
class move_by_pieces_d : public op_by_pieces_d
{
insn_gen_fn m_gen_fun;
void generate (rtx, rtx, machine_mode);
bool prepare_mode (machine_mode, unsigned int);
public:
move_by_pieces_d (rtx to, rtx from, unsigned HOST_WIDE_INT len,
unsigned int align)
: op_by_pieces_d (MOVE_MAX_PIECES, to, false, from, true, NULL,
NULL, len, align, PUSHG_P (to))
{
}
rtx finish_retmode (memop_ret);
};
/* Return true if MODE can be used for a set of copies, given an
alignment ALIGN. Prepare whatever data is necessary for later
calls to generate. */
bool
move_by_pieces_d::prepare_mode (machine_mode mode, unsigned int align)
{
insn_code icode = optab_handler (mov_optab, mode);
m_gen_fun = GEN_FCN (icode);
return icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode);
}
/* A callback used when iterating for a compare_by_pieces_operation.
OP0 and OP1 are the values that have been loaded and should be
compared in MODE. If OP0 is NULL, this means we should generate a
push; otherwise EXTRA_DATA holds a pointer to a pointer to the insn
gen function that should be used to generate the mode. */
void
move_by_pieces_d::generate (rtx op0, rtx op1,
machine_mode mode ATTRIBUTE_UNUSED)
{
#ifdef PUSH_ROUNDING
if (op0 == NULL_RTX)
{
emit_single_push_insn (mode, op1, NULL);
return;
}
#endif
emit_insn (m_gen_fun (op0, op1));
}
/* Perform the final adjustment at the end of a string to obtain the
correct return value for the block operation.
Return value is based on RETMODE argument. */
rtx
move_by_pieces_d::finish_retmode (memop_ret retmode)
{
gcc_assert (!m_reverse);
if (retmode == RETURN_END_MINUS_ONE)
{
m_to.maybe_postinc (-1);
--m_offset;
}
return m_to.adjust (QImode, m_offset);
}
/* Generate several move instructions to copy LEN bytes from block FROM to
block TO. (These are MEM rtx's with BLKmode).
If PUSH_ROUNDING is defined and TO is NULL, emit_single_push_insn is
used to push FROM to the stack.
ALIGN is maximum stack alignment we can assume.
Return value is based on RETMODE argument. */
rtx
move_by_pieces (rtx to, rtx from, unsigned HOST_WIDE_INT len,
unsigned int align, memop_ret retmode)
{
#ifndef PUSH_ROUNDING
if (to == NULL)
gcc_unreachable ();
#endif
move_by_pieces_d data (to, from, len, align);
data.run ();
if (retmode != RETURN_BEGIN)
return data.finish_retmode (retmode);
else
return to;
}
/* Derived class from op_by_pieces_d, providing support for block move
operations. */
class store_by_pieces_d : public op_by_pieces_d
{
insn_gen_fn m_gen_fun;
void generate (rtx, rtx, machine_mode);
bool prepare_mode (machine_mode, unsigned int);
public:
store_by_pieces_d (rtx to, by_pieces_constfn cfn, void *cfn_data,
unsigned HOST_WIDE_INT len, unsigned int align,
bool qi_vector_mode)
: op_by_pieces_d (STORE_MAX_PIECES, to, false, NULL_RTX, true, cfn,
cfn_data, len, align, false, qi_vector_mode)
{
}
rtx finish_retmode (memop_ret);
};
/* Return true if MODE can be used for a set of stores, given an
alignment ALIGN. Prepare whatever data is necessary for later
calls to generate. */
bool
store_by_pieces_d::prepare_mode (machine_mode mode, unsigned int align)
{
insn_code icode = optab_handler (mov_optab, mode);
m_gen_fun = GEN_FCN (icode);
return icode != CODE_FOR_nothing && align >= GET_MODE_ALIGNMENT (mode);
}
/* A callback used when iterating for a store_by_pieces_operation.
OP0 and OP1 are the values that have been loaded and should be
compared in MODE. If OP0 is NULL, this means we should generate a
push; otherwise EXTRA_DATA holds a pointer to a pointer to the insn
gen function that should be used to generate the mode. */
void
store_by_pieces_d::generate (rtx op0, rtx op1, machine_mode)
{
emit_insn (m_gen_fun (op0, op1));
}
/* Perform the final adjustment at the end of a string to obtain the
correct return value for the block operation.
Return value is based on RETMODE argument. */
rtx
store_by_pieces_d::finish_retmode (memop_ret retmode)
{
gcc_assert (!m_reverse);
if (retmode == RETURN_END_MINUS_ONE)
{
m_to.maybe_postinc (-1);
--m_offset;
}
return m_to.adjust (QImode, m_offset);
}
/* Determine whether the LEN bytes generated by CONSTFUN can be
stored to memory using several move instructions. CONSTFUNDATA is
a pointer which will be passed as argument in every CONSTFUN call.
ALIGN is maximum alignment we can assume. MEMSETP is true if this is
a memset operation and false if it's a copy of a constant string.
Return nonzero if a call to store_by_pieces should succeed. */
int
can_store_by_pieces (unsigned HOST_WIDE_INT len,
by_pieces_constfn constfun,
void *constfundata, unsigned int align, bool memsetp)
{
unsigned HOST_WIDE_INT l;
unsigned int max_size;
HOST_WIDE_INT offset = 0;
enum insn_code icode;
int reverse;
/* cst is set but not used if LEGITIMATE_CONSTANT doesn't use it. */
rtx cst ATTRIBUTE_UNUSED;
if (len == 0)
return 1;
if (!targetm.use_by_pieces_infrastructure_p (len, align,
memsetp
? SET_BY_PIECES
: STORE_BY_PIECES,
optimize_insn_for_speed_p ()))
return 0;
align = alignment_for_piecewise_move (STORE_MAX_PIECES, align);
/* We would first store what we can in the largest integer mode, then go to
successively smaller modes. */
for (reverse = 0;
reverse <= (HAVE_PRE_DECREMENT || HAVE_POST_DECREMENT);
reverse++)
{
l = len;
max_size = STORE_MAX_PIECES + 1;
while (max_size > 1 && l > 0)
{
fixed_size_mode mode
= widest_fixed_size_mode_for_size (max_size, memsetp);
icode = optab_handler (mov_optab, mode);
if (icode != CODE_FOR_nothing
&& align >= GET_MODE_ALIGNMENT (mode))
{
unsigned int size = GET_MODE_SIZE (mode);
while (l >= size)
{
if (reverse)
offset -= size;
cst = (*constfun) (constfundata, nullptr, offset, mode);
/* All CONST_VECTORs can be loaded for memset since
vec_duplicate_optab is a precondition to pick a
vector mode for the memset expander. */
if (!((memsetp && VECTOR_MODE_P (mode))
|| targetm.legitimate_constant_p (mode, cst)))
return 0;
if (!reverse)
offset += size;
l -= size;
}
}
max_size = GET_MODE_SIZE (mode);
}
/* The code above should have handled everything. */
gcc_assert (!l);
}
return 1;
}
/* Generate several move instructions to store LEN bytes generated by
CONSTFUN to block TO. (A MEM rtx with BLKmode). CONSTFUNDATA is a
pointer which will be passed as argument in every CONSTFUN call.
ALIGN is maximum alignment we can assume. MEMSETP is true if this is
a memset operation and false if it's a copy of a constant string.
Return value is based on RETMODE argument. */
rtx
store_by_pieces (rtx to, unsigned HOST_WIDE_INT len,
by_pieces_constfn constfun,
void *constfundata, unsigned int align, bool memsetp,
memop_ret retmode)
{
if (len == 0)
{
gcc_assert (retmode != RETURN_END_MINUS_ONE);
return to;
}
gcc_assert (targetm.use_by_pieces_infrastructure_p
(len, align,
memsetp ? SET_BY_PIECES : STORE_BY_PIECES,
optimize_insn_for_speed_p ()));
store_by_pieces_d data (to, constfun, constfundata, len, align,
memsetp);
data.run ();
if (retmode != RETURN_BEGIN)
return data.finish_retmode (retmode);
else
return to;
}
/* Generate several move instructions to clear LEN bytes of block TO. (A MEM
rtx with BLKmode). ALIGN is maximum alignment we can assume. */
static void
clear_by_pieces (rtx to, unsigned HOST_WIDE_INT len, unsigned int align)
{
if (len == 0)
return;
/* Use builtin_memset_read_str to support vector mode broadcast. */
char c = 0;
store_by_pieces_d data (to, builtin_memset_read_str, &c, len, align,
true);
data.run ();
}
/* Context used by compare_by_pieces_genfn. It stores the fail label
to jump to in case of miscomparison, and for branch ratios greater than 1,
it stores an accumulator and the current and maximum counts before
emitting another branch. */
class compare_by_pieces_d : public op_by_pieces_d
{
rtx_code_label *m_fail_label;
rtx m_accumulator;
int m_count, m_batch;
void generate (rtx, rtx, machine_mode);
bool prepare_mode (machine_mode, unsigned int);
void finish_mode (machine_mode);
public:
compare_by_pieces_d (rtx op0, rtx op1, by_pieces_constfn op1_cfn,
void *op1_cfn_data, HOST_WIDE_INT len, int align,
rtx_code_label *fail_label)
: op_by_pieces_d (COMPARE_MAX_PIECES, op0, true, op1, true, op1_cfn,
op1_cfn_data, len, align, false)
{
m_fail_label = fail_label;
}
};
/* A callback used when iterating for a compare_by_pieces_operation.
OP0 and OP1 are the values that have been loaded and should be
compared in MODE. DATA holds a pointer to the compare_by_pieces_data
context structure. */
void
compare_by_pieces_d::generate (rtx op0, rtx op1, machine_mode mode)
{
if (m_batch > 1)
{
rtx temp = expand_binop (mode, sub_optab, op0, op1, NULL_RTX,
true, OPTAB_LIB_WIDEN);
if (m_count != 0)
temp = expand_binop (mode, ior_optab, m_accumulator, temp, temp,
true, OPTAB_LIB_WIDEN);
m_accumulator = temp;
if (++m_count < m_batch)
return;
m_count = 0;
op0 = m_accumulator;
op1 = const0_rtx;
m_accumulator = NULL_RTX;
}
do_compare_rtx_and_jump (op0, op1, NE, true, mode, NULL_RTX, NULL,
m_fail_label, profile_probability::uninitialized ());
}
/* Return true if MODE can be used for a set of moves and comparisons,
given an alignment ALIGN. Prepare whatever data is necessary for
later calls to generate. */
bool
compare_by_pieces_d::prepare_mode (machine_mode mode, unsigned int align)
{
insn_code icode = optab_handler (mov_optab, mode);
if (icode == CODE_FOR_nothing
|| align < GET_MODE_ALIGNMENT (mode)
|| !can_compare_p (EQ, mode, ccp_jump))
return false;
m_batch = targetm.compare_by_pieces_branch_ratio (mode);
if (m_batch < 0)
return false;
m_accumulator = NULL_RTX;
m_count = 0;
return true;
}
/* Called after expanding a series of comparisons in MODE. If we have
accumulated results for which we haven't emitted a branch yet, do
so now. */
void
compare_by_pieces_d::finish_mode (machine_mode mode)
{
if (m_accumulator != NULL_RTX)
do_compare_rtx_and_jump (m_accumulator, const0_rtx, NE, true, mode,
NULL_RTX, NULL, m_fail_label,
profile_probability::uninitialized ());
}
/* Generate several move instructions to compare LEN bytes from blocks
ARG0 and ARG1. (These are MEM rtx's with BLKmode).
If PUSH_ROUNDING is defined and TO is NULL, emit_single_push_insn is
used to push FROM to the stack.
ALIGN is maximum stack alignment we can assume.
Optionally, the caller can pass a constfn and associated data in A1_CFN
and A1_CFN_DATA. describing that the second operand being compared is a
known constant and how to obtain its data. */
static rtx
compare_by_pieces (rtx arg0, rtx arg1, unsigned HOST_WIDE_INT len,
rtx target, unsigned int align,
by_pieces_constfn a1_cfn, void *a1_cfn_data)
{
rtx_code_label *fail_label = gen_label_rtx ();
rtx_code_label *end_label = gen_label_rtx ();
if (target == NULL_RTX
|| !REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
target = gen_reg_rtx (TYPE_MODE (integer_type_node));
compare_by_pieces_d data (arg0, arg1, a1_cfn, a1_cfn_data, len, align,
fail_label);
data.run ();
emit_move_insn (target, const0_rtx);
emit_jump (end_label);
emit_barrier ();
emit_label (fail_label);
emit_move_insn (target, const1_rtx);
emit_label (end_label);
return target;
}
/* Emit code to move a block Y to a block X. This may be done with
string-move instructions, with multiple scalar move instructions,
or with a library call.
Both X and Y must be MEM rtx's (perhaps inside VOLATILE) with mode BLKmode.
SIZE is an rtx that says how long they are.
ALIGN is the maximum alignment we can assume they have.
METHOD describes what kind of copy this is, and what mechanisms may be used.
MIN_SIZE is the minimal size of block to move
MAX_SIZE is the maximal size of block to move, if it cannot be represented
in unsigned HOST_WIDE_INT, than it is mask of all ones.
Return the address of the new block, if memcpy is called and returns it,
0 otherwise. */
rtx
emit_block_move_hints (rtx x, rtx y, rtx size, enum block_op_methods method,
unsigned int expected_align, HOST_WIDE_INT expected_size,
unsigned HOST_WIDE_INT min_size,
unsigned HOST_WIDE_INT max_size,
unsigned HOST_WIDE_INT probable_max_size,
bool bail_out_libcall, bool *is_move_done,
bool might_overlap)
{
int may_use_call;
rtx retval = 0;
unsigned int align;
if (is_move_done)
*is_move_done = true;
gcc_assert (size);
if (CONST_INT_P (size) && INTVAL (size) == 0)
return 0;
switch (method)
{
case BLOCK_OP_NORMAL:
case BLOCK_OP_TAILCALL:
may_use_call = 1;
break;
case BLOCK_OP_CALL_PARM:
may_use_call = block_move_libcall_safe_for_call_parm ();
/* Make inhibit_defer_pop nonzero around the library call
to force it to pop the arguments right away. */
NO_DEFER_POP;
break;
case BLOCK_OP_NO_LIBCALL:
may_use_call = 0;
break;
case BLOCK_OP_NO_LIBCALL_RET:
may_use_call = -1;
break;
default:
gcc_unreachable ();
}
gcc_assert (MEM_P (x) && MEM_P (y));
align = MIN (MEM_ALIGN (x), MEM_ALIGN (y));
gcc_assert (align >= BITS_PER_UNIT);
/* Make sure we've got BLKmode addresses; store_one_arg can decide that
block copy is more efficient for other large modes, e.g. DCmode. */
x = adjust_address (x, BLKmode, 0);
y = adjust_address (y, BLKmode, 0);
/* If source and destination are the same, no need to copy anything. */
if (rtx_equal_p (x, y)
&& !MEM_VOLATILE_P (x)
&& !MEM_VOLATILE_P (y))
return 0;
/* Set MEM_SIZE as appropriate for this block copy. The main place this
can be incorrect is coming from __builtin_memcpy. */
poly_int64 const_size;
if (poly_int_rtx_p (size, &const_size))
{
x = shallow_copy_rtx (x);
y = shallow_copy_rtx (y);
set_mem_size (x, const_size);
set_mem_size (y, const_size);
}
bool pieces_ok = CONST_INT_P (size)
&& can_move_by_pieces (INTVAL (size), align);
bool pattern_ok = false;
if (!pieces_ok || might_overlap)
{
pattern_ok
= emit_block_move_via_pattern (x, y, size, align,
expected_align, expected_size,
min_size, max_size, probable_max_size,
might_overlap);
if (!pattern_ok && might_overlap)
{
/* Do not try any of the other methods below as they are not safe
for overlapping moves. */
*is_move_done = false;
return retval;
}
}
if (pattern_ok)
;
else if (pieces_ok)
move_by_pieces (x, y, INTVAL (size), align, RETURN_BEGIN);
else if (may_use_call && !might_overlap
&& ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (x))
&& ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (y)))
{
if (bail_out_libcall)
{
if (is_move_done)
*is_move_done = false;
return retval;
}
if (may_use_call < 0)
return pc_rtx;
retval = emit_block_copy_via_libcall (x, y, size,
method == BLOCK_OP_TAILCALL);
}
else if (might_overlap)
*is_move_done = false;
else
emit_block_move_via_loop (x, y, size, align);
if (method == BLOCK_OP_CALL_PARM)
OK_DEFER_POP;
return retval;
}
rtx
emit_block_move (rtx x, rtx y, rtx size, enum block_op_methods method)
{
unsigned HOST_WIDE_INT max, min = 0;
if (GET_CODE (size) == CONST_INT)
min = max = UINTVAL (size);
else
max = GET_MODE_MASK (GET_MODE (size));
return emit_block_move_hints (x, y, size, method, 0, -1,
min, max, max);
}
/* A subroutine of emit_block_move. Returns true if calling the
block move libcall will not clobber any parameters which may have
already been placed on the stack. */
static bool
block_move_libcall_safe_for_call_parm (void)
{
tree fn;
/* If arguments are pushed on the stack, then they're safe. */
if (targetm.calls.push_argument (0))
return true;
/* If registers go on the stack anyway, any argument is sure to clobber
an outgoing argument. */
#if defined (REG_PARM_STACK_SPACE)
fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
/* Avoid set but not used warning if *REG_PARM_STACK_SPACE doesn't
depend on its argument. */
(void) fn;
if (OUTGOING_REG_PARM_STACK_SPACE ((!fn ? NULL_TREE : TREE_TYPE (fn)))
&& REG_PARM_STACK_SPACE (fn) != 0)
return false;
#endif
/* If any argument goes in memory, then it might clobber an outgoing
argument. */
{
CUMULATIVE_ARGS args_so_far_v;
cumulative_args_t args_so_far;
tree arg;
fn = builtin_decl_implicit (BUILT_IN_MEMCPY);
INIT_CUMULATIVE_ARGS (args_so_far_v, TREE_TYPE (fn), NULL_RTX, 0, 3);
args_so_far = pack_cumulative_args (&args_so_far_v);
arg = TYPE_ARG_TYPES (TREE_TYPE (fn));
for ( ; arg != void_list_node ; arg = TREE_CHAIN (arg))
{
machine_mode mode = TYPE_MODE (TREE_VALUE (arg));
function_arg_info arg_info (mode, /*named=*/true);
rtx tmp = targetm.calls.function_arg (args_so_far, arg_info);
if (!tmp || !REG_P (tmp))
return false;
if (targetm.calls.arg_partial_bytes (args_so_far, arg_info))
return false;
targetm.calls.function_arg_advance (args_so_far, arg_info);
}
}
return true;
}
/* A subroutine of emit_block_move. Expand a cpymem or movmem pattern;
return true if successful.
X is the destination of the copy or move.
Y is the source of the copy or move.
SIZE is the size of the block to be moved.
MIGHT_OVERLAP indicates this originated with expansion of a
builtin_memmove() and the source and destination blocks may
overlap.
*/
static bool
emit_block_move_via_pattern (rtx x, rtx y, rtx size, unsigned int align,
unsigned int expected_align,
HOST_WIDE_INT expected_size,
unsigned HOST_WIDE_INT min_size,
unsigned HOST_WIDE_INT max_size,
unsigned HOST_WIDE_INT probable_max_size,
bool might_overlap)
{
if (expected_align < align)
expected_align = align;
if (expected_size != -1)
{
if ((unsigned HOST_WIDE_INT)expected_size > probable_max_size)
expected_size = probable_max_size;
if ((unsigned HOST_WIDE_INT)expected_size < min_size)
expected_size = min_size;
}
/* Since this is a move insn, we don't care about volatility. */
temporary_volatile_ok v (true);
/* Try the most limited insn first, because there's no point
including more than one in the machine description unless
the more limited one has some advantage. */
opt_scalar_int_mode mode_iter;
FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_INT)
{
scalar_int_mode mode = mode_iter.require ();
enum insn_code code;
if (might_overlap)
code = direct_optab_handler (movmem_optab, mode);
else
code = direct_optab_handler (cpymem_optab, mode);
if (code != CODE_FOR_nothing
/* We don't need MODE to be narrower than BITS_PER_HOST_WIDE_INT
here because if SIZE is less than the mode mask, as it is
returned by the macro, it will definitely be less than the
actual mode mask. Since SIZE is within the Pmode address
space, we limit MODE to Pmode. */
&& ((CONST_INT_P (size)
&& ((unsigned HOST_WIDE_INT) INTVAL (size)
<= (GET_MODE_MASK (mode) >> 1)))
|| max_size <= (GET_MODE_MASK (mode) >> 1)
|| GET_MODE_BITSIZE (mode) >= GET_MODE_BITSIZE (Pmode)))
{
class expand_operand ops[9];
unsigned int nops;
/* ??? When called via emit_block_move_for_call, it'd be
nice if there were some way to inform the backend, so
that it doesn't fail the expansion because it thinks
emitting the libcall would be more efficient. */
nops = insn_data[(int) code].n_generator_args;
gcc_assert (nops == 4 || nops == 6 || nops == 8 || nops == 9);
create_fixed_operand (&ops[0], x);
create_fixed_operand (&ops[1], y);
/* The check above guarantees that this size conversion is valid. */
create_convert_operand_to (&ops[2], size, mode, true);
create_integer_operand (&ops[3], align / BITS_PER_UNIT);
if (nops >= 6)
{
create_integer_operand (&ops[4], expected_align / BITS_PER_UNIT);
create_integer_operand (&ops[5], expected_size);
}
if (nops >= 8)
{
create_integer_operand (&ops[6], min_size);
/* If we cannot represent the maximal size,
make parameter NULL. */
if ((HOST_WIDE_INT) max_size != -1)
create_integer_operand (&ops[7], max_size);
else
create_fixed_operand (&ops[7], NULL);
}
if (nops == 9)
{
/* If we cannot represent the maximal size,
make parameter NULL. */
if ((HOST_WIDE_INT) probable_max_size != -1)
create_integer_operand (&ops[8], probable_max_size);
else
create_fixed_operand (&ops[8], NULL);
}
if (maybe_expand_insn (code, nops, ops))
return true;
}
}
return false;
}
/* A subroutine of emit_block_move. Copy the data via an explicit
loop. This is used only when libcalls are forbidden. */
/* ??? It'd be nice to copy in hunks larger than QImode. */
static void
emit_block_move_via_loop (rtx x, rtx y, rtx size,
unsigned int align ATTRIBUTE_UNUSED)
{
rtx_code_label *cmp_label, *top_label;
rtx iter, x_addr, y_addr, tmp;
machine_mode x_addr_mode = get_address_mode (x);
machine_mode y_addr_mode = get_address_mode (y);
machine_mode iter_mode;
iter_mode = GET_MODE (size);
if (iter_mode == VOIDmode)
iter_mode = word_mode;
top_label = gen_label_rtx ();
cmp_label = gen_label_rtx ();
iter = gen_reg_rtx (iter_mode);
emit_move_insn (iter, const0_rtx);
x_addr = force_operand (XEXP (x, 0), NULL_RTX);
y_addr = force_operand (XEXP (y, 0), NULL_RTX);
do_pending_stack_adjust ();
emit_jump (cmp_label);
emit_label (top_label);
tmp = convert_modes (x_addr_mode, iter_mode, iter, true);
x_addr = simplify_gen_binary (PLUS, x_addr_mode, x_addr, tmp);
if (x_addr_mode != y_addr_mode)
tmp = convert_modes (y_addr_mode, iter_mode, iter, true);
y_addr = simplify_gen_binary (PLUS, y_addr_mode, y_addr, tmp);
x = change_address (x, QImode, x_addr);
y = change_address (y, QImode, y_addr);
emit_move_insn (x, y);
tmp = expand_simple_binop (iter_mode, PLUS, iter, const1_rtx, iter,
true, OPTAB_LIB_WIDEN);
if (tmp != iter)
emit_move_insn (iter, tmp);
emit_label (cmp_label);
emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
true, top_label,
profile_probability::guessed_always ()
.apply_scale (9, 10));
}
/* Expand a call to memcpy or memmove or memcmp, and return the result.
TAILCALL is true if this is a tail call. */
rtx
emit_block_op_via_libcall (enum built_in_function fncode, rtx dst, rtx src,
rtx size, bool tailcall)
{
rtx dst_addr, src_addr;
tree call_expr, dst_tree, src_tree, size_tree;
machine_mode size_mode;
/* Since dst and src are passed to a libcall, mark the corresponding
tree EXPR as addressable. */
tree dst_expr = MEM_EXPR (dst);
tree src_expr = MEM_EXPR (src);
if (dst_expr)
mark_addressable (dst_expr);
if (src_expr)
mark_addressable (src_expr);
dst_addr = copy_addr_to_reg (XEXP (dst, 0));
dst_addr = convert_memory_address (ptr_mode, dst_addr);
dst_tree = make_tree (ptr_type_node, dst_addr);
src_addr = copy_addr_to_reg (XEXP (src, 0));
src_addr = convert_memory_address (ptr_mode, src_addr);
src_tree = make_tree (ptr_type_node, src_addr);
size_mode = TYPE_MODE (sizetype);
size = convert_to_mode (size_mode, size, 1);
size = copy_to_mode_reg (size_mode, size);
size_tree = make_tree (sizetype, size);
/* It is incorrect to use the libcall calling conventions for calls to
memcpy/memmove/memcmp because they can be provided by the user. */
tree fn = builtin_decl_implicit (fncode);
call_expr = build_call_expr (fn, 3, dst_tree, src_tree, size_tree);
CALL_EXPR_TAILCALL (call_expr) = tailcall;
return expand_call (call_expr, NULL_RTX, false);
}
/* Try to expand cmpstrn or cmpmem operation ICODE with the given operands.
ARG3_TYPE is the type of ARG3_RTX. Return the result rtx on success,
otherwise return null. */
rtx
expand_cmpstrn_or_cmpmem (insn_code icode, rtx target, rtx arg1_rtx,
rtx arg2_rtx, tree arg3_type, rtx arg3_rtx,
HOST_WIDE_INT align)
{
machine_mode insn_mode = insn_data[icode].operand[0].mode;
if (target && (!REG_P (target) || HARD_REGISTER_P (target)))
target = NULL_RTX;
class expand_operand ops[5];
create_output_operand (&ops[0], target, insn_mode);
create_fixed_operand (&ops[1], arg1_rtx);
create_fixed_operand (&ops[2], arg2_rtx);
create_convert_operand_from (&ops[3], arg3_rtx, TYPE_MODE (arg3_type),
TYPE_UNSIGNED (arg3_type));
create_integer_operand (&ops[4], align);
if (maybe_expand_insn (icode, 5, ops))
return ops[0].value;
return NULL_RTX;
}
/* Expand a block compare between X and Y with length LEN using the
cmpmem optab, placing the result in TARGET. LEN_TYPE is the type
of the expression that was used to calculate the length. ALIGN
gives the known minimum common alignment. */
static rtx
emit_block_cmp_via_cmpmem (rtx x, rtx y, rtx len, tree len_type, rtx target,
unsigned align)
{
/* Note: The cmpstrnsi pattern, if it exists, is not suitable for
implementing memcmp because it will stop if it encounters two
zero bytes. */
insn_code icode = direct_optab_handler (cmpmem_optab, SImode);
if (icode == CODE_FOR_nothing)
return NULL_RTX;
return expand_cmpstrn_or_cmpmem (icode, target, x, y, len_type, len, align);
}
/* Emit code to compare a block Y to a block X. This may be done with
string-compare instructions, with multiple scalar instructions,
or with a library call.
Both X and Y must be MEM rtx's. LEN is an rtx that says how long
they are. LEN_TYPE is the type of the expression that was used to
calculate it.
If EQUALITY_ONLY is true, it means we don't have to return the tri-state
value of a normal memcmp call, instead we can just compare for equality.
If FORCE_LIBCALL is true, we should emit a call to memcmp rather than
returning NULL_RTX.
Optionally, the caller can pass a constfn and associated data in Y_CFN
and Y_CFN_DATA. describing that the second operand being compared is a
known constant and how to obtain its data.
Return the result of the comparison, or NULL_RTX if we failed to
perform the operation. */
rtx
emit_block_cmp_hints (rtx x, rtx y, rtx len, tree len_type, rtx target,
bool equality_only, by_pieces_constfn y_cfn,
void *y_cfndata)
{
rtx result = 0;
if (CONST_INT_P (len) && INTVAL (len) == 0)
return const0_rtx;
gcc_assert (MEM_P (x) && MEM_P (y));
unsigned int align = MIN (MEM_ALIGN (x), MEM_ALIGN (y));
gcc_assert (align >= BITS_PER_UNIT);
x = adjust_address (x, BLKmode, 0);
y = adjust_address (y, BLKmode, 0);
if (equality_only
&& CONST_INT_P (len)
&& can_do_by_pieces (INTVAL (len), align, COMPARE_BY_PIECES))
result = compare_by_pieces (x, y, INTVAL (len), target, align,
y_cfn, y_cfndata);
else
result = emit_block_cmp_via_cmpmem (x, y, len, len_type, target, align);
return result;
}
/* Copy all or part of a value X into registers starting at REGNO.
The number of registers to be filled is NREGS. */
void
move_block_to_reg (int regno, rtx x, int nregs, machine_mode mode)
{
if (nregs == 0)
return;
if (CONSTANT_P (x) && !targetm.legitimate_constant_p (mode, x))
x = validize_mem (force_const_mem (mode, x));
/* See if the machine can do this with a load multiple insn. */
if (targetm.have_load_multiple ())
{
rtx_insn *last = get_last_insn ();
rtx first = gen_rtx_REG (word_mode, regno);
if (rtx_insn *pat = targetm.gen_load_multiple (first, x,
GEN_INT (nregs)))
{
emit_insn (pat);
return;
}
else
delete_insns_since (last);
}
for (int i = 0; i < nregs; i++)
emit_move_insn (gen_rtx_REG (word_mode, regno + i),
operand_subword_force (x, i, mode));
}
/* Copy all or part of a BLKmode value X out of registers starting at REGNO.
The number of registers to be filled is NREGS. */
void
move_block_from_reg (int regno, rtx x, int nregs)
{
if (nregs == 0)
return;
/* See if the machine can do this with a store multiple insn. */
if (targetm.have_store_multiple ())
{
rtx_insn *last = get_last_insn ();
rtx first = gen_rtx_REG (word_mode, regno);
if (rtx_insn *pat = targetm.gen_store_multiple (x, first,
GEN_INT (nregs)))
{
emit_insn (pat);
return;
}
else
delete_insns_since (last);
}
for (int i = 0; i < nregs; i++)
{
rtx tem = operand_subword (x, i, 1, BLKmode);
gcc_assert (tem);
emit_move_insn (tem, gen_rtx_REG (word_mode, regno + i));
}
}
/* Generate a PARALLEL rtx for a new non-consecutive group of registers from
ORIG, where ORIG is a non-consecutive group of registers represented by
a PARALLEL. The clone is identical to the original except in that the
original set of registers is replaced by a new set of pseudo registers.
The new set has the same modes as the original set. */
rtx
gen_group_rtx (rtx orig)
{
int i, length;
rtx *tmps;
gcc_assert (GET_CODE (orig) == PARALLEL);
length = XVECLEN (orig, 0);
tmps = XALLOCAVEC (rtx, length);
/* Skip a NULL entry in first slot. */
i = XEXP (XVECEXP (orig, 0, 0), 0) ? 0 : 1;
if (i)
tmps[0] = 0;
for (; i < length; i++)
{
machine_mode mode = GET_MODE (XEXP (XVECEXP (orig, 0, i), 0));
rtx offset = XEXP (XVECEXP (orig, 0, i), 1);
tmps[i] = gen_rtx_EXPR_LIST (VOIDmode, gen_reg_rtx (mode), offset);
}
return gen_rtx_PARALLEL (GET_MODE (orig), gen_rtvec_v (length, tmps));
}
/* A subroutine of emit_group_load. Arguments as for emit_group_load,
except that values are placed in TMPS[i], and must later be moved
into corresponding XEXP (XVECEXP (DST, 0, i), 0) element. */
static void
emit_group_load_1 (rtx *tmps, rtx dst, rtx orig_src, tree type,
poly_int64 ssize)
{
rtx src;
int start, i;
machine_mode m = GET_MODE (orig_src);
gcc_assert (GET_CODE (dst) == PARALLEL);
if (m != VOIDmode
&& !SCALAR_INT_MODE_P (m)
&& !MEM_P (orig_src)
&& GET_CODE (orig_src) != CONCAT)
{
scalar_int_mode imode;
if (int_mode_for_mode (GET_MODE (orig_src)).exists (&imode))
{
src = gen_reg_rtx (imode);
emit_move_insn (gen_lowpart (GET_MODE (orig_src), src), orig_src);
}
else
{
src = assign_stack_temp (GET_MODE (orig_src), ssize);
emit_move_insn (src, orig_src);
}
emit_group_load_1 (tmps, dst, src, type, ssize);
return;
}
/* Check for a NULL entry, used to indicate that the parameter goes
both on the stack and in registers. */
if (XEXP (XVECEXP (dst, 0, 0), 0))
start = 0;
else
start = 1;
/* Process the pieces. */
for (i = start; i < XVECLEN (dst, 0); i++)
{
machine_mode mode = GET_MODE (XEXP (XVECEXP (dst, 0, i), 0));
poly_int64 bytepos = rtx_to_poly_int64 (XEXP (XVECEXP (dst, 0, i), 1));
poly_int64 bytelen = GET_MODE_SIZE (mode);
poly_int64 shift = 0;
/* Handle trailing fragments that run over the size of the struct.
It's the target's responsibility to make sure that the fragment
cannot be strictly smaller in some cases and strictly larger
in others. */
gcc_checking_assert (ordered_p (bytepos + bytelen, ssize));
if (known_size_p (ssize) && maybe_gt (bytepos + bytelen, ssize))
{
/* Arrange to shift the fragment to where it belongs.
extract_bit_field loads to the lsb of the reg. */
if (
#ifdef BLOCK_REG_PADDING
BLOCK_REG_PADDING (GET_MODE (orig_src), type, i == start)
== (BYTES_BIG_ENDIAN ? PAD_UPWARD : PAD_DOWNWARD)
#else
BYTES_BIG_ENDIAN
#endif
)
shift = (bytelen - (ssize - bytepos)) * BITS_PER_UNIT;
bytelen = ssize - bytepos;
gcc_assert (maybe_gt (bytelen, 0));
}
/* If we won't be loading directly from memory, protect the real source
from strange tricks we might play; but make sure that the source can
be loaded directly into the destination. */
src = orig_src;
if (!MEM_P (orig_src)
&& (!CONSTANT_P (orig_src)
|| (GET_MODE (orig_src) != mode
&& GET_MODE (orig_src) != VOIDmode)))
{
if (GET_MODE (orig_src) == VOIDmode)
src = gen_reg_rtx (mode);
else
src = gen_reg_rtx (GET_MODE (orig_src));
emit_move_insn (src, orig_src);
}
/* Optimize the access just a bit. */
if (MEM_P (src)
&& (! targetm.slow_unaligned_access (mode, MEM_ALIGN (src))
|| MEM_ALIGN (src) >= GET_MODE_ALIGNMENT (mode))
&& multiple_p (bytepos * BITS_PER_UNIT, GET_MODE_ALIGNMENT (mode))
&& known_eq (bytelen, GET_MODE_SIZE (mode)))
{
tmps[i] = gen_reg_rtx (mode);
emit_move_insn (tmps[i], adjust_address (src, mode, bytepos));
}
else if (COMPLEX_MODE_P (mode)
&& GET_MODE (src) == mode
&& known_eq (bytelen, GET_MODE_SIZE (mode)))
/* Let emit_move_complex do the bulk of the work. */
tmps[i] = src;
else if (GET_CODE (src) == CONCAT)
{
poly_int64 slen = GET_MODE_SIZE (GET_MODE (src));
poly_int64 slen0 = GET_MODE_SIZE (GET_MODE (XEXP (src, 0)));
unsigned int elt;
poly_int64 subpos;
if (can_div_trunc_p (bytepos, slen0, &elt, &subpos)
&& known_le (subpos + bytelen, slen0))
{
/* The following assumes that the concatenated objects all
have the same size. In this case, a simple calculation
can be used to determine the object and the bit field
to be extracted. */
tmps[i] = XEXP (src, elt);
if (maybe_ne (subpos, 0)
|| maybe_ne (subpos + bytelen, slen0)
|| (!CONSTANT_P (tmps[i])
&& (!REG_P (tmps[i]) || GET_MODE (tmps[i]) != mode)))
tmps[i] = extract_bit_field (tmps[i], bytelen * BITS_PER_UNIT,
subpos * BITS_PER_UNIT,
1, NULL_RTX, mode, mode, false,
NULL);
}
else
{
rtx mem;
gcc_assert (known_eq (bytepos, 0));
mem = assign_stack_temp (GET_MODE (src), slen);
emit_move_insn (mem, src);
tmps[i] = extract_bit_field (mem, bytelen * BITS_PER_UNIT,
0, 1, NULL_RTX, mode, mode, false,
NULL);
}
}
/* FIXME: A SIMD parallel will eventually lead to a subreg of a
SIMD register, which is currently broken. While we get GCC
to emit proper RTL for these cases, let's dump to memory. */
else if (VECTOR_MODE_P (GET_MODE (dst))
&& REG_P (src))
{
poly_uint64 slen = GET_MODE_SIZE (GET_MODE (src));
rtx mem;
mem = assign_stack_temp (GET_MODE (src), slen);
emit_move_insn (mem, src);
tmps[i] = adjust_address (mem, mode, bytepos);
}
else if (CONSTANT_P (src) && GET_MODE (dst) != BLKmode
&& XVECLEN (dst, 0) > 1)
tmps[i] = simplify_gen_subreg (mode, src, GET_MODE (dst), bytepos);
else if (CONSTANT_P (src))
{
if (known_eq (bytelen, ssize))
tmps[i] = src;
else
{
rtx first, second;
/* TODO: const_wide_int can have sizes other than this... */
gcc_assert (known_eq (2 * bytelen, ssize));
split_double (src, &first, &second);
if (i)
tmps[i] = second;
else
tmps[i] = first;
}
}
else if (REG_P (src) && GET_MODE (src) == mode)
tmps[i] = src;
else
tmps[i] = extract_bit_field (src, bytelen * BITS_PER_UNIT,
bytepos * BITS_PER_UNIT, 1, NULL_RTX,
mode, mode, false, NULL);
if (maybe_ne (shift, 0))
tmps[i] = expand_shift (LSHIFT_EXPR, mode, tmps[i],
shift, tmps[i], 0);
}
}
/* Emit code to move a block SRC of type TYPE to a block DST,
where DST is non-consecutive registers represented by a PARALLEL.
SSIZE represents the total size of block ORIG_SRC in bytes, or -1
if not known. */
void
emit_group_load (rtx dst, rtx src, tree type, poly_int64 ssize)
{
rtx *tmps;
int i;
tmps = XALLOCAVEC (rtx, XVECLEN (dst, 0));
emit_group_load_1 (tmps, dst, src, type, ssize);
/* Copy the extracted pieces into the proper (probable) hard regs. */
for (i = 0; i < XVECLEN (dst, 0); i++)
{
rtx d = XEXP (XVECEXP (dst, 0, i), 0);
if (d == NULL)
continue;
emit_move_insn (d, tmps[i]);
}
}
/* Similar, but load SRC into new pseudos in a format that looks like
PARALLEL. This can later be fed to emit_group_move to get things
in the right place. */
rtx
emit_group_load_into_temps (rtx parallel, rtx src, tree type, poly_int64 ssize)
{
rtvec vec;
int i;
vec = rtvec_alloc (XVECLEN (parallel, 0));
emit_group_load_1 (&RTVEC_ELT (vec, 0), parallel, src, type, ssize);
/* Convert the vector to look just like the original PARALLEL, except
with the computed values. */
for (i = 0; i < XVECLEN (parallel, 0); i++)
{
rtx e = XVECEXP (parallel, 0, i);
rtx d = XEXP (e, 0);
if (d)
{
d = force_reg (GET_MODE (d), RTVEC_ELT (vec, i));
e = alloc_EXPR_LIST (REG_NOTE_KIND (e), d, XEXP (e, 1));
}
RTVEC_ELT (vec, i) = e;
}
return gen_rtx_PARALLEL (GET_MODE (parallel), vec);
}
/* Emit code to move a block SRC to block DST, where SRC and DST are
non-consecutive groups of registers, each represented by a PARALLEL. */
void
emit_group_move (rtx dst, rtx src)
{
int i;
gcc_assert (GET_CODE (src) == PARALLEL
&& GET_CODE (dst) == PARALLEL
&& XVECLEN (src, 0) == XVECLEN (dst, 0));
/* Skip first entry if NULL. */
for (i = XEXP (XVECEXP (src, 0, 0), 0) ? 0 : 1; i < XVECLEN (src, 0); i++)
emit_move_insn (XEXP (XVECEXP (dst, 0, i), 0),
XEXP (XVECEXP (src, 0, i), 0));
}
/* Move a group of registers represented by a PARALLEL into pseudos. */
rtx
emit_group_move_into_temps (rtx src)
{
rtvec vec = rtvec_alloc (XVECLEN (src, 0));
int i;
for (i = 0; i < XVECLEN (src, 0); i++)
{
rtx e = XVECEXP (src, 0, i);
rtx d = XEXP (e, 0);
if (d)
e = alloc_EXPR_LIST (REG_NOTE_KIND (e), copy_to_reg (d), XEXP (e, 1));
RTVEC_ELT (vec, i) = e;
}
return gen_rtx_PARALLEL (GET_MODE (src), vec);
}
/* Emit code to move a block SRC to a block ORIG_DST of type TYPE,
where SRC is non-consecutive registers represented by a PARALLEL.
SSIZE represents the total size of block ORIG_DST, or -1 if not
known. */
void
emit_group_store (rtx orig_dst, rtx src, tree type ATTRIBUTE_UNUSED,
poly_int64 ssize)
{
rtx *tmps, dst;
int start, finish, i;
machine_mode m = GET_MODE (orig_dst);
gcc_assert (GET_CODE (src) == PARALLEL);
if (!SCALAR_INT_MODE_P (m)
&& !MEM_P (orig_dst) && GET_CODE (orig_dst) != CONCAT)
{
scalar_int_mode imode;
if (int_mode_for_mode (GET_MODE (orig_dst)).exists (&imode))
{
dst = gen_reg_rtx (imode);
emit_group_store (dst, src, type, ssize);
dst = gen_lowpart (GET_MODE (orig_dst), dst);
}
else
{
dst = assign_stack_temp (GET_MODE (orig_dst), ssize);
emit_group_store (dst, src, type, ssize);
}
emit_move_insn (orig_dst, dst);
return;
}
/* Check for a NULL entry, used to indicate that the parameter goes
both on the stack and in registers. */
if (XEXP (XVECEXP (src, 0, 0), 0))
start = 0;
else
start = 1;
finish = XVECLEN (src, 0);
tmps = XALLOCAVEC (rtx, finish);
/* Copy the (probable) hard regs into pseudos. */
for (i = start; i < finish; i++)
{
rtx reg = XEXP (XVECEXP (src, 0, i), 0);
if (!REG_P (reg) || REGNO (reg) < FIRST_PSEUDO_REGISTER)
{
tmps[i] = gen_reg_rtx (GET_MODE (reg));
emit_move_insn (tmps[i], reg);
}
else
tmps[i] = reg;
}
/* If we won't be storing directly into memory, protect the real destination
from strange tricks we might play. */
dst = orig_dst;
if (GET_CODE (dst) == PARALLEL)
{
rtx temp;
/* We can get a PARALLEL dst if there is a conditional expression in
a return statement. In that case, the dst and src are the same,
so no action is necessary. */
if (rtx_equal_p (dst, src))
return;
/* It is unclear if we can ever reach here, but we may as well handle
it. Allocate a temporary, and split this into a store/load to/from
the temporary. */
temp = assign_stack_temp (GET_MODE (dst), ssize);
emit_group_store (temp, src, type, ssize);
emit_group_load (dst, temp, type, ssize);
return;
}
else if (!MEM_P (dst) && GET_CODE (dst) != CONCAT)
{
machine_mode outer = GET_MODE (dst);
machine_mode inner;
poly_int64 bytepos;
bool done = false;
rtx temp;
if (!REG_P (dst) || REGNO (dst) < FIRST_PSEUDO_REGISTER)
dst = gen_reg_rtx (outer);
/* Make life a bit easier for combine. */
/* If the first element of the vector is the low part
of the destination mode, use a paradoxical subreg to
initialize the destination. */
if (start < finish)
{
inner = GET_MODE (tmps[start]);
bytepos = subreg_lowpart_offset (inner, outer);
if (known_eq (rtx_to_poly_int64 (XEXP (XVECEXP (src, 0, start), 1)),
bytepos))
{
temp = simplify_gen_subreg (outer, tmps[start],
inner, 0);
if (temp)
{
emit_move_insn (dst, temp);
done = true;
start++;
}
}
}
/* If the first element wasn't the low part, try the last. */
if (!done
&& start < finish - 1)
{
inner = GET_MODE (tmps[finish - 1]);
bytepos = subreg_lowpart_offset (inner, outer);
if (known_eq (rtx_to_poly_int64 (XEXP (XVECEXP (src, 0,
finish - 1), 1)),
bytepos))
{
temp = simplify_gen_subreg (outer, tmps[finish - 1],
inner, 0);
if (temp)
{
emit_move_insn (dst, temp);
done = true;
finish--;
}
}
}
/* Otherwise, simply initialize the result to zero. */
if (!done)
emit_move_insn (dst, CONST0_RTX (outer));
}
/* Process the pieces. */
for (i = start; i < finish; i++)
{
poly_int64 bytepos = rtx_to_poly_int64 (XEXP (XVECEXP (src, 0, i), 1));
machine_mode mode = GET_MODE (tmps[i]);
poly_int64 bytelen = GET_MODE_SIZE (mode);
poly_uint64 adj_bytelen;
rtx dest = dst;
/* Handle trailing fragments that run over the size of the struct.
It's the target's responsibility to make sure that the fragment
cannot be strictly smaller in some cases and strictly larger
in others. */
gcc_checking_assert (ordered_p (bytepos + bytelen, ssize));
if (known_size_p (ssize) && maybe_gt (bytepos + bytelen, ssize))
adj_bytelen = ssize - bytepos;
else
adj_bytelen = bytelen;
if (GET_CODE (dst) == CONCAT)
{
if (known_le (bytepos + adj_bytelen,
GET_MODE_SIZE (GET_MODE (XEXP (dst, 0)))))
dest = XEXP (dst, 0);
else if (known_ge (bytepos, GET_MODE_SIZE (GET_MODE (XEXP (dst, 0)))))
{
bytepos -= GET_MODE_SIZE (GET_MODE (XEXP (dst, 0)));
dest = XEXP (dst, 1);
}
else
{
machine_mode dest_mode = GET_MODE (dest);
machine_mode tmp_mode = GET_MODE (tmps[i]);
gcc_assert (known_eq (bytepos, 0) && XVECLEN (src, 0));
if (GET_MODE_ALIGNMENT (dest_mode)
>= GET_MODE_ALIGNMENT (tmp_mode))
{
dest = assign_stack_temp (dest_mode,
GET_MODE_SIZE (dest_mode));
emit_move_insn (adjust_address