blob: 6bc6ccf9411c0176ef015613800b491e7e913a2f [file] [log] [blame]
/* Dependency checks for instruction scheduling, shared between ARM and
AARCH64.
Copyright (C) 1991-2020 Free Software Foundation, Inc.
Contributed by ARM Ltd.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published
by the Free Software Foundation; either version 3, or (at your
option) any later version.
GCC is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
License for more details.
You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
#define IN_TARGET_CODE 1
#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "insn-modes.h"
#include "tm.h"
#include "rtl.h"
#include "rtl-iter.h"
#include "memmodel.h"
#include "diagnostic.h"
#include "tree.h"
#include "expr.h"
#include "function.h"
#include "emit-rtl.h"
/* Return TRUE if X is either an arithmetic shift left, or
is a multiplication by a power of two. */
bool
arm_rtx_shift_left_p (rtx x)
{
enum rtx_code code = GET_CODE (x);
if (code == MULT && CONST_INT_P (XEXP (x, 1))
&& exact_log2 (INTVAL (XEXP (x, 1))) > 0)
return true;
if (code == ASHIFT)
return true;
return false;
}
static rtx_code shift_rtx_codes[] =
{ ASHIFT, ROTATE, ASHIFTRT, LSHIFTRT,
ROTATERT, ZERO_EXTEND, SIGN_EXTEND };
/* Traverse PATTERN looking for a sub-rtx with RTX_CODE CODE.
If FIND_ANY_SHIFT then we are interested in anything which can
reasonably be described as a SHIFT RTX. */
static rtx
arm_find_sub_rtx_with_code (rtx pattern, rtx_code code, bool find_any_shift)
{
subrtx_var_iterator::array_type array;
FOR_EACH_SUBRTX_VAR (iter, array, pattern, NONCONST)
{
rtx x = *iter;
if (find_any_shift)
{
/* Left shifts might have been canonicalized to a MULT of some
power of two. Make sure we catch them. */
if (arm_rtx_shift_left_p (x))
return x;
else
for (unsigned int i = 0; i < ARRAY_SIZE (shift_rtx_codes); i++)
if (GET_CODE (x) == shift_rtx_codes[i])
return x;
}
if (GET_CODE (x) == code)
return x;
}
return NULL_RTX;
}
/* Traverse PATTERN looking for any sub-rtx which looks like a shift. */
static rtx
arm_find_shift_sub_rtx (rtx pattern)
{
return arm_find_sub_rtx_with_code (pattern, ASHIFT, true);
}
/* PRODUCER and CONSUMER are two potentially dependant RTX. PRODUCER
(possibly) contains a SET which will provide a result we can access
using the SET_DEST macro. We will place the RTX which would be
written by PRODUCER in SET_SOURCE.
Similarly, CONSUMER (possibly) contains a SET which has an operand
we can access using SET_SRC. We place this operand in
SET_DESTINATION.
Return nonzero if we found the SET RTX we expected. */
static int
arm_get_set_operands (rtx producer, rtx consumer,
rtx *set_source, rtx *set_destination)
{
rtx set_producer = arm_find_sub_rtx_with_code (PATTERN (producer),
SET, false);
rtx set_consumer = arm_find_sub_rtx_with_code (PATTERN (consumer),
SET, false);
if (set_producer && set_consumer)
{
*set_source = SET_DEST (set_producer);
*set_destination = SET_SRC (set_consumer);
return 1;
}
return 0;
}
bool
aarch_rev16_shright_mask_imm_p (rtx val, machine_mode mode)
{
return CONST_INT_P (val)
&& INTVAL (val)
== trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff),
mode);
}
bool
aarch_rev16_shleft_mask_imm_p (rtx val, machine_mode mode)
{
return CONST_INT_P (val)
&& INTVAL (val)
== trunc_int_for_mode (HOST_WIDE_INT_C (0xff00ff00ff00ff00),
mode);
}
static bool
aarch_rev16_p_1 (rtx lhs, rtx rhs, machine_mode mode)
{
if (GET_CODE (lhs) == AND
&& GET_CODE (XEXP (lhs, 0)) == ASHIFT
&& CONST_INT_P (XEXP (XEXP (lhs, 0), 1))
&& INTVAL (XEXP (XEXP (lhs, 0), 1)) == 8
&& REG_P (XEXP (XEXP (lhs, 0), 0))
&& CONST_INT_P (XEXP (lhs, 1))
&& GET_CODE (rhs) == AND
&& GET_CODE (XEXP (rhs, 0)) == LSHIFTRT
&& REG_P (XEXP (XEXP (rhs, 0), 0))
&& CONST_INT_P (XEXP (XEXP (rhs, 0), 1))
&& INTVAL (XEXP (XEXP (rhs, 0), 1)) == 8
&& CONST_INT_P (XEXP (rhs, 1))
&& REGNO (XEXP (XEXP (rhs, 0), 0)) == REGNO (XEXP (XEXP (lhs, 0), 0)))
{
rtx lhs_mask = XEXP (lhs, 1);
rtx rhs_mask = XEXP (rhs, 1);
return aarch_rev16_shright_mask_imm_p (rhs_mask, mode)
&& aarch_rev16_shleft_mask_imm_p (lhs_mask, mode);
}
return false;
}
/* Recognise a sequence of bitwise operations corresponding to a rev16 operation.
These will be of the form:
((x >> 8) & 0x00ff00ff)
| ((x << 8) & 0xff00ff00)
for SImode and with similar but wider bitmasks for DImode.
The two sub-expressions of the IOR can appear on either side so check both
permutations with the help of aarch_rev16_p_1 above. */
bool
aarch_rev16_p (rtx x)
{
rtx left_sub_rtx, right_sub_rtx;
bool is_rev = false;
if (GET_CODE (x) != IOR)
return false;
left_sub_rtx = XEXP (x, 0);
right_sub_rtx = XEXP (x, 1);
/* There are no canonicalisation rules for the position of the two shifts
involved in a rev, so try both permutations. */
is_rev = aarch_rev16_p_1 (left_sub_rtx, right_sub_rtx, GET_MODE (x));
if (!is_rev)
is_rev = aarch_rev16_p_1 (right_sub_rtx, left_sub_rtx, GET_MODE (x));
return is_rev;
}
/* Return non-zero if the RTX representing a memory model is a memory model
that needs acquire semantics. */
bool
aarch_mm_needs_acquire (rtx const_int)
{
enum memmodel model = memmodel_from_int (INTVAL (const_int));
return !(is_mm_relaxed (model)
|| is_mm_consume (model)
|| is_mm_release (model));
}
/* Return non-zero if the RTX representing a memory model is a memory model
that needs release semantics. */
bool
aarch_mm_needs_release (rtx const_int)
{
enum memmodel model = memmodel_from_int (INTVAL (const_int));
return !(is_mm_relaxed (model)
|| is_mm_consume (model)
|| is_mm_acquire (model));
}
/* Return nonzero if the CONSUMER instruction (a load) does need
PRODUCER's value to calculate the address. */
int
arm_early_load_addr_dep (rtx producer, rtx consumer)
{
rtx value, addr;
if (!arm_get_set_operands (producer, consumer, &value, &addr))
return 0;
return reg_overlap_mentioned_p (value, addr);
}
/* Return nonzero if the CONSUMER instruction (a load) does need
a Pmode PRODUCER's value to calculate the address. */
int
arm_early_load_addr_dep_ptr (rtx producer, rtx consumer)
{
rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false);
rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false);
if (!value || !addr || !MEM_P (SET_SRC (value)))
return 0;
value = SET_DEST (value);
addr = SET_SRC (addr);
return GET_MODE (value) == Pmode && reg_overlap_mentioned_p (value, addr);
}
/* Return nonzero if the CONSUMER instruction (an ALU op) does not
have an early register shift value or amount dependency on the
result of PRODUCER. */
int
arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
{
rtx value, op;
rtx early_op;
if (!arm_get_set_operands (producer, consumer, &value, &op))
return 0;
if ((early_op = arm_find_shift_sub_rtx (op)))
return !reg_overlap_mentioned_p (value, early_op);
return 0;
}
/* Return nonzero if the CONSUMER instruction (an ALU op) does not
have an early register shift value dependency on the result of
PRODUCER. */
int
arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
{
rtx value, op;
rtx early_op;
if (!arm_get_set_operands (producer, consumer, &value, &op))
return 0;
if ((early_op = arm_find_shift_sub_rtx (op)))
/* We want to check the value being shifted. */
if (!reg_overlap_mentioned_p (value, XEXP (early_op, 0)))
return 1;
return 0;
}
/* Return nonzero if the CONSUMER (a mul or mac op) does not
have an early register mult dependency on the result of
PRODUCER. */
int
arm_no_early_mul_dep (rtx producer, rtx consumer)
{
rtx value, op;
if (!arm_get_set_operands (producer, consumer, &value, &op))
return 0;
if (GET_CODE (op) == PLUS || GET_CODE (op) == MINUS)
{
if (GET_CODE (XEXP (op, 0)) == MULT)
return !reg_overlap_mentioned_p (value, XEXP (op, 0));
else
return !reg_overlap_mentioned_p (value, XEXP (op, 1));
}
return 0;
}
/* Return nonzero if the CONSUMER instruction (a store) does not need
PRODUCER's value to calculate the address. */
int
arm_no_early_store_addr_dep (rtx producer, rtx consumer)
{
rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false);
rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false);
if (value)
value = SET_DEST (value);
if (addr)
addr = SET_DEST (addr);
if (!value || !addr)
return 0;
return !reg_overlap_mentioned_p (value, addr);
}
/* Return nonzero if the CONSUMER instruction (a store) does need
PRODUCER's value to calculate the address. */
int
arm_early_store_addr_dep (rtx producer, rtx consumer)
{
return !arm_no_early_store_addr_dep (producer, consumer);
}
/* Return nonzero if the CONSUMER instruction (a store) does need
a Pmode PRODUCER's value to calculate the address. */
int
arm_early_store_addr_dep_ptr (rtx producer, rtx consumer)
{
rtx value = arm_find_sub_rtx_with_code (PATTERN (producer), SET, false);
rtx addr = arm_find_sub_rtx_with_code (PATTERN (consumer), SET, false);
if (!value || !addr || !MEM_P (SET_SRC (value)))
return 0;
value = SET_DEST (value);
addr = SET_DEST (addr);
return GET_MODE (value) == Pmode && reg_overlap_mentioned_p (value, addr);
}
/* Return non-zero iff the consumer (a multiply-accumulate or a
multiple-subtract instruction) has an accumulator dependency on the
result of the producer and no other dependency on that result. It
does not check if the producer is multiply-accumulate instruction. */
int
arm_mac_accumulator_is_result (rtx producer, rtx consumer)
{
rtx result;
rtx op0, op1, acc;
producer = PATTERN (producer);
consumer = PATTERN (consumer);
if (GET_CODE (producer) == COND_EXEC)
producer = COND_EXEC_CODE (producer);
if (GET_CODE (consumer) == COND_EXEC)
consumer = COND_EXEC_CODE (consumer);
if (GET_CODE (producer) != SET)
return 0;
result = XEXP (producer, 0);
if (GET_CODE (consumer) != SET)
return 0;
/* Check that the consumer is of the form
(set (...) (plus (mult ...) (...)))
or
(set (...) (minus (...) (mult ...))). */
if (GET_CODE (XEXP (consumer, 1)) == PLUS)
{
if (GET_CODE (XEXP (XEXP (consumer, 1), 0)) != MULT)
return 0;
op0 = XEXP (XEXP (XEXP (consumer, 1), 0), 0);
op1 = XEXP (XEXP (XEXP (consumer, 1), 0), 1);
acc = XEXP (XEXP (consumer, 1), 1);
}
else if (GET_CODE (XEXP (consumer, 1)) == MINUS)
{
if (GET_CODE (XEXP (XEXP (consumer, 1), 1)) != MULT)
return 0;
op0 = XEXP (XEXP (XEXP (consumer, 1), 1), 0);
op1 = XEXP (XEXP (XEXP (consumer, 1), 1), 1);
acc = XEXP (XEXP (consumer, 1), 0);
}
else
return 0;
return (reg_overlap_mentioned_p (result, acc)
&& !reg_overlap_mentioned_p (result, op0)
&& !reg_overlap_mentioned_p (result, op1));
}
/* Return non-zero if the destination of PRODUCER feeds the accumulator
operand of an MLA-like operation. */
int
aarch_accumulator_forwarding (rtx_insn *producer, rtx_insn *consumer)
{
rtx producer_set = single_set (producer);
rtx consumer_set = single_set (consumer);
/* We are looking for a SET feeding a SET. */
if (!producer_set || !consumer_set)
return 0;
rtx dest = SET_DEST (producer_set);
rtx mla = SET_SRC (consumer_set);
/* We're looking for a register SET. */
if (!REG_P (dest))
return 0;
rtx accumulator;
/* Strip a zero_extend. */
if (GET_CODE (mla) == ZERO_EXTEND)
mla = XEXP (mla, 0);
switch (GET_CODE (mla))
{
case PLUS:
/* Possibly an MADD. */
if (GET_CODE (XEXP (mla, 0)) == MULT)
accumulator = XEXP (mla, 1);
else
return 0;
break;
case MINUS:
/* Possibly an MSUB. */
if (GET_CODE (XEXP (mla, 1)) == MULT)
accumulator = XEXP (mla, 0);
else
return 0;
break;
case FMA:
{
/* Possibly an FMADD/FMSUB/FNMADD/FNMSUB. */
if (REG_P (XEXP (mla, 1))
&& REG_P (XEXP (mla, 2))
&& (REG_P (XEXP (mla, 0))
|| GET_CODE (XEXP (mla, 0)) == NEG))
{
/* FMADD/FMSUB. */
accumulator = XEXP (mla, 2);
}
else if (REG_P (XEXP (mla, 1))
&& GET_CODE (XEXP (mla, 2)) == NEG
&& (REG_P (XEXP (mla, 0))
|| GET_CODE (XEXP (mla, 0)) == NEG))
{
/* FNMADD/FNMSUB. */
accumulator = XEXP (XEXP (mla, 2), 0);
}
else
return 0;
break;
}
default:
/* Not an MLA-like operation. */
return 0;
}
if (GET_CODE (accumulator) == SUBREG)
accumulator = SUBREG_REG (accumulator);
if (!REG_P (accumulator))
return 0;
return (REGNO (dest) == REGNO (accumulator));
}
/* Return non-zero if the consumer (a multiply-accumulate instruction)
has an accumulator dependency on the result of the producer (a
multiplication instruction) and no other dependency on that result. */
int
arm_mac_accumulator_is_mul_result (rtx producer, rtx consumer)
{
rtx mul = PATTERN (producer);
rtx mac = PATTERN (consumer);
rtx mul_result;
rtx mac_op0, mac_op1, mac_acc;
if (GET_CODE (mul) == COND_EXEC)
mul = COND_EXEC_CODE (mul);
if (GET_CODE (mac) == COND_EXEC)
mac = COND_EXEC_CODE (mac);
/* Check that mul is of the form (set (...) (mult ...))
and mla is of the form (set (...) (plus (mult ...) (...))). */
if ((GET_CODE (mul) != SET || GET_CODE (XEXP (mul, 1)) != MULT)
|| (GET_CODE (mac) != SET || GET_CODE (XEXP (mac, 1)) != PLUS
|| GET_CODE (XEXP (XEXP (mac, 1), 0)) != MULT))
return 0;
mul_result = XEXP (mul, 0);
mac_op0 = XEXP (XEXP (XEXP (mac, 1), 0), 0);
mac_op1 = XEXP (XEXP (XEXP (mac, 1), 0), 1);
mac_acc = XEXP (XEXP (mac, 1), 1);
return (reg_overlap_mentioned_p (mul_result, mac_acc)
&& !reg_overlap_mentioned_p (mul_result, mac_op0)
&& !reg_overlap_mentioned_p (mul_result, mac_op1));
}
/* Worker function for TARGET_MD_ASM_ADJUST.
We implement asm flag outputs. */
rtx_insn *
arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &/*inputs*/,
vec<const char *> &constraints,
vec<rtx> &/*clobbers*/, HARD_REG_SET &/*clobbered_regs*/)
{
bool saw_asm_flag = false;
start_sequence ();
for (unsigned i = 0, n = outputs.length (); i < n; ++i)
{
const char *con = constraints[i];
if (strncmp (con, "=@cc", 4) != 0)
continue;
con += 4;
if (strchr (con, ',') != NULL)
{
error ("alternatives not allowed in %<asm%> flag output");
continue;
}
machine_mode mode;
rtx_code code;
int con01 = 0;
#define C(X, Y) (unsigned char)(X) * 256 + (unsigned char)(Y)
/* All of the condition codes are two characters. */
if (con[0] != 0 && con[1] != 0 && con[2] == 0)
con01 = C(con[0], con[1]);
switch (con01)
{
case C('c', 'c'):
case C('l', 'o'):
mode = CC_Cmode, code = GEU;
break;
case C('c', 's'):
case C('h', 's'):
mode = CC_Cmode, code = LTU;
break;
case C('e', 'q'):
mode = CC_NZmode, code = EQ;
break;
case C('g', 'e'):
mode = CCmode, code = GE;
break;
case C('g', 't'):
mode = CCmode, code = GT;
break;
case C('h', 'i'):
mode = CCmode, code = GTU;
break;
case C('l', 'e'):
mode = CCmode, code = LE;
break;
case C('l', 's'):
mode = CCmode, code = LEU;
break;
case C('l', 't'):
mode = CCmode, code = LT;
break;
case C('m', 'i'):
mode = CC_NZmode, code = LT;
break;
case C('n', 'e'):
mode = CC_NZmode, code = NE;
break;
case C('p', 'l'):
mode = CC_NZmode, code = GE;
break;
case C('v', 'c'):
mode = CC_Vmode, code = EQ;
break;
case C('v', 's'):
mode = CC_Vmode, code = NE;
break;
default:
error ("unknown %<asm%> flag output %qs", constraints[i]);
continue;
}
#undef C
rtx dest = outputs[i];
machine_mode dest_mode = GET_MODE (dest);
if (!SCALAR_INT_MODE_P (dest_mode))
{
error ("invalid type for %<asm%> flag output");
continue;
}
if (!saw_asm_flag)
{
/* This is the first asm flag output. Here we put the flags
register in as the real output and adjust the condition to
allow it. */
constraints[i] = "=c";
outputs[i] = gen_rtx_REG (CCmode, CC_REGNUM);
saw_asm_flag = true;
}
else
{
/* We don't need the flags register as output twice. */
constraints[i] = "=X";
outputs[i] = gen_rtx_SCRATCH (word_mode);
}
rtx x = gen_rtx_REG (mode, CC_REGNUM);
x = gen_rtx_fmt_ee (code, word_mode, x, const0_rtx);
if (dest_mode == word_mode)
emit_insn (gen_rtx_SET (dest, x));
else
{
rtx tmp = gen_reg_rtx (word_mode);
emit_insn (gen_rtx_SET (tmp, x));
tmp = convert_modes (dest_mode, word_mode, tmp, true);
emit_move_insn (dest, tmp);
}
}
rtx_insn *seq = get_insns ();
end_sequence ();
return saw_asm_flag ? seq : NULL;
}