[APX NDD] Support APX NDD for left shift insns
For left shift, there is an optimization TARGET_DOUBLE_WITH_ADD that shl
1 can be optimized to add. As NDD form of add requires src operand to
be register since NDD cannot take 2 memory src, we currently just keep
using NDD form shift instead of add.
The optimization TARGET_SHIFT1 will try to remove constant 1 to use shorter
opcode, but under NDD assembler will automatically use it whether $1 exist
or not, so do not involve NDD with it.
The doubleword insns for left shift calls ix86_expand_ashl, which assume
all shift related pattern has same operand[0] and operand[1]. For these pattern
we will support them in a standalone patch.
gcc/ChangeLog:
* config/i386/i386.md (*ashl<mode>3_1): Extend with new
alternatives to support NDD, limit the new alternative to
generate sal only, and adjust output template for NDD.
(*ashlsi3_1_zext): Likewise.
(*ashlhi3_1): Likewise.
(*ashlqi3_1): Likewise.
(*ashl<mode>3_cmp): Likewise.
(*ashlsi3_cmp_zext): Likewise, and use nonimmediate_operand for
operands[1] to accept memory input for NDD alternative.
(*ashl<mode>3_cconly): Likewise.
(*ashl<dwi>3_doubleword_highpart): Adjust codegen for NDD.
gcc/testsuite/ChangeLog:
* gcc.target/i386/apx-ndd.c: Add tests for sal.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ad4c958..c67896c 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -14472,10 +14472,19 @@
{
split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[3]);
int bits = INTVAL (operands[2]) - (<MODE_SIZE> * BITS_PER_UNIT);
- if (!rtx_equal_p (operands[3], operands[1]))
- emit_move_insn (operands[3], operands[1]);
- if (bits > 0)
- emit_insn (gen_ashl<mode>3 (operands[3], operands[3], GEN_INT (bits)));
+ bool op_equal_p = rtx_equal_p (operands[3], operands[1]);
+ if (bits == 0)
+ {
+ if (!op_equal_p)
+ emit_move_insn (operands[3], operands[1]);
+ }
+ else
+ {
+ if (!op_equal_p && !TARGET_APX_NDD)
+ emit_move_insn (operands[3], operands[1]);
+ rtx op_tmp = TARGET_APX_NDD ? operands[1] : operands[3];
+ emit_insn (gen_ashl<mode>3 (operands[3], op_tmp, GEN_INT (bits)));
+ }
ix86_expand_clear (operands[0]);
DONE;
})
@@ -14782,12 +14791,14 @@
(set_attr "mode" "<MODE>")])
(define_insn "*ashl<mode>3_1"
- [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k")
- (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k")
- (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>")))
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k,r")
+ (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k,rm")
+ (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>,c<S>")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands,
+ TARGET_APX_NDD)"
{
+ bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
switch (get_attr_type (insn))
{
case TYPE_LEA:
@@ -14802,18 +14813,25 @@
default:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ /* For NDD form instructions related to TARGET_SHIFT1, the $1
+ immediate do not need to be omitted as assembler will map it
+ to use shorter encoding. */
+ && !use_ndd)
return "sal{<imodesuffix>}\t%0";
else
- return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- [(set_attr "isa" "*,*,bmi2,<kmov_isa>")
+ [(set_attr "isa" "*,*,bmi2,<kmov_isa>,apx_ndd")
(set (attr "type")
(cond [(eq_attr "alternative" "1")
(const_string "lea")
(eq_attr "alternative" "2")
(const_string "ishiftx")
+ (eq_attr "alternative" "4")
+ (const_string "ishift")
(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
(match_operand 0 "register_operand"))
(match_operand 2 "const1_operand"))
@@ -14855,13 +14873,15 @@
(set_attr "mode" "SI")])
(define_insn "*ashlsi3_1_zext"
- [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+ [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
(zero_extend:DI
- (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm")
- (match_operand:QI 2 "nonmemory_operand" "cI,M,r"))))
+ (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm,rm")
+ (match_operand:QI 2 "nonmemory_operand" "cI,M,r,cI"))))
(clobber (reg:CC FLAGS_REG))]
- "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+ "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands,
+ TARGET_APX_NDD)"
{
+ bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
switch (get_attr_type (insn))
{
case TYPE_LEA:
@@ -14874,18 +14894,22 @@
default:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !use_ndd)
return "sal{l}\t%k0";
else
- return "sal{l}\t{%2, %k0|%k0, %2}";
+ return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}"
+ : "sal{l}\t{%2, %k0|%k0, %2}";
}
}
- [(set_attr "isa" "*,*,bmi2")
+ [(set_attr "isa" "*,*,bmi2,apx_ndd")
(set (attr "type")
(cond [(eq_attr "alternative" "1")
(const_string "lea")
(eq_attr "alternative" "2")
(const_string "ishiftx")
+ (eq_attr "alternative" "3")
+ (const_string "ishift")
(and (match_test "TARGET_DOUBLE_WITH_ADD")
(match_operand 2 "const1_operand"))
(const_string "alu")
@@ -14915,12 +14939,14 @@
"operands[2] = gen_lowpart (SImode, operands[2]);")
(define_insn "*ashlhi3_1"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k")
- (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k")
- (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww")))
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k,r")
+ (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k,rm")
+ (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww,cI")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (ASHIFT, HImode, operands)"
+ "ix86_binary_operator_ok (ASHIFT, HImode, operands,
+ TARGET_APX_NDD)"
{
+ bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
switch (get_attr_type (insn))
{
case TYPE_LEA:
@@ -14933,18 +14959,22 @@
default:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !use_ndd)
return "sal{w}\t%0";
else
- return "sal{w}\t{%2, %0|%0, %2}";
+ return use_ndd ? "sal{w}\t{%2, %1, %0|%0, %1, %2}"
+ : "sal{w}\t{%2, %0|%0, %2}";
}
}
- [(set_attr "isa" "*,*,avx512f")
+ [(set_attr "isa" "*,*,avx512f,apx_ndd")
(set (attr "type")
(cond [(eq_attr "alternative" "1")
(const_string "lea")
(eq_attr "alternative" "2")
(const_string "msklog")
+ (eq_attr "alternative" "3")
+ (const_string "ishift")
(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
(match_operand 0 "register_operand"))
(match_operand 2 "const1_operand"))
@@ -14960,15 +14990,17 @@
(match_test "optimize_function_for_size_p (cfun)")))))
(const_string "0")
(const_string "*")))
- (set_attr "mode" "HI,SI,HI")])
+ (set_attr "mode" "HI,SI,HI,HI")])
(define_insn "*ashlqi3_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k")
- (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k")
- (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb")))
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k,r")
+ (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k,rm")
+ (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb,cI")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (ASHIFT, QImode, operands)"
+ "ix86_binary_operator_ok (ASHIFT, QImode, operands,
+ TARGET_APX_NDD)"
{
+ bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
switch (get_attr_type (insn))
{
case TYPE_LEA:
@@ -14984,7 +15016,8 @@
default:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !use_ndd)
{
if (get_attr_mode (insn) == MODE_SI)
return "sal{l}\t%k0";
@@ -14996,16 +15029,19 @@
if (get_attr_mode (insn) == MODE_SI)
return "sal{l}\t{%2, %k0|%k0, %2}";
else
- return "sal{b}\t{%2, %0|%0, %2}";
+ return use_ndd ? "sal{b}\t{%2, %1, %0|%0, %1, %2}"
+ : "sal{b}\t{%2, %0|%0, %2}";
}
}
}
- [(set_attr "isa" "*,*,*,avx512dq")
+ [(set_attr "isa" "*,*,*,avx512dq,apx_ndd")
(set (attr "type")
(cond [(eq_attr "alternative" "2")
(const_string "lea")
(eq_attr "alternative" "3")
(const_string "msklog")
+ (eq_attr "alternative" "4")
+ (const_string "ishift")
(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
(match_operand 0 "register_operand"))
(match_operand 2 "const1_operand"))
@@ -15021,10 +15057,10 @@
(match_test "optimize_function_for_size_p (cfun)")))))
(const_string "0")
(const_string "*")))
- (set_attr "mode" "QI,SI,SI,QI")
+ (set_attr "mode" "QI,SI,SI,QI,QI")
;; Potential partial reg stall on alternative 1.
(set (attr "preferred_for_speed")
- (cond [(eq_attr "alternative" "1")
+ (cond [(eq_attr "alternative" "1,4")
(symbol_ref "!TARGET_PARTIAL_REG_STALL")]
(symbol_ref "true")))])
@@ -15119,10 +15155,10 @@
(define_insn "*ashl<mode>3_cmp"
[(set (reg FLAGS_REG)
(compare
- (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
- (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+ (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
+ (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
(const_int 0)))
- (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
+ (set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,r")
(ashift:SWI (match_dup 1) (match_dup 2)))]
"(optimize_function_for_size_p (cfun)
|| !TARGET_PARTIAL_FLAG_REG_STALL
@@ -15130,8 +15166,10 @@
&& (TARGET_SHIFT1
|| (TARGET_DOUBLE_WITH_ADD && REG_P (operands[0])))))
&& ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
+ && ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands,
+ TARGET_APX_NDD)"
{
+ bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
switch (get_attr_type (insn))
{
case TYPE_ALU:
@@ -15140,14 +15178,19 @@
default:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !use_ndd)
return "sal{<imodesuffix>}\t%0";
else
- return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- [(set (attr "type")
- (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+ [(set_attr "isa" "*,apx_ndd")
+ (set (attr "type")
+ (cond [(eq_attr "alternative" "1")
+ (const_string "ishift")
+ (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
(match_operand 0 "register_operand"))
(match_operand 2 "const1_operand"))
(const_string "alu")
@@ -15167,10 +15210,10 @@
(define_insn "*ashlsi3_cmp_zext"
[(set (reg FLAGS_REG)
(compare
- (ashift:SI (match_operand:SI 1 "register_operand" "0")
+ (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
(match_operand:QI 2 "const_1_to_31_operand"))
(const_int 0)))
- (set (match_operand:DI 0 "register_operand" "=r")
+ (set (match_operand:DI 0 "register_operand" "=r,r")
(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
"TARGET_64BIT
&& (optimize_function_for_size_p (cfun)
@@ -15179,8 +15222,10 @@
&& (TARGET_SHIFT1
|| TARGET_DOUBLE_WITH_ADD)))
&& ix86_match_ccmode (insn, CCGOCmode)
- && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
+ && ix86_binary_operator_ok (ASHIFT, SImode, operands,
+ TARGET_APX_NDD)"
{
+ bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
switch (get_attr_type (insn))
{
case TYPE_ALU:
@@ -15189,14 +15234,19 @@
default:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !use_ndd)
return "sal{l}\t%k0";
else
- return "sal{l}\t{%2, %k0|%k0, %2}";
+ return use_ndd ? "sal{l}\t{%2, %1, %k0|%k0, %1, %2}"
+ : "sal{l}\t{%2, %k0|%k0, %2}";
}
}
- [(set (attr "type")
- (cond [(and (match_test "TARGET_DOUBLE_WITH_ADD")
+ [(set_attr "isa" "*,apx_ndd")
+ (set (attr "type")
+ (cond [(eq_attr "alternative" "1")
+ (const_string "ishift")
+ (and (match_test "TARGET_DOUBLE_WITH_ADD")
(match_operand 2 "const1_operand"))
(const_string "alu")
]
@@ -15215,10 +15265,10 @@
(define_insn "*ashl<mode>3_cconly"
[(set (reg FLAGS_REG)
(compare
- (ashift:SWI (match_operand:SWI 1 "register_operand" "0")
- (match_operand:QI 2 "<shift_immediate_operand>" "<S>"))
+ (ashift:SWI (match_operand:SWI 1 "nonimmediate_operand" "0,rm")
+ (match_operand:QI 2 "<shift_immediate_operand>" "<S>,<S>"))
(const_int 0)))
- (clobber (match_scratch:SWI 0 "=<r>"))]
+ (clobber (match_scratch:SWI 0 "=<r>,r"))]
"(optimize_function_for_size_p (cfun)
|| !TARGET_PARTIAL_FLAG_REG_STALL
|| (operands[2] == const1_rtx
@@ -15226,22 +15276,28 @@
|| TARGET_DOUBLE_WITH_ADD)))
&& ix86_match_ccmode (insn, CCGOCmode)"
{
+ bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
switch (get_attr_type (insn))
{
case TYPE_ALU:
gcc_assert (operands[2] == const1_rtx);
return "add{<imodesuffix>}\t%0, %0";
- default:
+ default:
if (operands[2] == const1_rtx
- && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun))
+ && !use_ndd)
return "sal{<imodesuffix>}\t%0";
else
- return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return use_ndd ? "sal{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+ : "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- [(set (attr "type")
- (cond [(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
+ [(set_attr "isa" "*,apx_ndd")
+ (set (attr "type")
+ (cond [(eq_attr "alternative" "1")
+ (const_string "ishift")
+ (and (and (match_test "TARGET_DOUBLE_WITH_ADD")
(match_operand 0 "register_operand"))
(match_operand 2 "const1_operand"))
(const_string "alu")
diff --git a/gcc/testsuite/gcc.target/i386/apx-ndd.c b/gcc/testsuite/gcc.target/i386/apx-ndd.c
index d97648c..9951fb0 100644
--- a/gcc/testsuite/gcc.target/i386/apx-ndd.c
+++ b/gcc/testsuite/gcc.target/i386/apx-ndd.c
@@ -29,6 +29,16 @@
return c; \
}
+#define FOO3(TYPE, OP_NAME, OP, IMM) \
+TYPE \
+__attribute__ ((noipa)) \
+foo3_##OP_NAME##_##TYPE (TYPE a) \
+{ \
+ TYPE b = a OP IMM; \
+ return b; \
+}
+
+
#define F(TYPE, OP_NAME, OP) \
TYPE \
__attribute__ ((noipa)) \
@@ -112,6 +122,16 @@
FOO1 (int, xor, ^)
FOO (long, xor, ^)
FOO1 (long, xor, ^)
+
+FOO (char, shl, <<)
+FOO3 (char, shl, <<, 7)
+FOO (short, shl, <<)
+FOO3 (short, shl, <<, 7)
+FOO (int, shl, <<)
+FOO3 (int, shl, <<, 7)
+FOO (long, shl, <<)
+FOO3 (long, shl, <<, 7)
+
/* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */
/* { dg-final { scan-assembler-times "lea(?:l|q)\[^\n\r]\\(%r(?:d|s)i,%r(?:d|s)i\\), %(?:|r|e)ax" 4 } } */
/* { dg-final { scan-assembler-times "add(?:b|l|w|q)\[^\n\r]%(?:|r|e)si(?:|l), \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */
@@ -134,3 +154,5 @@
/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)ax" 3 } } */
/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)di, %(?:|r|e)si, %(?:|r|e)ax" 2 } } */
/* { dg-final { scan-assembler-times "xor(?:l|w|q)\[^\n\r]%(?:|r|e)si, %(?:|r|e)di, %(?:|r|e)ax" 2 } } */
+/* { dg-final { scan-assembler-times "sal(?:b|l|w|q)\[^\n\r]*1, \\(%rdi\\), %(?:|r|e)a(?:x|l)" 4 } } */
+/* { dg-final { scan-assembler-times "sal(?:l|w|q)\[^\n\r]*7, %(?:|r|e)di, %(?:|r|e)ax" 4 } } */