Support logic shift left/right for avx512 mask type.
gcc/ChangeLog:
* config/i386/constraints.md (Wb): New constraint.
(Ww): Ditto.
* config/i386/i386.md (*ashlhi3_1): Extend to avx512 mask
shift.
(*ashlqi3_1): Ditto.
(*<insn><mode>3_1): Split to ..
(*ashr<mode>3_1): this, ...
(*lshr<mode>3_1): and this, also extend this pattern to avx512
mask registers.
(*<insn><mode>3_1): Split to ..
(*ashr<mode>3_1): this, ...
(*lshrqi3_1): and this, also extend this pattern to avx512
mask registers.
(*lshrhi3_1): And this, also extend this pattern to avx512
mask registers.
* config/i386/sse.md (k<code><mode>): New define_split after
it to convert generic shift pattern to mask shift ones.
gcc/testsuite/ChangeLog:
* gcc.target/i386/mask-shift.c: New test.
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 485e3f5..4aa28a5 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -222,6 +222,16 @@
(match_operand 0 "vector_all_ones_operand"))))
;; Integer constant constraints.
+(define_constraint "Wb"
+ "Integer constant in the range 0 @dots{} 7, for 8-bit shifts."
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (ival, 0, 7)")))
+
+(define_constraint "Ww"
+ "Integer constant in the range 0 @dots{} 15, for 16-bit shifts."
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (ival, 0, 15)")))
+
(define_constraint "I"
"Integer constant in the range 0 @dots{} 31, for 32-bit shifts."
(and (match_code "const_int")
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8b809c4..44ae18e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1136,6 +1136,7 @@
;; Immediate operand constraint for shifts.
(define_mode_attr S [(QI "I") (HI "I") (SI "I") (DI "J") (TI "O")])
+(define_mode_attr KS [(QI "Wb") (HI "Ww") (SI "I") (DI "J")])
;; Print register name in the specified mode.
(define_mode_attr k [(QI "b") (HI "w") (SI "k") (DI "q")])
@@ -11088,9 +11089,9 @@
(set_attr "mode" "<MODE>")])
(define_insn "*ashl<mode>3_1"
- [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
- (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm")
- (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r")))
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,?k")
+ (ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm,k")
+ (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r,<KS>")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
{
@@ -11098,6 +11099,7 @@
{
case TYPE_LEA:
case TYPE_ISHIFTX:
+ case TYPE_MSKLOG:
return "#";
case TYPE_ALU:
@@ -11113,7 +11115,7 @@
return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
- [(set_attr "isa" "*,*,bmi2")
+ [(set_attr "isa" "*,*,bmi2,avx512bw")
(set (attr "type")
(cond [(eq_attr "alternative" "1")
(const_string "lea")
@@ -11123,6 +11125,8 @@
(match_operand 0 "register_operand"))
(match_operand 2 "const1_operand"))
(const_string "alu")
+ (eq_attr "alternative" "3")
+ (const_string "msklog")
]
(const_string "ishift")))
(set (attr "length_immediate")
@@ -11218,15 +11222,16 @@
"operands[2] = gen_lowpart (SImode, operands[2]);")
(define_insn "*ashlhi3_1"
- [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp")
- (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l")
- (match_operand:QI 2 "nonmemory_operand" "cI,M")))
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm,Yp,?k")
+ (ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0,l,k")
+ (match_operand:QI 2 "nonmemory_operand" "cI,M,Ww")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (ASHIFT, HImode, operands)"
{
switch (get_attr_type (insn))
{
case TYPE_LEA:
+ case TYPE_MSKLOG:
return "#";
case TYPE_ALU:
@@ -11241,9 +11246,12 @@
return "sal{w}\t{%2, %0|%0, %2}";
}
}
- [(set (attr "type")
+ [(set_attr "isa" "*,*,avx512f")
+ (set (attr "type")
(cond [(eq_attr "alternative" "1")
(const_string "lea")
+ (eq_attr "alternative" "2")
+ (const_string "msklog")
(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
(match_operand 0 "register_operand"))
(match_operand 2 "const1_operand"))
@@ -11259,18 +11267,19 @@
(match_test "optimize_function_for_size_p (cfun)")))))
(const_string "0")
(const_string "*")))
- (set_attr "mode" "HI,SI")])
+ (set_attr "mode" "HI,SI,HI")])
(define_insn "*ashlqi3_1"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp")
- (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l")
- (match_operand:QI 2 "nonmemory_operand" "cI,cI,M")))
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,r,Yp,?k")
+ (ashift:QI (match_operand:QI 1 "nonimmediate_operand" "0,0,l,k")
+ (match_operand:QI 2 "nonmemory_operand" "cI,cI,M,Wb")))
(clobber (reg:CC FLAGS_REG))]
"ix86_binary_operator_ok (ASHIFT, QImode, operands)"
{
switch (get_attr_type (insn))
{
case TYPE_LEA:
+ case TYPE_MSKLOG:
return "#";
case TYPE_ALU:
@@ -11298,9 +11307,12 @@
}
}
}
- [(set (attr "type")
+ [(set_attr "isa" "*,*,*,avx512dq")
+ (set (attr "type")
(cond [(eq_attr "alternative" "2")
(const_string "lea")
+ (eq_attr "alternative" "3")
+ (const_string "msklog")
(and (and (match_test "TARGET_DOUBLE_WITH_ADD")
(match_operand 0 "register_operand"))
(match_operand 2 "const1_operand"))
@@ -11316,7 +11328,7 @@
(match_test "optimize_function_for_size_p (cfun)")))))
(const_string "0")
(const_string "*")))
- (set_attr "mode" "QI,SI,SI")
+ (set_attr "mode" "QI,SI,SI,QI")
;; Potential partial reg stall on alternative 1.
(set (attr "preferred_for_speed")
(cond [(eq_attr "alternative" "1")
@@ -11818,13 +11830,13 @@
[(set_attr "type" "ishiftx")
(set_attr "mode" "<MODE>")])
-(define_insn "*<insn><mode>3_1"
+(define_insn "*ashr<mode>3_1"
[(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
- (any_shiftrt:SWI48
+ (ashiftrt:SWI48
(match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
(match_operand:QI 2 "nonmemory_operand" "c<S>,r")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
{
switch (get_attr_type (insn))
{
@@ -11834,9 +11846,9 @@
default:
if (operands[2] == const1_rtx
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "<shift>{<imodesuffix>}\t%0";
+ return "sar{<imodesuffix>}\t%0";
else
- return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
}
}
[(set_attr "isa" "*,bmi2")
@@ -11850,6 +11862,40 @@
(const_string "*")))
(set_attr "mode" "<MODE>")])
+(define_insn "*lshr<mode>3_1"
+ [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,?k")
+ (lshiftrt:SWI48
+ (match_operand:SWI48 1 "nonimmediate_operand" "0,rm,k")
+ (match_operand:QI 2 "nonmemory_operand" "c<S>,r,<KS>")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (LSHIFTRT, <MODE>mode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ISHIFTX:
+ case TYPE_MSKLOG:
+ return "#";
+
+ default:
+ if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "shr{<imodesuffix>}\t%0";
+ else
+ return "shr{<imodesuffix>}\t{%2, %0|%0, %2}";
+ }
+}
+ [(set_attr "isa" "*,bmi2,avx512bw")
+ (set_attr "type" "ishift,ishiftx,msklog")
+ (set (attr "length_immediate")
+ (if_then_else
+ (and (and (match_operand 2 "const1_operand")
+ (eq_attr "alternative" "0"))
+ (ior (match_test "TARGET_SHIFT1")
+ (match_test "optimize_function_for_size_p (cfun)")))
+ (const_string "0")
+ (const_string "*")))
+ (set_attr "mode" "<MODE>")])
+
;; Convert shift to the shiftx pattern to avoid flags dependency.
(define_split
[(set (match_operand:SWI48 0 "register_operand")
@@ -11915,19 +11961,19 @@
(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
"operands[2] = gen_lowpart (SImode, operands[2]);")
-(define_insn "*<insn><mode>3_1"
+(define_insn "*ashr<mode>3_1"
[(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
- (any_shiftrt:SWI12
+ (ashiftrt:SWI12
(match_operand:SWI12 1 "nonimmediate_operand" "0")
(match_operand:QI 2 "nonmemory_operand" "c<S>")))
(clobber (reg:CC FLAGS_REG))]
- "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+ "ix86_binary_operator_ok (ASHIFTRT, <MODE>mode, operands)"
{
if (operands[2] == const1_rtx
&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
- return "<shift>{<imodesuffix>}\t%0";
+ return "sar{<imodesuffix>}\t%0";
else
- return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
+ return "sar{<imodesuffix>}\t{%2, %0|%0, %2}";
}
[(set_attr "type" "ishift")
(set (attr "length_immediate")
@@ -11939,6 +11985,74 @@
(const_string "*")))
(set_attr "mode" "<MODE>")])
+(define_insn "*lshrqi3_1"
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=qm,?k")
+ (lshiftrt:QI
+ (match_operand:QI 1 "nonimmediate_operand" "0, k")
+ (match_operand:QI 2 "nonmemory_operand" "cI,Wb")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (LSHIFTRT, QImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ISHIFT:
+ if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "shr{b}\t%0";
+ else
+ return "shr{b}\t{%2, %0|%0, %2}";
+ case TYPE_MSKLOG:
+ return "#";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "isa" "*,avx512dq")
+ (set_attr "type" "ishift,msklog")
+ (set (attr "length_immediate")
+ (if_then_else
+ (and (and (match_operand 2 "const1_operand")
+ (eq_attr "alternative" "0"))
+ (ior (match_test "TARGET_SHIFT1")
+ (match_test "optimize_function_for_size_p (cfun)")))
+ (const_string "0")
+ (const_string "*")))
+ (set_attr "mode" "QI")])
+
+(define_insn "*lshrhi3_1"
+ [(set (match_operand:HI 0 "nonimmediate_operand" "=rm, ?k")
+ (lshiftrt:HI
+ (match_operand:HI 1 "nonimmediate_operand" "0, k")
+ (match_operand:QI 2 "nonmemory_operand" "cI, Ww")))
+ (clobber (reg:CC FLAGS_REG))]
+ "ix86_binary_operator_ok (LSHIFTRT, HImode, operands)"
+{
+ switch (get_attr_type (insn))
+ {
+ case TYPE_ISHIFT:
+ if (operands[2] == const1_rtx
+ && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+ return "shr{w}\t%0";
+ else
+ return "shr{w}\t{%2, %0|%0, %2}";
+ case TYPE_MSKLOG:
+ return "#";
+ default:
+ gcc_unreachable ();
+ }
+}
+ [(set_attr "isa" "*, avx512f")
+ (set_attr "type" "ishift,msklog")
+ (set (attr "length_immediate")
+ (if_then_else
+ (and (and (match_operand 2 "const1_operand")
+ (eq_attr "alternative" "0"))
+ (ior (match_test "TARGET_SHIFT1")
+ (match_test "optimize_function_for_size_p (cfun)")))
+ (const_string "0")
+ (const_string "*")))
+ (set_attr "mode" "HI")])
+
(define_insn "*<insn><mode>3_1_slp"
[(set (strict_low_part (match_operand:SWI12 0 "register_operand" "+<r>"))
(any_shiftrt:SWI12 (match_operand:SWI12 1 "register_operand" "0")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ab29999..f8759e4 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1755,6 +1755,20 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<MODE>")])
+(define_split
+ [(set (match_operand:SWI1248_AVX512BW 0 "mask_reg_operand")
+ (any_lshift:SWI1248_AVX512BW
+ (match_operand:SWI1248_AVX512BW 1 "mask_reg_operand")
+ (match_operand 2 "const_int_operand")))
+ (clobber (reg:CC FLAGS_REG))]
+ "TARGET_AVX512F && reload_completed"
+ [(parallel
+ [(set (match_dup 0)
+ (any_lshift:SWI1248_AVX512BW
+ (match_dup 1)
+ (match_dup 2)))
+ (unspec [(const_int 0)] UNSPEC_MASKOP)])])
+
(define_insn "ktest<mode>"
[(set (reg:CC FLAGS_REG)
(unspec:CC
diff --git a/gcc/testsuite/gcc.target/i386/mask-shift.c b/gcc/testsuite/gcc.target/i386/mask-shift.c
new file mode 100644
index 0000000..4cb6ef3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mask-shift.c
@@ -0,0 +1,83 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512dq -O2" } */
+
+#include<immintrin.h>
+void
+fooq (__m512i a, __m512i b, void* p)
+{
+ __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b);
+ m1 >>= 4;
+ _mm512_mask_storeu_epi64 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftrb} "1" } } */
+
+void
+food (__m512i a, __m512i b, void* p)
+{
+ __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b);
+ m1 >>= 8;
+ _mm512_mask_storeu_epi32 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftrw} "1" } } */
+
+void
+foow (__m512i a, __m512i b, void* p)
+{
+ __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b);
+ m1 >>= 16;
+ _mm512_mask_storeu_epi16 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftrd} "1" } } */
+
+void
+foob (__m512i a, __m512i b, void* p)
+{
+ __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b);
+ m1 >>= 32;
+ _mm512_mask_storeu_epi8 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftrq} "1" { target { ! ia32 } } } } */
+
+void
+fooq1 (__m512i a, __m512i b, void* p)
+{
+ __mmask8 m1 = _mm512_cmpeq_epi64_mask (a, b);
+ m1 <<= 4;
+ _mm512_mask_storeu_epi64 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftlb} "1" } } */
+
+void
+food1 (__m512i a, __m512i b, void* p)
+{
+ __mmask16 m1 = _mm512_cmpeq_epi32_mask (a, b);
+ m1 <<= 8;
+ _mm512_mask_storeu_epi32 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftlw} "1" } } */
+
+void
+foow1 (__m512i a, __m512i b, void* p)
+{
+ __mmask32 m1 = _mm512_cmpeq_epi16_mask (a, b);
+ m1 <<= 16;
+ _mm512_mask_storeu_epi16 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftld} "1" } } */
+
+void
+foob1 (__m512i a, __m512i b, void* p)
+{
+ __mmask64 m1 = _mm512_cmpeq_epi8_mask (a, b);
+ m1 <<= 32;
+ _mm512_mask_storeu_epi8 (p, m1, a);
+}
+
+/* { dg-final { scan-assembler-times {(?n)kshiftlq} "1" { target { ! ia32 } } } } */