Refine splitters related to "combine vpcmpuw + zero_extend to vpcmpuw"
r12-6103-g1a7ce8570997eb combines vpcmpuw + zero_extend to vpcmpuw
with the pre_reload splitter, but the splitter transforms the
zero_extend into a subreg which make reload think the upper part is
garbage, it's not correct.
The patch adjusts the zero_extend define_insn_and_split to
define_insn to keep zero_extend.
gcc/ChangeLog:
PR target/117159
* config/i386/sse.md
(*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
Change from define_insn_and_split to define_insn.
(*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
Ditto.
(*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
Ditto.
(*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>):
Ditto.
(*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2):
Split to the zero_extend pattern.
(*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2):
Ditto.
(*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2):
Ditto.
(*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>_2):
Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/pr117159.c: New test.
* gcc.target/i386/avx512bw-pr103750-1.c: Remove xfail.
* gcc.target/i386/avx512bw-pr103750-2.c: Remove xfail.
(cherry picked from commit 5259d3927c1c8e3a15b4b844adef59b48c241233)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index c91a7e0..e186b6a 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4006,32 +4006,19 @@
;; Since vpcmpd implicitly clear the upper bits of dest, transform
;; vpcmpd + zero_extend to vpcmpd since the instruction
-(define_insn_and_split "*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
- [(set (match_operand:SWI248x 0 "register_operand")
+(define_insn "*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+ [(set (match_operand:SWI248x 0 "register_operand" "=k")
(zero_extend:SWI248x
(unspec:<V48H_AVX512VL:avx512fmaskmode>
- [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand")
- (match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
- (match_operand:SI 3 "const_0_to_7_operand")]
+ [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand" "v")
+ (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_7_operand" "n")]
UNSPEC_PCMP)))]
"TARGET_AVX512F
&& (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
- && ix86_pre_reload_split ()
&& (GET_MODE_NUNITS (<V48H_AVX512VL:MODE>mode)
< GET_MODE_PRECISION (<SWI248x:MODE>mode))"
- "#"
- "&& 1"
- [(set (match_dup 0)
- (unspec:<V48H_AVX512VL:avx512fmaskmode>
- [(match_dup 1)
- (match_dup 2)
- (match_dup 3)]
- UNSPEC_PCMP))]
-{
- operands[1] = force_reg (<V48H_AVX512VL:MODE>mode, operands[1]);
- operands[0] = lowpart_subreg (<V48H_AVX512VL:avx512fmaskmode>mode,
- operands[0], <SWI248x:MODE>mode);
-}
+ "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssecmp")
(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
@@ -4059,21 +4046,22 @@
"#"
"&& 1"
[(set (match_dup 0)
- (unspec:<V48H_AVX512VL:avx512fmaskmode>
- [(match_dup 1)
- (match_dup 2)
- (match_dup 3)]
- UNSPEC_PCMP))
- (set (match_dup 4) (match_dup 0))]
+ (zero_extend:SWI248x
+ (unspec:<V48H_AVX512VL:avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_PCMP)))
+ (set (match_dup 4) (match_dup 5))]
{
- operands[1] = force_reg (<V48H_AVX512VL:MODE>mode, operands[1]);
- operands[0] = lowpart_subreg (<V48H_AVX512VL:avx512fmaskmode>mode,
+ operands[5] = lowpart_subreg (<V48H_AVX512VL:avx512fmaskmode>mode,
operands[0], <SWI248x:MODE>mode);
-}
- [(set_attr "type" "ssecmp")
- (set_attr "length_immediate" "1")
- (set_attr "prefix" "evex")
- (set_attr "mode" "<V48H_AVX512VL:sseinsnmode>")])
+ if (SUBREG_P (operands[5]))
+ {
+ SUBREG_PROMOTED_VAR_P (operands[5]) = 1;
+ SUBREG_PROMOTED_SET (operands[5], 1);
+ }
+})
(define_insn_and_split "*<avx512>_cmp<mode>3"
[(set (match_operand:<avx512fmaskmode> 0 "register_operand")
@@ -4108,31 +4096,18 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn_and_split "*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
- [(set (match_operand:SWI248x 0 "register_operand")
+(define_insn "*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+ [(set (match_operand:SWI248x 0 "register_operand" "=k")
(zero_extend:SWI248x
(unspec:<VI12_AVX512VL:avx512fmaskmode>
- [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
- (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
- (match_operand:SI 3 "const_0_to_7_operand")]
+ [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v")
+ (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_7_operand" "n")]
UNSPEC_PCMP)))]
"TARGET_AVX512BW
- && ix86_pre_reload_split ()
- && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
- < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
- "#"
- "&& 1"
- [(set (match_dup 0)
- (unspec:<VI12_AVX512VL:avx512fmaskmode>
- [(match_dup 1)
- (match_dup 2)
- (match_dup 3)]
- UNSPEC_PCMP))]
-{
- operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
- operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
- operands[0], <SWI248x:MODE>mode);
-}
+ && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
+ < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
+ "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssecmp")
(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
@@ -4159,16 +4134,21 @@
"#"
"&& 1"
[(set (match_dup 0)
- (unspec:<VI12_AVX512VL:avx512fmaskmode>
- [(match_dup 1)
- (match_dup 2)
- (match_dup 3)]
- UNSPEC_PCMP))
- (set (match_dup 4) (match_dup 0))]
+ (zero_extend:SWI248x
+ (unspec:<VI12_AVX512VL:avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_PCMP)))
+ (set (match_dup 4) (match_dup 5))]
{
- operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
- operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
+ operands[5] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
operands[0], <SWI248x:MODE>mode);
+ if (SUBREG_P (operands[5]))
+ {
+ SUBREG_PROMOTED_VAR_P (operands[5]) = 1;
+ SUBREG_PROMOTED_SET (operands[5], 1);
+ }
}
[(set_attr "type" "ssecmp")
(set_attr "length_immediate" "1")
@@ -4226,31 +4206,18 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn_and_split "*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
- [(set (match_operand:SWI248x 0 "register_operand")
+(define_insn "*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+ [(set (match_operand:SWI248x 0 "register_operand" "=k")
(zero_extend:SWI248x
(unspec:<VI12_AVX512VL:avx512fmaskmode>
- [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
- (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
- (match_operand:SI 3 "const_0_to_7_operand")]
+ [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v")
+ (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_7_operand" "n")]
UNSPEC_UNSIGNED_PCMP)))]
"TARGET_AVX512BW
- && ix86_pre_reload_split ()
&& (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
< GET_MODE_PRECISION (<SWI248x:MODE>mode))"
- "#"
- "&& 1"
- [(set (match_dup 0)
- (unspec:<VI12_AVX512VL:avx512fmaskmode>
- [(match_dup 1)
- (match_dup 2)
- (match_dup 3)]
- UNSPEC_UNSIGNED_PCMP))]
-{
- operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
- operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
- operands[0], <SWI248x:MODE>mode);
-}
+ "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssecmp")
(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
@@ -4278,16 +4245,21 @@
"#"
"&& 1"
[(set (match_dup 0)
- (unspec:<VI12_AVX512VL:avx512fmaskmode>
- [(match_dup 1)
- (match_dup 2)
- (match_dup 3)]
- UNSPEC_UNSIGNED_PCMP))
- (set (match_dup 4) (match_dup 0))]
+ (zero_extend:SWI248x
+ (unspec:<VI12_AVX512VL:avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_UNSIGNED_PCMP)))
+ (set (match_dup 4) (match_dup 5))]
{
- operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
- operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
+ operands[5] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
operands[0], <SWI248x:MODE>mode);
+ if (SUBREG_P (operands[5]))
+ {
+ SUBREG_PROMOTED_VAR_P (operands[5]) = 1;
+ SUBREG_PROMOTED_SET (operands[5], 1);
+ }
}
[(set_attr "type" "ssecmp")
(set_attr "length_immediate" "1")
@@ -4323,32 +4295,19 @@
(set_attr "prefix" "evex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn_and_split "*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
- [(set (match_operand:SWI248x 0 "register_operand")
+(define_insn "*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+ [(set (match_operand:SWI248x 0 "register_operand" "=k")
(zero_extend:SWI248x
(unspec:<VI48_AVX512VL:avx512fmaskmode>
- [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
- (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
- (match_operand:SI 3 "const_0_to_7_operand")]
+ [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "v")
+ (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
+ (match_operand:SI 3 "const_0_to_7_operand" "n")]
UNSPEC_UNSIGNED_PCMP)))]
"TARGET_AVX512F
&& (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
- && ix86_pre_reload_split ()
&& (GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode)
< GET_MODE_PRECISION (<SWI248x:MODE>mode))"
- "#"
- "&& 1"
- [(set (match_dup 0)
- (unspec:<VI48_AVX512VL:avx512fmaskmode>
- [(match_dup 1)
- (match_dup 2)
- (match_dup 3)]
- UNSPEC_UNSIGNED_PCMP))]
-{
- operands[1] = force_reg (<VI48_AVX512VL:MODE>mode, operands[1]);
- operands[0] = lowpart_subreg (<VI48_AVX512VL:avx512fmaskmode>mode,
- operands[0], <SWI248x:MODE>mode);
-}
+ "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
[(set_attr "type" "ssecmp")
(set_attr "length_immediate" "1")
(set_attr "prefix" "evex")
@@ -4376,16 +4335,21 @@
"#"
"&& 1"
[(set (match_dup 0)
- (unspec:<VI48_AVX512VL:avx512fmaskmode>
- [(match_dup 1)
- (match_dup 2)
- (match_dup 3)]
- UNSPEC_UNSIGNED_PCMP))
- (set (match_dup 4) (match_dup 0))]
+ (zero_extend:SWI248x
+ (unspec:<VI48_AVX512VL:avx512fmaskmode>
+ [(match_dup 1)
+ (match_dup 2)
+ (match_dup 3)]
+ UNSPEC_UNSIGNED_PCMP)))
+ (set (match_dup 4) (match_dup 5))]
{
- operands[1] = force_reg (<VI48_AVX512VL:MODE>mode, operands[1]);
- operands[0] = lowpart_subreg (<VI48_AVX512VL:avx512fmaskmode>mode,
+ operands[5] = lowpart_subreg (<VI48_AVX512VL:avx512fmaskmode>mode,
operands[0], <SWI248x:MODE>mode);
+ if (SUBREG_P (operands[5]))
+ {
+ SUBREG_PROMOTED_VAR_P (operands[5]) = 1;
+ SUBREG_PROMOTED_SET (operands[5], 1);
+ }
}
[(set_attr "type" "ssecmp")
(set_attr "length_immediate" "1")
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c
index b1165f0..e7d6183 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-1.c
@@ -1,8 +1,7 @@
/* PR target/103750 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512bw -mavx512vl" } */
-/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */
-/* xfail need to be fixed. */
+/* { dg-final { scan-assembler-not "kmov" } } */
#include <immintrin.h>
extern __m128i* pi128;
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c
index 7303f54..3392e19 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr103750-2.c
@@ -1,8 +1,7 @@
/* PR target/103750 */
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512dq -mavx512bw -mavx512vl" } */
-/* { dg-final { scan-assembler-not "kmov" { xfail ia32 } } } */
-/* xfail need to be fixed. */
+/* { dg-final { scan-assembler-not "kmov" } } */
#include <immintrin.h>
extern __m128i* pi128;
diff --git a/gcc/testsuite/gcc.target/i386/pr117159.c b/gcc/testsuite/gcc.target/i386/pr117159.c
new file mode 100644
index 0000000..b67d682
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr117159.c
@@ -0,0 +1,42 @@
+/* { dg-do run } */
+/* { dg-options "-Os -mavx512bw" } */
+/* { dg-require-effective-target avx512bw } */
+
+typedef __attribute__((__vector_size__ (4))) unsigned char W;
+typedef __attribute__((__vector_size__ (64))) int V;
+typedef __attribute__((__vector_size__ (64))) long long Vq;
+
+W w;
+V v;
+Vq vq;
+
+static inline W
+foo (short m)
+{
+ unsigned k = __builtin_ia32_pcmpgtq512_mask ((Vq) { }, vq, m);
+ W r = (W) k + w;
+ return r;
+}
+
+static inline W
+foo1 (short m)
+{
+ unsigned k = __builtin_ia32_pcmpgtd512_mask ((V) {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, v, m);
+ W r = (W) k + w;
+ return r;
+}
+
+int
+main ()
+{
+ if (!__builtin_cpu_supports ("avx512bw"))
+ return 0;
+ W y = foo1 (65535);
+ if (!y[0] || !y[1] || y[2] || y[3])
+ __builtin_abort();
+ W x = foo (65535);
+ if (x[0] || x[1] || x[2] || x[3])
+ __builtin_abort();
+
+ return 0;
+}