x86: Cpu64 handling improvements

First of all we want to also accumulate its reverse dependencies, such
that we can use them in cpu_flags_match(). This is in particular in
preparation of APX additions, such that e.g. BMI VEX-encoding templates
can become combined VEX/EVEX ones.

Once we have the reverse dependencies, we can further leverage them to
omit explicit "&x64" from any insn templates dealing with 64-bit-mode-
only ISA extensions. Besides helping readability for several insn
templates we already have, this will also help with what is going to be
added for APX (as all of the new templates would otherwise need to have
"&x64").

Note that rather than leaving a meaningless CPU_64_FLAGS (which is
unused anyway), its emitting is now also suppressed.
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index c7b9a95..c6a1521 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -804,6 +804,9 @@
 /* CPU feature flags.  */
 i386_cpu_flags cpu_arch_flags = CPU_UNKNOWN_FLAGS;
 
+/* ISA extensions available in 64-bit mode only.  */
+static const i386_cpu_flags cpu_64_flags = CPU_ANY_64_FLAGS;
+
 /* If we have selected a cpu we are generating instructions for.  */
 static int cpu_arch_tune_set = 0;
 
@@ -1874,7 +1877,12 @@
   else
     {
       /* This instruction is available only on some archs.  */
-      i386_cpu_flags cpu = cpu_arch_flags;
+      i386_cpu_flags active, cpu;
+
+      if (flag_code != CODE_64BIT)
+	active = cpu_flags_and_not (cpu_arch_flags, cpu_64_flags);
+      else
+	active = cpu_arch_flags;
 
       /* Dual VEX/EVEX templates may need stripping of one of the flags.  */
       if (t->opcode_modifier.vex && t->opcode_modifier.evex)
@@ -1895,14 +1903,14 @@
 		{
 		  x.bitfield.cpuavx512f = 0;
 		  x.bitfield.cpuavx512vl = 0;
-		  if (x.bitfield.cpufma && !cpu.bitfield.cpufma)
+		  if (x.bitfield.cpufma && !active.bitfield.cpufma)
 		    x.bitfield.cpuavx = 0;
 		}
 	    }
 	}
 
       /* AVX512VL is no standalone feature - match it and then strip it.  */
-      if (x.bitfield.cpuavx512vl && !cpu.bitfield.cpuavx512vl)
+      if (x.bitfield.cpuavx512vl && !active.bitfield.cpuavx512vl)
 	return match;
       x.bitfield.cpuavx512vl = 0;
 
@@ -1912,7 +1920,7 @@
       if (x.bitfield.cpuavx && x.bitfield.cpuavx2)
 	x.bitfield.cpuavx2 = 0;
 
-      cpu = cpu_flags_and (x, cpu);
+      cpu = cpu_flags_and (x, active);
       if (!cpu_flags_all_zero (&cpu))
 	{
 	  if (t->cpu.bitfield.cpuavx && t->cpu.bitfield.cpuavx512f)
@@ -1921,7 +1929,7 @@
 		   ? cpu.bitfield.cpuavx512f
 		   : cpu.bitfield.cpuavx)
 		  && (!x.bitfield.cpufma || cpu.bitfield.cpufma
-		      || cpu_arch_flags.bitfield.cpuavx512f)
+		      || active.bitfield.cpuavx512f)
 		  && (!x.bitfield.cpugfni || cpu.bitfield.cpugfni)
 		  && (!x.bitfield.cpuvaes || cpu.bitfield.cpuvaes)
 		  && (!x.bitfield.cpuvpclmulqdq || cpu.bitfield.cpuvpclmulqdq))
diff --git a/opcodes/i386-gen.c b/opcodes/i386-gen.c
index b2ddda3..c118f01 100644
--- a/opcodes/i386-gen.c
+++ b/opcodes/i386-gen.c
@@ -166,6 +166,10 @@
     "AVX2" },
   { "AVX_NE_CONVERT",
     "AVX2" },
+  { "CX16",
+    "64" },
+  { "LKGS",
+    "64" },
   { "FRED",
     "LKGS" },
   { "AVX512F",
@@ -240,13 +244,13 @@
   { "SNP",
     "SEV_ES" },
   { "RMPQUERY",
-    "SNP" },
+    "SNP|64" },
   { "TSX",
     "RTM|HLE" },
   { "TSXLDTRK",
     "RTM" },
   { "AMX_TILE",
-    "XSAVE" },
+    "XSAVE|64" },
   { "AMX_INT8",
     "AMX_TILE" },
   { "AMX_BF16",
@@ -259,6 +263,18 @@
     "SSE2" },
   { "WIDEKL",
     "KL" },
+  { "PBNDKB",
+    "64" },
+  { "UINTR",
+    "64" },
+  { "PREFETCHI",
+    "64" },
+  { "CMPCCXADD",
+    "64" },
+  { "MSRLIST",
+    "64" },
+  { "USER_MSR",
+    "64" },
 };
 
 /* This array is populated as process_i386_initializers() walks cpu_flags[].  */
@@ -772,8 +788,10 @@
 	  }
 	free (deps);
 
-	/* ISA extensions with dependencies need CPU_ANY_*_FLAGS emitted.  */
-	if (reverse < ARRAY_SIZE (isa_reverse_deps[0]))
+	/* ISA extensions with dependencies need CPU_ANY_*_FLAGS emitted,
+	   unless the sole dependency is the "64-bit mode only" one.  */
+	if (reverse < ARRAY_SIZE (isa_reverse_deps[0])
+	    && strcmp (isa_dependencies[i].deps, "64"))
 	  isa_reverse_deps[reverse][reverse] = 1;
 
 	is_avx = orig_is_avx;
@@ -919,6 +937,15 @@
       size_t len = strlen (name);
       char *upper = xmalloc (len + 1);
 
+      /* Cpu64 is special: It specifies a mode dependency, not an ISA one.  Zap
+	 the flag from ISA initializer macros (and from CPU_ANY_64_FLAGS
+	 itself we only care about tracking its dependents.  Also don't emit the
+	 (otherwise all zero) CPU_64_FLAGS.  */
+      if (flag != NULL && reverse == Cpu64)
+	return;
+      if (is_isa || flag == NULL)
+	flags[Cpu64].value = 0;
+
       for (i = 0; i < len; ++i)
 	{
 	  /* Don't emit #define-s for auxiliary entries.  */
@@ -931,6 +958,14 @@
 	       flag != NULL ? "": "ANY_", upper);
       free (upper);
     }
+  else
+    {
+      /* Synthesize "64-bit mode only" dependencies from the dependencies we
+	 have accumulated.  */
+      for (i = 0; i < ARRAY_SIZE (isa_reverse_deps[0]); ++i)
+	if (flags[i].value && isa_reverse_deps[Cpu64][i])
+	  flags[Cpu64].value = 1;
+    }
 
   output_cpu_flags (table, flags, ARRAY_SIZE (flags), name != NULL,
 		    comma, indent, lineno);
@@ -2142,6 +2177,8 @@
   qsort (operand_types, ARRAY_SIZE (operand_types),
 	 sizeof (operand_types [0]), compare);
 
+  process_i386_initializers ();
+
   table = fopen ("i386-tbl.h", "w");
   if (table == NULL)
     fail ("can't create i386-tbl.h, errno = %s\n",
@@ -2151,7 +2188,6 @@
 
   process_i386_opcodes (table);
   process_i386_registers (table);
-  process_i386_initializers ();
 
   fclose (table);
 
diff --git a/opcodes/i386-init.h b/opcodes/i386-init.h
index 475db31..5aecf77 100644
--- a/opcodes/i386-init.h
+++ b/opcodes/i386-init.h
@@ -1398,16 +1398,6 @@
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, \
       0, 0, 0, 0, 0 } }
 
-#define CPU_64_FLAGS \
-  { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
-      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, \
-      0, 0, 0, 0, 0 } }
-
 #define CPU_AVX_FLAGS \
   { { 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, \
       0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, \
@@ -2518,6 +2508,16 @@
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, \
       0, 0, 0, 0, 0 } }
 
+#define CPU_ANY_64_FLAGS \
+  { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+      0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+      0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, \
+      0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, \
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, \
+      0, 0, 0, 0, 0 } }
+
 #define CPU_ANY_AVX_FLAGS \
   { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, \
diff --git a/opcodes/i386-opc.tbl b/opcodes/i386-opc.tbl
index a3426298..d9540e3 100644
--- a/opcodes/i386-opc.tbl
+++ b/opcodes/i386-opc.tbl
@@ -158,6 +158,8 @@
 #define i287 287
 #define i387 387
 #define i687 687
+// Note: Don't add this one to any templates already specifying a 64-bit-mode-
+// only ISA extension: i386-gen takes care of adding such dependencies.
 #define x64 64
 
 ### MARKER ###
@@ -1273,7 +1275,7 @@
 
 // CMPXCHG16B instruction.
 
-cmpxchg16b, 0xfc7/1, CX16|x64, Modrm|NoSuf|Size64|LockPrefixOk, { Oword|Unspecified|BaseIndex }
+cmpxchg16b, 0xfc7/1, CX16, Modrm|NoSuf|Size64|LockPrefixOk, { Oword|Unspecified|BaseIndex }
 
 // MONITOR instructions.
 
@@ -3013,7 +3015,7 @@
 
 // PBNDKB instruction.
 
-pbndkb, 0x0f01c7, PBNDKB|x64, NoSuf, {}
+pbndkb, 0x0f01c7, PBNDKB, NoSuf, {}
 
 // PBNDKB instruction end.
 
@@ -3100,8 +3102,8 @@
 
 // RMPQUERY instruction
 
-rmpquery, 0xf30f01fd, RMPQUERY|x64, NoSuf, {}
-rmpquery, 0xf30f01fd, RMPQUERY|x64, AddrPrefixOpReg|NoSuf, { Acc|Dword|Qword, RegC|Qword, RegD|Qword }
+rmpquery, 0xf30f01fd, RMPQUERY, NoSuf, {}
+rmpquery, 0xf30f01fd, RMPQUERY, AddrPrefixOpReg|NoSuf, { Acc|Dword|Qword, RegC|Qword, RegD|Qword }
 
 // RMPQUERY instruction end
 
@@ -3126,26 +3128,26 @@
 
 // AMX instructions.
 
-ldtilecfg, 0x49/0, AMX_TILE|x64, Modrm|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex }
-sttilecfg, 0x6649/0, AMX_TILE|x64, Modrm|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex }
+ldtilecfg, 0x49/0, AMX_TILE, Modrm|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex }
+sttilecfg, 0x6649/0, AMX_TILE, Modrm|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex }
 
-tcmmimfp16ps, 0x666c, AMX_COMPLEX|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tcmmrlfp16ps, 0x6c, AMX_COMPLEX|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tcmmimfp16ps, 0x666c, AMX_COMPLEX, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tcmmrlfp16ps, 0x6c, AMX_COMPLEX, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
 
-tdpbf16ps, 0xf35c, AMX_BF16|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tdpfp16ps, 0xf25c, AMX_FP16|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tdpbssd, 0xf25e, AMX_INT8|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tdpbuud, 0x5e, AMX_INT8|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tdpbusd, 0x665e, AMX_INT8|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
-tdpbsud, 0xf35e, AMX_INT8|x64, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpbf16ps, 0xf35c, AMX_BF16, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpfp16ps, 0xf25c, AMX_FP16, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpbssd, 0xf25e, AMX_INT8, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpbuud, 0x5e, AMX_INT8, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpbusd, 0x665e, AMX_INT8, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
+tdpbsud, 0xf35e, AMX_INT8, Modrm|Vex128|Space0F38|VexVVVV|VexW0|SwapSources|NoSuf, { RegTMM, RegTMM, RegTMM }
 
-tileloadd, 0xf24b, AMX_TILE|x64, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex, RegTMM }
-tileloaddt1, 0x664b, AMX_TILE|x64, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex, RegTMM }
-tilestored, 0xf34b, AMX_TILE|x64, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { RegTMM, Unspecified|BaseIndex }
+tileloadd, 0xf24b, AMX_TILE, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex, RegTMM }
+tileloaddt1, 0x664b, AMX_TILE, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { Unspecified|BaseIndex, RegTMM }
+tilestored, 0xf34b, AMX_TILE, Sibmem|Vex128|Space0F38|VexW0|NoSuf, { RegTMM, Unspecified|BaseIndex }
 
-tilerelease, 0x49c0, AMX_TILE|x64, Vex128|Space0F38|VexW0|NoSuf, {}
+tilerelease, 0x49c0, AMX_TILE, Vex128|Space0F38|VexW0|NoSuf, {}
 
-tilezero, 0xf249, AMX_TILE|x64, Modrm|Vex128|Space0F38|VexW0|NoSuf, { RegTMM }
+tilezero, 0xf249, AMX_TILE, Modrm|Vex128|Space0F38|VexW0|NoSuf, { RegTMM }
 
 // AMX instructions end.
 
@@ -3176,11 +3178,11 @@
 
 // UINTR instructions.
 
-uiret, 0xf30f01ec, UINTR|x64, NoSuf, {}
-clui, 0xf30f01ee, UINTR|x64, NoSuf, {}
-stui, 0xf30f01ef, UINTR|x64, NoSuf, {}
-testui, 0xf30f01ed, UINTR|x64, NoSuf, {}
-senduipi, 0xf30fc7/6, UINTR|x64, Modrm|NoSuf|NoRex64, { Reg64 }
+uiret, 0xf30f01ec, UINTR, NoSuf, {}
+clui, 0xf30f01ee, UINTR, NoSuf, {}
+stui, 0xf30f01ef, UINTR, NoSuf, {}
+testui, 0xf30f01ed, UINTR, NoSuf, {}
+senduipi, 0xf30fc7/6, UINTR, Modrm|NoSuf|NoRex64, { Reg64 }
 
 // UINTR instructions end.
 
@@ -3302,14 +3304,14 @@
 
 // PREFETCHI instructions.
 
-prefetchit0, 0xf18/7, PREFETCHI|x64, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex }
-prefetchit1, 0xf18/6, PREFETCHI|x64, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex }
+prefetchit0, 0xf18/7, PREFETCHI, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex }
+prefetchit1, 0xf18/6, PREFETCHI, Modrm|Anysize|IgnoreSize|NoSuf, { BaseIndex }
 
 // PREFETCHI instructions end.
 
 // CMPCCXADD instructions.
 
-cmp<cc>xadd, 0x66e<cc:opc>, CMPCCXADD|x64, Modrm|Vex|Space0F38|VexVVVV|SwapSources|CheckOperandSize|NoSuf, { Reg32|Reg64, Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
+cmp<cc>xadd, 0x66e<cc:opc>, CMPCCXADD, Modrm|Vex|Space0F38|VexVVVV|SwapSources|CheckOperandSize|NoSuf, { Reg32|Reg64, Reg32|Reg64, Dword|Qword|Unspecified|BaseIndex }
 
 // CMPCCXADD instructions end.
 
@@ -3321,8 +3323,8 @@
 
 // MSRLIST instructions.
 
-rdmsrlist, 0xf20f01c6, MSRLIST|x64, NoSuf, {}
-wrmsrlist, 0xf30f01c6, MSRLIST|x64, NoSuf, {}
+rdmsrlist, 0xf20f01c6, MSRLIST, NoSuf, {}
+wrmsrlist, 0xf30f01c6, MSRLIST, NoSuf, {}
 
 // MSRLIST instructions end.
 
@@ -3337,23 +3339,23 @@
 
 // LKGS instruction.
 
-lkgs, 0xf20f00/6, LKGS|x64, Modrm|IgnoreSize|No_bSuf|No_sSuf|NoRex64, { Reg16|Reg32|Reg64 }
-lkgs, 0xf20f00/6, LKGS|x64, Modrm|IgnoreSize|No_bSuf|No_lSuf|No_sSuf|No_qSuf, { Word|Unspecified|BaseIndex }
+lkgs, 0xf20f00/6, LKGS, Modrm|IgnoreSize|No_bSuf|No_sSuf|NoRex64, { Reg16|Reg32|Reg64 }
+lkgs, 0xf20f00/6, LKGS, Modrm|IgnoreSize|No_bSuf|No_lSuf|No_sSuf|No_qSuf, { Word|Unspecified|BaseIndex }
 
 // LKGS instruction end.
 
 // FRED instructions.
 
-erets, 0xf20f01ca, FRED|x64, NoSuf, {}
-eretu, 0xf30f01ca, FRED|x64, NoSuf, {}
+erets, 0xf20f01ca, FRED, NoSuf, {}
+eretu, 0xf30f01ca, FRED, NoSuf, {}
 
 // FRED instructions end.
 
 // USER_MSR instructions.
 
-urdmsr, 0xf20f38f8, USER_MSR|x64, RegMem|NoSuf|NoRex64, { Reg64, Reg64 }
-urdmsr, 0xf2f8/0, USER_MSR|x64, Modrm|Vex128|VexMap7|VexW0|NoSuf, { Imm32, Reg64 }
-uwrmsr, 0xf30f38f8, USER_MSR|x64, Modrm|NoSuf|NoRex64, { Reg64, Reg64 }
-uwrmsr, 0xf3f8/0, USER_MSR|x64, Modrm|Vex128|VexMap7|VexW0|NoSuf, { Reg64, Imm32 }
+urdmsr, 0xf20f38f8, USER_MSR, RegMem|NoSuf|NoRex64, { Reg64, Reg64 }
+urdmsr, 0xf2f8/0, USER_MSR, Modrm|Vex128|VexMap7|VexW0|NoSuf, { Imm32, Reg64 }
+uwrmsr, 0xf30f38f8, USER_MSR, Modrm|NoSuf|NoRex64, { Reg64, Reg64 }
+uwrmsr, 0xf3f8/0, USER_MSR, Modrm|Vex128|VexMap7|VexW0|NoSuf, { Reg64, Imm32 }
 
 // USER_MSR instructions end.