x86: widen @got{,pcrel} support to PUSH and APX IMUL

With us doing the transformation to an immediate operand for MOV and
various ALU insns, there's little reason to then not support the same
conversion for the other two insns which have respective immediate
operand forms. Unfortunately for IMUL (due to the 0F opcode prefix)
there's no suitable relocation, so the pre-APX forms cannot be marked
for relaxation in the assembler.
diff --git a/bfd/elf32-i386.c b/bfd/elf32-i386.c
index ca82016..77ae135 100644
--- a/bfd/elf32-i386.c
+++ b/bfd/elf32-i386.c
@@ -1209,6 +1209,10 @@
    to
    test $foo, %reg1
    and convert
+   push foo@GOT[(%reg)]
+   to
+   push $foo
+   and convert
    binop foo@GOT[(%reg1)], %reg2
    to
    binop $foo, %reg2
@@ -1233,7 +1237,7 @@
   unsigned int addend;
   unsigned int nop;
   bfd_vma nop_offset;
-  bool is_pic;
+  bool is_pic, is_branch = false;
   bool to_reloc_32;
   bool abs_symbol;
   unsigned int r_type;
@@ -1301,6 +1305,23 @@
 
   opcode = bfd_get_8 (abfd, contents + roff - 2);
 
+  if (opcode == 0xff)
+    {
+      switch (modrm & 0x38)
+	{
+	case 0x10: /* CALL */
+	case 0x20: /* JMP */
+	  is_branch = true;
+	  break;
+
+	case 0x30: /* PUSH */
+	  break;
+
+	default:
+	  return true;
+	}
+    }
+
   /* Convert to R_386_32 if PIC is false or there is no base
      register.  */
   to_reloc_32 = !is_pic || baseless;
@@ -1311,7 +1332,7 @@
      reloc.  */
   if (h == NULL)
     {
-      if (opcode == 0x0ff)
+      if (is_branch)
 	/* Convert "call/jmp *foo@GOT[(%reg)]".  */
 	goto convert_branch;
       else
@@ -1327,7 +1348,7 @@
       && !eh->linker_def
       && local_ref)
     {
-      if (opcode == 0xff)
+      if (is_branch)
 	{
 	  /* No direct branch to 0 for PIC.  */
 	  if (is_pic)
@@ -1343,7 +1364,7 @@
 	}
     }
 
-  if (opcode == 0xff)
+  if (is_branch)
     {
       /* We have "call/jmp *foo@GOT[(%reg)]".  */
       if ((h->root.type == bfd_link_hash_defined
@@ -1399,7 +1420,8 @@
   else
     {
       /* We have "mov foo@GOT[(%re1g)], %reg2",
-	 "test %reg1, foo@GOT(%reg2)" and
+	 "test %reg1, foo@GOT(%reg2)",
+	 "push foo@GOT[(%reg)]", or
 	 "binop foo@GOT[(%reg1)], %reg2".
 
 	 Avoid optimizing _DYNAMIC since ld.so may use its
@@ -1460,6 +1482,13 @@
 		  modrm = 0xc0 | ((modrm & 0x38) >> 3) | (opcode & 0x38);
 		  opcode = 0x81;
 		}
+	      else if (opcode == 0xff)
+		{
+		  /* Convert "push foo@GOT(%reg)" to
+		     "push $foo".  */
+		  modrm = 0x68; /* Really the opcode.  */
+		  opcode = 0x2e; /* Really a meaningless %cs: prefix.  */
+		}
 	      else
 		return true;
 
diff --git a/bfd/elf64-x86-64.c b/bfd/elf64-x86-64.c
index 5accb75..3d68f97 100644
--- a/bfd/elf64-x86-64.c
+++ b/bfd/elf64-x86-64.c
@@ -1739,13 +1739,16 @@
 }
 
 /* Move the R bits to the B bits in EVEX payload byte 1.  */
-static unsigned int evex_move_r_to_b (unsigned int byte1)
+static unsigned int evex_move_r_to_b (unsigned int byte1, bool copy)
 {
   byte1 = (byte1 & ~(1 << 5)) | ((byte1 & (1 << 7)) >> 2); /* R3 -> B3 */
   byte1 = (byte1 & ~(1 << 3)) | ((~byte1 & (1 << 4)) >> 1); /* R4 -> B4 */
 
   /* Set both R bits, as they're inverted.  */
-  return byte1 | (1 << 4) | (1 << 7);
+  if (!copy)
+    byte1 |= (1 << 4) | (1 << 7);
+
+  return byte1;
 }
 
 /* With the local symbol, foo, we convert
@@ -1762,10 +1765,14 @@
    to
    test $foo, %reg
    and convert
+   push foo@GOTPCREL(%rip)
+   to
+   push $foo
+   and convert
    binop foo@GOTPCREL(%rip), %reg
    to
    binop $foo, %reg
-   where binop is one of adc, add, and, cmp, or, sbb, sub, xor
+   where binop is one of adc, add, and, cmp, imul, or, sbb, sub, xor
    instructions.  */
 
 static bool
@@ -1782,6 +1789,7 @@
   bool is_pic;
   bool no_overflow;
   bool relocx;
+  bool is_branch = false;
   bool to_reloc_pc32;
   bool abs_symbol;
   bool local_ref;
@@ -1878,6 +1886,23 @@
   r_symndx = htab->r_sym (irel->r_info);
 
   opcode = bfd_get_8 (abfd, contents + roff - 2);
+  modrm = bfd_get_8 (abfd, contents + roff - 1);
+  if (opcode == 0xff)
+    {
+      switch (modrm & 0x38)
+	{
+	case 0x10: /* CALL */
+	case 0x20: /* JMP */
+	  is_branch = true;
+	  break;
+
+	case 0x30: /* PUSH */
+	  break;
+
+	default:
+	  return true;
+	}
+    }
 
   /* Convert mov to lea since it has been done for a while.  */
   if (opcode != 0x8b)
@@ -1895,7 +1920,7 @@
      3. no_overflow is true.
      4. PIC.
      */
-  to_reloc_pc32 = (opcode == 0xff
+  to_reloc_pc32 = (is_branch
 		   || !relocx
 		   || no_overflow
 		   || is_pic);
@@ -1950,7 +1975,7 @@
 	      && !eh->linker_def
 	      && local_ref))
 	{
-	  if (opcode == 0xff)
+	  if (is_branch)
 	    {
 	      /* Skip for branch instructions since R_X86_64_PC32
 		 may overflow.  */
@@ -2055,7 +2080,7 @@
   else
     relocation = 0;
 
-  if (opcode == 0xff)
+  if (is_branch)
     {
       /* We have "call/jmp *foo@GOTPCREL(%rip)".  */
       unsigned int nop;
@@ -2074,7 +2099,6 @@
 
       /* Convert R_X86_64_GOTPCRELX and R_X86_64_REX_GOTPCRELX to
 	 R_X86_64_PC32.  */
-      modrm = bfd_get_8 (abfd, contents + roff - 1);
       if (modrm == 0x25)
 	{
 	  /* Convert to "jmp foo nop".  */
@@ -2119,11 +2143,12 @@
     }
   else if (r_type == R_X86_64_CODE_6_GOTPCRELX && opcode != 0x8b)
     {
+      bool move_v_r = false;
+
       /* R_X86_64_PC32 isn't supported.  */
       if (to_reloc_pc32)
 	return true;
 
-      modrm = bfd_get_8 (abfd, contents + roff - 1);
       if (opcode == 0x85)
 	{
 	  /* Convert "ctest<cc> %reg, foo@GOTPCREL(%rip)" to
@@ -2149,6 +2174,23 @@
 	  modrm = 0xc0 | ((modrm & 0x38) >> 3) | (opcode & 0x38);
 	  opcode = 0x81;
 	}
+      else if (opcode == 0xaf)
+	{
+	  if (!(evex[2] & 0x10))
+	    {
+	      /* Convert "imul foo@GOTPCREL(%rip), %reg" to
+	         "imul $foo, %reg, %reg".  */
+	      modrm = 0xc0 | ((modrm & 0x38) >> 3) | (modrm & 0x38);
+	    }
+	  else
+	    {
+	      /* Convert "imul foo@GOTPCREL(%rip), %reg1, %reg2" to
+	         "imul $foo, %reg1, %reg2".  */
+	      modrm = 0xc0 | ((modrm & 0x38) >> 3) | (~evex[1] & 0x38);
+	      move_v_r = true;
+	    }
+	  opcode = 0x69;
+	}
       else
 	return true;
 
@@ -2182,7 +2224,23 @@
       bfd_put_8 (abfd, opcode, contents + roff - 2);
       bfd_put_8 (abfd, modrm, contents + roff - 1);
 
-      evex[0] = evex_move_r_to_b (evex[0]);
+      evex[0] = evex_move_r_to_b (evex[0], opcode == 0x69 && !move_v_r);
+      if (move_v_r)
+	{
+	  /* Move the top two V bits to the R bits in EVEX payload byte 1.
+	     Note that evex_move_r_to_b() set both R bits.  */
+	  if (!(evex[1] & (1 << 6)))
+	    evex[0] &= ~(1 << 7); /* V3 -> R3 */
+	  if (!(evex[2] & (1 << 3)))
+	    evex[0] &= ~(1 << 4); /* V4 -> R4 */
+	  /* Set all V bits, as they're inverted.  */
+	  evex[1] |= 0xf << 3;
+	  evex[2] |= 1 << 3;
+	  /* Clear the ND (ZU) bit (it ought to be ignored anyway).  */
+	  evex[2] &= ~(1 << 4);
+	  bfd_put_8 (abfd, evex[2], contents + roff - 3);
+	  bfd_put_8 (abfd, evex[1], contents + roff - 4);
+	}
       bfd_put_8 (abfd, evex[0], contents + roff - 5);
 
       /* No addend for R_X86_64_32/R_X86_64_32S relocations.  */
@@ -2225,7 +2283,10 @@
 	{
 	  if (bfd_get_8 (abfd, contents + roff - 4) == 0xd5)
 	    {
-	      rex2 = bfd_get_8 (abfd, contents + roff - 3);
+	      /* Make sure even an all-zero payload leaves a non-zero value
+		 in the variable.  */
+	      rex2 = bfd_get_8 (abfd, contents + roff - 3) | 0x100;
+	      rex2_mask |= 0x100;
 	      rex_w = (rex2 & REX_W) != 0;
 	    }
 	  else if (bfd_get_8 (abfd, contents + roff - 4) == 0x0f)
@@ -2267,7 +2328,6 @@
 	      /* Convert "mov foo@GOTPCREL(%rip), %reg" to
 		 "mov $foo, %reg".  */
 	      opcode = 0xc7;
-	      modrm = bfd_get_8 (abfd, contents + roff - 1);
 	      modrm = 0xc0 | (modrm & 0x38) >> 3;
 	      if (rex_w && ABI_64_P (link_info->output_bfd))
 		{
@@ -2294,7 +2354,6 @@
 	  if (to_reloc_pc32)
 	    return true;
 
-	  modrm = bfd_get_8 (abfd, contents + roff - 1);
 	  if (opcode == 0x85)
 	    {
 	      /* Convert "test %reg, foo@GOTPCREL(%rip)" to
@@ -2309,6 +2368,39 @@
 	      modrm = 0xc0 | ((modrm & 0x38) >> 3) | (opcode & 0x38);
 	      opcode = 0x81;
 	    }
+	  else if (opcode == 0xaf && (rex2 & (REX2_M << 4)))
+	    {
+	      /* Convert "imul foo@GOTPCREL(%rip), %reg" to
+		 "imul $foo, %reg, %reg".  */
+	      modrm = 0xc0 | ((modrm & 0x38) >> 3) | (modrm & 0x38);
+	      rex_mask = 0;
+	      rex2_mask = REX2_M << 4;
+	      opcode = 0x69;
+	    }
+	  else if (opcode == 0xff && !(rex2 & (REX2_M << 4)))
+	    {
+	      /* Convert "push foo@GOTPCREL(%rip)" to
+		 "push $foo".  */
+	      bfd_put_8 (abfd, 0x68, contents + roff - 1);
+	      if (rex)
+		{
+		  bfd_put_8 (abfd, 0x2e, contents + roff - 3);
+		  bfd_put_8 (abfd, rex, contents + roff - 2);
+		}
+	      else if (rex2)
+		{
+		  bfd_put_8 (abfd, 0x2e, contents + roff - 4);
+		  bfd_put_8 (abfd, 0xd5, contents + roff - 3);
+		  bfd_put_8 (abfd, rex2, contents + roff - 2);
+		}
+	      else
+		bfd_put_8 (abfd, 0x2e, contents + roff - 2);
+
+	      r_type = R_X86_64_32S;
+	      /* No addend for R_X86_64_32S relocations.  */
+	      irel->r_addend = 0;
+	      goto finish;
+	    }
 	  else
 	    return true;
 
@@ -2377,6 +2469,7 @@
 	}
     }
 
+ finish:
   *r_type_p = r_type;
   irel->r_info = htab->r_info (r_symndx,
 			       r_type | R_X86_64_converted_reloc_bit);
@@ -4467,7 +4560,7 @@
 		      continue;
 		    }
 
-		  byte1 = evex_move_r_to_b (byte1);
+		  byte1 = evex_move_r_to_b (byte1, false);
 		  bfd_put_8 (output_bfd, byte1, contents + roff - 5);
 		  bfd_put_8 (output_bfd, 0x81, contents + roff - 2);
 		  bfd_put_8 (output_bfd, 0xc0 | reg, contents + roff - 1);
diff --git a/gas/config/tc-i386.c b/gas/config/tc-i386.c
index 53743fb..d44efe8 100644
--- a/gas/config/tc-i386.c
+++ b/gas/config/tc-i386.c
@@ -12932,9 +12932,9 @@
 	      else if (object_64bit)
 		continue;
 
-	      /* Check for "call/jmp *mem", "mov mem, %reg", "movrs mem, %reg",
-		 "test %reg, mem" and "binop mem, %reg" where binop
-		 is one of adc, add, and, cmp, or, sbb, sub, xor
+	      /* Check for "call/jmp *mem", "push mem", "mov mem, %reg",
+		 "movrs mem, %reg", "test %reg, mem" and "binop mem, %reg" where
+		 binop is one of adc, add, and, cmp, or, sbb, sub, xor, or imul
 		 instructions without data prefix.  Always generate
 		 R_386_GOT32X for "sym*GOT" operand in 32-bit mode.  */
 	      unsigned int space = dot_insn () ? i.insn_opcode_space
@@ -12944,7 +12944,7 @@
 		      || (i.rm.mode == 0 && i.rm.regmem == 5))
 		  && ((space == SPACE_BASE
 		       && i.tm.base_opcode == 0xff
-		       && (i.rm.reg == 2 || i.rm.reg == 4))
+		       && (i.rm.reg == 2 || i.rm.reg == 4 || i.rm.reg == 6))
 		      || ((space == SPACE_BASE
 			   || space == SPACE_0F38
 			   || space == SPACE_MAP4)
@@ -12953,7 +12953,13 @@
 			   || space == SPACE_MAP4)
 			  && (i.tm.base_opcode == 0x85
 			      || (i.tm.base_opcode
-				  | (i.operands > 2 ? 0x3a : 0x38)) == 0x3b))))
+				  | (i.operands > 2 ? 0x3a : 0x38)) == 0x3b))
+		      || (((space == SPACE_0F
+			    /* Because of the 0F prefix, no suitable relocation
+			       exists for this unless it's REX2-encoded.  */
+			    && is_apx_rex2_encoding ())
+			   || space == SPACE_MAP4)
+			  && i.tm.base_opcode == 0xaf)))
 		{
 		  if (object_64bit)
 		    {
diff --git a/ld/testsuite/ld-i386/i386.exp b/ld/testsuite/ld-i386/i386.exp
index f84b4d4..dc91f6b 100644
--- a/ld/testsuite/ld-i386/i386.exp
+++ b/ld/testsuite/ld-i386/i386.exp
@@ -374,6 +374,8 @@
 run_dump_test "load6"
 run_dump_test "load7"
 run_dump_test "load8"
+run_dump_test "load9a"
+run_dump_test "load9b"
 run_dump_test "pr19175"
 run_dump_test "pr19615"
 run_dump_test "pr19636-1a"
diff --git a/ld/testsuite/ld-i386/load9.s b/ld/testsuite/ld-i386/load9.s
new file mode 100644
index 0000000..0f42268
--- /dev/null
+++ b/ld/testsuite/ld-i386/load9.s
@@ -0,0 +1,20 @@
+	.data
+	.type	bar, @object
+bar:
+	.byte	1
+	.size	bar, .-bar
+	.globl	foo
+	.type	foo, @object
+foo:
+	.byte	1
+	.size	foo, .-foo
+	.text
+	.globl	_start
+	.type	_start, @function
+_start:
+	push	bar@GOT(%ecx)
+	push	foo@GOT(%edx)
+	.ifndef PIC
+	push	foo@GOT
+	.endif
+	.size	_start, .-_start
diff --git a/ld/testsuite/ld-i386/load9a.d b/ld/testsuite/ld-i386/load9a.d
new file mode 100644
index 0000000..77a27b4
--- /dev/null
+++ b/ld/testsuite/ld-i386/load9a.d
@@ -0,0 +1,14 @@
+#source: load9.s
+#as: --32 -mrelax-relocations=yes
+#ld: -melf_i386 -z noseparate-code
+#objdump: -dw
+
+.*: +file format .*
+
+Disassembly of section .text:
+
+0+8048074 <_start>:
+[ 	]*[a-f0-9]+:	2e 68 86 90 04 08    	cs push \$0x8049086
+[ 	]*[a-f0-9]+:	2e 68 87 90 04 08    	cs push \$0x8049087
+[ 	]*[a-f0-9]+:	2e 68 87 90 04 08    	cs push \$0x8049087
+#pass
diff --git a/ld/testsuite/ld-i386/load9b.d b/ld/testsuite/ld-i386/load9b.d
new file mode 100644
index 0000000..f3b1c71
--- /dev/null
+++ b/ld/testsuite/ld-i386/load9b.d
@@ -0,0 +1,13 @@
+#source: load9.s
+#as: --32 -mshared -mrelax-relocations=yes --defsym PIC=1
+#ld: -melf_i386 -shared -z noseparate-code
+#objdump: -dw
+
+.*: +file format .*
+
+Disassembly of section .text:
+
+0+[0-9a-f]+ <_start>:
+[ 	]*[0-9a-f]+:	ff b1 f8 ff ff ff    	push   -0x8\(%ecx\)
+[ 	]*[0-9a-f]+:	ff b2 fc ff ff ff    	push   -0x4\(%edx\)
+#pass
diff --git a/ld/testsuite/ld-x86-64/apx-load1.s b/ld/testsuite/ld-x86-64/apx-load1.s
index 7cd39f6..5877702 100644
--- a/ld/testsuite/ld-x86-64/apx-load1.s
+++ b/ld/testsuite/ld-x86-64/apx-load1.s
@@ -118,5 +118,11 @@
 	sub	%rbp, bar@GOTPCREL(%rip), %r21
 	xor	%rsi, bar@GOTPCREL(%rip), %r22
 
+	imul	bar@GOTPCREL(%rip), %r17
+	{nf} imul bar@GOTPCREL(%rip), %r17
+	imul	bar@GOTPCREL(%rip), %r17, %rdx
+	imul	bar@GOTPCREL(%rip), %rcx, %r18
+	{rex2} pushq bar@GOTPCREL(%rip)
+
 	.size	_start, .-_start
 	.p2align 12, 0x90
diff --git a/ld/testsuite/ld-x86-64/apx-load1a.d b/ld/testsuite/ld-x86-64/apx-load1a.d
index 83d4cd2..87ec931 100644
--- a/ld/testsuite/ld-x86-64/apx-load1a.d
+++ b/ld/testsuite/ld-x86-64/apx-load1a.d
@@ -115,4 +115,9 @@
  +[a-f0-9]+:	62 f4 dc 10 19 25 74 0c 20 00 	sbb    %rsp,0x200c74\(%rip\),%r20        # 602000 <.*>
  +[a-f0-9]+:	62 f4 d4 10 29 2d 6a 0c 20 00 	sub    %rbp,0x200c6a\(%rip\),%r21        # 602000 <.*>
  +[a-f0-9]+:	62 f4 cc 10 81 f6 20 20 60 00 	xor    \$0x602020,%rsi,%r22
+ +[a-f0-9]+:	d5 58 69 c9 20 20 60 00 	imul   \$0x602020,%r17,%r17
+ +[a-f0-9]+:	62 ec fc 0c 69 c9 20 20 60 00 	\{nf\} imul \$0x602020,%r17,%r17
+ +[a-f0-9]+:	62 fc fc 08 69 d1 20 20 60 00 	imul   \$0x602020,%r17,%rdx
+ +[a-f0-9]+:	62 e4 fc 08 69 d1 20 20 60 00 	imul   \$0x602020,%rcx,%r18
+ +[a-f0-9]+:	2e d5 00 68 20 20 60 00 	cs \{rex2 0x0\} push \$0x602020
 #pass
diff --git a/ld/testsuite/ld-x86-64/apx-load1c.d b/ld/testsuite/ld-x86-64/apx-load1c.d
index 2f80337..68cda54 100644
--- a/ld/testsuite/ld-x86-64/apx-load1c.d
+++ b/ld/testsuite/ld-x86-64/apx-load1c.d
@@ -108,4 +108,9 @@
  +[a-f0-9]+:	62 f4 dc 10 19 25 54 0d 20 00 	sbb    %rsp,0x200d54\(%rip\),%r20        # 2020e0 <.*>
  +[a-f0-9]+:	62 f4 d4 10 29 2d 4a 0d 20 00 	sub    %rbp,0x200d4a\(%rip\),%r21        # 2020e0 <.*>
  +[a-f0-9]+:	62 f4 cc 10 31 35 40 0d 20 00 	xor    %rsi,0x200d40\(%rip\),%r22        # 2020e0 <.*>
+ +[a-f0-9]+:	d5 c8 af 0d 38 0d 20 00 	imul   0x200d38\(%rip\),%r17        # 2020e0 <.*>
+ +[a-f0-9]+:	62 e4 fc 0c af 0d 2e 0d 20 00 	\{nf\} imul 0x200d2e\(%rip\),%r17        # 2020e0 <.*>
+ +[a-f0-9]+:	62 e4 ec 18 af 0d 24 0d 20 00 	imul   0x200d24\(%rip\),%r17,%rdx        # 2020e0 <.*>
+ +[a-f0-9]+:	62 f4 ec 10 af 0d 1a 0d 20 00 	imul   0x200d1a\(%rip\),%rcx,%r18        # 2020e0 <.*>
+ +[a-f0-9]+:	d5 00 ff 35 12 0d 20 00 	\{rex2 0x0\} push 0x200d12\(%rip\)        # 2020e0 <.*>
 #pass
diff --git a/ld/testsuite/ld-x86-64/apx-load1d.d b/ld/testsuite/ld-x86-64/apx-load1d.d
index 648368f..cc0cf92 100644
--- a/ld/testsuite/ld-x86-64/apx-load1d.d
+++ b/ld/testsuite/ld-x86-64/apx-load1d.d
@@ -108,4 +108,9 @@
  +[a-f0-9]+:	62 f4 dc 10 19 25 e4 0c 20 00 	sbb    %rsp,0x200ce4\(%rip\),%r20        # 202070 <.*>
  +[a-f0-9]+:	62 f4 d4 10 29 2d da 0c 20 00 	sub    %rbp,0x200cda\(%rip\),%r21        # 202070 <.*>
  +[a-f0-9]+:	62 f4 cc 10 31 35 d0 0c 20 00 	xor    %rsi,0x200cd0\(%rip\),%r22        # 202070 <.*>
+ +[a-f0-9]+:	d5 c8 af 0d c8 0c 20 00 	imul   0x200cc8\(%rip\),%r17        # 202070 <.*>
+ +[a-f0-9]+:	62 e4 fc 0c af 0d be 0c 20 00 	\{nf\} imul 0x200cbe\(%rip\),%r17        # 202070 <.*>
+ +[a-f0-9]+:	62 e4 ec 18 af 0d b4 0c 20 00 	imul   0x200cb4\(%rip\),%r17,%rdx        # 202070 <.*>
+ +[a-f0-9]+:	62 f4 ec 10 af 0d aa 0c 20 00 	imul   0x200caa\(%rip\),%rcx,%r18        # 202070 <.*>
+ +[a-f0-9]+:	d5 00 ff 35 a2 0c 20 00 	\{rex2 0x0\} push 0x200ca2\(%rip\)        # 202070 <.*>
 #pass
diff --git a/ld/testsuite/ld-x86-64/load5.s b/ld/testsuite/ld-x86-64/load5.s
new file mode 100644
index 0000000..61f06e9
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/load5.s
@@ -0,0 +1,17 @@
+	.data
+	.type	bar, @object
+bar:
+	.byte	1
+	.size	bar, .-bar
+	.globl	foo
+	.type	foo, @object
+foo:
+	.byte	1
+	.size	foo, .-foo
+	.text
+	.globl	_start
+	.type	_start, @function
+_start:
+	      pushq	bar@GOTPCREL(%rip)
+	{rex} pushq	foo@GOTPCREL(%rip)
+	.size	_start, .-_start
diff --git a/ld/testsuite/ld-x86-64/load5a.d b/ld/testsuite/ld-x86-64/load5a.d
new file mode 100644
index 0000000..ae58af7
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/load5a.d
@@ -0,0 +1,15 @@
+#source: load5.s
+#as: --64 -mrelax-relocations=yes
+#ld: -melf_x86_64
+#objdump: -dw
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+#...
+[a-f0-9]+ <_start>:
+[ 	]*[a-f0-9]+:	2e 68 ([0-9a-f]{2} ){4} *	cs push \$0x[a-f0-9]+
+[ 	]*[a-f0-9]+:	2e 40 68 ([0-9a-f]{2} ){4} *	cs rex push \$0x[a-f0-9]+
+#pass
diff --git a/ld/testsuite/ld-x86-64/load5b.d b/ld/testsuite/ld-x86-64/load5b.d
new file mode 100644
index 0000000..3dd2d75
--- /dev/null
+++ b/ld/testsuite/ld-x86-64/load5b.d
@@ -0,0 +1,15 @@
+#source: load5.s
+#as: --64 -mrelax-relocations=yes
+#ld: -shared -melf_x86_64
+#objdump: -dw
+
+.*: +file format .*
+
+
+Disassembly of section .text:
+
+#...
+[a-f0-9]+ <_start>:
+[ 	]*[a-f0-9]+:	ff 35 ([0-9a-f]{2} ){4} *	push +0x[a-f0-9]+\(%rip\)        # [a-f0-9]+ <.*>
+[ 	]*[a-f0-9]+:	40 ff 35 ([0-9a-f]{2} ){4} *	rex push 0x[a-f0-9]+\(%rip\)        # [a-f0-9]+ <.*>
+#pass
diff --git a/ld/testsuite/ld-x86-64/x86-64.exp b/ld/testsuite/ld-x86-64/x86-64.exp
index e4b9ebb..01d6459 100644
--- a/ld/testsuite/ld-x86-64/x86-64.exp
+++ b/ld/testsuite/ld-x86-64/x86-64.exp
@@ -673,6 +673,8 @@
 run_dump_test "load3a"
 run_dump_test "load3b"
 run_dump_test "load4"
+run_dump_test "load5a"
+run_dump_test "load5b"
 run_dump_test "call1a"
 run_dump_test "call1b"
 run_dump_test "call1c"