gcc/testsuite/gcc.target/mips/mips-3d-9.c - gcc - Git at Google

 /* { dg-do run } */
 /* { dg-options "-mips3d forbid_cpu=octeon.* (REQUIRES_STDLIB)" } */

 /* Matrix Multiplications */
 #include <stdlib.h>
 #include <stdio.h>

 typedef float v2sf __attribute__((vector_size(8)));

 float a[4] = {1.1, 2.2, 3.3, 4.4};
 float b[4][4] = {{1, 2, 3, 4},
                  {5, 6, 7, 8},
                  {9, 10, 11, 12},
                  {13, 14, 15, 16}};

 float c[4]; /* Result for matrix_multiply1() */
 float d[4]; /* Result for matrix_multiply2() */
 float e[4]; /* Result for matrix_multiply3() */
 float f[4]; /* Result for matrix_multiply4() */

 void matrix_multiply1();
 NOMIPS16 void matrix_multiply2();
 NOMIPS16 void matrix_multiply3();
 NOMIPS16 void matrix_multiply4();

 int main ()
 {
   int i;

   /* Version 1. Use float calculations */
   matrix_multiply1();

   /* Version 2. Use paired-single instructions inside the inner loop*/
   matrix_multiply2();
   for (i = 0; i < 4; i++)
     if (d[i] != c[i])
       abort();

   /* Version 3. Use paired-single instructions and unroll the inner loop */
   matrix_multiply3();
   for (i = 0; i < 4; i++)
     if (e[i] != c[i])
       abort();

   /* Version 4. Use paired-single instructions and unroll all loops */
   matrix_multiply4();
   for (i = 0; i < 4; i++)
     if (f[i] != c[i])
       abort();

   printf ("Test Passes\n");
   exit (0);
 }

 void matrix_multiply1()
 {
   int i, j;

   for (i = 0; i < 4; i++)
    {
      c[i] = 0.0;

      for (j = 0; j < 4; j ++)
        c[i] += a[j] * b[j][i];
    }
 }

 NOMIPS16 void matrix_multiply2()
 {
   int i, j;
   v2sf m1, m2;
   v2sf result, temp;

   for (i = 0; i < 4; i++)
    {
      result = (v2sf) {0.0, 0.0};

      for (j = 0; j < 4; j+=2)
      {
        /* Load two float values into m1 */
        m1 = (v2sf) {a[j], a[j+1]};
        m2 = (v2sf) {b[j][i], b[j+1][i]};

        /* Multiply and add */
        result += m1 * m2;
      }

      /* Reduction add at the end */
      temp = __builtin_mips_addr_ps (result, result);
      d[i] = __builtin_mips_cvt_s_pl (temp);
    }
 }

 NOMIPS16 void matrix_multiply3()
 {
   int i;
   v2sf m1, m2, n1, n2;
   v2sf result, temp;

   m1 = (v2sf) {a[0], a[1]};
   m2 = (v2sf) {a[2], a[3]};

   for (i = 0; i < 4; i++)
    {
      n1 = (v2sf) {b[0][i], b[1][i]};
      n2 = (v2sf) {b[2][i], b[3][i]};

      /* Multiply and add */
      result = m1 * n1 + m2 * n2;

      /* Reduction add at the end */
      temp = __builtin_mips_addr_ps (result, result);
      e[i] = __builtin_mips_cvt_s_pl (temp);
    }
 }

 NOMIPS16 void matrix_multiply4()
 {
   v2sf m1, m2;
   v2sf n1, n2, n3, n4, n5, n6, n7, n8;
   v2sf temp1, temp2, temp3, temp4;
   v2sf result1, result2;

   /* Load a[0] a[1] values into m1
      Load a[2] a[3] values into m2 */
   m1 = (v2sf) {a[0], a[1]};
   m2 = (v2sf) {a[2], a[3]};

   /* Load b[0][0] b[1][0] values into n1
      Load b[2][0] b[3][0] values into n2
      Load b[0][1] b[1][1] values into n3
      Load b[2][1] b[3][1] values into n4
      Load b[0][2] b[1][2] values into n5
      Load b[2][2] b[3][2] values into n6
      Load b[0][3] b[1][3] values into n7
      Load b[2][3] b[3][3] values into n8 */
   n1 = (v2sf) {b[0][0], b[1][0]};
   n2 = (v2sf) {b[2][0], b[3][0]};
   n3 = (v2sf) {b[0][1], b[1][1]};
   n4 = (v2sf) {b[2][1], b[3][1]};
   n5 = (v2sf) {b[0][2], b[1][2]};
   n6 = (v2sf) {b[2][2], b[3][2]};
   n7 = (v2sf) {b[0][3], b[1][3]};
   n8 = (v2sf) {b[2][3], b[3][3]};

   temp1 = m1 * n1 + m2 * n2;
   temp2 = m1 * n3 + m2 * n4;
   temp3 = m1 * n5 + m2 * n6;
   temp4 = m1 * n7 + m2 * n8;

   result1 = __builtin_mips_addr_ps (temp1, temp2);
   result2 = __builtin_mips_addr_ps (temp3, temp4);

   f[0] = __builtin_mips_cvt_s_pu (result1);
   f[1] = __builtin_mips_cvt_s_pl (result1);
   f[2] = __builtin_mips_cvt_s_pu (result2);
   f[3] = __builtin_mips_cvt_s_pl (result2);
 }
	/* { dg-do run } */
	/* { dg-options "-mips3d forbid_cpu=octeon.* (REQUIRES_STDLIB)" } */

	/* Matrix Multiplications */
	#include <stdlib.h>
	#include <stdio.h>

	typedef float v2sf __attribute__((vector_size(8)));

	float a[4] = {1.1, 2.2, 3.3, 4.4};
	float b[4][4] = {{1, 2, 3, 4},
	{5, 6, 7, 8},
	{9, 10, 11, 12},
	{13, 14, 15, 16}};

	float c[4]; /* Result for matrix_multiply1() */
	float d[4]; /* Result for matrix_multiply2() */
	float e[4]; /* Result for matrix_multiply3() */
	float f[4]; /* Result for matrix_multiply4() */

	void matrix_multiply1();
	NOMIPS16 void matrix_multiply2();
	NOMIPS16 void matrix_multiply3();
	NOMIPS16 void matrix_multiply4();

	int main ()
	{
	int i;

	/* Version 1. Use float calculations */
	matrix_multiply1();

	/* Version 2. Use paired-single instructions inside the inner loop*/
	matrix_multiply2();
	for (i = 0; i < 4; i++)
	if (d[i] != c[i])
	abort();

	/* Version 3. Use paired-single instructions and unroll the inner loop */
	matrix_multiply3();
	for (i = 0; i < 4; i++)
	if (e[i] != c[i])
	abort();

	/* Version 4. Use paired-single instructions and unroll all loops */
	matrix_multiply4();
	for (i = 0; i < 4; i++)
	if (f[i] != c[i])
	abort();

	printf ("Test Passes\n");
	exit (0);
	}

	void matrix_multiply1()
	{
	int i, j;

	for (i = 0; i < 4; i++)
	{
	c[i] = 0.0;

	for (j = 0; j < 4; j ++)
	c[i] += a[j] * b[j][i];
	}
	}

	NOMIPS16 void matrix_multiply2()
	{
	int i, j;
	v2sf m1, m2;
	v2sf result, temp;

	for (i = 0; i < 4; i++)
	{
	result = (v2sf) {0.0, 0.0};

	for (j = 0; j < 4; j+=2)
	{
	/* Load two float values into m1 */
	m1 = (v2sf) {a[j], a[j+1]};
	m2 = (v2sf) {b[j][i], b[j+1][i]};

	/* Multiply and add */
	result += m1 * m2;
	}

	/* Reduction add at the end */
	temp = __builtin_mips_addr_ps (result, result);
	d[i] = __builtin_mips_cvt_s_pl (temp);
	}
	}

	NOMIPS16 void matrix_multiply3()
	{
	int i;
	v2sf m1, m2, n1, n2;
	v2sf result, temp;

	m1 = (v2sf) {a[0], a[1]};
	m2 = (v2sf) {a[2], a[3]};

	for (i = 0; i < 4; i++)
	{
	n1 = (v2sf) {b[0][i], b[1][i]};
	n2 = (v2sf) {b[2][i], b[3][i]};

	/* Multiply and add */
	result = m1 * n1 + m2 * n2;

	/* Reduction add at the end */
	temp = __builtin_mips_addr_ps (result, result);
	e[i] = __builtin_mips_cvt_s_pl (temp);
	}
	}

	NOMIPS16 void matrix_multiply4()
	{
	v2sf m1, m2;
	v2sf n1, n2, n3, n4, n5, n6, n7, n8;
	v2sf temp1, temp2, temp3, temp4;
	v2sf result1, result2;

	/* Load a[0] a[1] values into m1
	Load a[2] a[3] values into m2 */
	m1 = (v2sf) {a[0], a[1]};
	m2 = (v2sf) {a[2], a[3]};

	/* Load b[0][0] b[1][0] values into n1
	Load b[2][0] b[3][0] values into n2
	Load b[0][1] b[1][1] values into n3
	Load b[2][1] b[3][1] values into n4
	Load b[0][2] b[1][2] values into n5
	Load b[2][2] b[3][2] values into n6
	Load b[0][3] b[1][3] values into n7
	Load b[2][3] b[3][3] values into n8 */
	n1 = (v2sf) {b[0][0], b[1][0]};
	n2 = (v2sf) {b[2][0], b[3][0]};
	n3 = (v2sf) {b[0][1], b[1][1]};
	n4 = (v2sf) {b[2][1], b[3][1]};
	n5 = (v2sf) {b[0][2], b[1][2]};
	n6 = (v2sf) {b[2][2], b[3][2]};
	n7 = (v2sf) {b[0][3], b[1][3]};
	n8 = (v2sf) {b[2][3], b[3][3]};

	temp1 = m1 * n1 + m2 * n2;
	temp2 = m1 * n3 + m2 * n4;
	temp3 = m1 * n5 + m2 * n6;
	temp4 = m1 * n7 + m2 * n8;

	result1 = __builtin_mips_addr_ps (temp1, temp2);
	result2 = __builtin_mips_addr_ps (temp3, temp4);

	f[0] = __builtin_mips_cvt_s_pu (result1);
	f[1] = __builtin_mips_cvt_s_pl (result1);
	f[2] = __builtin_mips_cvt_s_pu (result2);
	f[3] = __builtin_mips_cvt_s_pl (result2);
	}