blob: 625c60484d12e1c75169edd5bdb245b19b8998c8 [file] [log] [blame]
/* Copyright (C) 2021-2023 Free Software Foundation, Inc.
Contributed by Oracle.
This file is part of GNU Binutils.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, 51 Franklin Street - Fifth Floor, Boston,
MA 02110-1301, USA. */
/*
* -----------------------------------------------------------------------------
* This program implements the multiplication of an m by n matrix with a vector
* of length n. The Posix Threads parallel programming model is used to
* parallelize the core matrix-vector multiplication algorithm.
* -----------------------------------------------------------------------------
*/
#include "mydefs.h"
int main (int argc, char **argv)
{
bool verbose = false;
thread_data *thread_data_arguments;
pthread_t *pthread_ids;
int64_t remainder_rows;
int64_t rows_per_thread;
int64_t active_threads;
int64_t number_of_rows;
int64_t number_of_columns;
int64_t number_of_threads;
int64_t repeat_count;
double **A;
double *b;
double *c;
double *ref;
int64_t errors;
/*
* -----------------------------------------------------------------------------
* Start the ball rolling - Get the user options and parse them.
* -----------------------------------------------------------------------------
*/
(void) get_user_options (
argc,
argv,
&number_of_rows,
&number_of_columns,
&repeat_count,
&number_of_threads,
&verbose);
if (verbose) printf ("Verbose mode enabled\n");
/*
* -----------------------------------------------------------------------------
* Allocate storage for all data structures.
* -----------------------------------------------------------------------------
*/
(void) allocate_data (
number_of_threads, number_of_rows,
number_of_columns, &A, &b, &c, &ref,
&thread_data_arguments, &pthread_ids);
if (verbose) printf ("Allocated data structures\n");
/*
* -----------------------------------------------------------------------------
* Initialize the data.
* -----------------------------------------------------------------------------
*/
(void) init_data (number_of_rows, number_of_columns, A, b, c, ref);
if (verbose) printf ("Initialized matrix and vectors\n");
/*
* -----------------------------------------------------------------------------
* Determine the main workload settings.
* -----------------------------------------------------------------------------
*/
(void) get_workload_stats (
number_of_threads, number_of_rows,
number_of_columns, &rows_per_thread,
&remainder_rows, &active_threads);
if (verbose) printf ("Defined workload distribution\n");
for (int64_t TID=active_threads; TID<number_of_threads; TID++)
{
thread_data_arguments[TID].do_work = false;
}
for (int64_t TID=0; TID<active_threads; TID++)
{
thread_data_arguments[TID].thread_id = TID;
thread_data_arguments[TID].verbose = verbose;
thread_data_arguments[TID].do_work = true;
thread_data_arguments[TID].repeat_count = repeat_count;
(void) determine_work_per_thread (
TID, rows_per_thread, remainder_rows,
&thread_data_arguments[TID].row_index_start,
&thread_data_arguments[TID].row_index_end);
thread_data_arguments[TID].m = number_of_rows;
thread_data_arguments[TID].n = number_of_columns;
thread_data_arguments[TID].b = b;
thread_data_arguments[TID].c = c;
thread_data_arguments[TID].A = A;
}
if (verbose) printf ("Assigned work to threads\n");
/*
* -----------------------------------------------------------------------------
* Create and execute the threads. Note that this means that there will be
* <t+1> threads, with <t> the number of threads specified on the commandline,
* or the default if the -t option was not used.
*
* Per the pthread_create () call, the threads start executing right away.
* -----------------------------------------------------------------------------
*/
for (int TID=0; TID<active_threads; TID++)
{
if (pthread_create (&pthread_ids[TID], NULL, driver_mxv,
(void *) &thread_data_arguments[TID]) != 0)
{
printf ("Error creating thread %d\n", TID);
perror ("pthread_create"); exit (-1);
}
else
{
if (verbose) printf ("Thread %d has been created\n", TID);
}
}
/*
* -----------------------------------------------------------------------------
* Wait for all threads to finish.
* -----------------------------------------------------------------------------
*/
for (int TID=0; TID<active_threads; TID++)
{
pthread_join (pthread_ids[TID], NULL);
}
if (verbose)
{
printf ("Matrix vector multiplication has completed\n");
printf ("Verify correctness of result\n");
}
/*
* -----------------------------------------------------------------------------
* Check the numerical results.
* -----------------------------------------------------------------------------
*/
if ((errors = check_results (number_of_rows, number_of_columns,
c, ref)) == 0)
{
if (verbose) printf ("Error check passed\n");
}
else
{
printf ("Error: %ld differences in the results detected\n", errors);
}
/*
* -----------------------------------------------------------------------------
* Print a summary of the execution.
* -----------------------------------------------------------------------------
*/
print_all_results (number_of_rows, number_of_columns, number_of_threads,
errors);
/*
* -----------------------------------------------------------------------------
* Release the allocated memory and end execution.
* -----------------------------------------------------------------------------
*/
free (A);
free (b);
free (c);
free (ref);
free (pthread_ids);
return (0);
}
/*
* -----------------------------------------------------------------------------
* Parse user options and set variables accordingly. In case of an error, print
* a message, but do not bail out yet. In this way we can catch multiple input
* errors.
* -----------------------------------------------------------------------------
*/
int get_user_options (int argc, char *argv[],
int64_t *number_of_rows,
int64_t *number_of_columns,
int64_t *repeat_count,
int64_t *number_of_threads,
bool *verbose)
{
int opt;
int errors = 0;
int64_t default_number_of_threads = 1;
int64_t default_rows = 2000;
int64_t default_columns = 3000;
int64_t default_repeat_count = 200;
bool default_verbose = false;
*number_of_rows = default_rows;
*number_of_columns = default_columns;
*number_of_threads = default_number_of_threads;
*repeat_count = default_repeat_count;
*verbose = default_verbose;
while ((opt = getopt (argc, argv, "m:n:r:t:vh")) != -1)
{
switch (opt)
{
case 'm':
*number_of_rows = atol (optarg);
break;
case 'n':
*number_of_columns = atol (optarg);
break;
case 'r':
*repeat_count = atol (optarg);
break;
case 't':
*number_of_threads = atol (optarg);
break;
case 'v':
*verbose = true;
break;
case 'h':
default:
printf ("Usage: %s " \
"[-m <number of rows>] " \
"[-n <number of columns] [-r <repeat count>] " \
"[-t <number of threads] [-v] [-h]\n", argv[0]);
printf ("\t-m - number of rows, default = %ld\n",
default_rows);
printf ("\t-n - number of columns, default = %ld\n",
default_columns);
printf ("\t-r - the number of times the algorithm is " \
"repeatedly executed, default = %ld\n",
default_repeat_count);
printf ("\t-t - the number of threads used, default = %ld\n",
default_number_of_threads);
printf ("\t-v - enable verbose mode, %s by default\n",
(default_verbose) ? "on" : "off");
printf ("\t-h - print this usage overview and exit\n");
exit (0);
break;
}
}
/*
* -----------------------------------------------------------------------------
* Check for errors and bail out in case of problems.
* -----------------------------------------------------------------------------
*/
if (*number_of_rows <= 0)
{
errors++;
printf ("Error: The number of rows is %ld but should be strictly " \
"positive\n", *number_of_rows);
}
if (*number_of_columns <= 0)
{
errors++;
printf ("Error: The number of columns is %ld but should be strictly " \
"positive\n", *number_of_columns);
}
if (*repeat_count <= 0)
{
errors++;
printf ("Error: The repeat count is %ld but should be strictly " \
"positive\n", *repeat_count);
}
if (*number_of_threads <= 0)
{
errors++;
printf ("Error: The number of threads is %ld but should be strictly " \
"positive\n", *number_of_threads);
}
if (errors != 0)
{
printf ("There are %d input error (s)\n", errors); exit (-1);
}
return (errors);
}
/*
* -----------------------------------------------------------------------------
* Print a summary of the execution status.
* -----------------------------------------------------------------------------
*/
void print_all_results (int64_t number_of_rows,
int64_t number_of_columns,
int64_t number_of_threads,
int64_t errors)
{
printf ("mxv: error check %s - rows = %ld columns = %ld threads = %ld\n",
(errors == 0) ? "passed" : "failed",
number_of_rows, number_of_columns, number_of_threads);
}
/*
* -----------------------------------------------------------------------------
* Check whether the computations produced the correct results.
* -----------------------------------------------------------------------------
*/
int64_t check_results (int64_t m, int64_t n, double *c, double *ref)
{
char *marker;
int64_t errors = 0;
double relerr;
double TOL = 100.0 * DBL_EPSILON;
double SMALL = 100.0 * DBL_MIN;
if ((marker=(char *)malloc (m*sizeof (char))) == NULL)
{
perror ("array marker");
exit (-1);
}
for (int64_t i=0; i<m; i++)
{
if (fabs (ref[i]) > SMALL)
{
relerr = fabs ((c[i]-ref[i])/ref[i]);
}
else
{
relerr = fabs ((c[i]-ref[i]));
}
if (relerr <= TOL)
{
marker[i] = ' ';
}
else
{
errors++;
marker[i] = '*';
}
}
if (errors > 0)
{
printf ("Found %ld differences in results for m = %ld n = %ld:\n",
errors,m,n);
for (int64_t i=0; i<m; i++)
printf (" %c c[%ld] = %f ref[%ld] = %f\n",marker[i],i,c[i],i,ref[i]);
}
return (errors);
}