Code:
#include <stdio.h>
#include <fcntl.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <pthread.h>
#include <semaphore.h>
#include <string.h>
#include <limits.h>
#include <float.h>
#include <math.h>
#include <errno.h>
#include <time.h>
#define SHARED 1
typedefstruct args {
uint32_t size;
uint8_t num_threads;
float lft;
float top;
float rgt;
float btm;
float eps;
}Args;
typedef struct threadargs {
uint32_t size;
float eps;
uint32_t low;
uint32_t high;
float **read;
float **write;
uint8_t nt;
uint8_t cur;
}ThreadArgs;
void print_args(Args args);
void print_mat(uint32_t size, float **array);
void sub_mat(uint32_t size, float **mat, float **a, float **b);
float max_mat(uint32_t size, float **array);
float min_mat(uint32_t size, float **array);
void abs_mat(uint32_t size, float **mat, float **array);
void cpy_mat(uint32_t size, float **a, float **b);
void *avg_rows(void *a);
sem_t *arrive, *go, *done;
uint8_t num_arrived = 0;
uint8_t num_complete = 0;
bool debug = false;
bool verbose = false;
bool thread = false;
int main(int argc, char *argv[]) {
clock_t beg, end;
float exec_time;
/*****************************************************************************************/
/************************ setting all parameters for the matrix ************************/
uint8_t nflags = 0;
if (argc < 2) {
fprintf(stderr, "Usage: ./jacobi <size> <num_threads> [ lft top rgt btm] [ eps ]\n");
fprintf(stderr, " size: width and height of the array\n");
fprintf(stderr, " num_threads: number of workers to use\n");
fprintf(stderr, " lft: value for each element in the left column\n");
fprintf(stderr, " top: value for each element in the top row\n");
fprintf(stderr, " rgt: value for each element in the right column\n");
fprintf(stderr, " btm: value for each element in the bottom row\n");
fprintf(stderr, " eps: value for epsilon\n");
return1;
}
else if (strcmp(argv[1], "-d") == 0) {
debug = true;
nflags++;
}
if (strcmp(argv[1], "-t") == 0) {
thread = true;
nflags++;
}
if (strcmp(argv[1], "-dt") == 0) {
debug = true;
thread = true;
nflags++;
}
if (strcmp(argv[1], "-d2") == 0) {
debug = true;
verbose = true;
thread++;
nflags++;
}
Args args;
if (argc==(nflags+3)) {
args.size = atoi(argv[nflags+1]);
args.num_threads = atoi(argv[nflags+2]);
args.lft = 1;
args.top = 1;
args.rgt = 80;
args.btm = 80;
args.eps = 0.1;
}
else if (argc==(nflags+4)) {
args.eps = atof(argv[nflags+3]);
args.lft = 1;
args.top = 1;
args.rgt = 80;
args.btm = 80;
}
else if (argc==(nflags+7)) {
args.lft = atof(argv[nflags+3]);
args.top = atof(argv[nflags+4]);
args.rgt = atof(argv[nflags+5]);
args.btm = atof(argv[nflags+6]);
args.eps = 0.1;
}
else if (argc==(nflags+8)) {
args.lft = atof(argv[nflags+3]);
args.top = atof(argv[nflags+4]);
args.rgt = atof(argv[nflags+5]);
args.btm = atof(argv[nflags+6]);
args.eps = atof(argv[nflags+7]);
}
else {
fprintf(stderr, "Usage: ./jacobi <size> <num_threads> [ lft top rgt btm] [ eps ]\n");
fprintf(stderr, " size: width and height of the array\n");
fprintf(stderr, " num_threads: number of workers to use\n");
fprintf(stderr, " lft: value for each element in the left column\n");
fprintf(stderr, " top: value for each element in the top row\n");
fprintf(stderr, " rgt: value for each element in the right column\n");
fprintf(stderr, " btm: value for each element in the bottom row\n");
fprintf(stderr, " eps: value for epsilon\n");
return1;
}
/********************* end setting parameter values for the matrix *********************/
/*****************************************************************************************/
/*****************************************************************************************/
/************************** initializing read and write matrix *************************/
float **read = calloc(sizeof(float*)*(args.size+2), 1);
float **write = calloc(sizeof(float*)*(args.size+2), 1);
for (uint32_t i = 0; i < args.size+2; i++) {
read[i] = calloc(sizeof(float)*(args.size+2), 1);
write[i] = calloc(sizeof(float)*(args.size+2), 1);
}
for (uint32_t i = 0; i < args.size+2; i++) {
read[i][0] = args.lft;
read[i][args.size+1] = args.rgt;
read[0][i] = args.top;
read[args.size+1][i] = args.btm;
}
cpy_mat(args.size+2, write, read);
/************************ end initializing read and write matrix ***********************/
/*****************************************************************************************/
/*****************************************************************************************/
/********************** calculating nearest working thread number **********************/
if (args.num_threads > args.size)
args.num_threads = args.size;
args.num_threads = ceil(((float)args.size)/ceil(args.size/(float)args.num_threads));
if (debug) {
print_args(args);
printf("Initial array:\n");
print_mat(args.size+2, read);
}
/******************** end calculating nearest working thread number ********************/
/*****************************************************************************************/
/*****************************************************************************************/
/****************** initializing semaphore, args, and thread variables *****************/
pthread_attr_t tattr;
pthread_t tid[args.num_threads];
pthread_attr_init(&tattr);
pthread_attr_setscope(&tattr, PTHREAD_SCOPE_SYSTEM);
arrive = sem_open("arrive", O_CREAT | O_EXCL, 0600, 1);
if (arrive == SEM_FAILED)
perror("arrive open error");
go = sem_open("go", O_CREAT | O_EXCL, 0600, 0);
if (go == SEM_FAILED)
perror("go open error");
done = sem_open("complete", O_CREAT | O_EXCL, 0600, 1);
if (done == SEM_FAILED)
perror("write open error");
/*************** end initializing semaphore, args, and thread variables ****************/
/*****************************************************************************************/
/*****************************************************************************************/
/***************** initializing thread arguements and starting threads *****************/
uint32_t rem_rows = args.size;
for (uint8_t i = 0; i < args.num_threads-1; i++) {
ThreadArgs *thread_args = malloc(sizeof(ThreadArgs));
thread_args->size = args.size;
thread_args->eps = args.eps;
thread_args->read = (float**)read;
thread_args->write = (float**)write;
thread_args->nt = args.num_threads;
thread_args->low = args.size-rem_rows+1;
thread_args->high = ceil(args.size/(float)args.num_threads)+thread_args->low-1;
thread_args->cur = i;
rem_rows -= thread_args->high-thread_args->low+1;
pthread_create(&(tid[i]), &tattr, avg_rows, (void*)thread_args);
}
ThreadArgs *thread_args = malloc(sizeof(ThreadArgs));
thread_args->size = args.size;
thread_args->eps = args.eps;
thread_args->read = (float**)read;
thread_args->write = (float**)write;
thread_args->nt = args.num_threads;
thread_args->low = args.size-rem_rows+1;
thread_args->high = args.size;
thread_args->cur = args.num_threads-1;
rem_rows = 0;
/*************** end initializing thread arguements and starting threads ***************/
/*****************************************************************************************/
/*****************************************************************************************/
/********* start timer, start last thread, and wait for all threads to complete ********/
beg = clock();
pthread_create(&(tid[args.num_threads-1]), &tattr, avg_rows, (void*)thread_args);
uint16_t *iters[args.num_threads];
for (int i = 0; i < args.num_threads; i++)
pthread_join(tid[i], &(iters[i]));
/*****************************************************************************************/
end = clock();
/************************* end timer and all threads completed *************************/
/*****************************************************************************************/
if (debug) {
printf("Final array:\n");
print_mat(args.size+2, read);
}
/*****************************************************************************************/
/********************************* clean up semaphores *********************************/
if (sem_close(arrive) != 0)
perror("close arrive");
if (sem_close(go) != 0)
perror("close go");
if (sem_close(done) != 0)
perror("close complete");
if (sem_unlink("arrive") != 0)
perror("unlink arrive");
if (sem_unlink("go") != 0)
perror("unlink go");
if (sem_unlink("complete") != 0)
perror("unlink complete");
/******************************* end clean up semaphores *******************************/
/*****************************************************************************************/
/*****************************************************************************************/
/******************************** write results to file ********************************/
FILE *out = fopen("jacobi_c.out", "w");
fprintf(out, "Arguments:\n");
fprintf(out, " %s:\t%u\n", "size", args.size);
fprintf(out, " %s:\t%u\n", "threads", args.num_threads);
fprintf(out, " %s:\t%.4f\n", "left", args.lft);
fprintf(out, " %s:\t%.4f\n", "top", args.top);
fprintf(out, " %s:\t%.4f\n", "right", args.rgt);
fprintf(out, " %s:\t%.4f\n", "bottom", args.btm);
fprintf(out, " %s:\t%.4f\n", "epsilon", args.eps);
fprintf(out, "\n\nFinal Grid:\n");
for (uint32_t i = 0; i < args.size+2; i++) {
for (uint32_t j = 0; j < args.size+2; j++)
fprintf(out, "%9.4f", read[i][j]);
fprintf(out, "\n\n");
}
fclose(out);
/***************************** end writing results to file *****************************/
/*****************************************************************************************/
exec_time = ((double)(end-beg))/CLOCKS_PER_SEC;
printf("total threads: %u\n", args.num_threads);
for (uint8_t i = 0; i < args.num_threads; i++)
printf("thread %u iterations: %u\n", i, *iters[i]);
printf("execution time: %us %u\xC2\xB5s\n", (uint32_t)exec_time, ((uint32_t)(exec_time*1000000))%1000000);
}
void print_args(Args args) {
printf("Arguments:\n");
printf(" %s:\t%u\n", "size", args.size);
printf(" %s:\t%u\n", "threads", args.num_threads);
printf(" %s:\t%.4f\n", "left", args.lft);
printf(" %s:\t%.4f\n", "top", args.top);
printf(" %s:\t%.4f\n", "right", args.rgt);
printf(" %s:\t%.4f\n", "bottom", args.btm);
printf(" %s:\t%.4f\n", "eps", args.eps);
}
void print_mat(uint32_t size, float **array) {
for (uint32_t i = 0; i < size; i++) {
for (uint32_t j = 0; j < size; j++)
printf("%9.4f", array[i][j]);
printf("\n\n");
}
}
void cpy_mat(uint32_t size, float **a, float **b) {
for (int i = 0; i < size; i++)
memcpy(a[i], b[i], sizeof(float)*size);
}
void *avg_rows(void *a) {
float exec_time;
float wait_time;
float totl_time;
totl_time = clock();
ThreadArgs *args = a;
uint16_t count = 0;
float max_dif = args->eps+1;
float dif = 0;
sem_wait(done);
if (thread) {
printf("Thread Args:\n");
printf(" low: %u\n", args->low);
printf(" high: %u\n", args->high);
printf(" thread: %u\n", args->cur);
}
sem_post(done);
for (; max_dif > args->eps && count < 500; count++) {
max_dif = 0;
//arriving at next iteration
exec_time = (float)clock();
if (sem_wait(arrive) != 0)
perror("arrive wait error");
wait_time = ((float)clock()-exec_time)/CLOCKS_PER_SEC;
if (thread) printf("thread %u: wait_time = %us %u\xC2\xB5s\n", args->cur, (uint32_t)wait_time, ((uint32_t)(wait_time*1000000))%1000000);
num_arrived++;
if (thread) printf("thread %u: process has arrived\n", args->cur);
if (num_arrived==(args->nt-num_complete)) {
if (thread) printf("thread %u: all processes are here\n", args->cur);
if (thread) printf("thread %u: remaining = %u\n", args->cur, args->nt-num_complete);
cpy_mat(args->size+2, args->read, args->write);
sem_wait(done);
for (uint8_t i = 0; i < (args->nt-num_complete); i++) {
if (sem_post(go) != 0)
perror("go post error");
}
num_arrived = 0;
sem_post(done);
}
if (sem_post(arrive) != 0)
perror("arrive post error");
//end arriving at net iteration
if (thread) printf("thread %u: waiting for go...\n", args->cur);
if (sem_wait(go) != 0)
perror("go wait error");
//calculate the averages into write
exec_time = clock();
for (uint32_t i = args->low; i < args->high+1; i++) {
if (thread) printf("thread %u: low = %u, high = %u, cur = %u\n", args->cur, args->low, args->high, i);
for (uint32_t j = 1; j < args->size+1; j++) {
args->write[i][j] = (args->read[i-1][j]+args->read[i+1][j]+args->read[i][j-1]+args->read[i][j+1])*.25;
}
}
for (uint32_t i = args->low; i < args->high+1; i++) {
for (uint32_t j = 1; j < args->size+1; j++) {
dif = fabs(args->write[i][j]-args->read[i][j]);
if (dif > max_dif) max_dif = dif;
}
}
exec_time = ((float)clock()-exec_time)/CLOCKS_PER_SEC;
if (thread) printf("thread %u: exec_time = %us %u\xC2\xB5s\n", args->cur, (uint32_t)exec_time, ((uint32_t)(exec_time*1000000))%1000000);
if (thread) printf("thread %u: max delta = %.4f\n", args->cur, max_dif);
}
sem_wait(done);
num_complete++;
if (thread) printf("thread %u: completed = %u\n", args->cur, num_complete);
if (num_arrived==(args->nt-num_complete)) {
if (thread) printf("thread %u: all processes are here\n", args->cur);
if (thread) printf("thread %u: remaining = %u\n", args->cur, args->nt-num_complete);
cpy_mat(args->size+2, args->read, args->write);
for (uint8_t i = 0; i < (args->nt-num_complete); i++) {
if (sem_post(go) != 0)
perror("go post error");
}
num_arrived = 0;
}
sem_post(done);
uint16_t *niters = calloc(sizeof(uint16_t), 1);
*niters = count;
if (thread) printf("thread %u: iterations = %u\n", args->cur, *niters);
totl_time = ((float)clock()-totl_time)/CLOCKS_PER_SEC;
if (thread) printf("thread %u: totl_time = %us %u\xC2\xB5s\n", args->cur, (uint32_t)totl_time, ((uint32_t)(totl_time*1000000))%1000000);
return (void*)niters;
}