I have an assignment to optimize a piece of code to use pthreads. Here is the original code.
Code:
// matmul_static .c
// CS4540 Fall 2010
// kapenga
//
// This program is one of a set of three programs that show a
// matrix itteration. The difference between the three is the way
// space for the arrays is allocated:
// matmul_stack uses local (automatic) variables for the arrays,
// which use space on the stack for the arrays. The array
// sizes are dynamic.
// matmul_static uses global (external) variables for the arrays,
// which uses space on the stack for the arrays. The maximum
// array sizes are compiled in.
// matmul_heap uses dynamic (malloced) space for the arrays,
// which uses space on the heap for the arrays. The array
// sizes are dynamic.
//
// The following itteretion can be used to solve linear systems
// t_{i+1} = A t_i + b
// If the itteration converges to t, then t == t_{i+1} == t_i
// So t = A t + b
// or (I-a) t = b
// where, I is the n*n idenity matrix
// There are several important applied problems where convergence
// will take place. One such case is when for
// each row of A ( rows 0 <= i < n)
// sum(j=0 ... n-1) abs(a[i][j]) < 1.0
// Then the itteration will converge, assuming no roundoff or overflow.
// Example
// % ./matmul_static 4 10 5
//
// a=
// 0.189331 0.147829 -0.009582 0.012830
// -0.020409 0.222627 0.073037 0.042701
// 0.069882 0.228326 -0.001161 0.024936
// 0.116375 -0.100117 0.229832 0.022235
//
// b=
// 2.411774 9.837874 6.251698 6.576916
//
// itt error
// 0 2.878398e+00
// 1 8.266521e-01
// 2 2.688652e-01
// 3 8.817662e-02
// 4 2.832084e-02
// 5 9.015857e-03
//
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
// These two function are not ansi C so they do not appear from the
// libstd.h header if the gcc option -std=c99 option is used.
// I should see if there is a safe way to include them from stdlib.h
// and not place them explicitly here, which is bad style.
void srand48(long int seedval);
double drand48(void);
// Note that external variables (the arrays below) can not by dynamically
// sized. So the sizes must be compiled in and are fixed at compile time.
// Using either automatic or malloced space, the arrays sizes could be
// dynamicly allocated and adjusted to the probemn size of without
// recompiling.
#define N 100
double a[N][N];// transformation matrix
double b[N]; // transformation vector
double ts[N]; // solution vector
double ts1[N]; // solution vector
int main(int argc, char *argv[])
{
int n=4; // problenm size
int seed=10;// seed for srand48() / drand48()
double *t=ts; // pointer to solution vector
double *t1=ts1;// pointer to next itteration of solution vector
double *ttemp; // used to swap t1 and t at each itteration
int itt_max=5;// number of itterations to preform
int itt; // current itteration
int i, j; // indices into arrays
double sum; // computes the inner products for A * t
double error; // max | t1[i] - t[i] |
double errori; // | t1[i] - t[i] |
char ch; // for error checking on command line args.
if( argc == 4 )
{
if( (sscanf(argv[1],"%d %[^ /t]", &n, &ch) != 1) ||
(sscanf(argv[2],"%d %[^ /t]", &seed, &ch) != 1) ||
(sscanf(argv[3],"%d %[^ /t]", &itt_max, &ch) != 1) )
{
fprintf(stderr," ERROR : useage: %s [ <n> <seed> <itt_max>]\n", argv[0]);
return(1);
}
}
else if(argc != 1 )
{
fprintf(stderr," ERROR : useage: %s [ <n> <seed> <itt_max>]\n", argv[0]);
return(1);
}
if( (n<1) || (N<n) )
{
fprintf(stderr," ERROR : n must be positive and <= %d.\n", N);
return(1);
}
// Generate matrix a with | eigenvalues | < 1
srand48((long int)seed);
printf("\n a=\n");
for(i=0; i< n; i++)
{
for(j=0; j< n; j++)
{
a[i][j] = 1.999 * (drand48() - 0.5) / n;
printf("%10.6f ", a[i][j]);
}
printf("\n");
}
printf("\n b=\n");
// Generate vector b
for(i=0; i< n; i++)
{
b[i] = 10.0 * drand48();
printf("%10.6f ", b[i]);
}
printf("\n");
// Initialize t
for(i=0; i< n; i++)
{
t[i] = b[i];
}
printf("\n itt error\n");
for(itt=0; itt<=itt_max; itt++)
{
error=0.0;
for(i=0; i< n; i++)
{
sum = 0.0;
for(j=0; j< n; j++)
{
sum += a[i][j] * t[j];
}
t1[i] = sum + b[i];
errori = fabs(t1[i]-t[i]);
if(errori > error)
{
error=errori;
}
}
ttemp = t1;
t1 = t;
t = ttemp;
printf("%5d %14.6e\n", itt, error);
}
return(0);
}
Here is my attempt:
Code:
// matmul_static .c
// CS4540 Fall 2010
// kapenga
//
// This program is one of a set of three programs that show a
// matrix itteration. The difference between the three is the way
// space for the arrays is allocated:
// matmul_stack uses local (automatic) variables for the arrays,
// which use space on the stack for the arrays. The array
// sizes are dynamic.
// matmul_static uses global (external) variables for the arrays,
// which uses space on the stack for the arrays. The maximum
// array sizes are compiled in.
// matmul_heap uses dynamic (malloced) space for the arrays,
// which uses space on the heap for the arrays. The array
// sizes are dynamic.
//
// The following itteretion can be used to solve linear systems
// t_{i+1} = A t_i + b
// If the itteration converges to t, then t == t_{i+1} == t_i
// So t = A t + b
// or (I-a) t = b
// where, I is the n*n idenity matrix
// There are several important applied problems where convergence
// will take place. One such case is when for
// each row of A ( rows 0 <= i < n)
// sum(j=0 ... n-1) abs(a[i][j]) < 1.0
// Then the itteration will converge, assuming no roundoff or overflow.
// Example
// % ./matmul_static 4 10 5
//
// a=
// 0.189331 0.147829 -0.009582 0.012830
// -0.020409 0.222627 0.073037 0.042701
// 0.069882 0.228326 -0.001161 0.024936
// 0.116375 -0.100117 0.229832 0.022235
//
// b=
// 2.411774 9.837874 6.251698 6.576916
//
// itt error
// 0 2.878398e+00
// 1 8.266521e-01
// 2 2.688652e-01
// 3 8.817662e-02
// 4 2.832084e-02
// 5 9.015857e-03
//
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <pthread.h>
// These two function are not ansi C so they do not appear from the
// libstd.h header if the gcc option -std=c99 option is used.
// I should see if there is a safe way to include them from stdlib.h
// and not place them explicitly here, which is bad style.
void srand48(long int seedval);
double drand48(void);
// Note that external variables (the arrays below) can not by dynamically
// sized. So the sizes must be compiled in and are fixed at compile time.
// Using either automatic or malloced space, the arrays sizes could be
// dynamicly allocated and adjusted to the probemn size of without
// recompiling.
#define N 100
#define Threads 2
double a[N][N];// transformation matrix
double b[N]; // transformation vector
double ts[N]; // solution vector
double ts1[N]; // solution vector
double error; // max | t1[i] - t[i] |
double errori; // | t1[i] - t[i] |
int n = 4; // problenm size
pthread_mutex_t *mutex;
void *ThreadFunction(void *id);
int main(int argc, char *argv[])
{
int seed = 10;// seed for srand48() / drand48()
double *ttemp;// used to swap t1 and t at each itteration
double *t=ts; // pointer to solution vector
double *t1=ts1;// pointer to next itteration of solution vector
int itt_max = 5;// number of itterations to preform
int itt; // current itteration
int i, j; //indexes
int ID[Threads];
double sum; // computes the inner products for A * t
char ch; // for error checking on command line args.
pthread_t thread[Threads];
if( argc == 4 )
{
if( (sscanf(argv[1],"%d %[^ /t]", &n, &ch) != 1) ||
(sscanf(argv[2],"%d %[^ /t]", &seed, &ch) != 1) ||
(sscanf(argv[3],"%d %[^ /t]", &itt_max, &ch) != 1) )
{
fprintf(stderr," ERROR : 1 useage: %s [ <n> <seed> <itt_max>]\n", argv[0]);
return(1);
}
}
else if(argc != 1 )
{
fprintf(stderr," ERROR : 2 useage: %s [ <n> <seed> <itt_max>]\n", argv[0]);
return(1);
}
if( (n<1) || (N<n) )
{
fprintf(stderr," ERROR : 3 n must be positive and <= %d.\n", N);
return(1);
}
// Generate matrix a with | eigenvalues | < 1
srand48((long int)seed);
printf("\n a=\n");
for(i=0; i< n; i++)
{
for(j=0; j< n; j++)
{
a[i][j] = 1.999 * (drand48() - 0.5) / n;
printf("%10.6f ", a[i][j]);
}
printf("\n");
}
printf("\n b=\n");
// Generate vector b
for(i=0; i< n; i++)
{
b[i] = 10.0 * drand48();
printf("%10.6f ", b[i]);
}
printf("\n");
// Initialize t
for(i=0; i< n; i++)
{
t[i] = b[i];
}
printf("\n itt error\n");
for(itt=0; itt<=itt_max; itt++)
{
error=0.0;
for(i = 0; i < Threads; i++)
{
ID[i] = i;
pthread_create(&thread[i], NULL, ThreadFunction, (void *)&ID[i]);
}
for(i = 0; i < Threads; i++)
{
pthread_join(thread[i], NULL);
}
ttemp = t1;
t1 = t;
t = ttemp;
printf("%5d %14.6e\n", itt, error);
}
return(0);
}
void *ThreadFunction(void *id)
{
int j, i ;
double sum;
double *t=ts; // pointer to solution vector
double *t1=ts1;// pointer to next itteration of solution vector
for(i = *(int *)id; i < n; i += Threads)
{
sum = 0.0;
for(j = 0; j < n; j++)
{
sum += a[i][j] * t[j];
}
pthread_mutex_lock(mutex);
t1[i] = sum + b[i];
errori = fabs(t1[i] - t[i]);
if(errori > error)
{
error = errori;
}
pthread_mutex_unlock(mutex);
}
return NULL;
}
I keep getting seg faults and I can't figure out why. I've been working on this forever and my brain is frozen. I think I could use some other pairs of eyes and perhaps a bit more understanding of pthreads. Anyone have any input? Anything at all would be very helpful.