Need help with pthreads

**TheTaoOfBill** · 04-03-2011

I have an assignment to optimize a piece of code to use pthreads. Here is the original code.

Code:

// matmul_static .c
// CS4540 Fall 2010
// kapenga
//
// This program is one of a set of three programs that show a
// matrix itteration. The difference between the three is the way
// space for the arrays is allocated:
//   matmul_stack uses local (automatic) variables for the arrays,
//        which use space on the stack for the arrays. The array
//        sizes are dynamic.
//   matmul_static uses global (external) variables for the arrays,
//        which uses space on the stack for the arrays. The maximum
//        array sizes are compiled in.
//   matmul_heap uses dynamic (malloced) space for the arrays,
//        which uses space on the heap for the arrays. The array
//        sizes are dynamic.
//
// The following itteretion can be used to solve linear systems
//   t_{i+1} = A t_i + b
// If the itteration converges to t, then t == t_{i+1} == t_i
// So t = A t + b
//   or  (I-a) t = b
//   where, I is the n*n idenity matrix
// There are several important applied problems where convergence 
// will take place. One such case is when for
// each row of A ( rows 0 <= i < n)
//             sum(j=0 ... n-1) abs(a[i][j])  < 1.0    
// Then the itteration will converge, assuming no roundoff or overflow.
// Example
// % ./matmul_static 4 10 5
//
//  a=
//  0.189331   0.147829  -0.009582   0.012830
// -0.020409   0.222627   0.073037   0.042701
//  0.069882   0.228326  -0.001161   0.024936
//  0.116375  -0.100117   0.229832   0.022235
//
//  b=
//  2.411774   9.837874   6.251698   6.576916
//
//  itt  error
//    0   2.878398e+00
//    1   8.266521e-01
//    2   2.688652e-01
//    3   8.817662e-02
//    4   2.832084e-02
//    5   9.015857e-03
//
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
// These two function are not ansi C so they do not appear from the
// libstd.h  header if the gcc option -std=c99 option is used.
// I should see if there is a safe way to include them from stdlib.h
// and not place them explicitly here, which is bad style.
void srand48(long int seedval);
double drand48(void);

// Note that external variables (the arrays below) can not by dynamically
// sized. So the sizes must be compiled in and are fixed at compile time.
// Using either automatic or malloced space, the arrays sizes could be
// dynamicly allocated and adjusted to the probemn size of without
// recompiling. 
#define N 100

double 	a[N][N];// transformation matrix
double 	b[N];   // transformation vector 
double  ts[N];	// solution vector
double  ts1[N];	// solution vector

int main(int argc, char *argv[]) 
{
	int	n=4;	// problenm size
	int	seed=10;// seed for srand48() / drand48()
	double  *t=ts;  // pointer to solution vector
	double  *t1=ts1;// pointer to next itteration of solution vector
	double	*ttemp;	// used to swap t1 and t at each itteration
	int	itt_max=5;// number of itterations to preform
	int	itt;	// current itteration 
	int	i, j;   // indices into arrays
	double	sum;	// computes the inner products for A * t
	double 	error;  // max | t1[i] - t[i] |
	double 	errori; // | t1[i] - t[i] |
	char	ch;	// for error checking on command line args.

	if( argc == 4 ) 
	{
  		if( (sscanf(argv[1],"%d %[^ /t]", &n, &ch) != 1) ||
	      (sscanf(argv[2],"%d %[^ /t]", &seed, &ch) != 1) ||
   	   (sscanf(argv[3],"%d %[^ /t]", &itt_max, &ch) != 1) ) 
		{
 			fprintf(stderr," ERROR : useage: %s [ <n> <seed> <itt_max>]\n", argv[0]); 
    		return(1);
  		}
	} 
	else if(argc != 1 ) 
	{
  		fprintf(stderr," ERROR : useage: %s [ <n> <seed> <itt_max>]\n", argv[0]); 
  		return(1);
	} 
	if( (n<1) || (N<n) ) 
	{
  		fprintf(stderr," ERROR :  n must be positive and <= %d.\n", N);
  		return(1);
	}

	// Generate matrix a with | eigenvalues | < 1
	srand48((long int)seed);
	printf("\n  a=\n");
	for(i=0; i< n; i++) 
	{
  		for(j=0; j< n; j++) 
		{
    		a[i][j] = 1.999 * (drand48() - 0.5) / n;
    		printf("%10.6f ", a[i][j]);
  		}
  		printf("\n");
	}
	printf("\n  b=\n");
	// Generate vector b 
	for(i=0; i< n; i++) 
	{
  		b[i] = 10.0 * drand48();
  		printf("%10.6f ", b[i]);
	}
	printf("\n");
	// Initialize t
	for(i=0; i< n; i++) 
	{
  		t[i] = b[i];
	}

	printf("\n  itt  error\n");
	for(itt=0; itt<=itt_max; itt++) 
	{
  		error=0.0;
  		for(i=0; i< n; i++) 
		{
    		sum = 0.0;
    		for(j=0; j< n; j++) 
			{
      		sum += a[i][j] * t[j];
    		}
    		t1[i] = sum + b[i];
    		errori = fabs(t1[i]-t[i]);
    		if(errori > error) 
			{
      		error=errori;
    		}
  		}
  		ttemp = t1;
  		t1 = t;
  		t = ttemp;
  		printf("%5d %14.6e\n", itt, error); 
	}

	return(0);
}

Here is my attempt:

Code:

// matmul_static .c
// CS4540 Fall 2010
// kapenga
//
// This program is one of a set of three programs that show a
// matrix itteration. The difference between the three is the way
// space for the arrays is allocated:
//   matmul_stack uses local (automatic) variables for the arrays,
//        which use space on the stack for the arrays. The array
//        sizes are dynamic.
//   matmul_static uses global (external) variables for the arrays,
//        which uses space on the stack for the arrays. The maximum
//        array sizes are compiled in.
//   matmul_heap uses dynamic (malloced) space for the arrays,
//        which uses space on the heap for the arrays. The array
//        sizes are dynamic.
//
// The following itteretion can be used to solve linear systems
//   t_{i+1} = A t_i + b
// If the itteration converges to t, then t == t_{i+1} == t_i
// So t = A t + b
//   or  (I-a) t = b
//   where, I is the n*n idenity matrix
// There are several important applied problems where convergence 
// will take place. One such case is when for
// each row of A ( rows 0 <= i < n)
//             sum(j=0 ... n-1) abs(a[i][j])  < 1.0    
// Then the itteration will converge, assuming no roundoff or overflow.
// Example
// % ./matmul_static 4 10 5
//
//  a=
//  0.189331   0.147829  -0.009582   0.012830
// -0.020409   0.222627   0.073037   0.042701
//  0.069882   0.228326  -0.001161   0.024936
//  0.116375  -0.100117   0.229832   0.022235
//
//  b=
//  2.411774   9.837874   6.251698   6.576916
//
//  itt  error
//    0   2.878398e+00
//    1   8.266521e-01
//    2   2.688652e-01
//    3   8.817662e-02
//    4   2.832084e-02
//    5   9.015857e-03
//
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <pthread.h>
// These two function are not ansi C so they do not appear from the
// libstd.h  header if the gcc option -std=c99 option is used.
// I should see if there is a safe way to include them from stdlib.h
// and not place them explicitly here, which is bad style.
void srand48(long int seedval);
double drand48(void);

// Note that external variables (the arrays below) can not by dynamically
// sized. So the sizes must be compiled in and are fixed at compile time.
// Using either automatic or malloced space, the arrays sizes could be
// dynamicly allocated and adjusted to the probemn size of without
// recompiling. 
#define N 100
#define Threads 2
double 				a[N][N];// transformation matrix
double 				b[N];   // transformation vector 
double  				ts[N];	// solution vector
double  				ts1[N];	// solution vector
double 				error;  // max | t1[i] - t[i] |
double 				errori; // | t1[i] - t[i] |
int					n			=	4;	// problenm size
pthread_mutex_t 	*mutex;

void *ThreadFunction(void *id);

int main(int argc, char *argv[]) 
{
	int			seed		=	10;// seed for srand48() / drand48()
	double		*ttemp;// used to swap t1 and t at each itteration
	double  		*t=ts;  // pointer to solution vector
	double  		*t1=ts1;// pointer to next itteration of solution vector 	
	int			itt_max	=	5;// number of itterations to preform
	int			itt;	// current itteration 
	int 			i, j; //indexes
	int			ID[Threads];
	double		sum;	// computes the inner products for A * t
	char			ch;	// for error checking on command line args.
	pthread_t 	thread[Threads];

	if( argc == 4 ) 
	{
  		if( (sscanf(argv[1],"%d %[^ /t]", &n, &ch) != 1) ||
      	(sscanf(argv[2],"%d %[^ /t]", &seed, &ch) != 1) ||
      	(sscanf(argv[3],"%d %[^ /t]", &itt_max, &ch) != 1) ) 
		{
    		fprintf(stderr," ERROR : 1 useage: %s [ <n> <seed> <itt_max>]\n", argv[0]); 
    		return(1);
  		}
	} 
	else if(argc != 1 ) 
	{
	  	fprintf(stderr," ERROR : 2 useage: %s [ <n> <seed> <itt_max>]\n", argv[0]); 
  		return(1);
	} 
	if( (n<1) || (N<n) ) 
	{
  		fprintf(stderr," ERROR : 3 n must be positive and <= %d.\n", N);
  		return(1);
	}

	// Generate matrix a with | eigenvalues | < 1
	srand48((long int)seed);
	printf("\n  a=\n");
	for(i=0; i< n; i++) 
	{
  		for(j=0; j< n; j++) 
		{
    		a[i][j] = 1.999 * (drand48() - 0.5) / n;
    		printf("%10.6f ", a[i][j]);
  		}
  		printf("\n");
	}
	printf("\n  b=\n");
	// Generate vector b 
	for(i=0; i< n; i++) 
	{
  		b[i] = 10.0 * drand48();
  		printf("%10.6f ", b[i]);
	}
	printf("\n");
	// Initialize t
	for(i=0; i< n; i++) 
	{
  		t[i] = b[i];
	}

	printf("\n  itt  error\n");
	for(itt=0; itt<=itt_max; itt++) 
	{
  		error=0.0;
		for(i = 0; i < Threads; i++)
		{
			ID[i] = i;
			pthread_create(&thread[i], NULL, ThreadFunction, (void *)&ID[i]);
		}
		for(i = 0; i < Threads; i++)
		{
			pthread_join(thread[i], NULL);
		}
		
		ttemp = t1;
  		t1 = t;
  		t = ttemp;
  		printf("%5d %14.6e\n", itt, error); 
	}

	return(0);
}

void *ThreadFunction(void *id)
{
	int j, i ;
	double sum; 
	double  *t=ts;  // pointer to solution vector
	double  *t1=ts1;// pointer to next itteration of solution vector 	

	for(i = *(int *)id; i < n; i += Threads)
	{
		sum = 0.0;
		for(j = 0; j < n; j++)
		{
			sum += a[i][j] * t[j];
		}
		pthread_mutex_lock(mutex);
			t1[i] = sum + b[i];
			errori = fabs(t1[i] - t[i]);
			if(errori > error)
			{
				error = errori;
			}
		pthread_mutex_unlock(mutex);
	}
	return NULL;
}

I keep getting seg faults and I can't figure out why. I've been working on this forever and my brain is frozen. I think I could use some other pairs of eyes and perhaps a bit more understanding of pthreads. Anyone have any input? Anything at all would be very helpful.

**cas** · 04-03-2011

When you get a segfault, the best thing to do is run your program under a debugger. Good free debuggers are Valgrind and gdb; if you're using an IDE, it might have an integrated debugger.

Also, see if you can turn up your compiler's warning level. A helpful compiler will tell you that this:

Code:

for(i = *(int *)id; i < n; id + Threads)

has an expression that does nothing ("id + Threads" specifically). Presumably you want to increment i, but I'm not entirely sure. In fact, id + Threads is actually invalid code, since id is a void*, and you can't perform arithmetic on a void*.

At any rate, a debugger should at least help you pinpoint where the problem is, if not why it's a problem.

**TheTaoOfBill** · 04-03-2011

Originally Posted by cas

When you get a segfault, the best thing to do is run your program under a debugger. Good free debuggers are Valgrind and gdb; if you're using an IDE, it might have an integrated debugger.

Also, see if you can turn up your compiler's warning level. A helpful compiler will tell you that this:

Code:

for(i = *(int *)id; i < n; id + Threads)

has an expression that does nothing ("id + Threads" specifically). Presumably you want to increment i, but I'm not entirely sure. In fact, id + Threads is actually invalid code, since id is a void*, and you can't perform arithmetic on a void*.

At any rate, a debugger should at least help you pinpoint where the problem is, if not why it's a problem.

Ooops! yeah okay that should be for(i = *(int *)id; i < n; i += Threads)

thanks! Still getting a segfault though...I've tried going through gdb for hours but I cannot find the problem...

EDIT: okay I fixed the segfault by changing the mutex initialization. Instead of creating a pointer to a mutex I just created a mutex and when I locked it I sent in the address of the mutex. Not sure why that changed anything but it worked.

Now the problem is it's giving the same error in every iteration.

**cas** · 04-03-2011

okay I fixed the segfault by changing the mutex initialization. Instead of creating a pointer to a mutex I just created a mutex and when I locked it I sent in the address of the mutex. Not sure why that changed anything but it worked.

When you create a pointer to a mutex, it's not actually pointing at anything. There is no mutex. You need an actual mutex (suitably initialized), which is why creating a mutex and then passing its address worked. You could use malloc() to allocate space for your mutex pointer, but there's really no point; the way you solved the problem is the proper approach.

**TheTaoOfBill** · 04-04-2011

Alright this is the updated code so far...

Code:

// matmul_static .c
// CS4540 Fall 2010
// kapenga
//
// This program is one of a set of three programs that show a
// matrix itteration. The difference between the three is the way
// space for the arrays is allocated:
//   matmul_stack uses local (automatic) variables for the arrays,
//        which use space on the stack for the arrays. The array
//        sizes are dynamic.
//   matmul_static uses global (external) variables for the arrays,
//        which uses space on the stack for the arrays. The maximum
//        array sizes are compiled in.
//   matmul_heap uses dynamic (malloced) space for the arrays,
//        which uses space on the heap for the arrays. The array
//        sizes are dynamic.
//
// The following itteretion can be used to solve linear systems
//   t_{i+1} = A t_i + b
// If the itteration converges to t, then t == t_{i+1} == t_i
// So t = A t + b
//   or  (I-a) t = b
//   where, I is the n*n idenity matrix
// There are several important applied problems where convergence 
// will take place. One such case is when for
// each row of A ( rows 0 <= i < n)
//             sum(j=0 ... n-1) abs(a[i][j])  < 1.0    
// Then the itteration will converge, assuming no roundoff or overflow.
// Example
// % ./matmul_static 4 10 5
//
//  a=
//  0.189331   0.147829  -0.009582   0.012830
// -0.020409   0.222627   0.073037   0.042701
//  0.069882   0.228326  -0.001161   0.024936
//  0.116375  -0.100117   0.229832   0.022235
//
//  b=
//  2.411774   9.837874   6.251698   6.576916
//
//  itt  error
//    0   2.878398e+00
//    1   8.266521e-01
//    2   2.688652e-01
//    3   8.817662e-02
//    4   2.832084e-02
//    5   9.015857e-03
//
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <pthread.h>
// These two function are not ansi C so they do not appear from the
// libstd.h  header if the gcc option -std=c99 option is used.
// I should see if there is a safe way to include them from stdlib.h
// and not place them explicitly here, which is bad style.
void srand48(long int seedval);
double drand48(void);

// Note that external variables (the arrays below) can not by dynamically
// sized. So the sizes must be compiled in and are fixed at compile time.
// Using either automatic or malloced space, the arrays sizes could be
// dynamicly allocated and adjusted to the probemn size of without
// recompiling. 
#define N 100
#define Threads 2
double 				a[N][N];// transformation matrix
double 				b[N];   // transformation vector 
double  				ts[N];	// solution vector
double  				ts1[N];	// solution vector
double  				*t=ts;  // pointer to solution vector
double  				*t1=ts1;// pointer to next itteration of solution vector 	
double 				error;  // max | t1[i] - t[i] |

int					n			=	4;	// problenm size
pthread_mutex_t 	*mutex;

void *ThreadFunction(void *id);

int main(int argc, char *argv[]) 
{
	int			seed		=	10;// seed for srand48() / drand48()
	double		*ttemp;// used to swap t1 and t at each itteration
	
	int			itt_max	=	5;// number of itterations to preform
	int			itt;	// current itteration 
	int 			i, j; //indexes
	int			ID[Threads];
	char			ch;	// for error checking on command line args.
	pthread_t 	thread[Threads];
	mutex = (pthread_mutex_t *) malloc (sizeof (pthread_mutex_t));
	pthread_mutex_init (mutex, NULL);
	if( argc == 4 ) 
	{
  		if( (sscanf(argv[1],"%d %[^ /t]", &n, &ch) != 1) ||
      	(sscanf(argv[2],"%d %[^ /t]", &seed, &ch) != 1) ||
      	(sscanf(argv[3],"%d %[^ /t]", &itt_max, &ch) != 1) ) 
		{
    		fprintf(stderr," ERROR : 1 useage: %s [ <n> <seed> <itt_max>]\n", argv[0]); 
    		return(1);
  		}
	} 
	else if(argc != 1 ) 
	{
	  	fprintf(stderr," ERROR : 2 useage: %s [ <n> <seed> <itt_max>]\n", argv[0]); 
  		return(1);
	} 
	if( (n<1) || (N<n) ) 
	{
  		fprintf(stderr," ERROR : 3 n must be positive and <= %d.\n", N);
  		return(1);
	}

	// Generate matrix a with | eigenvalues | < 1
	srand48((long int)seed);
	printf("\n  a=\n");
	for(i=0; i< n; i++) 
	{
  		for(j=0; j< n; j++) 
		{
    		a[i][j] = 1.999 * (drand48() - 0.5) / n;
    		printf("%10.6f ", a[i][j]);
  		}
  		printf("\n");
	}
	printf("\n  b=\n");
	// Generate vector b 
	for(i=0; i< n; i++) 
	{
  		b[i] = 10.0 * drand48();
  		printf("%10.6f ", b[i]);
	}
	printf("\n");
	// Initialize t
	for(i=0; i< n; i++) 
	{
  		t[i] = b[i];
	}


	printf("\n  itt  error\n");
	for(itt=0; itt<=itt_max; itt++) 
	{
  		error=0.0;
		for(i = 0; i < Threads; i++)
		{
			//printf("Creating thread %d\n", i);
			ID[i] = i;
			//printf("Sending in %d\n", ID[i]);
			pthread_create(&thread[i], NULL, ThreadFunction, (void *)&ID[i]);
			//printf("moving on to next thread\n");
		}
		for(i = 0; i < Threads; i++)
		{
			//printf("joining thread %d\n", i);
			pthread_join(thread[i], NULL);
		}
		
		ttemp = t1;
  		t1 = t;
  		t = ttemp;
  		printf("%5d %14.5e\n", itt, error); 
    
		
  			
  		
	}

	return(0);
}

void *ThreadFunction(void *id)
{
	int j, i = 11;
	double sum; 
	double  *t=ts;  // pointer to solution vector
	double  *t1=ts1;// pointer to next itteration of solution vector 	
	double 	errori; // | t1[i] - t[i] |
	//printf("starting thread id %d\n", *(int *)id);
	for(i = *(int *)id; i < n; i += Threads)
	{
		sum = 0.0;
		errori = 0.0;
		for(j = 0; j < n; j++)
		{
			sum += a[i][j] * t[j];
		}
		//printf("About to enter mutex with thread #%d\n", *(int *)id);
		pthread_mutex_lock(mutex);
			//printf("Thread #%d has entered the mutex!\n", *(int *)id);
			t1[i] = sum + b[i];
			errori = fabs(t1[i] - t[i]);
			//printf("%10.5e - %10.5e = %10.5e\n", t1[i], t[i], errori);
			//printf("errori = %14.5e error = %14.5e\n", errori, error);
			if(errori > error)
			{
				error = errori;
			}		
			//printf("Thread #%d is about to leave mutex!\n", *(int *)id);
		pthread_mutex_unlock(mutex);
		//printf("Thread #%d has left mutex!\n", *(int *)id);
	}
	pthread_exit(0);
}

No segfault but now the int error is giving odd results. Without threads the int error values are different with every itteration. But with the threads they are the same each time. Error is only changed under the mutex so they shouldn't be shared. I'm really stumped here

Thread: Need help with pthreads

Thread Tools

Search Thread

Display

Need help with pthreads

Similar Threads

Threads other than pthreads?

using pthreads with an API that uses pthreads

Pthreads performance

Difference between win32 and linux pthreads

pthreads and resources