PDA

View Full Version : threaded merge sort



AusTex
05-01-2005, 12:23 PM
I am working on a merge sort of two files of integers, and am fuzzy on some of the logic\syntax. I need two threads, each of which will open a file, read its contents into an array, and then sort the array using qsort. One thread will operate on file f1.dat(10000 numbers) and leave its sorted contents in array x, while the other will use file f2.dat(11999 numbers) and array y. When the threads have finished, I need to merge x and y into array z so that z is also sorted. Also, I am wanting to use one function for both threads, passing the array, its size, and the name of the file in a structure.

Currently, I have a struct set up with the arrays, their sizes, and the name of the two files, a comparison function for qsort, a sort function for the contents of the files, a merge sort for the z array, and of course the creation of the threads.

I think the biggest area I have questions about is how to properly pass a structure to a generalized sort function and then sort data using that structure parameter.

So far:



#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>

typedef struct Data
{
int x[10000];
int size;
char *filename;
} data_t;


// Comparison function for qsort
int cmpint (int *a, int *b)
{
if (*a < *b)
return -1;
else
if (*a > *b)
return 1;
else
return 0;
}


// Sort contents of files
// How to generalize this to work with two separate arrays???????
void *sort(void *data)
{
FILE *fp;
//long lSize = ftell (fp);
fp = fopen(filename.x[], "rb");
fread(x, 10000, size, fp);

// data_t *my_data = (data_t *) data;

qsort(x,1000,sizeof(int),cmpint);
pthread_exit(0);

}


int main() {

int i=0,j=0,k=0;
char z[22000];
pthread_t px, py;

pthread_create(&px, NULL, sort, (void *) &data_x);
pthread_create(&py, NULL, sort, (void *) &data_x);

pthread_join(px, 0);
pthread_join (py, 0);

// Merge x and y into z, sorted
while (i<1000 || j < 1000)
{
if (j == 1000)
z[k++]=x[i++];

else
if (i<1000 && (x[i]<y[j]))
z[k++]=x[i++];
else
z[k++]=y[j++];
}


}

Salem
05-02-2005, 05:17 AM
> pthread_create(&px, NULL, sort, (void *) &data_x);
> pthread_create(&py, NULL, sort, (void *) &data_x);
You're supposed to create two data instances, so each thread has something to work with



data_t data[2];
data[0].filename = "foo";
data[1].filename = "bar";
// do you set the size here, or get the size when you open the file?
pthread_create(&px, NULL, sort, &data[0] );
pthread_create(&py, NULL, sort, &data[1] );


Then your thread function is


void *sort(void *p)
{
FILE *fp;
data_t *data = p; // cast back to the correct type
fp = fopen( data->filename, "rb");

// read upto min of buffer size or file size
data->size = fread( data->x, sizeof(int), 10000, fp );

qsort( data->x, data->size, sizeof(int), cmpint );

pthread_exit(0);
return NULL;
}


> while (i<1000 || j < 1000)
Use the output data[0].size and data[1].size

AusTex
05-02-2005, 11:17 AM
Thanks for helping Salem. Everything seems to be ok, but I keep getting a compile error, 18: syntax error before '->', and I can't for the life of me figure out what could be causing the error before line 18.




#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <pthread.h>

#define MAX_ARRAY_SIZE 12000

typedef struct
{
int x[MAX_ARRAY_SIZE];
int size;
char *filename;
}data_t;

data_t *data_x;
data_t *data_y;

data_x->filename="file1";
data_x->size=10000;

data_y->filename="file2";
data_y->size=12000;




// Comparison function for qsort
int cmpint (int *a, int *b)
{
if (*a < *b)
return -1;
else
if (*a > *b)
return 1;
else
return 0;
}

// Sort contents of files
void *sort(void *data)
{
FILE *fp;
data_t *my_data = (data_t *) data;
fp = fopen(my_data->filename, "rb");
fread(my_data->x, 10000, my_data->size, fp);
qsort(my_data->x,1000,sizeof(int),cmpint);
pthread_exit(0);

}


int main() {

data_t *data_x=malloc(sizeof(data_t));
data_t *data_y=malloc(sizeof(data_t));

int i=0,j=0,k=0;
char z[22000];
pthread_t px, py;

pthread_create(&px, NULL, sort, (void *) &data_x);
pthread_create(&py, NULL, sort, (void *) &data_y);

pthread_join(px, 0);
pthread_join (py, 0);

// Merge x and y into z, sorted
while (i<10000 || j < 12000)
{
if (j == 12000)
z[k++]=data_x->x[i++];

else
if (i<10000 && (data_x->x[i]<data_y->x[j]))
z[k++]=data_x->x[i++];
else
z[k++]=data_y->x[j++];
}


}

Salem
05-02-2005, 11:28 AM
You have to put the statements inside functions!

0rion
05-04-2005, 04:03 AM
typedef struct
{
int x[MAX_ARRAY_SIZE];
int size;
char *filename;
}data_t;

data_t *data_x;
data_t *data_y;

data_x->filename="file1";
data_x->size=10000;

data_y->filename="file2";
data_y->size=12000;


You need to malloc data_x and data_y before use.