The problem is that the threading has too much overhead to be useful here.(..as evident from the "sys" time taken.)
Yet, I've read in many places that merge sort is a good candidate for concurrency.
(Also, I'm not testing this on a very good computer now... When I can do so..(tomorrow), the results may improve. If you can test this on a juicy machine now, please do.)
I understand the following implementation could be little naive (any tips to improve is welcome).
Here is the code:
Code:
#include<algorithm>
#include<thread>
#include<cstdlib>
void merge_mix(int *array1,int count1,int* array2,int count2) //Merges and distributes the result over array1 and array2
{
int ptr_t(0),ptr_1(0),ptr_2(0); //offsets to temp, array1, array2
int * temp = new int[count1+count2];
while((ptr_1<count1) && (ptr_2<count2))
{
while((ptr_1<count1) && (*(array1+ptr_1)<=*(array2+ptr_2)))
{
*(temp+ptr_t++) = *(array1+ptr_1);
ptr_1++;
}
if (ptr_1<count1)
{
while((ptr_2<count2) && (*(array2+ptr_2)<=*(array1+ptr_1)))
{
*(temp+ptr_t++) = *(array2+ptr_2);
ptr_2++;
}
}
}
std::copy(array1+ptr_1,array1+ptr_1 + (count1-ptr_1),temp+ptr_t);
std::copy(temp,temp+count1,array1);
std::copy(array2+ptr_2,array2+ptr_2+(count2-ptr_2),temp+ptr_t);
std::copy(temp+count1,temp+count1+count2,array2);
delete [] temp;
}
void merge_sort(int data[],int size)
{
if(size==1)return;
if(size<10)
{
std::__insertion_sort(data,data+size);
return;
}
merge_sort(data,size/2);
merge_sort(data+size/2,(size+1)/2);
merge_mix(data,size/2,data+size/2,(size+1)/2);
return;
}
void threaded_merge_sort(int data[],int size)
{
if(size==1)return;
if(size<100)
{
std::__insertion_sort(data,data+size);
return;
}
std::thread foo(threaded_merge_sort,data,size/2);
std::thread bar(threaded_merge_sort,data+size/2,(size+1)/2);
foo.join();
bar.join();
merge_mix(data,size/2,data+size/2,(size+1)/2);
return;
}
int main(int argc,char** argv)
//arg1: Size of random int array.
//arg2: -t or -n ..threaded or normal
{
srand(time(NULL));
int* dat;
int n = atoi(argv[1]);
dat = new int[n];
for(int i=0;i<n;i++)
*(dat+i)=rand();
if(argv[2][1]=='t')threaded_merge_sort(dat,n);
else if(argv[2][1]=='n')merge_sort(dat,n);
}
Here is the testing script:
Code:
#!/bin/bash
#$1 has the number of ints to be sorted.
g++ a.cpp -std=c++0x -lpthread -o test
echo "Time Taken for normal version ($1 nos):"
time ./test $1 -normal
echo "Time Taken for threaded version ($1 nos):"
time ./test $1 -threaded
Result:
[manasij7479@manasijn ~]$ ./test.sh 10000
Time Taken for normal version (10000 nos):
real 0m0.013s
user 0m0.011s
sys 0m0.001s
Time Taken for threaded version (10000 nos):
real 0m0.315s
user 0m0.018s
sys 0m0.470s
(I also have another question about the merging part. Is it possible to skip the memory allocation and copy() ....and do all operations on the two arrays themselves ?)