# Thread: Urgent problem with a merge sort.

1. ## Urgent problem with a merge sort.

Hello people, I'm new here but got recommended by a friend so here I am.

For one of my university exercises, I have to create and implement a merge sorting algorithm, which is to sort data provided to us, which happens to be an example set of data for books from Amazon, and the case study is that somebody would like the cheapest one that is best, although this is irrelevant.

My problem is, that my merge sort only works for sorting things from low to high, so the sorting of 'price' in my algorithm, works fine, but when I sort by 'rating' or 'relevance' (which does high to low), I get something similar to the following.

5 7.99 7.6 2
4.5 12.49 7.9 1
3.5 10.49 5.2 9
3.5 20.99 6 5
3 19.49 0.3 18
3 22.99 8.9 11
3 27.99 7.7 10
2.5 26.49 7.2 19
2.5 15.49 9.1 4
2 21.49 3.5 20
2 16.99 7.6 17
2 27.49 8.5 16
2 11.49 9.1 12
2 16.99 6.2 3
1.5 19.49 3.4 14
1.5 13.99 1 7
1 3.99 0.6 15
1 26.49 0.8 13
1 9.99 8 6
4.43497e-312 2.42092e-322 0 0

The last line clearly doesn't make sense, and I have no idea why it's doing it.

Just a note, if I reverse the 'rating' and 'relevance' search functions to do low to high, they work fine.

Here is my code:
Code:
```void merge(B* a, int low, int high, int mid, int(*compar)(const void *, const void *))
{
int i, j, k;
B* c = (B *)malloc((high - low + 1) * sizeof(B));
i=low;
j=mid+1;
k=0;

while((i<=mid)&&(j<=high))
{

if(compar(&a[i],&a[j]) <= 0)
{
c[k]=a[i];
k++;
i++;
}

else
{
c[k]=a[j];
k++;
j++;
}
}

while(i<=mid)
{
c[k]=a[i];
k++;
i++;
}

while(j<=high)
{
c[k]=a[j];
k++;
j++;
}

memcpy(a + low, c, (high - low + 1) * sizeof(B));
free(c);
}

void mergesort(B* a, int low, int high, int(*compar)(const void *, const void *))
{
int mid;

if(low<high)
{
mid=(low+high)/2;
mergesort(a,low,mid,compar);
mergesort(a,mid+1,high,compar);
merge(a,low,high,mid,compar);
}
}```
Just so you know what "compar" is, when mergesort is called, we provide it with which compare function is used, so we can allow the user to select whether to sort by price, rating or relevance. These are defined as:

Code:
```int comp_on_rating(const void *a, const void *b)
{
if ((*(B *)a).rating < (*(B *)b).rating)
return -1;
else if ((*(B *)a).rating > (*(B *)b).rating)
return 1;
else
return 0;
}

int comp_on_relev(const void *a, const void *b)
{

if ((*(B *)a).relevance < (*(B *)b).relevance)
return -1;
else if ((*(B *)a).relevance > (*(B *)b).relevance)
return 1;
else
return 0;
}

int comp_on_price(const void *a, const void *b)
{

if ((*(B *)a).price < (*(B *)b).price)
return 1;
else if ((*(B *)a).price > (*(B *)b).price)
return -1;
else
return 0;
}```
The data we are sorting is as follows, if this is required:
Code:
```Stars	Price	Relv	ID
4.5  	12.49	7.9  	 1
5    	 7.99	7.6  	 2
2    	16.99	6.2  	 3
2.5  	15.49	9.1  	 4
3.5  	20.99	6    	 5
1    	 9.99	8    	 6
1.5  	13.99	1    	 7
5    	 8.49	8.3  	 8
3.5  	10.49	5.2  	 9
3    	27.99	7.7  	10
3    	22.99	8.9  	11
2    	11.49	9.1  	12
1    	26.49	0.8  	13
1.5  	19.49	3.4  	14
1    	 3.99	0.6  	15
2    	27.49	8.5  	16
2    	16.99	7.6  	17
3    	19.49	0.3  	18
2.5  	26.49	7.2  	19
2    	21.49	3.5  	20```
This has been driving me insane for hours so I really hope someone can help me out here.

2. Oh yeah and I have searched high and low on the Internet for a solution and I cannot seem to find one, so sorry if this is something stupid.

3. Would you post up the rest of the program? I don't see anything obvious as it is. Off by one errors can be hard to spot.

5. The code, exactly as it currently stands.

Code:
```#include<string.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>

FILE *fp;

typedef struct book{
double rating;
double price;
double relevance;
int ID;
}B;

B *list;

{
int c;
if((fp=fopen(infile, "rb")))
{
fscanf(fp, "%*s\t%*s\t%*s\t%*s\n");
c=0;
while((!feof(fp))&&(c<N))
{
fscanf(fp, "%lf\t%lf\t%lf\t%d\n", &list[c].rating,  &list[c].price, &list[c].relevance, &list[c].ID);
c++;
}
fclose(fp);
}
else
{
fprintf(stderr,"%s did not open. Exiting.\n",infile);
exit(-1);
}
return(c);
}

int comp_on_rating(const void *a, const void *b)
{
if ((*(B *)a).rating < (*(B *)b).rating)
return -1;
else if ((*(B *)a).rating > (*(B *)b).rating)
return 1;
else
return 0;
}

int comp_on_relev(const void *a, const void *b)
{

if ((*(B *)a).relevance < (*(B *)b).relevance)
return -1;
else if ((*(B *)a).relevance > (*(B *)b).relevance)
return 1;
else
return 0;
}

int comp_on_price(const void *a, const void *b)
{

if ((*(B *)a).price < (*(B *)b).price)
return 1;
else if ((*(B *)a).price > (*(B *)b).price)
return -1;
else
return 0;
}

/* This code is taken and adapted from
http://en.allexperts.com/q/C-1587/Merge-Sort-C.htm
and http://paste-it.net/public/d2c2d20

The merge sort works by taking the items that need to be sorted,
splitting the list into two (each about the same size) and
recursively sorting each sublist, by recursively calling the merge
sort on each sublist, which resulting in smaller and smaller lists.

One the sublists have all been sorted, they are all merged together
into one sorted list. */

void merge(B* a, int low, int high, int mid, int(*compar)(const void *, const void *))
{
int i, j, k;
B* c = (B *)malloc((high - low + 1) * sizeof(B));
i=low;
j=mid+1;
k=0;

while((i<=mid)&&(j<=high))
{

if(compar(&a[i],&a[j]) <= 0)
{
c[k]=a[i];
k++;
i++;
}

else
{
c[k]=a[j];
k++;
j++;
}
}

while(i<=mid)
{
c[k]=a[i];
k++;
i++;
}

while(j<=high)
{
c[k]=a[j];
k++;
j++;
}

memcpy(a + low, c, (high - low + 1) * sizeof(B));
free(c);
}

void mergesort(B* a, int low, int high, int(*compar)(const void *, const void *))
{
int mid;

if(low<high)
{
mid=(low+high)/2;
mergesort(a,low,mid,compar);
mergesort(a,mid+1,high,compar);
merge(a,low,high,mid,compar);
}
}

char *getUserInput() {
/* Create and allocate space for the users response. */
char* userResponse = malloc(200);
fgets(userResponse, 100, stdin);

/* Remove the carry line character and replace it with a 0. */
int responseLength = strlen(userResponse);
userResponse[(responseLength - 1)] = 0;

/* Return the result. */
return userResponse;
}

void user_interface(int N)
{
// For Part 1 this function calls the sort function to sort on Price only
// mergesort(list, 0, N, comp_on_price);

// For Part 2 this function
// (1) asks the user if they would like to sort their search results
// (2) asks for the most important field (or key), the next most etc
// (3) calls your sort function

printf("How many fields would like to sort by? (Maximum of 3)\n");
printf("> ");
char *numberOfFields = getUserInput();
int noOfFields = atoi(numberOfFields);
while(noOfFields > 3 || noOfFields < 1)
{
printf("The number of fields to sort by must be 1, 2 or 3, please try again.\n");
printf("> ");
numberOfFields = getUserInput();
noOfFields = atoi(numberOfFields);
}

if(noOfFields == 1)
{
printf("Which field is the most important for you to sort by?\n");
printf("> ");
char* mostImportantField;
mostImportantField = getUserInput();

if(strcmp(mostImportantField,"price") == 0)
{
mergesort(list, 0, N, comp_on_price);
}
else if(strcmp(mostImportantField,"relevance") == 0)
{
mergesort(list, 0, N, comp_on_relev);
}
else if(strcmp(mostImportantField,"rating") == 0)
{
mergesort(list, 0, N, comp_on_rating);
}
}

else if(noOfFields == 2)
{
printf("Which field is the 2nd most important for you to sort by?\n");
printf("> ");
char* secondMostImportantField;
secondMostImportantField = getUserInput();

if(strcmp(secondMostImportantField,"price") == 0)
{
mergesort(list, 0, N, comp_on_price);
}
else if(strcmp(secondMostImportantField,"relevance") == 0)
{
mergesort(list, 0, N, comp_on_relev);
}
else if(strcmp(secondMostImportantField,"rating") == 0)
{
mergesort(list, 0, N, comp_on_rating);
}

printf("Which field is the most important for you to sort by?\n");
printf("> ");
char* mostImportantField;
mostImportantField = getUserInput();

if(strcmp(mostImportantField,"price") == 0)
{
mergesort(list, 0, N, comp_on_price);
}
else if(strcmp(mostImportantField,"relevance") == 0)
{
mergesort(list, 0, N, comp_on_relev);
}
else if(strcmp(mostImportantField,"rating") == 0)
{
mergesort(list, 0, N, comp_on_rating);
}
}

else if(noOfFields == 3)
{
printf("Which field is the 3rd most important for you to sort by?\n");
printf("> ");
char* thirdMostImportantField;
thirdMostImportantField = getUserInput();

if(strcmp(thirdMostImportantField,"price") == 0)
{
mergesort(list, 0, N, comp_on_price);
}
else if(strcmp(thirdMostImportantField,"relevance") == 0)
{
mergesort(list, 0, N, comp_on_relev);
}
else if(strcmp(thirdMostImportantField,"rating") == 0)
{
mergesort(list, 0, N, comp_on_rating);
}

printf("Which field is the 2nd most important for you to sort by?\n");
printf("> ");
char* secondMostImportantField;
secondMostImportantField = getUserInput();

if(strcmp(secondMostImportantField,"price") == 0)
{
mergesort(list, 0, N, comp_on_price);
}
else if(strcmp(secondMostImportantField,"relevance") == 0)
{
mergesort(list, 0, N, comp_on_relev);
}
else if(strcmp(secondMostImportantField,"rating") == 0)
{
mergesort(list, 0, N, comp_on_rating);
}

printf("Which field is the most important for you to sort by?");
printf("> ");
char* mostImportantField;
mostImportantField = getUserInput();

if(strcmp(mostImportantField,"price") == 0)
{
mergesort(list, 0, N, comp_on_price);
}
else if(strcmp(mostImportantField,"relevance") == 0)
{
mergesort(list, 0, N, comp_on_relev);
}
else if(strcmp(mostImportantField,"rating") == 0)
{
mergesort(list, 0, N, comp_on_rating);
}
}
}

void print_results(int N)
{
int i;
if((fp=fopen("top20.txt","w")))
{
for(i=N-1;i>=N-20;i--)
{

printf("%g %g %g %d\n", list[i].rating, list[i].price, list[i].relevance, list[i].ID);
fprintf(fp, "%g %g %g %d\n", list[i].rating, list[i].price, list[i].relevance, list[i].ID);

}
fclose(fp);
}
else
{
fprintf(stderr,"Trouble opening output file top20.txt\n");
exit(-1);
}

}

int main(int argc, char *argv[])
{
int N;

if(argc!=3)
{
fprintf(stderr, "./exec <input_size> <filename>\n");
exit(-1);
}

N=atoi(argv[1]);

list = (B *)malloc(N*sizeof(B));

user_interface(N);

print_results(N);

return(0);
}```

6. I've got some running around to do, but I'll look at it later this evening.

7. No problem. Got some help from another friend of mine who suggested that it should be N-1 in the calling of mergesort and that seems to have solved it - although I'd appreciate it if you could still look at it for me. Cheers.

8. I'm running it through now. What I've noticed initially is:

1) The cast from malloc's pointer should be removed. C++ may need them, but C does not.

2) In the get user input function, you have repeated declarations for most important Field and three declarations for second most important Field!

Your compiler should be warning you. Do you have your warnings turned up all the way?

3) You're using feof() as part of your while reading loop. That has been found inaccurate. Check our FAQ on this forum about feof() problems. (link to it on the top blue bar on the forum page).

You fixed the problem by passing N-1, instead of N?

Does that make it work with any of the keys being chosen?

I'm just getting started with debugging, so more later.

Edit:

The merge sorting logic is off. Comments are in the affected code, but the problem is deeper than just j going beyond the bounds of the array.

When you're developing a program like this, it's a BIG help if you make it work with a fixed array first, with a minimum of pointers and no malloc's in sight. With just the most basic of code present, it's easy to find any bugs, and fix them. Then, once you know that everything is working correctly, then add in your pointers and mallocs, and etc.

I can make this program work great for relevance, or for rating, but not both.

Code:
```#include<string.h>
#include <stdlib.h>
#include <stdio.h>
#include <math.h>

FILE *fp;

typedef struct book{
double rating;
double price;
double relevance;
int ID;
}B;

B *list;

/*
ignore this - this is a bug fix only for Turbo C failing to link in the floating point formats
automatically. It has no other purpose.
*/
static void force_fpf()
{
float x, *y; /* Just declares two variables */
y = &x;      /* Forces linkage of FP formats */
x = *y;      /* Suppress warning message about x */
}

{
int c;
if((fp=fopen(infile, "rb")) != NULL)
{
fscanf(fp, "%*s\t%*s\t%*s\t%*s\n");
c=0;
while((!feof(fp))&&(c<N))
{
fscanf(fp, "%lf\t%lf\t%lf\t%d\n", &list[c].rating,  &list[c].price, &list[c].relevance, &list[c].ID);
c++;
}
fclose(fp);
}
else
{
fprintf(stderr,"%s did not open. Exiting.\n",infile);
exit(-1);
}
return(c);
}

int comp_on_rating(const void *a, const void *b)
{
if ((*(B *)a).rating < (*(B *)b).rating)
return -1;
else if ((*(B *)a).rating > (*(B *)b).rating)
return 1;
else
return 0;
}

int comp_on_relev(const void *a, const void *b)
{

if ((*(B *)a).relevance < (*(B *)b).relevance)
return -1;
else if ((*(B *)a).relevance > (*(B *)b).relevance)
return 1;
else
return 0;
}

int comp_on_price(const void *a, const void *b)
{

if ((*(B *)a).price < (*(B *)b).price)
return 1;
else if ((*(B *)a).price > (*(B *)b).price)
return -1;
else
return 0;
}

/* This code is taken and adapted from
http://en.allexperts.com/q/C-1587/Merge-Sort-C.htm
and http://paste-it.net/public/d2c2d20

The merge sort works by taking the items that need to be sorted,
splitting the list into two (each about the same size) and
recursively sorting each sublist, by recursively calling the merge
sort on each sublist, which resulting in smaller and smaller lists.

One the sublists have all been sorted, they are all merged together
into one sorted list. */

void merge(B* a, int low, int high, int mid, int(*compar)(const void *, const void *))
{
int i, j, k;
B* c = (B *)malloc((high - low + 1) * sizeof(B));
i=low;
j=mid+1;
k=0;

<=high is needed for rating sort. < no = is needed for relevance! ;-)
while((i<=mid)&&(j<=high))  //  <<< this is the immediate problem
{

if(compar(&a[i],&a[j]) <= 0)
{
c[k]=a[i];
k++;
if(k > 20) printf("\nk1 in merge!! \a\a\n");
i++;
if(i > 20) printf("\ni1 in merge!! \a\a\n");
}

else   //allows j to go to 21
{
c[k]=a[j];  //and here's the problem with that
k++;
if(k > 20) printf("\nk2 in merge!! \a\a\n");
j++;
if(j > 20) printf("\nj2 in merge! \a\a\n");
}
}

while(i<=mid)
{
c[k]=a[i];
k++;
if(k > 20) printf("\nk3 in merge!! \a\a\n");
i++;
if(i > 20) printf("\ni2 in merge!! \a\a\n");
}

while(j<=high)
{
c[k]=a[j];
k++;
if(k > 20) printf("\nk3 in merge!! \a\a\n");
j++;
if(j > 20) printf("\nj3 in merge!! \a\a\n");
}

memcpy(a + low, c, (high - low + 1) * sizeof(B));
free(c);
}

void mergesort(B* a, int low, int high, int(*compar)(const void *, const void *))
{
int mid;

if(low<high)
{
mid=(low+high)/2;
mergesort(a,low,mid,compar);
mergesort(a,mid+1,high,compar);
merge(a,low,high,mid,compar);
}
}

char *getUserInput() {
/* Create and allocate space for the users response. */
int responseLength;
char* userResponse = malloc(200);
fgets(userResponse, 100, stdin);

/* Remove the carry line character and replace it with a 0. */
responseLength = strlen(userResponse);
userResponse[(responseLength - 1)] = 0;

/* Return the result. */
return userResponse;
}

void user_interface(int N)
{
// For Part 1 this function calls the sort function to sort on Price only
// mergesort(list, 0, N, comp_on_price);

// For Part 2 this function
// (1) asks the user if they would like to sort their search results
// (2) asks for the most important field (or key), the next most etc
// (3) calls your sort function
char *numberOfFields;
char* mostImportantField;
//char* mostImportantField;
//char* mostImportantField;
char* secondMostImportantField;
//char* secondMostImportantField;
char* thirdMostImportantField;

int noOfFields;

printf("How many fields would like to sort by? (Maximum of 3)\n");
printf("> ");
numberOfFields = getUserInput();
noOfFields = atoi(numberOfFields);
while(noOfFields > 3 || noOfFields < 1)
{
printf("The number of fields to sort by must be 1, 2 or 3, please try again.\n");
printf("> ");
numberOfFields = getUserInput();
noOfFields = atoi(numberOfFields);
}

if(noOfFields == 1)
{
printf("Which field is the most important for you to sort by?\n");
printf("> ");

mostImportantField = getUserInput();

if(strcmp(mostImportantField,"price") == 0)
{
mergesort(list, 0, N, comp_on_price);
}
else if(strcmp(mostImportantField,"relevance") == 0)
{
mergesort(list, 0, N, comp_on_relev);
}
else if(strcmp(mostImportantField,"rating") == 0)
{
mergesort(list, 0, N, comp_on_rating);
}
}

else if(noOfFields == 2)
{
printf("Which field is the 2nd most important for you to sort by?\n");
printf("> ");

secondMostImportantField = getUserInput();

if(strcmp(secondMostImportantField,"price") == 0)
{
mergesort(list, 0, N, comp_on_price);
}
else if(strcmp(secondMostImportantField,"relevance") == 0)
{
mergesort(list, 0, N, comp_on_relev);
}
else if(strcmp(secondMostImportantField,"rating") == 0)
{
mergesort(list, 0, N, comp_on_rating);
}

printf("Which field is the most important for you to sort by?\n");
printf("> ");

mostImportantField = getUserInput();

if(strcmp(mostImportantField,"price") == 0)
{
mergesort(list, 0, N, comp_on_price);
}
else if(strcmp(mostImportantField,"relevance") == 0)
{
mergesort(list, 0, N, comp_on_relev);
}
else if(strcmp(mostImportantField,"rating") == 0)
{
mergesort(list, 0, N, comp_on_rating);
}
}

else if(noOfFields == 3)
{
printf("Which field is the 3rd most important for you to sort by?\n");
printf("> ");

thirdMostImportantField = getUserInput();

if(strcmp(thirdMostImportantField,"price") == 0)
{
mergesort(list, 0, N, comp_on_price);
}
else if(strcmp(thirdMostImportantField,"relevance") == 0)
{
mergesort(list, 0, N, comp_on_relev);
}
else if(strcmp(thirdMostImportantField,"rating") == 0)
{
mergesort(list, 0, N, comp_on_rating);
}

printf("Which field is the 2nd most important for you to sort by?\n");
printf("> ");

secondMostImportantField = getUserInput();

if(strcmp(secondMostImportantField,"price") == 0)
{
mergesort(list, 0, N, comp_on_price);
}
else if(strcmp(secondMostImportantField,"relevance") == 0)
{
mergesort(list, 0, N, comp_on_relev);
}
else if(strcmp(secondMostImportantField,"rating") == 0)
{
mergesort(list, 0, N, comp_on_rating);
}

printf("Which field is the most important for you to sort by?");
printf("> ");

mostImportantField = getUserInput();

if(strcmp(mostImportantField,"price") == 0)
{
mergesort(list, 0, N, comp_on_price);
}
else if(strcmp(mostImportantField,"relevance") == 0)
{
mergesort(list, 0, N, comp_on_relev);
}
else if(strcmp(mostImportantField,"rating") == 0)
{
mergesort(list, 0, N, comp_on_rating);
}
}
}

void print_results(int N)
{
int i;
//if((fp=fopen("top20.txt","w")) != NULL)
//  {
for(i=N-1;i>=0;i--)  //N-20?
{

printf("%g %g %g %d\n", list[i].rating, list[i].price, list[i].relevance, list[i].ID);

// don't print to the input file - when it's not right!
//	  fprintf(fp, "%g %g %g %d\n", list[i].rating, list[i].price, list[i].relevance, list[i].ID);

}
//fclose(fp);
/*    }
else
{
fprintf(stderr,"Trouble opening output file top20.txt\n");
exit(-1);
}
*/
}

int main() //int argc, char *argv[])
{
int N;

/*  if(argc!=3)
{
fprintf(stderr, "./exec <input_size> <filename>\n");
exit(-1);
}

N=atoi(argv[1]);
*/
N = 20;

list = malloc(N*sizeof(B));  //removed cast of malloc's pointer

user_interface(N);

print_results(N);

getch();
return(0);
}

/*
Stars	Price	Relv	ID
4.5  	12.49	7.9  	 1
5    	 7.99	7.6  	 2
2    	16.99	6.2  	 3
2.5  	15.49	9.1  	 4
3.5  	20.99	6    	 5
1    	 9.99	8    	 6
1.5  	13.99	1    	 7
5    	 8.49	8.3  	 8
3.5  	10.49	5.2  	 9
3    	27.99	7.7  	10
3    	22.99	8.9  	11
2    	11.49	9.1  	12
1    	26.49	0.8  	13
1.5  	19.49	3.4  	14
1    	 3.99	0.6  	15
2    	27.49	8.5  	16
2    	16.99	7.6  	17
3    	19.49	0.3  	18
2.5  	26.49	7.2  	19
2    	21.49	3.5  	20

*/```