Code:
/* sorts strings in a file, counts the number of those
strings which match the strings from another data file.
status: ok, but not thoroughly tested
*/
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#define ROW 1793
#define COL 15
int binarySearch(char c2[ROW][COL], char c1[COL], int i);
int compare( const void *a, const void *b);
int getC2(char c2[ROW][COL], FILE *fpt2);
void quicksort(char c2[ROW][COL], int l, int r);
int main() {
int i, j, found, up, down, index, match;
FILE *fpt1,*fpt2;
char c1[COL]; //strings for file1.txt (c1) and file2.txt (c2)
char c2[ROW][COL];
printf("\n\n\n\n\n New Run \n\n");
i = ROW;
fpt1=fopen("file1.txt","rt");
fpt2=fopen("file2.txt","rt");
if((fpt1 == NULL) || fpt2 == NULL) {
printf("\n Error opening file1.txt or file2.txt - terminating");
return 0;
}
i = getC2(c2, fpt2);
//and sort the array of lines
printf("\n\n Sorting - Standby \n");
i = ROW-1;
qsort((void *)c2, ROW, sizeof(c2[0]), compare);
/* shows the sorted strings in the array */
/*
for(i = 0; i < ROW; i++) {
printf("%3d: %s", i, c2[i]);
if(i % 20 == 0 && i)
up = getchar();
}
*/
//printf("\n\n %d", strcmp(c2[9], c2[10]));
//main processing loop
putchar('\n');
while((fgets(c1, COL, fpt1)) != NULL) {
index = binarySearch(c2, c1, i);
if(index > -1) {
up = down = index;
while((strcmp(c2[up], c1)) == 0)
up--;
while((strcmp(c2[down], c1)) == 0)
down++;
match = down - up;
printf("%d matches of %s", match, c1);
}
else
printf("0 matches of %s", c1);
}
fclose(fpt1);
fclose(fpt2);
printf("\n\n\t\t\t press enter when ready");
i = getchar(); ++j; ++up;
return 0;
}
int binarySearch(char c2[ROW][COL], char c1[COL], int i) {
int lo, mid, hi;
lo = 0;
hi = i -1;
while(lo <= hi) {
mid = (lo + hi) / 2;
if((strcmp(c2[mid], c1)) > 0)
hi = mid - 1;
else if((strcmp(c2[mid], c1)) < 0)
lo = mid + 1;
else
return mid; //found, return index
}
return -1; //not found
}
int getC2(char c2[ROW][COL], FILE * fpt2) {
int i;
//load file2 lines into the c2 array
i = 0;
while((fgets(c2[i], COL, fpt2)) != NULL) {
++i;
if(i == ROW) break;
}
--i; //adjust i one time
return i;
}
int compare( const void *a, const void *b)
{
return( strcmp(a,b) );
}
/* Notes:
Strings below are the contents of file1.txt,
(the small file). file2.txt had 1793 lines
in it, made up of multiple copies of this file,
along with original variations of this string.
042,0101,182
042,0101,181
042,0101,183
042,0101,184
042,0101,185
054,0101,194
054,0101,191
054,0101,170
054,0101,193
054,0101,172
054,0101,170
000,test,000 << unique testing string
054,0101,174
054,0101,192
*/
If you don't like this program for your needs, then use the algorithm I highlighted in blue, above. It will work, and is much better than your original, but still similar.