Hi everyone,
New poster here, keen to get involved in some serious C programming. I'm currently working on a program to take ASCII input from a file, and count the occurrences of each individual word within that file. The program should ignore punctuation and treat whitespace as the end of a particular string. It should also convert uppercase letters to lowercase letters to avoid duplicates. All of these strings and their counts should be stored in an array, which should then be sorted and printed to a CSV file.
This is what I have so far. Bear in mind that this is a partial implementation, and the bits that work are by no means perfect:
Code:
#include <stdio.h>#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <errno.h>
#define TRUE 1
#define FALSE 0
/* This program is designed to take an ASCII input file, count the occurrences of words in it
* and write an output file displaying the data. I intend for it to convert uppercase to
* lowercase, so as not to generate duplicate words in the data structure. It should also
* ignore whitespace and punctuation.
*/
void getWords(void);
void printFile(void);
void save(char *input);
struct word {
char *str;
int wc;
};
struct word *warray = NULL;
FILE *infile;
FILE *outfile;
int elements = 0;
void getWords(void)
{
char cw[100]; // Current word storage
int fullWord = FALSE;
int i = 0, j = 0, c;
while((c = fgetc(infile)) != EOF)
{
if(isalpha(c))
{
if(isupper(c))
{
cw[i] = tolower(c);
++i;
}
else
{
cw[i] = c;
++i;
}
fullWord = TRUE;
}
else
{
if(fullWord)
{
cw[i] = '\0';
i = 0;
fullWord = FALSE;
save(cw);
}
}
}
return;
}
void printFile(void)
{
int i, c;
printf("Printing the file to be counted in lowercase...\n");
for(i = 0; (c = fgetc(infile)) != EOF; i++)
{
if(ispunct(c) || isdigit(c))
{
++i;
}
else
{
putchar(tolower(c));
}
}
printf("\n");
rewind(infile);
}
void save(char *input)
{
int i = 0;
if(!warray)
{
warray = malloc(sizeof(struct word));
printf("Made array.\n");
}
else
{
printf("New\n");
for(i = 0; i < elements; i++)
{
printf("in for loop.\n");
if (strcmp(input, warray[i].str) == 0)
{
printf("exists\n");
warray[i].wc++;
}
else
{
printf("New element\n");
warray = realloc(warray, (elements+1)*sizeof(struct word));
printf("Elements = %d\n", elements);
warray[elements].str = malloc(strlen(input)+1);
strcpy(warray[elements].str,input);
warray[elements].wc = 1;
}
}
}
return;
}
int main (int argc, char *argv[])
{
if (argc < 3)
{
puts("Please supply the input filename and desired output filename as arguments.");
return 1;
}
infile = fopen(argv[1], "r");
if(infile == NULL)
{
printf("File failed to open. Error: %d\n", errno);
return 1;
}
else
{
puts("File opened successfully.");
printFile();
getWords();
}
printf("%d", elements);
return 0;
}
The problem I'm having is that my "elements" variable doesn't seem to be incrementing, so the save function also doesn't seem to actually be doing anything. I assumed that the reason for this was the following:
Code:
for(i = 0; i < elements; i++) {
printf("in for loop.\n");
if (strcmp(input, warray[i].str) == 0)
{
printf("exists\n");
warray[i].wc++;
}
else
{
printf("New element\n");
warray = realloc(warray, (elements+1)*sizeof(struct word));
printf("Elements = %d\n", elements);
warray[elements].str = malloc(strlen(input)+1);
strcpy(warray[elements].str,input);
warray[elements].wc = 1;
}
}
The "elements" variable is set globally to 0, and is supposed to be incremented within the save function. I assumed that this was causing the problem with regards to the above for loop, so I changed the initial value of elements to 1, which resulted in a segmentation fault.
I'm not exactly sure what to do here, can anyone help me out?
Thanks in advance