Code:
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <dirent.h> ///to access files folder
#include "stem.h" ///stemming library
typedef struct {
char text[100];
int nbrofwords;
struct string *next;
} String;
char *stopwords[39] =
{ "I", "a", "about", "an", "are", "as", "at", "be", "by", "do", "in",
"for", "from", "how", "they", "have", "is", "it", "all", "of", "on",
"or", "that", "the", "this", "to", "was", "what", "when", "where",
"who", "will", "with", "and", "the", "com", "www", "org", "net" };
char *smallletter[26] =
{ "a", "b", "c", "d", "e", "f", "j", "h", "i", "g", "k", "l", "m",
"n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z" };
char *Capitalletter[26] =
{ "A", "B", "C", "D", "E", "F", "J", "H", "I", "G", "K", "L", "M",
"N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z" };
FILE *filef;
String *words;
long filenb;
void addterms(char word[100])
{
int test = 0;
String *p, *new;
p = words;
new = malloc(sizeof(String));
strcpy(new->text, word);
new->nbrofwords = 1;
new->next = NULL;
if (!words)
words = new;
else {
while (p->next != NULL) {
if (strcmp(p->text, word) == 0) {
test = 1;
p->nbrofwords += 1;
}
p = p->next;
}
if (test == 0)
p->next = new;
}
}
/// test if it's a character or not
int isCharacter(char c)
{
int i;
for (i = 0; i < 26; i++) {
if (c == smallletter[i] || c == Capitalletter[i]) {
return 1;
}
}
return 0;
}
/// test if it's a stop word or not
int findstopwords(char *text)
{
int i;
for (i = 0; i < 39; i++) {
if (strcmp(text, stopwords[i]) == 0)
return 1;
}
return 0;
}
/// empty arrays to read other files and words
void emptyarray(char array[100])
{
int i;
for (i = 0; i < 100; i++)
array[i] = '\0';
}
/// return words to their roots and adds them to the outputted file
void addtofile(FILE * file_s)
{
char c;
int i = 0;
int j, k;
char text[100];
c = fgetc(file_s);
do {
if (isCharacter(c) == 1) /// lower Capital letters to small letters
{
if (c < 97)
c += 32;
text[i] = c;
i++;
} else {
text[i] = '\0';
if (findstopwords(text) == 0 && text[0] != '\0') {
k = stem(text, 0, strlen(text) - 1);
if (k != (strlen(text) - 1))
text[k] = '\0';
if (findstopwords(text) == 0 && text[0] != '\0') {
fputs(text, filef);
fputc(' ', filef);
addterms(text);
}
}
emptyarray(text);
i = 0;
}
c = fgetc(file_s);
} while (c != EOF);
fputc('\n', filef);
}
/// open the file to read the documents
void openFile(char *filename)
{
char text[10000] = "files/";
FILE *f;
int i;
j = strlen(text);
for (i = 0; i < strlen(filename); i++)
text[i + 11] = filename[i];
text[strlen(filename) + 11] = '\0';
f = fopen(text, "r+");
if (f) {
addtofile(f);
}
filenb++;
fclose(f);
}
/// measure the weight and tf and idf to find the
float MeasureWeight(int nbwordf, int nbofW, int nbF)
{
float weight_measured;
float TF, DF, IDF, nbword_infiles, nb_words_tot, nbfw_term, nbof_files;
nbword_infiles = nbwordf; /// number of a term in file
nb_words_tot = nbofW; /// number of terms in file
nbfw_term = nbF; /// number of files with the term
nbof_files = filenb; /// number of files
TF = nbword_infiles / nb_words_tot; /// TermFrequency=number of times term appear in file/doc divided by number of terms in file/doc
DF = nbfw_term; /// number of files with term
IDF = nbof_files / DF; /// InverseDocFrequency(calculate the importance of a term) = number of files/docs divided by number of files/docs with the term
weight_measured = TF * IDF;
return weight_measured;
}
void Display(char *word, int nb)
{
int frequent[2][filenb];
int nbF = 0;
int i, k, j = 0;
for (i = 0; i < filenb; i++) {
frequent[0][i] = 0;
frequent[1][i] = 0;
}
FILE *f = fopen("C:\Users\AMANIB\Desktop\ri\IR\output.txt", "r+");
char c;
char text[100];
int count = 0;
do {
c = fgetc(f);
if (isCharacter(c) == 1) {
text[i] = c;
i++;
} else {
text[i] = '\0';
if (strcmp(word, text) == 0)
count++;
i = 0;
emptyarray(text);
if (c == '\n') {
frequent[0][j] = count;
frequent[1][j] = j;
j++;
count = 0;
}
}
} while (c != EOF);
for (i = 0; i < filenb; i++) {
if (frequent[0][i] != 0) {
nbF++;
}
}
float weight;
printf("%s ", word);
for (i = 0; i < filenb; i++) {
if (frequent[0][i] != 0) {
weight = MeasureWeight(frequent[0][i], nb, nbF);
printf("\n \n filenb%d = %d times | weight = %f \n \n \n", frequent[1][i] + 1, frequent[0][i], weight);
}
}
}
int main()
{
int i, j, k;
char c;
String *p;
char word[50];
filenb = 0;
filef = fopen("C:\Users\AMANIB\Desktop\ri\IR\output.txt", "r+");
struct dirent *read;
DIR *files;
files = opendir("files");
char *text;
FILE *f;
while ((read = readdir(files))) {
if ((strcmp(read->doc_name, ".") == 0) || (strcmp(read->doc_name, "..") == 0))
continue;
openFile(read->doc_name);
}
closedir(files);
fclose(filef);
p = words;
while (p) {
Display(p->text, p->nbrofwords);
p = p->next;
}
return 0;
}
> I would like to know where is the problem as I tried to debug it but with no success of finding the error
Code:
$ gcc -Wall -Wextra foo.c
In file included from foo.c:5:
stem.h: In function ‘m’:
stem.h:46:7: warning: this ‘if’ clause does not guard... [-Wmisleading-indentation]
46 | if (! cons(i)) break; i++;
| ^~
stem.h:46:29: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the ‘if’
46 | if (! cons(i)) break; i++;
| ^
stem.h: In function ‘step4’:
stem.h:237:18: warning: this ‘if’ clause does not guard... [-Wmisleading-indentation]
237 | if (ends("\04" "ence")) break; return;
| ^~
stem.h:237:49: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the ‘if’
237 | if (ends("\04" "ence")) break; return;
| ^~~~~~
stem.h:241:18: warning: this ‘if’ clause does not guard... [-Wmisleading-indentation]
241 | if (ends("\04" "ible")) break; return;
| ^~
stem.h:241:49: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the ‘if’
241 | if (ends("\04" "ible")) break; return;
| ^~~~~~
stem.h:245:18: warning: this ‘if’ clause does not guard... [-Wmisleading-indentation]
245 | if (ends("\03" "ent")) break; return;
| ^~
stem.h:245:48: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the ‘if’
245 | if (ends("\03" "ent")) break; return;
| ^~~~~~
stem.h:247:18: warning: this ‘if’ clause does not guard... [-Wmisleading-indentation]
247 | if (ends("\02" "ou")) break; return;
| ^~
stem.h:247:47: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the ‘if’
247 | if (ends("\02" "ou")) break; return;
| ^~~~~~
stem.h:251:18: warning: this ‘if’ clause does not guard... [-Wmisleading-indentation]
251 | if (ends("\03" "iti")) break; return;
| ^~
stem.h:251:48: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the ‘if’
251 | if (ends("\03" "iti")) break; return;
| ^~~~~~
stem.h: In function ‘step5’:
stem.h:267:27: warning: suggest parentheses around ‘&&’ within ‘||’ [-Wparentheses]
267 | if (a > 1 || a == 1 && !cvc(k-1)) k--;
| ~~~~~~~^~~~~~~~~~~~
foo.c: In function ‘addterms’:
foo.c:46:9: warning: assignment to ‘String *’ {aka ‘struct <anonymous> *’} from incompatible pointer type ‘struct string *’ [-Wincompatible-pointer-types]
46 | p = p->next;
| ^
foo.c:49:15: warning: assignment to ‘struct string *’ from incompatible pointer type ‘String *’ {aka ‘struct <anonymous> *’} [-Wincompatible-pointer-types]
49 | p->next = new;
| ^
foo.c: In function ‘isCharacter’:
foo.c:58:11: warning: comparison between pointer and integer
58 | if (c == smallletter[i] || c == Capitalletter[i]) {
| ^~
foo.c:58:34: warning: comparison between pointer and integer
58 | if (c == smallletter[i] || c == Capitalletter[i]) {
| ^~
foo.c: In function ‘addtofile’:
foo.c:103:15: warning: comparison of integer expressions of different signedness: ‘int’ and ‘size_t’ {aka ‘long unsigned int’} [-Wsign-compare]
103 | if (k != (strlen(text) - 1))
| ^~
foo.c:89:7: warning: unused variable ‘j’ [-Wunused-variable]
89 | int j, k;
| ^
foo.c: In function ‘openFile’:
foo.c:126:17: warning: comparison of integer expressions of different signedness: ‘int’ and ‘size_t’ {aka ‘long unsigned int’} [-Wsign-compare]
126 | for (i = 0; i < strlen(filename); i++)
| ^
foo.c: In function ‘Display’:
foo.c:162:19: error: incomplete universal character name \U
162 | FILE *f = fopen("C:\Users\AMANIB\Desktop\ri\IR\output.txt", "r+");
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
foo.c:162:19: warning: unknown escape sequence: '\A'
foo.c:162:19: warning: unknown escape sequence: '\D'
foo.c:162:19: warning: unknown escape sequence: '\I'
foo.c:162:19: warning: unknown escape sequence: '\o'
foo.c:157:10: warning: unused variable ‘k’ [-Wunused-variable]
157 | int i, k, j = 0;
| ^
foo.c: In function ‘main’:
foo.c:207:17: error: incomplete universal character name \U
207 | filef = fopen("C:\Users\AMANIB\Desktop\ri\IR\output.txt", "r+");
| ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
foo.c:207:17: warning: unknown escape sequence: '\A'
foo.c:207:17: warning: unknown escape sequence: '\D'
foo.c:207:17: warning: unknown escape sequence: '\I'
foo.c:207:17: warning: unknown escape sequence: '\o'
foo.c:214:23: error: ‘struct dirent’ has no member named ‘doc_name’; did you mean ‘d_name’?
214 | if ((strcmp(read->doc_name, ".") == 0) || (strcmp(read->doc_name, "..") == 0))
| ^~~~~~~~
| d_name
foo.c:214:61: error: ‘struct dirent’ has no member named ‘doc_name’; did you mean ‘d_name’?
214 | if ((strcmp(read->doc_name, ".") == 0) || (strcmp(read->doc_name, "..") == 0))
| ^~~~~~~~
| d_name
foo.c:216:20: error: ‘struct dirent’ has no member named ‘doc_name’; did you mean ‘d_name’?
216 | openFile(read->doc_name);
| ^~~~~~~~
| d_name
foo.c:224:7: warning: assignment to ‘String *’ {aka ‘struct <anonymous> *’} from incompatible pointer type ‘struct string *’ [-Wincompatible-pointer-types]
224 | p = p->next;
| ^
foo.c:212:9: warning: unused variable ‘f’ [-Wunused-variable]
212 | FILE *f;
| ^
foo.c:211:9: warning: unused variable ‘text’ [-Wunused-variable]
211 | char *text;
| ^~~~
foo.c:205:8: warning: unused variable ‘word’ [-Wunused-variable]
205 | char word[50];
| ^~~~
foo.c:203:8: warning: unused variable ‘c’ [-Wunused-variable]
203 | char c;
| ^
foo.c:202:13: warning: unused variable ‘k’ [-Wunused-variable]
202 | int i, j, k;
| ^
foo.c:202:10: warning: unused variable ‘j’ [-Wunused-variable]
202 | int i, j, k;
| ^
foo.c:202:7: warning: unused variable ‘i’ [-Wunused-variable]
202 | int i, j, k;
| ^
All of your upper/lower case stuff can be simplified by knowing about ctype.h