Thread: trying to treat a few documents and print it as normal text but my code won't output

  1. #1
    Registered User
    Join Date
    Feb 2022
    Posts
    3

    Unhappy trying to treat a few documents and print it as normal text but my code won't output

    my treatment of the files include checking whether they are characters or not, to lower capital letters to small letters, stem the words to their root, and calculate TF and IDF and the weight of the terms too and as a result I try to output the resulted text in a normal text document

    and the stem library I used stem.h - Google Drive
    (the files I used are from 20newsgroup like 5 of them as a test)


    but when it builds it it shows nothing on the command window and text document


    I would like to know where is the problem as I tried to debug it but with no success of finding the error
    suggestions to improve the code are most welcome too

    [CODE]
    [COLOR=var(--highlight-keyword)]#[COLOR=var(--highlight-keyword)]include [COLOR=var(--highlight-variable)]<stdio.h>[/COLOR][/COLOR]
    [COLOR=var(--highlight-keyword)]#[COLOR=var(--highlight-keyword)]include[/COLOR] [COLOR=var(--highlight-variable)]<stdlib.h>[/COLOR][/COLOR]
    [COLOR=var(--highlight-keyword)]#[COLOR=var(--highlight-keyword)]include[/COLOR] [COLOR=var(--highlight-variable)]<sys/types.h>[/COLOR][/COLOR]
    [COLOR=var(--highlight-keyword)]#[COLOR=var(--highlight-keyword)]include[/COLOR] [COLOR=var(--highlight-variable)]<dirent.h>[/COLOR] [COLOR=var(--highlight-comment)]///to access files folder[/COLOR][/COLOR]
    [COLOR=var(--highlight-keyword)]#[COLOR=var(--highlight-keyword)]include[/COLOR] [COLOR=var(--highlight-variable)]"stem.h"[/COLOR] [COLOR=var(--highlight-comment)]///stemming library[/COLOR][/COLOR]

    [COLOR=var(--highlight-keyword)]typedef[/COLOR] [COLOR=var(--highlight-keyword)]struct[/COLOR]
    {

    [COLOR=var(--highlight-namespace)]char[/COLOR] text[[COLOR=var(--highlight-namespace)]100[/COLOR]];
    [COLOR=var(--highlight-namespace)]int[/COLOR] nbrofwords;
    [COLOR=var(--highlight-keyword)]struct[/COLOR] [COLOR=var(--highlight-literal)]string[/COLOR] *[COLOR=var(--highlight-literal)]next[/COLOR];
    } String;

    [COLOR=var(--highlight-namespace)]char[/COLOR] *stopwords[[COLOR=var(--highlight-namespace)]39[/COLOR]] = {[COLOR=var(--highlight-variable)]"I"[/COLOR], [COLOR=var(--highlight-variable)]"a"[/COLOR], [COLOR=var(--highlight-variable)]"about"[/COLOR], [COLOR=var(--highlight-variable)]"an"[/COLOR], [COLOR=var(--highlight-variable)]"are"[/COLOR], [COLOR=var(--highlight-variable)]"as"[/COLOR], [COLOR=var(--highlight-variable)]"at"[/COLOR], [COLOR=var(--highlight-variable)]"be"[/COLOR], [COLOR=var(--highlight-variable)]"by"[/COLOR], [COLOR=var(--highlight-variable)]"do"[/COLOR], [COLOR=var(--highlight-variable)]"in"[/COLOR], [COLOR=var(--highlight-variable)]"for"[/COLOR], [COLOR=var(--highlight-variable)]"from"[/COLOR], [COLOR=var(--highlight-variable)]"how"[/COLOR], [COLOR=var(--highlight-variable)]"they"[/COLOR], [COLOR=var(--highlight-variable)]"have"[/COLOR], [COLOR=var(--highlight-variable)]"is"[/COLOR], [COLOR=var(--highlight-variable)]"it"[/COLOR], [COLOR=var(--highlight-variable)]"all"[/COLOR], [COLOR=var(--highlight-variable)]"of"[/COLOR], [COLOR=var(--highlight-variable)]"on"[/COLOR], [COLOR=var(--highlight-variable)]"or"[/COLOR], [COLOR=var(--highlight-variable)]"that"[/COLOR], [COLOR=var(--highlight-variable)]"the"[/COLOR], [COLOR=var(--highlight-variable)]"this"[/COLOR], [COLOR=var(--highlight-variable)]"to"[/COLOR], [COLOR=var(--highlight-variable)]"was"[/COLOR], [COLOR=var(--highlight-variable)]"what"[/COLOR], [COLOR=var(--highlight-variable)]"when"[/COLOR], [COLOR=var(--highlight-variable)]"where"[/COLOR], [COLOR=var(--highlight-variable)]"who"[/COLOR], [COLOR=var(--highlight-variable)]"will"[/COLOR], [COLOR=var(--highlight-variable)]"with"[/COLOR], [COLOR=var(--highlight-variable)]"and"[/COLOR], [COLOR=var(--highlight-variable)]"the"[/COLOR], [COLOR=var(--highlight-variable)]"com"[/COLOR], [COLOR=var(--highlight-variable)]"www"[/COLOR], [COLOR=var(--highlight-variable)]"org"[/COLOR], [COLOR=var(--highlight-variable)]"net"[/COLOR]};
    [COLOR=var(--highlight-namespace)]char[/COLOR] *smallletter[[COLOR=var(--highlight-namespace)]26[/COLOR]] = {[COLOR=var(--highlight-variable)]"a"[/COLOR], [COLOR=var(--highlight-variable)]"b"[/COLOR], [COLOR=var(--highlight-variable)]"c"[/COLOR], [COLOR=var(--highlight-variable)]"d"[/COLOR], [COLOR=var(--highlight-variable)]"e"[/COLOR], [COLOR=var(--highlight-variable)]"f"[/COLOR], [COLOR=var(--highlight-variable)]"j"[/COLOR], [COLOR=var(--highlight-variable)]"h"[/COLOR], [COLOR=var(--highlight-variable)]"i"[/COLOR], [COLOR=var(--highlight-variable)]"g"[/COLOR], [COLOR=var(--highlight-variable)]"k"[/COLOR], [COLOR=var(--highlight-variable)]"l"[/COLOR], [COLOR=var(--highlight-variable)]"m"[/COLOR], [COLOR=var(--highlight-variable)]"n"[/COLOR], [COLOR=var(--highlight-variable)]"o"[/COLOR], [COLOR=var(--highlight-variable)]"p"[/COLOR], [COLOR=var(--highlight-variable)]"q"[/COLOR], [COLOR=var(--highlight-variable)]"r"[/COLOR], [COLOR=var(--highlight-variable)]"s"[/COLOR], [COLOR=var(--highlight-variable)]"t"[/COLOR], [COLOR=var(--highlight-variable)]"u"[/COLOR], [COLOR=var(--highlight-variable)]"v"[/COLOR], [COLOR=var(--highlight-variable)]"w"[/COLOR], [COLOR=var(--highlight-variable)]"x"[/COLOR], [COLOR=var(--highlight-variable)]"y"[/COLOR], [COLOR=var(--highlight-variable)]"z"[/COLOR]};
    [COLOR=var(--highlight-namespace)]char[/COLOR] *Capitalletter[[COLOR=var(--highlight-namespace)]26[/COLOR]] = {[COLOR=var(--highlight-variable)]"A"[/COLOR], [COLOR=var(--highlight-variable)]"B"[/COLOR], [COLOR=var(--highlight-variable)]"C"[/COLOR], [COLOR=var(--highlight-variable)]"D"[/COLOR], [COLOR=var(--highlight-variable)]"E"[/COLOR], [COLOR=var(--highlight-variable)]"F"[/COLOR], [COLOR=var(--highlight-variable)]"J"[/COLOR], [COLOR=var(--highlight-variable)]"H"[/COLOR], [COLOR=var(--highlight-variable)]"I"[/COLOR], [COLOR=var(--highlight-variable)]"G"[/COLOR], [COLOR=var(--highlight-variable)]"K"[/COLOR], [COLOR=var(--highlight-variable)]"L"[/COLOR], [COLOR=var(--highlight-variable)]"M"[/COLOR], [COLOR=var(--highlight-variable)]"N"[/COLOR], [COLOR=var(--highlight-variable)]"O"[/COLOR], [COLOR=var(--highlight-variable)]"P"[/COLOR], [COLOR=var(--highlight-variable)]"Q"[/COLOR], [COLOR=var(--highlight-variable)]"R"[/COLOR], [COLOR=var(--highlight-variable)]"S"[/COLOR], [COLOR=var(--highlight-variable)]"T"[/COLOR], [COLOR=var(--highlight-variable)]"U"[/COLOR], [COLOR=var(--highlight-variable)]"V"[/COLOR], [COLOR=var(--highlight-variable)]"W"[/COLOR], [COLOR=var(--highlight-variable)]"X"[/COLOR], [COLOR=var(--highlight-variable)]"Y"[/COLOR], [COLOR=var(--highlight-variable)]"Z"[/COLOR]};
    FILE *filef;
    String *words;
    [COLOR=var(--highlight-namespace)]long[/COLOR] filenb;

    [COLOR=var(--highlight-namespace)]void[/COLOR] [COLOR=var(--highlight-literal)]addterms[/COLOR]([COLOR=var(--highlight-namespace)]char[/COLOR] word[[COLOR=var(--highlight-namespace)]100[/COLOR]])
    {
    [COLOR=var(--highlight-namespace)]int[/COLOR] test = [COLOR=var(--highlight-namespace)]0[/COLOR];
    String *p, *new;
    p = words;
    new = [COLOR=var(--highlight-literal)]malloc[/COLOR]([COLOR=var(--highlight-keyword)]sizeof[/COLOR](String));
    [COLOR=var(--highlight-literal)]strcpy[/COLOR](new->text, word);
    new->nbrofwords = [COLOR=var(--highlight-namespace)]1[/COLOR];
    new->next = [COLOR=var(--highlight-literal)]NULL[/COLOR];
    [COLOR=var(--highlight-keyword)]if[/COLOR] (!words)
    words = new;
    [COLOR=var(--highlight-keyword)]else[/COLOR]
    {
    [COLOR=var(--highlight-keyword)]while[/COLOR] (p->next != [COLOR=var(--highlight-literal)]NULL[/COLOR])
    {
    [COLOR=var(--highlight-keyword)]if[/COLOR] ([COLOR=var(--highlight-literal)]strcmp[/COLOR](p->text, word) == [COLOR=var(--highlight-namespace)]0[/COLOR])
    {
    test = [COLOR=var(--highlight-namespace)]1[/COLOR];
    p->nbrofwords += [COLOR=var(--highlight-namespace)]1[/COLOR];
    }
    p = p->next;
    }
    [COLOR=var(--highlight-keyword)]if[/COLOR] (test == [COLOR=var(--highlight-namespace)]0[/COLOR])
    p->next = new;
    }
    }

    [COLOR=var(--highlight-comment)]/// test if it's a character or not[/COLOR]
    [COLOR=var(--highlight-namespace)]int[/COLOR] [COLOR=var(--highlight-literal)]isCharacter[/COLOR]([COLOR=var(--highlight-namespace)]char[/COLOR] c)
    {
    [COLOR=var(--highlight-namespace)]int[/COLOR] i;
    [COLOR=var(--highlight-keyword)]for[/COLOR] (i = [COLOR=var(--highlight-namespace)]0[/COLOR]; i < [COLOR=var(--highlight-namespace)]26[/COLOR]; i++)
    {
    [COLOR=var(--highlight-keyword)]if[/COLOR] (c == smallletter[i] || c == Capitalletter[i])
    {
    [COLOR=var(--highlight-keyword)]return[/COLOR] [COLOR=var(--highlight-namespace)]1[/COLOR];
    }
    }
    [COLOR=var(--highlight-keyword)]return[/COLOR] [COLOR=var(--highlight-namespace)]0[/COLOR];
    }

    [COLOR=var(--highlight-comment)]/// test if it's a stop word or not[/COLOR]
    [COLOR=var(--highlight-namespace)]int[/COLOR] [COLOR=var(--highlight-literal)]findstopwords[/COLOR]([COLOR=var(--highlight-namespace)]char[/COLOR] *text)
    {
    [COLOR=var(--highlight-namespace)]int[/COLOR] i;
    [COLOR=var(--highlight-keyword)]for[/COLOR] (i = [COLOR=var(--highlight-namespace)]0[/COLOR]; i < [COLOR=var(--highlight-namespace)]39[/COLOR]; i++)
    {
    [COLOR=var(--highlight-keyword)]if[/COLOR] ([COLOR=var(--highlight-literal)]strcmp[/COLOR](text, stopwords[i]) == [COLOR=var(--highlight-namespace)]0[/COLOR])
    [COLOR=var(--highlight-keyword)]return[/COLOR] [COLOR=var(--highlight-namespace)]1[/COLOR];
    }
    [COLOR=var(--highlight-keyword)]return[/COLOR] [COLOR=var(--highlight-namespace)]0[/COLOR];
    }

    [COLOR=var(--highlight-comment)]/// empty arrays to read other files and words[/COLOR]
    [COLOR=var(--highlight-namespace)]void[/COLOR] [COLOR=var(--highlight-literal)]emptyarray[/COLOR]([COLOR=var(--highlight-namespace)]char[/COLOR] [COLOR=var(--highlight-literal)]array[/COLOR][[COLOR=var(--highlight-namespace)]100[/COLOR]])
    {
    [COLOR=var(--highlight-namespace)]int[/COLOR] i;
    [COLOR=var(--highlight-keyword)]for[/COLOR] (i = [COLOR=var(--highlight-namespace)]0[/COLOR]; i < [COLOR=var(--highlight-namespace)]100[/COLOR]; i++)
    [COLOR=var(--highlight-literal)]array[/COLOR][i] = [COLOR=var(--highlight-variable)]'\0'[/COLOR];
    }

    [COLOR=var(--highlight-comment)]/// return words to their roots and adds them to the outputted file[/COLOR]
    [COLOR=var(--highlight-namespace)]void[/COLOR] [COLOR=var(--highlight-literal)]addtofile[/COLOR](FILE *file_s)
    {
    [COLOR=var(--highlight-namespace)]char[/COLOR] c;
    [COLOR=var(--highlight-namespace)]int[/COLOR] i = [COLOR=var(--highlight-namespace)]0[/COLOR];
    [COLOR=var(--highlight-namespace)]int[/COLOR] j, k;
    [COLOR=var(--highlight-namespace)]char[/COLOR] text[[COLOR=var(--highlight-namespace)]100[/COLOR]];
    c = fgetc(file_s);
    [COLOR=var(--highlight-keyword)]do[/COLOR]
    {
    [COLOR=var(--highlight-keyword)]if[/COLOR] (isCharacter(c) == [COLOR=var(--highlight-namespace)]1[/COLOR]) [COLOR=var(--highlight-comment)]/// lower Capital letters to small letters[/COLOR]
    {
    [COLOR=var(--highlight-keyword)]if[/COLOR] (c < [COLOR=var(--highlight-namespace)]97[/COLOR])
    c += [COLOR=var(--highlight-namespace)]32[/COLOR];
    text[i] = c;
    i++;
    }
    [COLOR=var(--highlight-keyword)]else[/COLOR]
    {
    text[i] = [COLOR=var(--highlight-variable)]'\0'[/COLOR];
    [COLOR=var(--highlight-keyword)]if[/COLOR] (findstopwords(text) == [COLOR=var(--highlight-namespace)]0[/COLOR] && text[[COLOR=var(--highlight-namespace)]0[/COLOR]] != [COLOR=var(--highlight-variable)]'\0'[/COLOR])
    {
    k = stem(text, [COLOR=var(--highlight-namespace)]0[/COLOR], [COLOR=var(--highlight-literal)]strlen[/COLOR](text) - [COLOR=var(--highlight-namespace)]1[/COLOR]);
    [COLOR=var(--highlight-keyword)]if[/COLOR] (k != ([COLOR=var(--highlight-literal)]strlen[/COLOR](text) - [COLOR=var(--highlight-namespace)]1[/COLOR]))
    text[k] = [COLOR=var(--highlight-variable)]'\0'[/COLOR];
    [COLOR=var(--highlight-keyword)]if[/COLOR] (findstopwords(text) == [COLOR=var(--highlight-namespace)]0[/COLOR] && text[[COLOR=var(--highlight-namespace)]0[/COLOR]] != [COLOR=var(--highlight-variable)]'\0'[/COLOR])
    {
    [COLOR=var(--highlight-literal)]fputs[/COLOR](text, filef);
    fputc([COLOR=var(--highlight-variable)]' '[/COLOR], filef);
    addterms(text);
    }
    }
    emptyarray(text);
    i = [COLOR=var(--highlight-namespace)]0[/COLOR];
    }
    c = fgetc(file_s);
    } [COLOR=var(--highlight-keyword)]while[/COLOR] (c != EOF);
    fputc([COLOR=var(--highlight-variable)]'\n'[/COLOR], filef);
    }

    [COLOR=var(--highlight-comment)]/// open the file to read the documents[/COLOR]
    [COLOR=var(--highlight-namespace)]void[/COLOR] [COLOR=var(--highlight-literal)]openFile[/COLOR]([COLOR=var(--highlight-namespace)]char[/COLOR] *filename)
    {
    [COLOR=var(--highlight-namespace)]char[/COLOR] text[[COLOR=var(--highlight-namespace)]10000[/COLOR]] = [COLOR=var(--highlight-variable)]"files/"[/COLOR];
    FILE *f;
    [COLOR=var(--highlight-namespace)]int[/COLOR] i;
    j = [COLOR=var(--highlight-literal)]strlen[/COLOR](text);
    [COLOR=var(--highlight-keyword)]for[/COLOR] (i = [COLOR=var(--highlight-namespace)]0[/COLOR]; i < [COLOR=var(--highlight-literal)]strlen[/COLOR](filename); i++)
    text[i + [COLOR=var(--highlight-namespace)]11[/COLOR]] = filename[i];
    text[[COLOR=var(--highlight-literal)]strlen[/COLOR](filename) + [COLOR=var(--highlight-namespace)]11[/COLOR]] = [COLOR=var(--highlight-variable)]'\0'[/COLOR];
    f = fopen(text, [COLOR=var(--highlight-variable)]"r+"[/COLOR]);
    [COLOR=var(--highlight-keyword)]if[/COLOR] (f)
    {
    addtofile(f);
    }
    filenb++;
    fclose(f);
    }

    [COLOR=var(--highlight-comment)]/// measure the weight and tf and idf to find the[/COLOR]
    [COLOR=var(--highlight-namespace)]float[/COLOR] [COLOR=var(--highlight-literal)]MeasureWeight[/COLOR]([COLOR=var(--highlight-namespace)]int[/COLOR] nbwordf, [COLOR=var(--highlight-namespace)]int[/COLOR] nbofW, [COLOR=var(--highlight-namespace)]int[/COLOR] nbF)
    {
    [COLOR=var(--highlight-namespace)]float[/COLOR] weight_measured;
    [COLOR=var(--highlight-namespace)]float[/COLOR] TF, DF, IDF, nbword_infiles, nb_words_tot, nbfw_term, nbof_files;
    nbword_infiles = nbwordf; [COLOR=var(--highlight-comment)]/// number of a term in file[/COLOR]
    nb_words_tot = nbofW; [COLOR=var(--highlight-comment)]/// number of terms in file[/COLOR]
    nbfw_term = nbF; [COLOR=var(--highlight-comment)]/// number of files with the term[/COLOR]
    nbof_files = filenb; [COLOR=var(--highlight-comment)]/// number of files[/COLOR]
    TF = nbword_infiles / nb_words_tot; [COLOR=var(--highlight-comment)]/// TermFrequency=number of times term appear in file/doc divided by number of terms in file/doc[/COLOR]
    DF = nbfw_term; [COLOR=var(--highlight-comment)]/// number of files with term[/COLOR]
    IDF = nbof_files / DF; [COLOR=var(--highlight-comment)]/// InverseDocFrequency(calculate the importance of a term) = number of files/docs divided by number of files/docs with the term[/COLOR]
    weight_measured = TF * IDF;
    [COLOR=var(--highlight-keyword)]return[/COLOR] weight_measured;
    }

    [COLOR=var(--highlight-namespace)]void[/COLOR] [COLOR=var(--highlight-literal)]Display[/COLOR]([COLOR=var(--highlight-namespace)]char[/COLOR] *word, [COLOR=var(--highlight-namespace)]int[/COLOR] nb)
    {
    [COLOR=var(--highlight-namespace)]int[/COLOR] frequent[[COLOR=var(--highlight-namespace)]2[/COLOR]][filenb];
    [COLOR=var(--highlight-namespace)]int[/COLOR] nbF = [COLOR=var(--highlight-namespace)]0[/COLOR];
    [COLOR=var(--highlight-namespace)]int[/COLOR] i, k, j = [COLOR=var(--highlight-namespace)]0[/COLOR];
    [COLOR=var(--highlight-keyword)]for[/COLOR] (i = [COLOR=var(--highlight-namespace)]0[/COLOR]; i < filenb; i++)
    {
    frequent[[COLOR=var(--highlight-namespace)]0[/COLOR]][i] = [COLOR=var(--highlight-namespace)]0[/COLOR];
    frequent[[COLOR=var(--highlight-namespace)]1[/COLOR]][i] = [COLOR=var(--highlight-namespace)]0[/COLOR];
    }
    FILE *f = fopen([COLOR=var(--highlight-variable)]"C:\Users\AMANIB\Desktop\ri\IR\output.txt"[/COLOR], [COLOR=var(--highlight-variable)]"r+"[/COLOR]);
    [COLOR=var(--highlight-namespace)]char[/COLOR] c;
    [COLOR=var(--highlight-namespace)]char[/COLOR] text[[COLOR=var(--highlight-namespace)]100[/COLOR]];
    [COLOR=var(--highlight-namespace)]int[/COLOR] count = [COLOR=var(--highlight-namespace)]0[/COLOR];
    [COLOR=var(--highlight-keyword)]do[/COLOR]
    {
    c = fgetc(f);
    [COLOR=var(--highlight-keyword)]if[/COLOR] (isCharacter(c) == [COLOR=var(--highlight-namespace)]1[/COLOR])
    {
    text[i] = c;
    i++;
    }
    [COLOR=var(--highlight-keyword)]else[/COLOR]
    {
    text[i] = [COLOR=var(--highlight-variable)]'\0'[/COLOR];
    [COLOR=var(--highlight-keyword)]if[/COLOR] ([COLOR=var(--highlight-literal)]strcmp[/COLOR](word, text) == [COLOR=var(--highlight-namespace)]0[/COLOR])
    count++;
    i = [COLOR=var(--highlight-namespace)]0[/COLOR];
    emptyarray(text);
    [COLOR=var(--highlight-keyword)]if[/COLOR] (c == [COLOR=var(--highlight-variable)]'\n'[/COLOR])
    {
    frequent[[COLOR=var(--highlight-namespace)]0[/COLOR]][j] = count;
    frequent[[COLOR=var(--highlight-namespace)]1[/COLOR]][j] = j;
    j++;
    count = [COLOR=var(--highlight-namespace)]0[/COLOR];
    }
    }
    } [COLOR=var(--highlight-keyword)]while[/COLOR] (c != EOF);
    [COLOR=var(--highlight-keyword)]for[/COLOR] (i = [COLOR=var(--highlight-namespace)]0[/COLOR]; i < filenb; i++)
    {
    [COLOR=var(--highlight-keyword)]if[/COLOR] (frequent[[COLOR=var(--highlight-namespace)]0[/COLOR]][i] != [COLOR=var(--highlight-namespace)]0[/COLOR])
    {
    nbF++;
    }
    }
    [COLOR=var(--highlight-namespace)]float[/COLOR] weight;
    [COLOR=var(--highlight-literal)]printf[/COLOR]([COLOR=var(--highlight-variable)]"%s "[/COLOR], word);
    [COLOR=var(--highlight-keyword)]for[/COLOR] (i = [COLOR=var(--highlight-namespace)]0[/COLOR]; i < filenb; i++)
    {
    [COLOR=var(--highlight-keyword)]if[/COLOR] (frequent[[COLOR=var(--highlight-namespace)]0[/COLOR]][i] != [COLOR=var(--highlight-namespace)]0[/COLOR])
    {
    weight = MeasureWeight(frequent[[COLOR=var(--highlight-namespace)]0[/COLOR]][i], nb, nbF);
    [COLOR=var(--highlight-literal)]printf[/COLOR]([COLOR=var(--highlight-variable)]"\n \n filenb%d = %d times | weight = %f \n \n \n"[/COLOR], frequent[[COLOR=var(--highlight-namespace)]1[/COLOR]][i] + [COLOR=var(--highlight-namespace)]1[/COLOR], frequent[[COLOR=var(--highlight-namespace)]0[/COLOR]][i], weight);
    }
    }
    }

    [COLOR=var(--highlight-namespace)]int[/COLOR] [COLOR=var(--highlight-literal)]main[/COLOR]()
    {
    [COLOR=var(--highlight-namespace)]int[/COLOR] i, j, k;
    [COLOR=var(--highlight-namespace)]char[/COLOR] c;
    String *p;
    [COLOR=var(--highlight-namespace)]char[/COLOR] word[[COLOR=var(--highlight-namespace)]50[/COLOR]];
    filenb = [COLOR=var(--highlight-namespace)]0[/COLOR];
    filef = fopen([COLOR=var(--highlight-variable)]"C:\Users\AMANIB\Desktop\ri\IR\output.txt"[/COLOR], [COLOR=var(--highlight-variable)]"r+"[/COLOR]);
    [COLOR=var(--highlight-keyword)]struct[/COLOR] [COLOR=var(--highlight-literal)]dirent[/COLOR] *[COLOR=var(--highlight-literal)]read[/COLOR];
    DIR *files;
    files = opendir([COLOR=var(--highlight-variable)]"files"[/COLOR]);
    [COLOR=var(--highlight-namespace)]char[/COLOR] *text;
    FILE *f;
    [COLOR=var(--highlight-keyword)]while[/COLOR] ((read = readdir(files)))
    {
    [COLOR=var(--highlight-keyword)]if[/COLOR] (([COLOR=var(--highlight-literal)]strcmp[/COLOR](read->doc_name, [COLOR=var(--highlight-variable)]"."[/COLOR]) == [COLOR=var(--highlight-namespace)]0[/COLOR]) || ([COLOR=var(--highlight-literal)]strcmp[/COLOR](read->doc_name, [COLOR=var(--highlight-variable)]".."[/COLOR]) == [COLOR=var(--highlight-namespace)]0[/COLOR]))
    [COLOR=var(--highlight-keyword)]continue[/COLOR];
    openFile(read->doc_name);
    }

    closedir(files);
    fclose(filef);
    p = words;
    [COLOR=var(--highlight-keyword)]while[/COLOR] (p)
    {
    Display(p->text, p->nbrofwords);
    p = p->next;
    }
    [COLOR=var(--highlight-keyword)]return[/COLOR] [COLOR=var(--highlight-namespace)]0[/COLOR];
    }
    [/COLOR]


    Last edited by kilig27; 02-01-2022 at 07:27 AM. Reason: fixed the code

  2. #2
    and the hat of int overfl Salem's Avatar
    Join Date
    Aug 2001
    Location
    The edge of the known universe
    Posts
    39,667
    > Last edited by kilig27; 48 Minutes Ago at 01:27 PM. Reason: fixed the code
    Try again!
    This time, use "copy as text" in your IDE, and/or "paste as text" in your browser.
    If necessary, go via something like notepad++ which washes away all that silly markup that's in your post.

    Nobody is going to read your code until it's actually readable.
    If you dance barefoot on the broken glass of undefined behaviour, you've got to expect the occasional cut.
    If at first you don't succeed, try writing your phone number on the exam paper.

  3. #3
    Registered User rstanley's Avatar
    Join Date
    Jun 2014
    Location
    New York, NY
    Posts
    1,127
    Also please add a closing [/CODE] to the end of you code block!

  4. #4
    Registered User
    Join Date
    Feb 2022
    Posts
    3
    I kept trying to fix it but it just kept appearing that way, I will try your idea and hopefully it gets viewed normally.

  5. #5
    Registered User
    Join Date
    Feb 2022
    Posts
    3
    It would appear that the edit box doesn't appear anymore and it will stay that way unless there's away to just delete it?

  6. #6
    Registered User rstanley's Avatar
    Join Date
    Jun 2014
    Location
    New York, NY
    Posts
    1,127
    Quote Originally Posted by kilig27 View Post
    It would appear that the edit box doesn't appear anymore and it will stay that way unless there's away to just delete it?
    Just repost with plain text if you still need help.

  7. #7
    and the hat of int overfl Salem's Avatar
    Join Date
    Aug 2001
    Location
    The edge of the known universe
    Posts
    39,667
    The OP's code.
    Code:
    #include <stdio.h>
    #include <stdlib.h>
    #include <sys/types.h>
    #include <dirent.h>             ///to access files folder
    #include "stem.h"               ///stemming library
    
    typedef struct {
      char text[100];
      int nbrofwords;
      struct string *next;
    } String;
    
    char *stopwords[39] =
        { "I", "a", "about", "an", "are", "as", "at", "be", "by", "do", "in", 
          "for", "from", "how", "they", "have", "is", "it", "all", "of", "on", 
          "or", "that", "the", "this", "to", "was", "what", "when", "where", 
          "who", "will", "with", "and", "the", "com", "www", "org", "net" };
    char *smallletter[26] =
        { "a", "b", "c", "d", "e", "f", "j", "h", "i", "g", "k", "l", "m", 
          "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z" };
    char *Capitalletter[26] =
        { "A", "B", "C", "D", "E", "F", "J", "H", "I", "G", "K", "L", "M", 
          "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z" };
    
    FILE *filef;
    String *words;
    long filenb;
    
    void addterms(char word[100])
    {
      int test = 0;
      String *p, *new;
      p = words;
      new = malloc(sizeof(String));
      strcpy(new->text, word);
      new->nbrofwords = 1;
      new->next = NULL;
      if (!words)
        words = new;
      else {
        while (p->next != NULL) {
          if (strcmp(p->text, word) == 0) {
            test = 1;
            p->nbrofwords += 1;
          }
          p = p->next;
        }
        if (test == 0)
          p->next = new;
      }
    }
    
    /// test if it's a character or not
    int isCharacter(char c)
    {
      int i;
      for (i = 0; i < 26; i++) {
        if (c == smallletter[i] || c == Capitalletter[i]) {
          return 1;
        }
      }
      return 0;
    }
    
    /// test if it's a stop word or not
    int findstopwords(char *text)
    {
      int i;
      for (i = 0; i < 39; i++) {
        if (strcmp(text, stopwords[i]) == 0)
          return 1;
      }
      return 0;
    }
    
    /// empty arrays to read other files and words
    void emptyarray(char array[100])
    {
      int i;
      for (i = 0; i < 100; i++)
        array[i] = '\0';
    }
    
    /// return words to their roots and adds them to the outputted file
    void addtofile(FILE * file_s)
    {
      char c;
      int i = 0;
      int j, k;
      char text[100];
      c = fgetc(file_s);
      do {
        if (isCharacter(c) == 1)    /// lower Capital letters to small letters
        {
          if (c < 97)
            c += 32;
          text[i] = c;
          i++;
        } else {
          text[i] = '\0';
          if (findstopwords(text) == 0 && text[0] != '\0') {
            k = stem(text, 0, strlen(text) - 1);
            if (k != (strlen(text) - 1))
              text[k] = '\0';
            if (findstopwords(text) == 0 && text[0] != '\0') {
              fputs(text, filef);
              fputc(' ', filef);
              addterms(text);
            }
          }
          emptyarray(text);
          i = 0;
        }
        c = fgetc(file_s);
      } while (c != EOF);
      fputc('\n', filef);
    }
    
    /// open the file to read the documents
    void openFile(char *filename)
    {
      char text[10000] = "files/";
      FILE *f;
      int i;
      j = strlen(text);
      for (i = 0; i < strlen(filename); i++)
        text[i + 11] = filename[i];
      text[strlen(filename) + 11] = '\0';
      f = fopen(text, "r+");
      if (f) {
        addtofile(f);
      }
      filenb++;
      fclose(f);
    }
    
    /// measure the weight and tf and idf to find the
    float MeasureWeight(int nbwordf, int nbofW, int nbF)
    {
      float weight_measured;
      float TF, DF, IDF, nbword_infiles, nb_words_tot, nbfw_term, nbof_files;
      nbword_infiles = nbwordf;     /// number of a term in file
      nb_words_tot = nbofW;         /// number of terms in file
      nbfw_term = nbF;              /// number of files with the term
      nbof_files = filenb;          /// number of files
      TF = nbword_infiles / nb_words_tot; /// TermFrequency=number of times term appear in file/doc divided by number of terms in file/doc
      DF = nbfw_term;               /// number of files with term
      IDF = nbof_files / DF;        /// InverseDocFrequency(calculate the importance of a term) = number of files/docs divided by number of files/docs with the term
      weight_measured = TF * IDF;
      return weight_measured;
    }
    
    void Display(char *word, int nb)
    {
      int frequent[2][filenb];
      int nbF = 0;
      int i, k, j = 0;
      for (i = 0; i < filenb; i++) {
        frequent[0][i] = 0;
        frequent[1][i] = 0;
      }
      FILE *f = fopen("C:\Users\AMANIB\Desktop\ri\IR\output.txt", "r+");
      char c;
      char text[100];
      int count = 0;
      do {
        c = fgetc(f);
        if (isCharacter(c) == 1) {
          text[i] = c;
          i++;
        } else {
          text[i] = '\0';
          if (strcmp(word, text) == 0)
            count++;
          i = 0;
          emptyarray(text);
          if (c == '\n') {
            frequent[0][j] = count;
            frequent[1][j] = j;
            j++;
            count = 0;
          }
        }
      } while (c != EOF);
      for (i = 0; i < filenb; i++) {
        if (frequent[0][i] != 0) {
          nbF++;
        }
      }
      float weight;
      printf("%s ", word);
      for (i = 0; i < filenb; i++) {
        if (frequent[0][i] != 0) {
          weight = MeasureWeight(frequent[0][i], nb, nbF);
          printf("\n \n filenb%d = %d times | weight = %f \n \n \n", frequent[1][i] + 1, frequent[0][i], weight);
        }
      }
    }
    
    int main()
    {
      int i, j, k;
      char c;
      String *p;
      char word[50];
      filenb = 0;
      filef = fopen("C:\Users\AMANIB\Desktop\ri\IR\output.txt", "r+");
      struct dirent *read;
      DIR *files;
      files = opendir("files");
      char *text;
      FILE *f;
      while ((read = readdir(files))) {
        if ((strcmp(read->doc_name, ".") == 0) || (strcmp(read->doc_name, "..") == 0))
          continue;
        openFile(read->doc_name);
      }
    
      closedir(files);
      fclose(filef);
      p = words;
      while (p) {
        Display(p->text, p->nbrofwords);
        p = p->next;
      }
      return 0;
    }
    > I would like to know where is the problem as I tried to debug it but with no success of finding the error
    Well compiling with warnings throws out a whole host of problems.
    Code:
    $ gcc -Wall -Wextra foo.c
    In file included from foo.c:5:
    stem.h: In function ‘m’:
    stem.h:46:7: warning: this ‘if’ clause does not guard... [-Wmisleading-indentation]
       46 |       if (! cons(i)) break; i++;
          |       ^~
    stem.h:46:29: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the ‘if’
       46 |       if (! cons(i)) break; i++;
          |                             ^
    stem.h: In function ‘step4’:
    stem.h:237:18: warning: this ‘if’ clause does not guard... [-Wmisleading-indentation]
      237 |                  if (ends("\04" "ence")) break; return;
          |                  ^~
    stem.h:237:49: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the ‘if’
      237 |                  if (ends("\04" "ence")) break; return;
          |                                                 ^~~~~~
    stem.h:241:18: warning: this ‘if’ clause does not guard... [-Wmisleading-indentation]
      241 |                  if (ends("\04" "ible")) break; return;
          |                  ^~
    stem.h:241:49: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the ‘if’
      241 |                  if (ends("\04" "ible")) break; return;
          |                                                 ^~~~~~
    stem.h:245:18: warning: this ‘if’ clause does not guard... [-Wmisleading-indentation]
      245 |                  if (ends("\03" "ent")) break; return;
          |                  ^~
    stem.h:245:48: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the ‘if’
      245 |                  if (ends("\03" "ent")) break; return;
          |                                                ^~~~~~
    stem.h:247:18: warning: this ‘if’ clause does not guard... [-Wmisleading-indentation]
      247 |                  if (ends("\02" "ou")) break; return;
          |                  ^~
    stem.h:247:47: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the ‘if’
      247 |                  if (ends("\02" "ou")) break; return;
          |                                               ^~~~~~
    stem.h:251:18: warning: this ‘if’ clause does not guard... [-Wmisleading-indentation]
      251 |                  if (ends("\03" "iti")) break; return;
          |                  ^~
    stem.h:251:48: note: ...this statement, but the latter is misleadingly indented as if it were guarded by the ‘if’
      251 |                  if (ends("\03" "iti")) break; return;
          |                                                ^~~~~~
    stem.h: In function ‘step5’:
    stem.h:267:27: warning: suggest parentheses around ‘&&’ within ‘||’ [-Wparentheses]
      267 |       if (a > 1 || a == 1 && !cvc(k-1)) k--;
          |                    ~~~~~~~^~~~~~~~~~~~
    foo.c: In function ‘addterms’:
    foo.c:46:9: warning: assignment to ‘String *’ {aka ‘struct <anonymous> *’} from incompatible pointer type ‘struct string *’ [-Wincompatible-pointer-types]
       46 |       p = p->next;
          |         ^
    foo.c:49:15: warning: assignment to ‘struct string *’ from incompatible pointer type ‘String *’ {aka ‘struct <anonymous> *’} [-Wincompatible-pointer-types]
       49 |       p->next = new;
          |               ^
    foo.c: In function ‘isCharacter’:
    foo.c:58:11: warning: comparison between pointer and integer
       58 |     if (c == smallletter[i] || c == Capitalletter[i]) {
          |           ^~
    foo.c:58:34: warning: comparison between pointer and integer
       58 |     if (c == smallletter[i] || c == Capitalletter[i]) {
          |                                  ^~
    foo.c: In function ‘addtofile’:
    foo.c:103:15: warning: comparison of integer expressions of different signedness: ‘int’ and ‘size_t’ {aka ‘long unsigned int’} [-Wsign-compare]
      103 |         if (k != (strlen(text) - 1))
          |               ^~
    foo.c:89:7: warning: unused variable ‘j’ [-Wunused-variable]
       89 |   int j, k;
          |       ^
    foo.c: In function ‘openFile’:
    foo.c:126:17: warning: comparison of integer expressions of different signedness: ‘int’ and ‘size_t’ {aka ‘long unsigned int’} [-Wsign-compare]
      126 |   for (i = 0; i < strlen(filename); i++)
          |                 ^
    foo.c: In function ‘Display’:
    foo.c:162:19: error: incomplete universal character name \U
      162 |   FILE *f = fopen("C:\Users\AMANIB\Desktop\ri\IR\output.txt", "r+");
          |                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    foo.c:162:19: warning: unknown escape sequence: '\A'
    foo.c:162:19: warning: unknown escape sequence: '\D'
    foo.c:162:19: warning: unknown escape sequence: '\I'
    foo.c:162:19: warning: unknown escape sequence: '\o'
    foo.c:157:10: warning: unused variable ‘k’ [-Wunused-variable]
      157 |   int i, k, j = 0;
          |          ^
    foo.c: In function ‘main’:
    foo.c:207:17: error: incomplete universal character name \U
      207 |   filef = fopen("C:\Users\AMANIB\Desktop\ri\IR\output.txt", "r+");
          |                 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    foo.c:207:17: warning: unknown escape sequence: '\A'
    foo.c:207:17: warning: unknown escape sequence: '\D'
    foo.c:207:17: warning: unknown escape sequence: '\I'
    foo.c:207:17: warning: unknown escape sequence: '\o'
    foo.c:214:23: error: ‘struct dirent’ has no member named ‘doc_name’; did you mean ‘d_name’?
      214 |     if ((strcmp(read->doc_name, ".") == 0) || (strcmp(read->doc_name, "..") == 0))
          |                       ^~~~~~~~
          |                       d_name
    foo.c:214:61: error: ‘struct dirent’ has no member named ‘doc_name’; did you mean ‘d_name’?
      214 |     if ((strcmp(read->doc_name, ".") == 0) || (strcmp(read->doc_name, "..") == 0))
          |                                                             ^~~~~~~~
          |                                                             d_name
    foo.c:216:20: error: ‘struct dirent’ has no member named ‘doc_name’; did you mean ‘d_name’?
      216 |     openFile(read->doc_name);
          |                    ^~~~~~~~
          |                    d_name
    foo.c:224:7: warning: assignment to ‘String *’ {aka ‘struct <anonymous> *’} from incompatible pointer type ‘struct string *’ [-Wincompatible-pointer-types]
      224 |     p = p->next;
          |       ^
    foo.c:212:9: warning: unused variable ‘f’ [-Wunused-variable]
      212 |   FILE *f;
          |         ^
    foo.c:211:9: warning: unused variable ‘text’ [-Wunused-variable]
      211 |   char *text;
          |         ^~~~
    foo.c:205:8: warning: unused variable ‘word’ [-Wunused-variable]
      205 |   char word[50];
          |        ^~~~
    foo.c:203:8: warning: unused variable ‘c’ [-Wunused-variable]
      203 |   char c;
          |        ^
    foo.c:202:13: warning: unused variable ‘k’ [-Wunused-variable]
      202 |   int i, j, k;
          |             ^
    foo.c:202:10: warning: unused variable ‘j’ [-Wunused-variable]
      202 |   int i, j, k;
          |          ^
    foo.c:202:7: warning: unused variable ‘i’ [-Wunused-variable]
      202 |   int i, j, k;
          |       ^
    All of your upper/lower case stuff can be simplified by knowing about ctype.h
    If you dance barefoot on the broken glass of undefined behaviour, you've got to expect the occasional cut.
    If at first you don't succeed, try writing your phone number on the exam paper.

Popular pages Recent additions subscribe to a feed

Similar Threads

  1. MAC C Scanning text documents, merge and summing lines
    By deon0783 in forum C Programming
    Replies: 3
    Last Post: 09-14-2017, 11:05 PM
  2. Code Help - File Prompt+Add Text+Print Contents
    By IR_logicAl in forum C Programming
    Replies: 2
    Last Post: 07-23-2015, 01:19 PM
  3. Output does not print on text file fputc();
    By mohsen in forum C Programming
    Replies: 1
    Last Post: 07-19-2013, 04:10 PM
  4. Replies: 0
    Last Post: 03-28-2003, 08:20 AM

Tags for this Thread