Thread: Help with a program to spell check a file

  1. #46
    Noobin to the max
    Join Date
    Mar 2013
    Posts
    73
    As soon as I do that I get 2 errors, the new one being:

    load.c:39:58: error: expected ';' at end of declaration
    int input(FILE *fp, char *words[DLENGTH], int getData)

  2. #47
    Registered User
    Join Date
    Sep 2006
    Posts
    8,868
    Move line 59 to line 40.

  3. #48
    Noobin to the max
    Join Date
    Mar 2013
    Posts
    73
    OK, that makes sense, I need to close main, inside of main! I have done that and the 1 error is gone, but I still have the one I started with:

    load.c:26:9: error: array type 'char *[29]' is not assignable
    words=malloc(count * sizeof(char *));
    ~~~~~^


    Here is my code:

    Code:
    #include <stdbool.h>
    #include <stdio.h>
    #include "dictionary.h"
    #include <string.h>
    #include <stdlib.h>
    
    
    #define MAXWORDS 26
    #define DLENGTH 29
     
    int input(FILE *fp,char *words[DLENGTH],int getData);
    int main(void)
    {
    
    
       char *words[DLENGTH];
       int getData;
       int count;
       char buff[BUFSIZ];  //BUFSIZ or BUFSIZE is a macro for your system - usually 256 or 512 char's in size. A "natural" buffer length, for your system.
     
       //char **words;
       FILE *fp=fopen("dictionary.txt","r");
       
       count=input(fp,words,0);   //just counting this time
       rewind(fp);              //going back to the start of the file
     
       //malloc the right number of words here
       words=malloc(count * sizeof(char *));
       for(int i=0;i<count;i++) 
       {
            words[i]=malloc(DLENGTH * sizeof(char));  //#define LENGTH  29
       }
       input(fp,words,1);   //now getting the words
      
       //all the other stuff, here (mostly calling some functions)
        
       printf("%s\n",buff);
     
       return 0;
    }   
       int input(FILE *fp, char *words[DLENGTH], int getData)
       { 
            int i=0;
            char buff[128];
            while((fgets(buff, BUFSIZ, fp)) != NULL) 
            {
                 if(getData) 
                 {
                      //remove the newline here
                      strcpy(words[i],buff);
                 }
            ++i;
            }
            if(getData==1)
                return i;
            else
                return -1;
       }

  4. #49
    Registered User
    Join Date
    Sep 2006
    Posts
    8,868
    Line 16, change it to

    char **words;

    I believe I confused you with **words and *words[]. Sorry.

  5. #50
    Noobin to the max
    Join Date
    Mar 2013
    Posts
    73
    Ok, that fixed my problem, once I also initialized it to NULL and removed my unused variable "getData". Only when I try to run it, after it compiles successfully I get a seg fault.

    I ran It through GDB and this Is what I got at the point of the seg fault:

    Program received signal SIGSEGV, Segmentation fault.
    _IO_fgets (buf=0xbfffd304 "\230\016\002", n=8192, fp=0x0) at iofgets.c:52
    52 _IO_acquire_lock (fp);
    (gdb) bt
    #0 _IO_fgets (buf=0xbfffd304 "\230\016\002", n=8192, fp=0x0) at iofgets.c:52
    #1 0x0804871a in input (fp=0x0, words=0x0, getData=0) at load.c:41
    #2 0x080485fd in main () at load.c:20


    Here is the program now:

    Code:
    #include <stdbool.h>
    #include <stdio.h>
    #include "dictionary.h"
    #include <string.h>
    #include <stdlib.h>
    
    
    #define MAXWORDS 26
    #define DLENGTH 29
     
    int input(FILE *fp,char *words[DLENGTH],int getData);
    int main(void)
    {
    
    
       char **words = NULL;
       int count;
       char buff[BUFSIZ];  //BUFSIZ or BUFSIZE is a macro for your system - usually 256 or 512 char's in size. A "natural" buffer length, for your system.
     
       FILE *fp=fopen("dictionary.txt","r");
       
       count=input(fp,words,0);   //just counting this time
       rewind(fp);              //going back to the start of the file
     
       //malloc the right number of words here
       words=malloc(count * sizeof(char *));
       for(int i=0;i<count;i++) 
       {
            words[i]=malloc(DLENGTH * sizeof(char));  //#define LENGTH  29
       }
       input(fp,words,1);   //now getting the words
      
       //all the other stuff, here (mostly calling some functions)
        
       printf("%s\n",buff);
     
       return 0;
    }   
       int input(FILE *fp, char *words[DLENGTH], int getData)
       { 
            int i=0;
            char buff[128];
            while((fgets(buff, BUFSIZ, fp)) != NULL) 
            {
                 if(getData) 
                 {
                      //remove the newline here
                      strcpy(words[i],buff);
                 }
            ++i;
            }
            if(getData==1)
                return i;
            else
                return -1;
       }

  6. #51
    Registered User
    Join Date
    Sep 2006
    Posts
    8,868
    Logic error was returning count on the wrong read of the file.

    Code:
    #include <stdio.h>
    //#include "dictionary.h"
    #include <string.h>
    #include <stdlib.h>
     
     
    #define MAXWORDS 26
    #define DLENGTH 29
      
    int input(FILE *fp,char *words[DLENGTH],int getData);
    int main(void)
    {
        int i;
     
       char **words = NULL;
       int count=0;
       char buff[BUFSIZ];  //BUFSIZ or BUFSIZE is a macro for your system - usually 256 or 512 char's in size. A "natural" buffer length, for your system.
      
       FILE *fp=fopen("words26.txt","r");
       if(!fp) {
          printf("Error! file failed to open\n");
          return 1;
       }
        
       count=input(fp,words,0);   //just counting this time
       //printf("count: %d",count); //getchar();
       rewind(fp);              //going back to the start of the file
      
       //malloc the right number of words here
       words=malloc(count * sizeof(char *));
       for(int i=0;i<count;i++)
       {
            words[i]=malloc(DLENGTH * sizeof(char));  //#define LENGTH  29
       }
       input(fp,words,1);   //now getting the words
       fclose(fp);
       //all the other stuff, here (mostly calling some functions)
         
       for(i=0;i<count;i++)
          printf("%s\n",words[i]);
      
       return 0;
    }  
    int input(FILE *fp, char **words, int getData)
    {
       int i=0;
       char buff[BUFSIZ];
       while((fgets(buff, BUFSIZ, fp)) != NULL)
       {
          if(getData)
          {
              //remove the newline here
              strcpy(words[i],buff);
          }
          ++i;
       }
       if(getData==1)
          return 0;
       else
          return i;
    }
    I changed the name of the file so I could test it.
    Last edited by Adak; 03-15-2013 at 01:58 PM.

  7. #52
    Noobin to the max
    Join Date
    Mar 2013
    Posts
    73
    AWESOME!!! IT WORKS!!!!!

    Thanks so much for all your help on this program, this is 1 section of a larger program and the part I am working on now involves taking a smaller section of this program, to simply count the number of words in the dictionary. I should just be able to take the count part of the program, but am having some problems.

    Here is what I have so far:

    Code:
    #include <stdio.h>
    //#include "dictionary.h"
    #include <string.h>
    #include <stdlib.h>
      
      
    //#define MAXWORDS 26
    #define DLENGTH 29
       
    
    
    int main(void)
    {
    int input(FILE *fp,char *words[DLENGTH],int getData);
      
       char **words = NULL;
       int count=0;
       //char buff[BUFSIZ];  //BUFSIZ or BUFSIZE is a macro for your system - usually 256 or 512 char's in size. A "natural" buffer length, for your system.
       
       FILE *fp=fopen("words1.txt","r");
       if(!fp) {
          printf("Error! file failed to open\n");
          return 1;
       }
         
       count=input(fp,words,0);   //just counting this time
       printf("count: %d",count); //getchar();
    }
    This is the error that I am getting:

    clang: error: linker command failed with exit code 1 (use -v to see invocation)
    make: *** [size] Error 1


    I have never seen that error before, and after Googling it, I couldn't find anything that helps me. Any ideas on what might be wrong?

  8. #53
    Registered User
    Join Date
    Sep 2006
    Posts
    8,868
    Your program gets an error because you're calling the "input()" function, (on line 26), and although you have the prototype for the input function, there is no ACTUAL input function present in your program.

    So the boat is going to FarLand, and you have the map to give you directions, but there is no FarLand, so you're goofed.

    The program you had before did count up all the words in the dictionary - but now you want just the words to be counted, by itself, is that right?
    Last edited by Adak; 03-16-2013 at 12:39 AM.

  9. #54
    Noobin to the max
    Join Date
    Mar 2013
    Posts
    73
    Yes, I want the words to be counted without the rest of the program. How would I fix the issue with input???

  10. #55
    Registered User
    Join Date
    Sep 2006
    Posts
    8,868
    You can just make a small block of code, in another function, but not in input.

    Code:
    #include <stdio.h>
    //#include "dictionary.h"
    #include <string.h>
    #include <stdlib.h>
      
      
    //#define MAXWORDS 26
    #define DLENGTH 29
       
    
    int countOnly(void);
    
    
    int main(void)
    {
       printf("count: %d\n",countOnly()); 
       return 0;
    }
    int countOnly(void) {
       int i=0;
       char str[40];
       FILE *fp=fopen("words26.txt","r");
       if(!fp) {
          printf("Error! file failed to open\n");
          return 1;
       }
       while((fscanf(fp, "%s",str))>0) {
          ++i;
          printf("%s\n",str);
       }
       
       fclose(fp);
       return i;
       
    }
    The above is one way to do it - but ONLY if the words are STRICTLY formatted correctly. fscanf() (and scanf() ), are flighty little birds, that will happily sing for someone else, in another county - they are not robust.
    Last edited by Adak; 03-16-2013 at 01:06 AM.

  11. #56
    Noobin to the max
    Join Date
    Mar 2013
    Posts
    73
    AWESOME, thanks so much, that works GREAT!!!

    I am going to try putting it all together in the larger program, I will let you know sometime Saturday, how it works!!!

    Thanks,
    Josh

  12. #57
    Registered User
    Join Date
    Sep 2006
    Posts
    8,868
    Good luck with the big program, Josh.

    I was motivated to make a binary tree to load words into, just from your thread here. Just for fun though.

  13. #58
    Noobin to the max
    Join Date
    Mar 2013
    Posts
    73
    Ok, so I am now working on combining it into 1 whole program, there is 1 program that has all the functions in it, then another program where they are called (the program where they are called was supplied).

    Here is my program so far:

    Code:
    #include <stdbool.h>
    #include <stdio.h>
    #include "dictionary.h"
    #include <string.h>
    #include <stdlib.h>
    
    
    #define MAXWORDS 26
    #define DLENGTH 29
    
    
    /**
     * Returns true if word is in dictionary else false.
     */
    bool check(const char* word)
    {
    
    
    //I need to know how many words in the dictionary, 
    //would it be best to use the count function again, 
    //or is there a way to declare a global variable.
    
    
    int times_looped = 0;
    
    
        while(times_looped < words_in_dictionary)
        {
            if(strcmp(dictionary[times_looped] == text[times_looped])
            {
                return true;
                times_looped++;
            }
            
            else if(times_looped == words_in_dictionary)
            {
                return false;
                break;
            }
            else
                times_looped++;
        }
        
    }
    
    
    
    
    /**
     * Loads dictionary into memory.  Returns true if successful else false.
     */
    bool load(const char* dictionary)
    {
     
        int input(FILE *fp,char *words[DLENGTH],int getData);
    
    
        char **words = NULL;
        int count;
        char buff[BUFSIZ];  //BUFSIZ or BUFSIZE is a macro for your system - usually 256 or 512 char's in size. A "natural" buffer length, for your system.
     
        FILE *fp=fopen("DICTIONARY","r");
       
        count=input(fp,words,0);   //just counting this time
        rewind(fp);              //going back to the start of the file
     
        //malloc the right number of words here
        words=malloc(count * sizeof(char *));
        for(int i=0;i<count;i++) 
        {
            words[i]=malloc(DLENGTH * sizeof(char));  //#define LENGTH  29
        }
        input(fp,words,1);   //now getting the words
      
        //all the other stuff, here (mostly calling some functions)
        
        printf("%s\n",buff);
     
        return 0;
     
        int input(FILE *fp, char *words[DLENGTH], int getData)
       { 
            int i=0;
            char buff[128];
            while((fgets(buff, BUFSIZ, fp)) != NULL) 
            {
                 if(getData) 
                 {
                      //remove the newline here
                      strcpy(words[i],buff);
                 }
            ++i;
            }
            if(getData==1)
                return i;
            else
                return -1;
       }
     
     
    }
    
    
    /**
     * Returns number of words in dictionary if loaded else 0 if not yet loaded.
     */
    unsigned int size(void)
    {
    //Again, Would it be best to use this count function,
    //Or is there a way to declare a global variable so I don't have to count it so many times.
    
    
    int countOnly(void);
     
     
    {
       printf("count: %d\n",countOnly()); //getchar();
       return 0;
    }
       int countOnly(void)
    {
       int i=0;
       char str[40];
       FILE *fp=fopen("DICTIONARY","r");
       if(!fp) 
       {
          printf("Error! file failed to open\n");
          return 0;
       }
       while((fscanf(fp, "%s",str))>0) 
       {
          ++i;
          printf("%s\n",str);
       }
        
       fclose(fp);
       return i;
        
    }
    }
    
    
    /**
     * Unloads dictionary from memory.  Returns true if successful else false.
     */
    bool unload(void)
    {
    //Is inptr the right thing to unload from memory???
    
    
        fclose(inptr);
        //how do you "Return true if successful else false"
        return true;
    }


    And here is the program where the functions are called:


    Code:
    #include <ctype.h>
    #include <stdio.h>
    #include <sys/resource.h>
    #include <sys/time.h>
    
    
    #include "dictionary.h"
    
    
    // default dictionary
    #define DICTIONARY "/home/cs50/pset5/dictionaries/large"
    
    
    // prototype
    double calculate(const struct rusage* b, const struct rusage* a);
    
    
    int main(int argc, char* argv[])
    {
        // check for correct number of args
        if (argc != 2 && argc != 3)
        {
            printf("Usage: speller [dictionary] text\n");
            return 1;
        }
    
    
        // structs for timing data
        struct rusage before, after;
    
    
        // benchmarks
        double ti_load = 0.0, ti_check = 0.0, ti_size = 0.0, ti_unload = 0.0;
    
    
        // determine dictionary to use
        char* dictionary = (argc == 3) ? argv[1] : DICTIONARY;
    
    
        // load dictionary
        getrusage(RUSAGE_SELF, &before);
        bool loaded = load(dictionary);
        getrusage(RUSAGE_SELF, &after);
    
    
        // abort if dictionary not loaded
        if (!loaded)
        {
            printf("Could not load %s.\n", dictionary);
            return 1;
        }
    
    
        // calculate time to load dictionary
        ti_load = calculate(&before, &after);
    
    
        // try to open text
        char* text = (argc == 3) ? argv[2] : argv[1];
        FILE* fp = fopen(text, "r");
        if (fp == NULL)
        {
            printf("Could not open %s.\n", text);
            unload();
            return 1;
        }
    
    
        // prepare to report misspellings
        printf("\nMISSPELLED WORDS\n\n");
    
    
        // prepare to spell-check
        int index = 0, misspellings = 0, words = 0;
        char word[LENGTH+1];
    
    
        // spell-check each word in text
        for (int c = fgetc(fp); c != EOF; c = fgetc(fp))
        {
            // allow only alphabetical characters and apostrophes
            if (isalpha(c) || (c == '\'' && index > 0))
            {
                // append character to word
                word[index] = c;
                index++;
    
    
                // ignore alphabetical strings too long to be words
                if (index > LENGTH)
                {
                    // consume remainder of alphabetical string
                    while ((c = fgetc(fp)) != EOF && isalpha(c));
    
    
                    // prepare for new word
                    index = 0;
                }
            }
    
    
            // ignore words with numbers (like MS Word can)
            else if (isdigit(c))
            {
                // consume remainder of alphanumeric string
                while ((c = fgetc(fp)) != EOF && isalnum(c));
    
    
                // prepare for new word
                index = 0;
            }
    
    
            // we must have found a whole word
            else if (index > 0)
            {
                // terminate current word
                word[index] = '\0';
    
    
                // update counter
                words++;
    
    
                // check word's spelling
                getrusage(RUSAGE_SELF, &before);
                bool misspelled = !check(word);
                getrusage(RUSAGE_SELF, &after);
    
    
                // update benchmark
                ti_check += calculate(&before, &after);
    
    
                // print word if misspelled
                if (misspelled)
                {
                    printf("%s\n", word);
                    misspellings++;
                }
    
    
                // prepare for next word
                index = 0;
            }
        }
    
    
        // check whether there was an error
        if (ferror(fp))
        {
            fclose(fp);
            printf("Error reading %s.\n", text);
            unload();
            return 1;
        }
    
    
        // close text
        fclose(fp);
    
    
        // determine dictionary's size
        getrusage(RUSAGE_SELF, &before);
        unsigned int n = size();
        getrusage(RUSAGE_SELF, &after);
    
    
        // calculate time to determine dictionary's size
        ti_size = calculate(&before, &after);
    
    
        // unload dictionary
        getrusage(RUSAGE_SELF, &before);
        bool unloaded = unload();
        getrusage(RUSAGE_SELF, &after);
    
    
        // abort if dictionary not unloaded
        if (!unloaded)
        {
            printf("Could not unload %s.\n", dictionary);
            return 1;
        }
    
    
        // calculate time to unload dictionary
        ti_unload = calculate(&before, &after);
    
    
        // report benchmarks
        printf("\nWORDS MISSPELLED:     %d\n", misspellings);
        printf("WORDS IN DICTIONARY:  %d\n", n);
        printf("WORDS IN TEXT:        %d\n", words);
        printf("TIME IN load:         %.2f\n", ti_load);
        printf("TIME IN check:        %.2f\n", ti_check);
        printf("TIME IN size:         %.2f\n", ti_size);
        printf("TIME IN unload:       %.2f\n", ti_unload);
        printf("TIME IN TOTAL:        %.2f\n\n", 
         ti_load + ti_check + ti_size + ti_unload);
    
    
        // that's all folks
        return 0;
    }
    
    
    /**
     * Returns number of seconds between b and a.
     */
    double calculate(const struct rusage* b, const struct rusage* a)
    {
        if (b == NULL || a == NULL)
        {
            return 0.0;
        }
        else
        {
            return ((((a->ru_utime.tv_sec * 1000000 + a->ru_utime.tv_usec) -
                     (b->ru_utime.tv_sec * 1000000 + b->ru_utime.tv_usec)) +
                    ((a->ru_stime.tv_sec * 1000000 + a->ru_stime.tv_usec) -
                     (b->ru_stime.tv_sec * 1000000 + b->ru_stime.tv_usec)))
                    / 1000000.0);
        }
    }

    I have highlighted my questions in red in the first program, mainly I need help with the count, as that is what both questions are about. Any other advice would also be GREAT!


    Just in case you are wondering, there is a makefile that compiles them together.
    Last edited by Dude22; 03-16-2013 at 04:37 PM.

  14. #59
    Stoned Witch Barney McGrew's Avatar
    Join Date
    Oct 2012
    Location
    astaylea
    Posts
    420
    Have you heard of those programs called profilers?

  15. #60
    Registered User
    Join Date
    Sep 2006
    Posts
    8,868
    You CAN'T be using a linear search - that's at least a felony sin in the Church of Coding.

    So replace this with a nice binary search. For a 140k+ dictionary, it's a function straight from heaven:

    Take out this:

    Code:
    /**
     * Returns true if word is in dictionary else false.
     */
    bool check(const char* word)
    {
    
        while(times_looped < words_in_dictionary)
        {
            if(strcmp(dictionary[times_looped] == text[times_looped])
            {
                return true;
                times_looped++;
            }
            
            else if(times_looped == words_in_dictionary)
            {
                return false;
                break;
            }
            else
                times_looped++;
        }
        
    }
    and replace it with this:
    Code:
    bool binsearch(char *buff, char *words[LENGTH], int n) {
       int lo=0,hi=n-1,mid;
          
       while(lo<=hi) {
          mid=(lo+hi)/2; //printf("lo: %d  hi: %d  mid: %d\n",lo,hi,mid);getchar();
          if(strcmp(words[mid],buff)>0) 
             hi=mid-1;
          else if(strcmp(words[mid],buff)<0)
             lo=mid+1;
          else
             return true;
       }
       return false;  
    }
    A binary search (and this is not the fastest one possible, just the most clear and easy to debug), will reduce your average look ups from approximately 72,000 on average to about 16 - 18 maximum. (With a binary search, most of the words found, will be found very close to the maximum number of searches possible for that logic.)

    //I need to know how many words in the dictionary,
    //would it be best to use the count function again,
    //or is there a way to declare a global variable.

    Save count - it was returned from the input function, AND it was re-counted in the getData function.

    You don't want a global variable, 95% of the time. Use local variables, and pass them to the functions as needed.

    You NEVER want to recount something big that your program has already spent the time counting, once.
    Last edited by Adak; 03-16-2013 at 06:35 PM.

Popular pages Recent additions subscribe to a feed

Similar Threads

  1. Spell checking program
    By JYorke2097 in forum C Programming
    Replies: 3
    Last Post: 01-15-2009, 08:28 PM
  2. how to check a file for every 15 mins thr' program
    By nitinmhetre in forum Linux Programming
    Replies: 10
    Last Post: 01-05-2007, 01:53 AM
  3. Spell Checker
    By DeepFyre in forum Tech Board
    Replies: 2
    Last Post: 02-11-2005, 12:17 PM
  4. spell check in C using a dictionary file
    By goron350 in forum C Programming
    Replies: 10
    Last Post: 11-25-2004, 06:44 PM
  5. I can't spell...
    By Cheeze-It in forum A Brief History of Cprogramming.com
    Replies: 0
    Last Post: 05-08-2003, 08:07 AM