Thread: Help with reading a file

  1. #1
    Registered User
    Join Date
    Mar 2003
    Posts
    1

    Help with reading a file

    Iam trying to read a file, count the number of words in it and list each word with it's frequency. I used the code bellow but when l try to run it, it just hungs and nothing is outputed. lam relatively new in c/c++ field trying to do it by way of self study. I will appliciate the help


    #include <stdio.h>
    #include <ctype.h>
    #include <string.h>
    #include <stdlib.h>

    #define MAXWORD 10000


    struct tnode {
    char *word;
    int count;
    struct tnode *left;
    struct tnode *right;
    };

    struct tnode *addtree(struct tnode *, char *);
    void treeprint(struct tnode *);
    struct tnode *talloc(void);
    int getword(char *, int);
    char *strdupl(char *);


    /* word frequency count */

    int main(void)
    {
    struct tnode *root;
    char word[MAXWORD];
    root = NULL;

    while (getword(word, MAXWORD) != EOF)
    if (isprint(word[0]))
    root = addtree(root, word);
    treeprint(root);
    exit (0);
    }

    /* addtree: add a node with w, at or below p */

    struct tnode *addtree(struct tnode *p, char *w)
    {
    int cond;

    if (p == NULL) { /* a new word has arrived */
    p = talloc(); /* make a new node */
    p->word = strdupl(w);
    p->count = 1;
    p->left = p->right = NULL;
    }
    else if ((cond = strcmp(w, p->word)) == 0)
    (p->count)++; /* repeated word */
    else if (cond < 0) /* less than into left subtree */
    p->left = addtree(p->left,w);
    else /* greater than into right subtree */
    p->right = addtree(p->right,w);
    return p;
    }

    /* treeprint: in-order print of tree p */

    void treeprint(struct tnode *p)
    {
    if (p !=NULL) {
    treeprint(p->left);
    printf("%4d %s\n", p->count, p->word);
    treeprint(p->right);
    }
    }


    /* talloc: make a tnode */

    struct tnode *talloc(void)
    {
    return (struct tnode *) malloc(sizeof(struct tnode));
    }


    /* getword: get next word or character from input */

    int getword(char *word, int lim)
    {

    FILE *pFile;
    int c;
    char *w = word;

    pFile = fopen ("/mnt/samba/k/patrick/testp.txt","r");


    while (!isspace(c = fgetc(pFile)) && c!=EOF)


    *w++ = tolower(c);

    if (!isprint(c))
    {
    *w = '\0';
    return c;
    }
    for ( ; --lim > 0; w++)
    if(!isalnum(*w = fgetc(pFile)) && !ispunct(*w))
    {
    ungetc(*w,pFile);
    break;
    }

    *w = '\0';
    fclose(pFile);
    return word[0];
    }

    /* strdupl: make duplicate of s. (strdup builtin) */

    char *strdupl(char *s)
    {
    char *p;
    p = (char *) malloc(strlen(s)+1);
    if (p != NULL)
    strcpy(p, s);
    return p;
    }

  2. #2
    Confused Magos's Avatar
    Join Date
    Sep 2001
    Location
    Sweden
    Posts
    3,145
    It seems like you're reopening the file in every call to GetWord().
    Oh, and use code tags when posting code (read the FAQ).

    You could try this pseudo code:
    Code:
    OpenFile();
    
    while(Looping)
    {
       GetWordFromFile();
    
       if(FindWordInList())
       {
          AddOneToCounterOfThatWord();
       }
       else
       {
          AddWordToList();
       }
    
       if(EndOfFile()) Looping = false;
    }
    
    CloseFile();
    MagosX.com

    Give a man a fish and you feed him for a day.
    Teach a man to fish and you feed him for a lifetime.

  3. #3
    Confused Magos's Avatar
    Join Date
    Sep 2001
    Location
    Sweden
    Posts
    3,145
    BTW, I don't think so many words contain 10000 letters .
    MagosX.com

    Give a man a fish and you feed him for a day.
    Teach a man to fish and you feed him for a lifetime.

  4. #4
    Code Goddess Prelude's Avatar
    Join Date
    Sep 2001
    Posts
    9,897
    Change the prototype of getword to this:
    Code:
    int getword(char *, int, FILE *);
    The definition to this:
    Code:
    int getword(char *word, int lim, FILE *pFile)
    {
      int c;
      char *w = word;
      
      while (isspace(c = fgetc(pFile)))
        ;
      
      if (c != EOF)
        *w++ = c;
      
      if (!isalpha(c))
      {
        *w = '\0';
        return c;
      }
      
      for ( ; --lim > 0; w++)
      {
        if(!isalnum(*w = fgetc(pFile)))
        {
          ungetc(*w, pFile);
          break;
        }
    
        *w = tolower(*w);
      }
      
      *w = '\0';
      
      return word[0];
    }
    And main to this:
    Code:
    int main(void)
    {
      FILE *pFile;
      struct tnode *root;
      char word[MAXWORD];
      
      root = NULL;
      pFile = fopen("/mnt/samba/k/patrick/testp.txt", "r");
      
      if (pFile != NULL)
      {
        while (getword(word, MAXWORD, pFile) != EOF) {
          if (isalpha(word[0]))
            root = addtree(root, word);
        }
    
        fclose(pFile);
      }
      else
        perror("File open failure");
      
      treeprint(root);
      exit (0);
    }
    You chose a good book to learn from, K&R is the bible for C.

    -Prelude
    My best code is written with the delete key.

Popular pages Recent additions subscribe to a feed

Similar Threads

  1. opening empty file causes access violation
    By trevordunstan in forum C Programming
    Replies: 10
    Last Post: 10-21-2008, 11:19 PM
  2. Formatting the contents of a text file
    By dagorsul in forum C++ Programming
    Replies: 2
    Last Post: 04-29-2008, 12:36 PM
  3. Replies: 3
    Last Post: 03-04-2005, 02:46 PM
  4. Possible circular definition with singleton objects
    By techrolla in forum C++ Programming
    Replies: 3
    Last Post: 12-26-2004, 10:46 AM
  5. System
    By drdroid in forum C++ Programming
    Replies: 3
    Last Post: 06-28-2002, 10:12 PM