Thread: how to extract words from a string

  1. #1
    Registered User
    Join Date
    Sep 2005
    Posts
    84

    how to extract words from a string

    Hello

    I'm having trouble figuring out how to extract words from a string. My program is suppose to turn english or morse code sentences into morse code or english.

    So far my program takes in the input, one line at a time, and figures out if it is morse or english, at that point it should send the line one word at a time to a function that would translate it...

    This is where I run into trouble, the morse code has 3 spaces seperating the words so I am trying to use this fact to distinguish bewtween words, each letter has one space between them.

    I set the variables a, b and c to 1, 2 and 3... and use them to check if there are three blanks next to eachother in the string

    If there are I want it to send the word before the blanks to a new string called word...

    after this point I would send word to the funciton (which i haven't written yet)

    but my "word" string keeps appending words to itself getting longer, instead of just one word at a time.....

    I dont know if this makes any sense, this is the code I've written so far, I maight be going about this a long way, if anyone knows of any shorter way to do this or why mine doesn't work.... any suggestion would be greatly appreciated

    This is my code:

    Code:
    /* Prepocessor Directives and Macros */
    
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #define Max_Size 80
    
    char morsecode[45][8] = {
          "a.-","b-...","c-.-.","d-..","e.","f..-.","g--.","h....","i..",
          "j.---","k-.-","l.-..","m--","n-.","o---","p.--.","q--.-","r.-.",
          "s...","t-","u..-","v...-","w.--","x-..-","y-.--","z--..","0-----",
          "1.----","2..---","3...--","4....-","5.....","6-....","7--...",
          "8---..","9----.","..-.-.-",",--..--",":---...","?..--..","'.----.",
          "--....-","/-..-.","(-.--.-","\".-..-."};
     
                    
                 
                 
                 
    main() {
           
         FILE *fin,*fout;
         fin = fopen("morse.txt","r");
         fout = fopen("morse_out.txt","w"); 
         
         char string1[Max_Size + 1];  /* This is used to store the output lines */
         char string2[Max_Size];
         char word1[Max_Size];
         char word2[Max_Size];
         int x;    /* used to check if strings are in morse code  */
         int is_Morse;   /* used to check if strings are in morse code  */
         int length;      /* keeps length in check */     
         int k;           /* counting */     
         int a;
         int b;
         int c;
         int n;
         int j;
         int count;
     
              while ( fgets( string1, Max_Size + 1, fin ) != NULL )     
         {              
               fprintf(fout,"Input: ");          
               
               fputs(string1, fout);         /* For echo check */
               
               length = strlen( string1);
              
              
              
        /***** Loop that checks is string is Morse Code or English ****/
               
    x = 0;
    is_Morse = 1;
    do {
        if (string1[x] != '-' && string1[x] != '.' && string1[x] != ' ' && string1[x] != '\n' && string1[x] != '\0') {
            is_Morse = 0;
        }
        x++;
    } while (x <= strlen(string1) && is_Morse == 1);
    
      /************ Line is checked either Morse or English **************/
    
    
    if( is_Morse == 1 ) {
        
        a = 1;
        b = 2;
        c = 3;
        n = 0;
    count = 0;
    
               
    do {      
               if ( string1[a] == ' ' && string1[b] == ' ' && string1[c] == ' ') {
                    
                    j = n+1;
                    
                    count = 0;
    
                  while( (a-j) <= n ) {
    
                         
                         word1[ (count) ] = string1[ (a-j) ];  
    
                         count += 1;
                         j = j - 1;            
                         
                         
                         }  /* Closes while loop */
                         
                         fprintf(fout,"word: %s \n",word1);
                         
                         memset(word1, 0, Max_Size);
    
                              
                         }  /* Closes if loop */
    
                         
                          a++; b++; c++; n++;   } /* Closes do loop */
    
                         
                         while( string1[c] != '\0' );
                         
    
        
    }}
        
    }

  2. #2
    Registered User
    Join Date
    Sep 2005
    Posts
    84
    ummm... I was looking at my post and it looked confusing with all the code, my question about how to extract a word and put it into a new string only corresponds to this piece of code:

    Code:
        a = 1; 
        b = 2; 
        c = 3; 
        n = 0; 
    count = 0; 
    
                
    do {      
               if ( string1[a] == ' ' && string1[b] == ' ' && string1[c] == ' ') { 
                    
                    j = n+1; 
                    
                    count = 0; 
    
                  while( (a-j) <= n ) { 
    
                          
                         word1[ (count) ] = string1[ (a-j) ];  
    
                         count += 1; 
                         j = j - 1;            
                          
                          
                         }  /* Closes while loop */ 
                          
                         fprintf(fout,"word: %s \n",word1); 
                          
                         
                              
                         }  /* Closes if loop */ 
    
                          
                          a++; b++; c++; n++;   } /* Closes do loop */ 
    
                          
                         while( string1[c] != '\0' );

  3. #3
    vae victus! skorman00's Avatar
    Join Date
    Nov 2003
    Posts
    594
    You never set count to 0, so each time it starts where you left off.

    For an easier solution, you may want to check out strtok(), and various other str***() functions.

  4. #4
    Registered User
    Join Date
    Sep 2005
    Posts
    84
    doesn't it set count to 0 in third line of the do while statement? shouldnt this restart my word array so i store the next word starting at word[0] ?

  5. #5
    Registered User
    Join Date
    Mar 2005
    Location
    Mountaintop, Pa
    Posts
    1,058
    You may want to approach this problem in the following fashion:

    Create two functions, Encrypt and Decrypt. The Encrypt function will convert English to Morse, Decrypt will convert Morse to English. Concentrate on only one function at a time. For instance, get the Encrypt function to work properly and then work on the Decrypt function. Also, initially, I wouldn't worry about loading the string(s) from a text file. This can be done later when your core processing is perfected... both Encrypt and Decrypt are working perfectly. Doing this will prevent you from being distracted by the clutter in main. Main should only have a string defined for input to Encrypt and a string for the return from Encrypt. Encrypt will be a char * function.

    BTW, the 2D array looks a little scarey to me.

    Have fun

    Bob

    Here's a starter.....

    Code:
    char *EncryptEnglish2Morse(char* pEnglishLine)
    {
        char szResult[1000] = {0};
        int iIndex;
    	int iK=0;
        for(iIndex=0; pEnglishLine[i]!='\0'; iIndex++){
            if(pEnglishLine[iIndex]=='a' || pEnglishLine[iIndex]=='A'){szResult[iK++]='.'; szResult[iK++]='_';szResult[iK++]=' ';}
            else if(pEnglishLine[iIndex]=='b' || pEnglishLine[iIndex]=='B'){szResult[iK++]='_'; szResult[iK++]='.'; szResult[iK++]='.'; szResult[iK++]='.'; szResult[iK++]=' ';}
            else if.......
    .....
    ...
    ..
    }

  6. #6
    vae victus! skorman00's Avatar
    Join Date
    Nov 2003
    Posts
    594
    you're right, I didn't see that first time around....hmm. Upon further inspection, I'm not quite sure what the condition of the inner loop is for. What is your thought process on that?

  7. #7
    Registered User
    Join Date
    Sep 2005
    Posts
    84
    Well basically what it's doing right now is, it goes through the string1 array and when it hits three blank spaces in a row that if statement kicks in

    once we are in the if statement I have that while statement there to begin to put the word before the three blanks into a new array called words1

    since i have a=1, b=2, c=3 and n=0 incrementing at the same time, the n is always 3 spaces behind the c, which puts it at the last character of the word I am moving into the new array

    so once a-j = n I am at the end of my word and I get out of the loop

    after we get out of that loop, a, b, c and n are incremented again and are looking for the next 3 blanks in a row to start the process over again

    Thats my thinking, these are my troubles

    1) instead of the array word just containing a word, printing it and then emptying itself out
    its adding each word, appending it, so it's printing the whole input string in chunks, each time it prints it appends a new word to it

    2) since my do-while loop terminates once c = null charater, it does not print out the last word.... I'm not sure how to correct this


    Thank You

  8. #8
    Registered User
    Join Date
    Mar 2005
    Location
    Mountaintop, Pa
    Posts
    1,058
    Here's an example on how you may want to approach translating English to Morse. It does the translation fine but I intentionally flawed the design of the function so that you cannot use it "as is" in your homework assignment. If you do, it will become your worst nightmare.

    Now, I would suggest you think in terms of your Morse to English function. In particular, how you are going to parse out the words and sentences from the morse code. Figure this out and make the necessary design changes to the code below and the whole assignment becomes a piece of cake.

    Have fun
    Bob
    Code:
    #include <stdio.h>
    #include <string.h>
    char *English2Morse(char* input)
    {
        static char output[1024];
        int i,k=0;
        memset(output, 0, sizeof output);
        for(i=0; input[i]!='\0'; i++){
            if(input[i]=='a' || input[i]=='A'){output[k++]='.';output[k++]='_';}
            else if(input[i]=='b' || input[i]=='B'){output[k++]='_';output[k++]='.';output[k++]='.'; output[k++]='.';}
            else if(input[i]=='c' || input[i]=='C'){output[k++]='_'; output[k++]='.'; output[k++]='_'; output[k++]='.';}
            else if(input[i]=='d' || input[i]=='D'){output[k++]='_'; output[k++]='.'; output[k++]='.'; output[k++]=' ';}
            else if(input[i]=='e' || input[i]=='E'){output[k++]='.';}
            else if(input[i]=='f' || input[i]=='F'){output[k++]='.'; output[k++]='.'; output[k++]='_'; output[k++]='.';}
            else if(input[i]=='g' || input[i]=='G'){output[k++]='_'; output[k++]='_'; output[k++]='.';}
            else if(input[i]=='h' || input[i]=='H'){output[k++]='.'; output[k++]='.'; output[k++]='.'; output[k++]='.';}
            else if(input[i]=='i' || input[i]=='I'){output[k++]='.'; output[k++]='.';}
            else if(input[i]=='j' || input[i]=='J'){output[k++]='.'; output[k++]='_'; output[k++]='_'; output[k++]='_';}
            else if(input[i]=='k' || input[i]=='K'){output[k++]='_'; output[k++]='.'; output[k++]='_';}
            else if(input[i]=='l' || input[i]=='L'){output[k++]='.'; output[k++]='_'; output[k++]='.'; output[k++]='.';}
            else if(input[i]=='m' || input[i]=='M'){output[k++]='_'; output[k++]='_';}
            else if(input[i]=='n' || input[i]=='N'){output[k++]='_'; output[k++]='.';}
            else if(input[i]=='o' || input[i]=='O'){output[k++]='_'; output[k++]='_'; output[k++]='_';}
            else if(input[i]=='p' || input[i]=='P'){output[k++]='.'; output[k++]='_'; output[k++]='_'; output[k++]='.';}
            else if(input[i]=='q' || input[i]=='Q'){output[k++]='_'; output[k++]='_'; output[k++]='.'; output[k++]='_';}
            else if(input[i]=='r' || input[i]=='R'){output[k++]='.'; output[k++]='_'; output[k++]='.';}
            else if(input[i]=='s' || input[i]=='S'){output[k++]='.'; output[k++]='.'; output[k++]='.';}
            else if(input[i]=='t' || input[i]=='T'){output[k++]='_';}
            else if(input[i]=='u' || input[i]=='U'){output[k++]='.'; output[k++]='.'; output[k++]='_';}
            else if(input[i]=='v' || input[i]=='V'){output[k++]='.'; output[k++]='.'; output[k++]='.'; output[k++]='_';}
            else if(input[i]=='y' || input[i]=='Y'){output[k++]='_'; output[k++]='.'; output[k++]='_'; output[k++]='_';}
            else if(input[i]=='z' || input[i]=='Z'){output[k++]='_'; output[k++]='_'; output[k++]='.'; output[k++]='.';}
            else if(input[i]=='w' || input[i]=='W'){output[k++]='.'; output[k++]='_'; output[k++]='_';}
            else if(input[i]=='x' || input[i]=='X'){output[k++]='_'; output[k++]='.'; output[k++]='.'; output[k++]='_';}
            else if(input[i]=='0'){output[k++]='_'; output[k++]='_'; output[k++]='_'; output[k++]='_'; output[k++]='_';}
            else if(input[i]=='1'){output[k++]='.'; output[k++]='_'; output[k++]='_'; output[k++]='_'; output[k++]='_';}
            else if(input[i]=='2'){output[k++]='.'; output[k++]='.'; output[k++]='_'; output[k++]='_'; output[k++]='_';}
            else if(input[i]=='3'){output[k++]='.'; output[k++]='.'; output[k++]='.'; output[k++]='_'; output[k++]='_';}
            else if(input[i]=='4'){output[k++]='.'; output[k++]='.'; output[k++]='.'; output[k++]='.'; output[k++]='_';}
            else if(input[i]=='5'){output[k++]='.'; output[k++]='.'; output[k++]='.'; output[k++]='.'; output[k++]='.';}
            else if(input[i]=='6'){output[k++]='_'; output[k++]='.'; output[k++]='.'; output[k++]='.'; output[k++]='.';}
            else if(input[i]=='7'){output[k++]='_'; output[k++]='_'; output[k++]='.'; output[k++]='.'; output[k++]='.';}
            else if(input[i]=='8'){output[k++]='_'; output[k++]='_'; output[k++]='_'; output[k++]='.'; output[k++]='.';}
            else if(input[i]=='9'){output[k++]='_'; output[k++]='_'; output[k++]='_'; output[k++]='_'; output[k++]='.';}
            else if(input[i]==','){output[k++]='_'; output[k++]='_'; output[k++]='.'; output[k++]='.'; output[k++]='_'; output[k++]='_';}
            else if(input[i]=='.'){output[k++]='.'; output[k++]='_'; output[k++]='.'; output[k++]='_'; output[k++]='.'; output[k++]='_';}
            else if(input[i]=='?'){output[k++]='.'; output[k++]='.'; output[k++]='_'; output[k++]='_'; output[k++]='.'; output[k++]='.';}
            else if(input[i]==';'){output[k++]='_'; output[k++]='.'; output[k++]='_'; output[k++]='.'; output[k++]='_';}
            else if(input[i]==':'){output[k++]='_'; output[k++]='_'; output[k++]='_'; output[k++]='.'; output[k++]='.'; output[k++]='.';}
            else if(input[i]=='/'){output[k++]='_'; output[k++]='.'; output[k++]='.'; output[k++]='_'; output[k++]='.';}
            else if(input[i]=='-'){output[k++]='_'; output[k++]='.'; output[k++]='.'; output[k++]='.'; output[k++]='.'; output[k++]='_';}
            else if(input[i]=='\''){output[k++]='.'; output[k++]='_'; output[k++]='_'; output[k++]='_'; output[k++]='_'; output[k++]='.';}
            else if(input[i]=='(' || input[i]==')'){output[k++]='_'; output[k++]='.'; output[k++]='_'; output[k++]='_'; output[k++]='.'; output[k++]='_';}
            else if(input[i]=='_'){output[k++]='.'; output[k++]='.'; output[k++]='_'; output[k++]='_'; output[k++]='.'; output[k++]='_';}
            else if(input[i]==' '){output[k++]=' ';}
            else
            {
                printf("Error: Cannot translate\n");
                output[k++]='*';
                output[k++]=' ';
            }
        }
        output[k]='\0';
        return output;
    }
    int main(void)
    {
        char szTest[] = {"This is a test of the national broadcasting system"};
        char szReturn[1024] = {0};
        strcpy(szReturn, English2Morse(szTest));
        printf("%s\n", szReturn);
    return 0;
    }

Popular pages Recent additions subscribe to a feed

Similar Threads

  1. C++ ini file reader problems
    By guitarist809 in forum C++ Programming
    Replies: 7
    Last Post: 09-04-2008, 06:02 AM
  2. Replies: 8
    Last Post: 04-25-2008, 02:45 PM
  3. We Got _DEBUG Errors
    By Tonto in forum Windows Programming
    Replies: 5
    Last Post: 12-22-2006, 05:45 PM
  4. Custom String class gives problem with another prog.
    By I BLcK I in forum C++ Programming
    Replies: 1
    Last Post: 12-18-2006, 03:40 AM
  5. Replies: 2
    Last Post: 05-05-2002, 01:38 PM