Thread: Lexer works almost perfectly - one small problem

  1. #1
    Registered User
    Join Date
    Dec 2012
    Posts
    34

    Lexer works almost perfectly - one small problem

    Hi all -

    I've made huge progress with my lexer in the last day or so! The code is quite clean now and easy to follow now that I have functions for each lexeme type.

    One small problem - the code is tacking on a bit of junk onto the "from" token which means that it isn't recognised as a keyword. So, if someone can track down why this is, that'd be *great* and the lexer will work perfectly!

    Here's the code -
    Code:
      
    
    /* This code is released to the public domain. */ 
    /* "Share and enjoy......"  :)     */ 
    
    
    #include <stdio.h>
    #include <ctype.h>
    #include <string.h>
    #include <stdlib.h>
    
    #define NUMBER_OF_KEYWORDS 9
    
    
    /* Array of our keywords in string form. */ 
    char *kw_strings[] = { 
       "select", "from", "where", "and", "or", "not", "in", "is", "null" 
        } ; 
       
        
    /*  Search function to search the array of keywords. */ 
    int search(char *arr[], int dim, char *str) { 
        
        int i;      
        int found_match;
        
        for (i=0; i<dim; i++) { 
            if ( !strcmp(arr[i] , str ) )  {   
                found_match = 1;        
                break; 
        }   else found_match = 0;    
     }  /* For */     
    
        return found_match; 
    }  /* search */ 
    
    
    
    /* Forward declarations. */ 
    void lex(char *str) ; 
    
    void parse(char token[],  char *toktype); 
    
    
    void lex_kwident(char *str) { 
       char token[20];    
       char *toktype;
       int i=0;    
        
        while (isalnum(*str) && *str != '\0' && i<20) {         
            token[i] = *str;
            i++;
            str++;
       } 
       
        if (search(kw_strings, NUMBER_OF_KEYWORDS, token) == 1 )
              toktype = "Keyword";
        else
              toktype = "Identifier" ;
          
       parse(token, toktype); 
       memset(&token[0], 0, sizeof(token));      
       lex(str);            
    } 
    
    
    void lex_string(char *str) { 
       char token[20];    
       char *toktype;
       int i=0;    
        
        while ( (*str != '"') && *str != '\0' && i<20) {         
            token[i] = *str;
            i++;
            str++;
       } 
           
       /* Add the end double-quote. */ 
       token[i] = '"' ;      
           
       toktype = "String" ;      
       parse(token, toktype); 
       memset(&token[0], 0, sizeof(token));      
       lex(str);            
    } 
    
    
    void lex_number(char *str) { 
       char token[20];    
       char *toktype;
       int i=0;    
        
        while (isdigit(*str) && *str != '\0' && i<20) {         
            token[i] = *str;
            i++;
            str++;
       } 
       
       toktype = "Number" ;
          
       parse(token, toktype); 
       memset(&token[0], 0, sizeof(token));      
       lex(str);            
    } 
    
    
    void lex_punct(char *str) { 
       char token[20];    
       char *toktype;
       int i=0;    
        
        while (ispunct(*str) && *str != '\0' && i<20) {         
            token[i] = *str;
            i++;
            str++;
       } 
       
       toktype = "Punct" ;
          
       parse(token, toktype); 
       memset(&token[0], 0, sizeof(token));      
       lex(str);            
    } 
    
    
    void lex_space(char *str) {  
       char token[80] = " ";         
       char *toktype = "Space";
        
      while ( isspace(*str) && *str != '\0') {                
            str++;
       } 
       
      toktype = "Space" ; 
      parse(token, toktype); 
      memset(&token[0], 0, sizeof(token)); 
      lex(str);             
    }     
    
    
    void lex(char *str) {     
            
       if (isalpha(*str) || *str == '_')       lex_kwident(str) ; 
       else if ( (*str == '"') )               lex_string(str); 
       else if (isspace(*str))                 lex_space(str); 
       else if (isdigit(*str))                 lex_number(str); 
       else if (ispunct(*str) && *str != '_')  lex_punct(str);          
                             
    } 
    
    
    /* Not a parser (yet) - just prints the tokens. */ 
    void parse(char token[],  char *toktype) { 
      printf("Token: %s Tokentype: %s\n", token, toktype); 
    }        
    
              
    int main() { 
    
    char *mystr = "select mycol8 from mytable" ; 
    
    lex(mystr); 
    
    return 0; 
    
    }
    Many thanks in advance.....
    - Andy

  2. #2
    misoturbutc Hodor's Avatar
    Join Date
    Nov 2013
    Posts
    1,787
    You're not terminating the "token" with the nul character and the fact that it is "working" for other tokens is just luck. Try this:

    Code:
        while (isalnum(*str) && *str != '\0' && i<19) {
            token[i] = *str;
            i++;
            str++;
       }
       token[i] = '\0';

  3. #3
    Registered User
    Join Date
    Dec 2012
    Posts
    34
    Quote Originally Posted by Hodor View Post
    You're not terminating the "token" with the nul character and the fact that it is "working" for other tokens is just luck. Try this:

    Code:
        while (isalnum(*str) && *str != '\0' && i<19) {
            token[i] = *str;
            i++;
            str++;
       }
       token[i] = '\0';
    Hi Hodor - thanks for that! Yes!!! It worked! Many thanks!

    It's a simple app but quite satisfying to get it working.
    Quite a nice example of mutually-recursive functions too.....

    Cheers - bye for now
    - Andy

Popular pages Recent additions subscribe to a feed

Similar Threads

  1. SQL lexer - almost there, just have a printf problem
    By latte123 in forum C Programming
    Replies: 4
    Last Post: 12-31-2017, 06:34 PM
  2. Replies: 4
    Last Post: 03-26-2013, 06:42 PM
  3. Replies: 15
    Last Post: 09-23-2010, 02:19 PM
  4. Replies: 4
    Last Post: 08-18-2009, 03:32 PM
  5. Small code works on VS6 crash on VS2005 WHY???
    By salvadoravi in forum C Programming
    Replies: 7
    Last Post: 02-10-2008, 09:32 AM

Tags for this Thread