Thread: simple lexical analyzer

  1. #1
    Registered User
    Join Date
    Aug 2009
    Posts
    4

    simple lexical analyzer

    im a computer science student and our professor is asking us to make a simple lexical analyzer which can determine if the entered value is a string literal, character literal, floating liferal, integer, or identifier. now, here are my problems...

    Code:
    #include <stdio.h>
    #include <conio.h>
    #include <string.h>
    char a[30], l;
    int x, y, z, ind1;
    void scancharstring()
    	{
    		if ((a[0]=='"' && a[l]=='"'))
    			{
    			 if (a[2]=='"')
    			  {
    				printf("Character Literal");
    			  }
    			 else
    			  {
    				printf("String Literal");
    			  }
    			 }
    	}
    
    main()
    	{
    
    		l=strlen(a);
    		clrscr();
    		printf("Input: ");
    		gets(a);
    		scancharstring();
    		for (y=0; y<=l; y++)
    
    			{
    
    					if (a[y]=='_')
    			  		 {
    			   		  ind1=1;
    			  		 }
    			 		else if (a[y]=='A')
    			  		 {
    			   		  ind1=1;
                              		 }
    			 		else if (a[y]=='a')
    			  		 {
    			   		  ind1=1;
                              		 }
    			 		else if (a[y]=='B')
    			  		 {
    			   		  ind1=1;
                              		 }
    			 		else if (a[y]=='b')
    			  		 {
    			   		  ind1=1;
                              		 }
    			 		else if (a[y]=='C')
    			  		 {
    			   		  ind1=1;
                              		 }
    					else if (a[y]=='c')
    			  		 {
    			   		  ind1=1;
                              		 }
    			 		else if (a[y]=='D')
    			  		 {
    			   		  ind1=1;
                              		 }
    			 		else if (a[y]=='d')
    			  		 {
    			   		  ind1=1;
                              		 }
    			 		else if (a[y]=='E')
    			  		 {
    			   		  ind1=1;
                              		 }
    					else if (a[y]=='e')
    			  		 {
    					  ind1=0;
                              		 }
    			 		else if (a[y]=='F')
    			  		 {
    			   		  ind1=1;
                              		 }
    			 		else if (a[y]=='f')
    			  		 {
    			   		  ind1=1;
                              		 }
    			 		else if (a[y]=='G')
    			  		 {
    			   		  ind1=1;
                              		 }
    				 	else if (a[y]=='g')
    			  		 {
    			   		  ind1=1;
                              		 }
    			 		else if (a[y]=='H')
    			  		 {
    			   		  ind1=1;
                              		 }
    			 		else if (a[y]=='h')
    			  		 {
    			   		  ind1=1;
                              		 }
    			 		else if (a[y]=='I')
    			 	 	 {
    			  	  	  ind1=1;
                             	 	 }
    			 		else if (a[y]=='i')
    			  	 	 {
    			   	  	  ind1=1;
                              	 	 }
    					else if (a[y]=='J')
    			 	 	 {
    			  	  	  ind1=1;
                             	 	 }
    			 		else if (a[y]=='j')
    			  	 	 {
    			   	  	  ind1=1;
                              	 	 }
    					else if (a[y]=='K')
    			 	 	 {
    			  	  	  ind1=1;
                             	 	 }
    			 		else if (a[y]=='k')
    			  	 	 {
    			   	  	  ind1=1;
                              	 	 }
    					else if (a[y]=='L')
    			 	 	 {
    			  	  	  ind1=1;
                             	 	 }
    			 		else if (a[y]=='l')
    			  	 	 {
    			   	  	  ind1=1;
                              	 	 }
    					else if (a[y]=='M')
    			 	 	 {
    			  	  	  ind1=1;
                             	 	 }
    			 		else if (a[y]=='m')
    			  	 	 {
    			   	  	  ind1=1;
                              	 	 }
    					else if (a[y]=='N')
    			 	 	 {
    			  	  	  ind1=1;
                             	 	 }
    			 		else if (a[y]=='n')
    			  	 	 {
    			   	  	  ind1=1;
                              	 	 }
    					else if (a[y]=='O')
    			 	 	 {
    			  	  	  ind1=1;
                             	 	 }
    			 		else if (a[y]=='o')
    			  	 	 {
    			   	  	  ind1=1;
                              	 	 }
    					else if (a[y]=='P')
    			 	 	 {
    			  	  	  ind1=1;
                             	 	 }
    			 		else if (a[y]=='p')
    			  	 	 {
    			   	  	  ind1=1;
                              	 	 }
    					else if (a[y]=='Q')
    			 	 	 {
    			  	  	  ind1=1;
                             	 	 }
    			 		else if (a[y]=='q')
    			  	 	 {
    			   	  	  ind1=1;
                              	 	 }
    					else if (a[y]=='R')
    			 	 	 {
    			  	  	  ind1=1;
                             	 	 }
    			 		else if (a[y]=='r')
    			  	 	 {
    			   	  	  ind1=1;
                              	 	 }
    					else if (a[y]=='S')
    			 	 	 {
    			  	  	  ind1=1;
                             	 	 }
    			 		else if (a[y]=='s')
    			  	 	 {
    			   	  	  ind1=1;
                              	 	 }
    					else if (a[y]=='T')
    			 	 	 {
    			  	  	  ind1=1;
                             	 	 }
    			 		else if (a[y]=='t')
    			  	 	 {
    			   	  	  ind1=1;
                              	 	 }
    					else if (a[y]=='U')
    			 	 	 {
    			  	  	  ind1=1;
                             	 	 }
    			 		else if (a[y]=='u')
    			  	 	 {
    			   	  	  ind1=1;
                              	 	 }
    					else if (a[y]=='V')
    			 	 	 {
    			  	  	  ind1=1;
                             	 	 }
    			 		else if (a[y]=='v')
    			  	 	 {
    			   	  	  ind1=1;
                              	 	 }
    					else if (a[y]=='W')
    			 	 	 {
    			  	  	  ind1=1;
                             	 	 }
    			 		else if (a[y]=='w')
    			  	 	 {
    			   	  	  ind1=1;
                              	 	 }
    					else if (a[y]=='X')
    			 	 	 {
    			  	  	  ind1=1;
                             	 	 }
    			 		else if (a[y]=='x')
    			  	 	 {
    			   	  	  ind1=1;
                              	 	 }
    					else if (a[y]=='Y')
    			 	 	 {
    			  	  	  ind1=1;
                             	 	 }
    			 		else if (a[y]=='y')
    			  	 	 {
    			   	  	  ind1=1;
                              	 	 }
    					else if (a[y]=='Z')
    			 	 	 {
    			  	  	  ind1=1;
                             	 	 }
    			 		else if (a[y]=='z')
    			  	 	 {
    			   	  	  ind1=1;
                              	 	 }
    					else if (a[y]=='!')
    			 	 	 {
    			  	  	  ind1=0;
                             	 	 }
    			 		else if (a[y]=='@')
    			  	 	 {
    			   	  	  ind1=0;
                              	 	 }
    					else if (a[y]=='#')
    			 	 	 {
    			  	  	  ind1=0;
                             	 	 }
    			 		else if (a[y]=='$')
    			  	 	 {
    			   	  	  ind1=0;
                              	 	 }
    					else if (a[y]=='%')
    			 	 	 {
    			  	  	  ind1=0;
                             	 	 }
    			 		else if (a[y]=='^')
    			  	 	 {
    			   	  	  ind1=0;
                              	 	 }
    					else if (a[y]=='&')
    			 	 	 {
    			  	  	  ind1=0;
                             	 	 }
    					else if (a[y]=='*')
    			  	 	 {
    			   	  	  ind1=0;
                              	 	 }
    					else if (a[y]=='(')
    			 	 	 {
    			  	  	  ind1=0;
    					 }
    
    
    				if (ind1==1)
    			 	 {
    				  printf("Identifier\n");
    			 	 }
    				else if (ind1==0)
    				 {
    				  printf("Invalid Identifier\n");
    				 }
    			}
    
    		getch();
    
    	}

    now, i think i'm almost done with the literals. what i did is to scan the array for " " to determine whether the entered characters will fall under string or character literal. but im not sure if i did it right.


    now, my 2nd problem is with the identifiers. the only rule with the identifiers is that it must consists of only letters and underscore. if any other character is detected, then it's not an identifier. what i did is to compare every index of the array with the valid characters for identifiers. well, i dont know what the problem is but it wont loop. it only scans the first index of the array. please help. i really suck at programming so please explain it to me in easy-to-understand terms. thanks guys.
    Last edited by ^Son_Gokou08; 08-22-2009 at 10:51 AM. Reason: edited the code

  2. #2
    and the Hat of Guessing tabstop's Avatar
    Join Date
    Nov 2007
    Posts
    14,336
    1. If y only goes from 0 to 1, that's not much of a loop in the first place.
    2. You print your results inside the loop, meaning you print results after every character instead of just once at the end.
    3. Every time through, you change your answer -- so *f counts as an identifier, since finding the f sets your flag back to true.

    There's probably more, but that's a start.

  3. #3
    spurious conceit MK27's Avatar
    Join Date
    Jul 2008
    Location
    segmentation fault
    Posts
    8,300
    You might want to look at

    Table of ASCII Characters

    since that entire ridiculously long series of else if could be done in 3 or 4 lines, eg:
    Code:
    if ((a[y]>=65) && (a[y]<=122)) ind1=1;
    C programming resources:
    GNU C Function and Macro Index -- glibc reference manual
    The C Book -- nice online learner guide
    Current ISO draft standard
    CCAN -- new CPAN like open source library repository
    3 (different) GNU debugger tutorials: #1 -- #2 -- #3
    cpwiki -- our wiki on sourceforge

  4. #4
    Registered User
    Join Date
    Aug 2009
    Posts
    4
    Quote Originally Posted by tabstop View Post
    1. If y only goes from 0 to 1, that's not much of a loop in the first place.
    2. You print your results inside the loop, meaning you print results after every character instead of just once at the end.
    3. Every time through, you change your answer -- so *f counts as an identifier, since finding the f sets your flag back to true.

    There's probably more, but that's a start.
    oh, that's an "L" and not a number "1".

    it wont work if i do it like this:

    Code:
    for (y=0; y<=l; y++)

    but this one works:

    Code:
    for (y=0; y<=strlen(a); y++)
    i dunno why...

    thanks for the 2nd and 3rd corrections. gonna fix that.



    @MK27 wow! thanks for the tip. that's very helpful=)

  5. #5
    C++ Witch laserlight's Avatar
    Join Date
    Oct 2003
    Location
    Singapore
    Posts
    28,413
    MK27's tip is good, but it would be better to use isalpha() for readability. You also should not be calling strlen() in a loop's comparison statement without a very good reason.
    Quote Originally Posted by Bjarne Stroustrup (2000-10-14)
    I get maybe two dozen requests for help with some sort of programming or design problem every day. Most have more sense than to send me hundreds of lines of code. If they do, I ask them to find the smallest example that exhibits the problem and send me that. Mostly, they then find the error themselves. "Finding the smallest program that demonstrates the error" is a powerful debugging tool.
    Look up a C++ Reference and learn How To Ask Questions The Smart Way

  6. #6
    Registered User
    Join Date
    Aug 2009
    Posts
    4
    i ended up redoing everything and here it is

    Code:
    #include <stdio.h>
    #include <conio.h>
    #include <string.h>
    char input[20];
    int l, x, y, z, a, b, c, val1, val2, fl1, fl2, dec, decloc, invfp, inttemp, idetemp, identifier, integer, op, chlit, stlit, floatlit, string;
    main()
     {
    	identifier=0;
    	idetemp=0;
    	integer=0;
    	inttemp=0;
    	invfp=0;
    	floatlit=0;
    	fl1=0;
    	fl2=0;
    	val1=0;
    	val2=0;
    	dec=0;
    	l=strlen(input);
    	op=0;
    	chlit=0;
    	stlit=0;
    	string=0;
    	clrscr();
    	printf("Input: ");
    	gets(input);
    
     /* For Operators */
    
    	if (strlen(input)==1)
    	 {
    	  if ((input[0]=='+' || input[0]=='-' || input[0]=='*' || input[0]=='/'))
    	  op=1;
    	 }
    
     /* End */
    
     /*For Character and String Literals */
    
    	if ((input[0]=='"' && input[l]=='"'))
    	 {
    	  if (input[2]=='"')
    	  chlit=1;
    	  else
    	  stlit=1;
    	 }
     /* End */
    
     /* For Floating Literals */
    
    	 for (x=0; x<=strlen(input); x++)
    	  {
    	   if (input[x]=='.')
    	    {
    	     dec=dec+1;
    	     decloc=x;
    	    }
    	  }
    	 if (dec==1)
    	  {
    	   for (y=0; y<decloc; y++)
    	    {
    	     if ((input[y]=='0' || input[y]=='1' || input[y]=='2' || input[y]=='3' || input[y]=='4' || input[y]=='5' || input[y]=='6' || input[y]=='7' || input[y]=='8' || input[y]=='9' || input[y]=='.'))
    	     val1=val1+1;
    	    }
    	   for (z=decloc+1; z<=strlen(input); z++)
    	    {
    	     if ((input[z]=='0' || input[z]=='1' || input[z]=='2' || input[z]=='3' || input[z]=='4' || input[z]=='5' || input[z]=='6' || input[z]=='7' || input[z]=='8' || input[z]=='9'))
    	     val2=val2+1;
    	    }
    	  }
    	 else if (dec>=2)
    	 invfp=1;
    
    	 if (val1==decloc)
    	  {
    	   fl1=1;
    	  }
    	 if (val2==(strlen(input)-decloc)-1)
    	  {
    	   fl2=1;
    	  }
    
     /* End */
    
     /* For Integer */
    
    	for (a=0; a<=strlen(input); a++)
    	  {
    	   if ((input[a]=='0' || input[a]=='1' || input[a]=='2' || input[a]=='3' || input[a]=='4' || input[a]=='5' || input[a]=='6' || input[a]=='7' || input[a]=='8' || input[a]=='9'))
    	    {
    	     inttemp=inttemp+1;
    	    }
    	  }
    
    
     /* End */
    
     /* For Identifiers */
    
    	for (c=0; c<=strlen(input); c++)
    	 {
    	  if ((input[c]=='_' || input[c]>=65 && input[c]<=122))
    	  idetemp=idetemp+1;
    	 }
    	if ((idetemp==strlen(input) && strlen(input)!=0))
    	 {
    	  identifier=1;
    	 }
    
     /* End */
    
    	 if (op==1)
    	 printf("Operator");
    	 else if (chlit==1)
    	 printf("Character Literal");
    	 else if (stlit==1)
    	 printf("String Literal");
    	 else if (identifier==1)
    	 printf("Identifier");
    
    	 if ((fl1==1 && fl2==1 && dec!=0))
    	  {
    	   floatlit=1;
    	  }
    	 if (floatlit==1)
    	 printf("Floating Literal");
    	 if ((inttemp==strlen(input) && strlen(input)!=0))
    	  {
    	   integer=1;
    	   printf("Integer Literal");
    	  }
    	 else if ((op==0 && chlit==0 && stlit==0 && floatlit==0 && integer==0 && identifier==0))
    	       {
    		if (invfp==0 && input[0]!=0)
    		printf("INVALID");
    		else if ((invfp==0 && input[0]==0))
    		printf("Please Enter a Value!");
    		else if (invfp==1)
    		printf("Invalid Floating Point Literal");
    	       }
    
    	getch();
     }

    for the integer part, what i really wanted to do was to use For Loop and increment the varable being compared with the content of the array index. sadly, i got lost in using nested for loop that's why i decided to code it from 0 to 9. would you guys please tell me how i should have done it?


    @laserlight im a newbie and i dont know how to use that function, but thanks, i'll try to use it after learning how.


    thanks guys=)

  7. #7
    Registered User
    Join Date
    Aug 2009
    Posts
    4
    i have another question...how will i make the program to end only when the user typed "exit"?

    i think i should use a looping statement but i have no idea how.

Popular pages Recent additions subscribe to a feed

Similar Threads

  1. creating very simple text editor using c
    By if13121 in forum C Programming
    Replies: 9
    Last Post: 10-19-2010, 05:26 PM
  2. Simple message encryption
    By Vicious in forum C++ Programming
    Replies: 10
    Last Post: 11-07-2004, 11:48 PM
  3. Binary Search Trees Part III
    By Prelude in forum A Brief History of Cprogramming.com
    Replies: 16
    Last Post: 10-02-2004, 03:00 PM
  4. Simple simple program
    By Ryback in forum C++ Programming
    Replies: 10
    Last Post: 09-09-2004, 05:48 AM
  5. Need help with simple DAQ program
    By canada-paul in forum C++ Programming
    Replies: 12
    Last Post: 03-15-2002, 08:52 AM