This is a sort of "bare-bones" LR parser: all it's supposed to do is prompt for a parsing table (table.txt), then prompt for an expression like:
id * id
or:
(id + id) * id
or:
id + *
and it will respond either "OK" (for inputs like the first 2 examples) or "syntax error" (for input like the third).
I guess I must be doing something wrong with the pointers, probably char **table, but WHAT?
***Everything seems to work correctly when I compile and run it in MSVC 6.0.***
When I compile it with GCC it compiles OK but I get a segmentation fault when I run it. And when I tried to debug it with some printf statements, I get some unexpected results: when i give strlen() a pointer to "id" (at least I thought it was pointing to "id") it returns the length as 3 instead of 2.
And where I expected my printf's (in parser.c) to give:
Code:
2 is the length of
id
ho ho ho
id
0
instead I get:
Code:
3 is the length of
id
ho ho ho
id
1
And this section (line 63 of parser.c)
Code:
while(strcmp(*(table+col),lexeme_tbl[lookup(nextToken)])!=0){
//DEBUG
printf("%10s %10s\n", *(table+col),lexeme_tbl[lookup(nextToken)] );
//ENDDEBUG
col++;
}
is supposed to find the column of the parsing table that corresponds to the token returned by lex. So if I just give it
id
as the input (since id is the first token in the table), it should immediately drop out of the while loop. Instead it seems to be finding that "id" and "id" are not the same, keeps incrementing col until it runs beyond "table"s memory, & segfaults.
parser.c:
Code:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "readTable.h"
#include "sstack.h"
#include "lex.h"
char *lexeme_tbl[] = {0,"$","(", ")", "*", "+", "id"};
int lexeme_idx[] = {0,36,40,41,42,43,128};
char *stk[STACK_SIZE];
int main() {
int i,j,rows,cols,states,actions,gotos,top=0,nextToken,rule,row,col;
char *action;
char **table;
// the lhs array is a lookup table of just the LHSs of the grammar rules,
// indexed by rule number; lhs[0] is not used
char *lhs[7] = {"VOID","E","E","T","T","F","F"};
// the rhs_count array tells how many symbols to pop for a reduce action
int rhs_count[7] = {0,3,1,3,1,3,1};
// (it would probably be more realistic for the grammar rules
// to be read in from a file, or interactive input)
// read in the parsing table
table = readTable(&states, &actions, &gotos);
rows = states + 1;
cols = actions + gotos + 1;
// get user input string
printf("\n\nEnter input expression.\n\n>");
// the actual parsing begins here
// start with state 0 on the stack
push("0");
// put first input character into buffer to begin lexical analysis
myGetChar();
nextToken = lex();
//DEBUG
printf("parser.c: returned from lex\n");
//ENDDEBUG
// continue to process the rest of the input
while (nextToken != 0){
// find the table column for nextToken
col=1;
//DEBUG
printf("%d is the length of\n",strlen(*(table+col)));
printf("%10s\n", *(table+col));
printf(" ho ho ho\n");
printf("%10s\n",lexeme_tbl[lookup(nextToken)]);
printf("%d\n",strcmp(*(table+col),lexeme_tbl[lookup(nextToken)]));
//ENDDEBUG
while(strcmp(*(table+col),lexeme_tbl[lookup(nextToken)])!=0){
//DEBUG
printf("%10s %10s\n", *(table+col),lexeme_tbl[lookup(nextToken)] );
//ENDDEBUG
col++;
}
//DEBUG
printf("parser.c: out of first while loop\n");
//ENDDEBUG
// find the appropriate action for this token as shown by the parse table;
// row 0 has the column headings, so the action row number = state+1
action = *(table+(1+atoi(peek()))*cols+col);
//DEBUG
printf("action is %s\n",action);
printf("parser.c: ready to check S-rules\n");
//ENDDEBUG
// S-rules:
if(*action=='S'){
push(lexeme_tbl[lookup(nextToken)]);
push(action+1); // this trims off the "S" leaving just the state
// nextToken was consumed (pushed onto stack) so call lex to get new nextToken
nextToken = lex();
} // end if(*action=='S')
// R-rules:
else if (*action=='R'){
rule=atoi(action+1);
for(i=0;i<rhs_count[rule];i++){
pop(); pop();
}
// find the row for the goto rule (1st row is column headings, so row = state+1)
// peek() gives the state currently at top of stack
row = 1+atoi(peek());
push(lhs[rule]);
// find table column for goto rule
col = actions+1; //this locates beginning of GOTO section of table
// next, find the column headed by the nonterminal on the lhs of the reduction rule
while(strcmp(*(table+col),lhs[rule]))
col++;
if( **(table+row*cols+col)!= 'B'){
push (*(table+row*cols+col));
}
else{
printf("Syntax error.\n");
nextToken=0;
}
} // end if(*action=='R')
// accept rule:
else if (*action=='a'){
printf("OK.\n");
nextToken = 0;
} // end if(*action=='a')
else{ // default: *action=='B'; parse table error
printf("Syntax error.\n");
nextToken = 0;
}
} //end while
// the while loop ends when input is accepted, or the parse table rules
// reveal a syntax error, or lex detects a syntax error and returns a '0'
return 0;
}
lex.c
Code:
/*
a simple lexical analyzer for lr_parser assignment.
lex() prompts for keyboard input, returning c-strings that it recognizes.
lex() recognizes inputs of "id", "+", "*", "(", ")", and whitespace.
anything else is reported as a syntax error.
*/
#include <string.h>
#include <stdio.h>
#include "lex.h"
#define ID_CODE 128
#define IDENT_MAXSIZE 25
extern int lexeme_idx[];
int lex() {
char lexeme[IDENT_MAXSIZE];
int i=0, temp;
// myGetChar(); //moved this to parser.c; to be done only before the first call to lex
/* consume blank spaces between lexemes */
while(charClass == SPACE)
myGetChar();
switch(charClass) {
case LETTER:
lexeme[i]=ch;
i++;
lexeme[i]='\0';
myGetChar();
while(charClass == LETTER){
lexeme[i]=ch;
i++;
lexeme[i]='\0';
if(i>2){
printf("Syntax error; invalid identifier: %s\n",lexeme);
return 0;
}
myGetChar();
}
/* in this implementation, if the lexeme is a char string other than "id" it is an error: */
if(strcmp(lexeme, "id")){
printf("Syntax error: invalid identifier: %s\n",lexeme);
return 0;
}
return ID_CODE;
break;
case OPER:
temp = ch;
myGetChar(); // to load next character into buffer
return temp;
break;
case CR: // put a $ to terminate the input
ch = '$';
return ch;
break;
case INVALID:
lexeme[i]=ch;
i++;
lexeme[i]='\0';
printf("Syntax error: invalid token: %s\n",lexeme);
return 0;
break;
}
}
int myGetChar(){
ch = getchar();
if ((ch > 96 && ch < 123))
charClass = LETTER;
else if ((ch > 39 && ch < 44))
charClass = OPER;
else if (ch == ' ')
charClass = SPACE;
else if (ch == 13 || ch == 10)
charClass = CR;
else
charClass = INVALID;
return ch;
}
int lookup(int lex_code){
int i=0;
while(lexeme_idx[i] != lex_code)
i++;
return i;
}
sstack.c
Code:
#include <assert.h>
#include <stdio.h>
#include "sstack.h"
extern char *stk[STACK_SIZE];
void clear(){
top = 0;
}
Bool is_empty(){
return top==0;
}
Bool is_full(){
return (top==STACK_SIZE);
}
void push(char *str){
assert (! is_full());
stk[top] = str;
top++;
}
char *pop(){
assert (! is_empty());
top--;
return stk[top];
}
char *peek(){
assert (! is_empty());
return stk[top-1];
}
readTable.c
Code:
/*
function to read a parsing table from a text file & return it as an array
the file structure is:
line 1: number of states
line 2: number of action columns
line 3: number of goto columns
lines 4...: the entries in a single column (one entry per line) for row 1 (the column headings),
followed by row 2, and so on...
readTable returns a pointer to the array
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int ch; // ch holds character input from keyboard; also used in file lex.c
char **readTable (int *states, int *actions, int *gotos) {
FILE *infile;
int rows, cols;
char filename[81];
char inbuffer[10];
char **p_table;
char *table_entry;
int i,j,k;
/* get the name of the input file */
printf("Enter the name of the parsing table file.\n\n>");
for(i=0; (i<80) && ((ch = getchar()) != EOF) && (ch != '\n'); i++)
filename[i] = ch;
filename[i] = '\0';
/* read in the dimensions of the table & allocate an array of pointers to hold it */
infile = fopen(filename,"r");
for(i=0; (i<9) && ((ch = fgetc(infile)) != EOF) && (ch != '\n'); i++)
inbuffer[i] = ch;
inbuffer[i] = '\0';
*states = atoi(inbuffer);
for(i=0; (i<9) && ((ch = fgetc(infile)) != EOF) && (ch != '\n'); i++)
inbuffer[i] = ch;
inbuffer[i] = '\0';
*actions = atoi(inbuffer);
for(i=0; (i<9) && ((ch = fgetc(infile)) != EOF) && (ch != '\n'); i++)
inbuffer[i] = ch;
inbuffer[i] = '\0';
*gotos = atoi(inbuffer);
rows = (*states) + 1;
cols = (*actions) + (*gotos) + 1;
p_table = malloc((rows)*(cols)*sizeof(char*));
/* read in each table entry (string), allocate an array to hold it,
and set the corresponding entry in the table array to point to the string */
/* first do row 1 (column headings) */
*p_table = "state"; // needed only if the table is to be printed
for(j=1; j<(cols); j++){
for(k=0; (k<9) && ((ch = fgetc(infile)) != EOF) && (ch != '\n'); k++)
inbuffer[k] = ch;
inbuffer[k] = '\0';
table_entry = malloc(strlen(inbuffer)+1);
strcpy(table_entry,inbuffer);
*(p_table+j)=table_entry;
}
/* now the rest of the table */
for(i=1; i<rows; i++)
for(j=0; j<cols; j++){
for(k=0; (k<9) && ((ch = fgetc(infile)) != EOF) && (ch != '\n'); k++)
inbuffer[k] = ch;
inbuffer[k] = '\0';
table_entry = malloc(strlen(inbuffer)+1);
strcpy(table_entry,inbuffer);
*(p_table+i*cols+j)=table_entry;
}
//DEBUG
for(k=0; k<rows*cols; k++)
printf("%s\n",*(p_table+k));
printf("leaving readTable\n");
for(i=0;i<rows;i++){
for(j=0;j<cols;j++)
printf("%8s",*(p_table+i*cols+j));
printf("\n");
}
//ENDDEBUG
return p_table;
}
readTable.h
Code:
#ifndef readTable_h
#define readTable_h
char **readTable(int *states, int *actions, int *gotos);
#endif
lex.h
Code:
#ifndef lex_h
#define lex_h
int lex();
int myGetChar();
int lookup(int);
int ch;
enum {LETTER, OPER, SPACE, CR, INVALID} charClass;
#endif
sstack.h
Code:
#ifndef sstack_h
#define sstack_h
#define STACK_SIZE 30
int top;
typedef int Bool;
void clear();
Bool is_empty();
Bool is_full();
void push(char*);
char *pop();
char *peek();
#endif
this is the makefile I'm using for gcc (NOT for msvc):
Code:
parser.exe: parser.o sstack.o readTable.o lex.o
gcc -o parser.exe parser.o sstack.o readTable.o lex.o
parser.o: parser.c sstack.h readTable.h lex.h
gcc -c parser.c
sstack.o: sstack.c sstack.h
gcc -c sstack.c
readTable.o: readTable.c
gcc -c readTable.c
lex.o: lex.c lex.h
gcc -c lex.c
table.txt
Code:
12
6
3
id
+
*
(
)
$
E
T
F
0
S5
B
B
S4
B
B
1
2
3
1
B
S6
B
B
B
acc
B
B
B
2
B
R2
S7
B
R2
R2
B
B
B
3
B
R4
R4
B
R4
R4
B
B
B
4
S5
B
B
S4
B
B
8
2
3
5
B
R6
R6
B
R6
R6
B
B
B
6
S5
B
B
S4
B
B
B
9
3
7
S5
B
B
S4
B
B
B
B
10
8
B
S6
B
B
S11
B
B
B
B
9
B
R1
S7
B
R1
R1
B
B
B
10
B
R3
R3
B
R3
R3
B
B
B
11
B
R5
R5
B
R5
R5
B
B
B
the parse table looks like this:
Code:
state id + * ( ) $ E T F
0 S5 B B S4 B B 1 2 3
1 B S6 B B B acc B B B
2 B R2 S7 B R2 R2 B B B
3 B R4 R4 B R4 R4 B B B
4 S5 B B S4 B B 8 2 3
5 B R6 R6 B R6 R6 B B B
6 S5 B B S4 B B B 9 3
7 S5 B B S4 B B B B 10
8 B S6 B B S11 B B B B
9 B R1 S7 B R1 R1 B B B
10 B R3 R3 B R3 R3 B B B
11 B R5 R5 B R5 R5 B B B