Hi again all -
Sorry to say - I'm still having problems with the SQL lexer.
It compiles but when the executable is run, it doesn't do anything - it just hangs.
The latest code is here - hoping someone may be able to help.......
Code:
/* sql_lexer.c */
/* A toy lexer for a small part of SQL. */
/* This code is released to the public domain. */
/* "Share and enjoy......" :) */
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include <stdlib.h>
/* Declare the current character. */
int current_char;
/* Next_char function */
int next_char() {
return current_char = getchar();
}
/* Enum for the token types. */
typedef enum tokentype {
KEYWORD, PUNCTUATION, IDENTIFIER, STRING,
INTEGER, FLOAT, OTHER
} tokentype;
/* Struct to store TOKENS. */
typedef struct {
tokentype toktype;
union {
char *string_value;
int int_value;
float float_value;
} value;
} token;
/* Allocate memory for tokens. */
void *allocate_memory(size_t n) {
return malloc(n);
}
/* Lex a keyword or identifier. */
token lex_keyword_or_identifier() {
token mytok;
char mystr[80] ;
int i=0;
while ( i<80 && (isalnum(current_char)
|| current_char == '_' ) ) {
mystr[i] = current_char ;
i++;
//next_char();
}
if ( strcmp(mystr, "select") ) {
mytok.toktype = KEYWORD ;
mytok.value.string_value = "select" ; }
else if (strcmp(mystr, "from") ) {
mytok.toktype = KEYWORD ;
mytok.value.string_value = "from" ; }
else if (strcmp(mystr, "where")) {
mytok.toktype = KEYWORD ;
mytok.value.string_value = "where" ; }
else if (strcmp(mystr, "and")) {
mytok.toktype = KEYWORD ;
mytok.value.string_value = "and" ; }
else if (strcmp(mystr, "or")) {
mytok.toktype = KEYWORD ;
mytok.value.string_value = "or" ; }
else if (strcmp(mystr, "and")) {
mytok.toktype = KEYWORD ;
mytok.value.string_value = "and" ; }
else if (strcmp(mystr, "is")) {
mytok.toktype = KEYWORD ;
mytok.value.string_value = "is" ; }
else if (strcmp(mystr, "in")) {
mytok.toktype = KEYWORD ;
mytok.value.string_value = "in" ; }
else if (strcmp(mystr, "not")) {
mytok.toktype = KEYWORD ;
mytok.value.string_value = "not" ; }
else if (strcmp(mystr, "null")) {
mytok.toktype = KEYWORD ;
mytok.value.string_value = "null" ; }
else mytok.toktype = IDENTIFIER ;
mytok.value.string_value = mystr ;
return mytok;
}
/* Lex a string constant. */
token lex_string() {
token mytok;
char mystr[80];
int i=0;
while ( (i<80 || current_char != '"' )) {
mystr[i] = current_char ;
// next_char();
i++;
}
mystr[i+1] = '"' ;
mytok.toktype = STRING ;
mytok.value.string_value = mystr ;
return mytok;
}
/* Lex an integer constant. */
token lex_number() {
token mytok;
char mystr[80];
char *myptr;
long myint;
int i=0;
while ( i<80 && isdigit(current_char) ) {
mystr[i] = current_char ;
i++;
//next_char();
}
/* Convert the text "number" to a long integer. */
myint = strtol(mystr, &myptr, 10);
mytok.toktype = INTEGER ;
mytok.value.int_value = myint;
return mytok;
}
/* Lex the "star" operator. */
token lex_star() {
token mytok;
mytok.toktype = PUNCTUATION ;
mytok.value.string_value = "*" ;
next_char();
return mytok;
}
/* Lex various operators. */
token lex_operator() {
token mytok;
if (current_char == ',') {
mytok.toktype = PUNCTUATION ;
mytok.value.string_value = "," ; }
else if (current_char == '.') {
mytok.toktype = PUNCTUATION ;
mytok.value.string_value = "." ; }
else if (current_char == ';') {
mytok.toktype = PUNCTUATION ;
mytok.value.string_value = ";" ; }
else if (current_char == '(') {
mytok.toktype = PUNCTUATION ;
mytok.value.string_value = "(" ; }
else if (current_char == ')') {
mytok.toktype = PUNCTUATION ;
mytok.value.string_value = ")" ; }
else if (current_char == '+') {
mytok.toktype = PUNCTUATION ;
mytok.value.string_value = "+" ; }
else if (current_char == '-') {
mytok.toktype = PUNCTUATION ;
mytok.value.string_value = "-" ; }
else if (current_char == '*') {
mytok.toktype = PUNCTUATION ;
mytok.value.string_value = "*" ; }
else if (current_char == '/') {
mytok.toktype = PUNCTUATION ;
mytok.value.string_value = "/" ; }
else if (current_char == ';') {
mytok.toktype = PUNCTUATION ;
mytok.value.string_value = ";" ; }
else if (current_char=='<') {
if (next_char()=='=') {
next_char();
mytok.toktype = PUNCTUATION ;
mytok.value.string_value = "<=" ;
}
mytok.toktype = PUNCTUATION ;
mytok.value.string_value = "<" ;
}
else if (current_char=='>') {
if (next_char()=='=') {
next_char();
mytok.toktype = PUNCTUATION ;
mytok.value.string_value = ">=" ;
}
mytok.toktype = PUNCTUATION ;
mytok.value.string_value = ">" ;
}
else if (current_char=='!') {
if (next_char()=='=') {
next_char();
mytok.toktype = PUNCTUATION ;
mytok.value.string_value = "!=" ;
}
mytok.toktype = OTHER ;
mytok.value.string_value = "!" ;
}
else if (current_char == '=') {
mytok.toktype = PUNCTUATION ;
mytok.value.string_value = "=" ; }
else {
mytok.toktype = OTHER ;
mytok.value.string_value = " " ; }
next_char();
return mytok;
}
/* Lex white space. */
void lex_white_space() {
while (isspace(current_char)) {
next_char();
}
}
token lex_eof() {
token mytok;
mytok.toktype = OTHER ;
mytok.value.string_value = "EOF" ;
return mytok;
}
/* Error in lexing. */
token lex_error() {
token mytok;
mytok.toktype = OTHER ;
mytok.value.string_value = "ERROR" ;
return mytok;
}
/* Main lexer */
token lexer(char *str) {
char current_char = str[0] ;
lex_white_space();
if (isalpha(current_char) || current_char == '_') {
return lex_keyword_or_identifier();
} else if (isdigit(current_char)) {
return lex_number();
} else if ( current_char=='"' ) {
return lex_string();
} else if (ispunct(current_char)) {
return lex_operator();
} else if (current_char==EOF) {
return lex_eof();
} else {
return lex_error();
}
}
int main() {
token curr_token;
char *mystr = "select var1 from mytable where city = \"Sydney\" ; " ;
curr_token = lexer(mystr);
while (curr_token.toktype != OTHER) {
switch (curr_token.toktype) {
case KEYWORD:
case PUNCTUATION:
case IDENTIFIER:
case STRING:
printf("%s", curr_token.value.string_value);
break;
case INTEGER:
printf("%d", curr_token.value.int_value);
break;
case FLOAT:
printf("%f", curr_token.value.float_value);
break;
case OTHER:
printf("%s", curr_token.value.string_value);
break;
default:
printf("Unknown token.type: %d\n", curr_token.toktype);
} /* switch */
} /* while */
return 0;
}
Many thanks in advance (and happy New Year.... )
Andy (latte123)