Sorry if this is a double post, I didn't see my post on the board so I am reposting.
I've written a rudimentary syntax checking program for c code, which just checks to make sure every {, (, or [ is closed in the appropriate order. It works fine right now, except for when I try and break it by adding (,[,{ characters inside strings.
So if it checks this file:
Code:
#include <stdio.h>
int main(int argc, char **argv){
printf("hello world");
return(1);
}
It works fine, but if I do this:
Code:
printf("there is escaped quotes \" in this string and now ( ( [ ) } will break the syntax check");
I'm trying to figure out how to parse the character " differently than \" if that makes any sense. Here is my code, and I'll bold out the relevant section.
Code:
#include <stdio.h>
#include <stdlib.h>
#include "stack.h"
#define STRLEN 256
#define IN_LINE 1
#define IN_MULTILINE 2
#define IN_SINGLE_STRING 3
#define IN_DOUBLE_STRING 4
#define OUT 0
#define TRUE 1
#define FALSE 0
#define BRACKETS 1
#define PARENTHESES 2
int check_syntax(FILE *fp);
int main(int argc, char *argv[]){
FILE *fp;
int bad_line;
if (argc < 2){
printf("Usage is syntax [FILE]");
exit(EXIT_FAILURE);
}
if( (fp=fopen(argv[1], "r")) == NULL)
exit(EXIT_FAILURE);
bad_line = check_syntax(fp);
fclose(fp);
if(bad_line)
printf("Error on line %d\n", bad_line);
else
printf("syntax is correct\n");
exit(EXIT_SUCCESS);
}//}99[}
int check_syntax(FILE *fp){
STACK *stack;
char cp, nextc;
int state, ln;
int error;
stack = (STACK *)malloc(sizeof stack);
state = OUT;
ln = 1;
error = FALSE;
while( (cp = fgetc(fp)) != EOF){
nextc = fgetc(fp);
ungetc(nextc, fp);
//check for comments first
if(state == OUT && cp == '/'){
if( nextc == '/'){ //we're in a single line comment
state = IN_LINE;
continue;
}
if(nextc == '*'){ //we're in a multi line comment
state = IN_MULTILINE;
continue;
}
}
//check for end of comments next
if(state == IN_LINE && cp == '\n'){
state = OUT;
continue;
}
if(state == IN_MULTILINE && cp == '*' && nextc == '/'){
state = OUT;
continue;
}
//check for in string
if(state == OUT && cp == '\"')
state = IN_DOUBLE_STRING;
if(state == OUT && cp == '\'')
state = IN_SINGLE_STRING;
if(state == IN_DOUBLE_STRING && cp == '\\' && nextc == '\"'){
cp = fgetc(fp);
cp = fgetc(fp);
}
if(state == IN_SINGLE_STRING && cp == '\\' && nextc == '\''){
cp = fgetc(fp);
cp = fgetc(fp);
}
if(state == IN_DOUBLE_STRING && cp == '\"')
state = OUT;
if(state == IN_SINGLE_STRING && cp == '\'')
state = OUT;
//we are out of a comment, time to check syntax
if(state == OUT){
switch(cp){
case '\n':
ln++;
break;
case '{':
case '}':
if(stack->state != OUT)
error = TRUE;
break;
case '(':
stack = add_to_stack(stack, PARENTHESES);
break;
case ')':
if(stack->state != PARENTHESES)
error = TRUE;
stack = pop_stack(stack);
break;
case '[':
stack = add_to_stack(stack, BRACKETS);
break;
case ']':
if(stack->state != BRACKETS)
error = TRUE;
stack = pop_stack(stack);
break;
}
if(error == TRUE)
return(ln);
}
}
return(0);
}
I hope this makes sense, thanks for looking