Ah what the hell, I look like an idiot talking about code no-one has seen.
Here's the 165-line (now that I added the final state checks), 7-state machine, that should be able to detect errors in quoting, parentheses, braces, brackets, and comments. I think it looks clean, and should be maintainable (although I should definitely add comments explaining the intent behind each code block), but make up your own mind:
Code:
#include <stdio.h>
#define MAX_ACTIVES 1024
enum comment_states {
NORMAL_CODE = 0,
SINGLE_QUOTED,
DOUBLE_QUOTED,
AFTER_SLASH,
CPP_COMMENT,
C_COMMENT,
C_COMMENT_ASTERISK
};
static unsigned long line = 1UL;
static inline int next(void)
{
int c;
c = getc(stdin);
if (c == '\n') {
line++;
c = getc(stdin);
if (c != '\r') {
ungetc(c, stdin);
fputc('\n', stdout);
} else
fputs("\n\r", stdout);
return '\n';
} else
if (c == '\r') {
line++;
c = getc(stdin);
if (c != '\n') {
ungetc(c, stdin);
fputc('\r', stdout);
} else
fputs("\r\n", stdout);
return '\n';
} else
if (c != EOF) {
fputc(c, stdout);
return c;
}
return EOF;
}
static inline int pair(const int c)
{
switch (c) {
case '(': return ')';
case ')': return '(';
case '[': return ']';
case ']': return '[';
case '{': return '}';
case '}': return '{';
default: return '\0';
}
}
int main(void)
{
enum comment_states state = NORMAL_CODE;
char active[MAX_ACTIVES];
int actives = 0;
int c;
while (EOF != (c = next()))
switch (state) {
case AFTER_SLASH:
if (c == '/') {
state = CPP_COMMENT;
break;
} else
if (c == '*') {
state = C_COMMENT;
break;
}
state = NORMAL_CODE;
case NORMAL_CODE:
if (c == '/')
state = AFTER_SLASH;
else
if (c == '"')
state = DOUBLE_QUOTED;
else
if (c == '\'')
state = SINGLE_QUOTED;
else
if (c == '(' || c == '[' || c == '{') {
if (actives >= MAX_ACTIVES) {
fprintf(stderr, "Line %lu: Too deep nesting.\n", line);
return 1;
}
active[actives++] = c;
} else
if (c == ')' || c == ']' || c == '}') {
if (actives < 1)
fprintf(stderr, "Line %lu: '%c' without a prior '%c'.\n", line, c, pair(c));
else
if (active[actives - 1] != pair(c))
fprintf(stderr, "Line %lu: '%c', but expected '%c'.\n", line, c, pair(active[actives - 1]));
else
actives--;
} else
break;
case SINGLE_QUOTED:
if (c == '\\')
next();
else
if (c == '\'')
state = NORMAL_CODE;
break;
case DOUBLE_QUOTED:
if (c == '\\')
next();
else
if (c == '\"')
state = NORMAL_CODE;
break;
case CPP_COMMENT:
if (c == '\n')
state = NORMAL_CODE;
break;
case C_COMMENT:
if (c == '*')
state = C_COMMENT_ASTERISK;
break;
case C_COMMENT_ASTERISK:
if (c == '/')
state = NORMAL_CODE;
else
if (c != '*')
state = C_COMMENT;
break;
}
if (state == C_COMMENT || state == C_COMMENT_ASTERISK)
fprintf(stderr, "Line %lu: Expected end of comment */ before end of input.\n", line);
else
if (state == DOUBLE_QUOTED)
fprintf(stderr, "Line %lu: Expected '\"' before end of input.\n", line);
else
if (state == SINGLE_QUOTED)
fprintf(stderr, "Line %lu: Expected '\'' before end of input.\n", line);
while (actives > 0)
fprintf(stderr, "Line %lu: Expected '%c' before end of input.\n", line, pair(active[--actives]));
return 0;
}
Edited to add the nine if-else lines near the end. I forgot to verify that the state is sane at end of input.
Also, please remember I wrote the above for testing purposes only. I did not intend to show it at all to anyone, just decided to show it because I'm stupid.