I am a bit puzzled because I have not been able to produce a short sample of code that reproduced the problem. I am going to post two things, 1.) the short code I thought ought to capture / reproduce the problem, but fails to do so, and 2.) my actual code in its entirety. Sorry for the large postings, I've hesitated doing this but have been unable to reproduce in a short example.
Here is the code I believe captures the flow of the original program and ought to reproduce the error, but does not. In comments off to the side I indicate what the different parts of this program are analogous to in the original.
Code:
#include <iostream>
#include <string>
using namespace std;
class Foo { // class Parser
public:
void f1() { // double parse(bool get = true)
f2();
}
struct Bar { // struct SyntaxError
string s;
Bar(const char *t) { s = string(t); }
};
private:
void f2() { // double stmt(bool)
try {
f3(); // match(NEWLINE, false); line 93 of Parser.cpp
}
catch (Foo::Bar e) {
cout << "Foo::Bar Caugh in f2(). " << e.s << endl;
}
catch (...) {
cout << "Unknown exception caught in f2().\n";
}
}
void f3() { // bool match(Token, bool)
throw Bar("Catch me if you can!");
}
};
int main() {
Foo foo;
try {
foo.f1(); // Call to Parser::parse()
}
catch (Foo::Bar e) {
cout << "Foo::Bar Caugh in main(). " << e.s << endl;
}
catch (...) {
cout << "Unknown exception caught in main().\n";
}
return 0;
}
Running this program produces "good" results:
Code:
./test
Foo::Bar Caugh in f2(). Catch me if you can!
Now what follows are the source files that make up my actual program:
main.cpp
Code:
#include <iostream>
#include "Parser.h"
using namespace std;
int main() {
SymbolTable table;
Lexer lex = Lexer(&cin);
Parser parser = Parser(lex, table, INTERACTIVE, true, true, &cout);
try {
parser.parse(true);
}
catch (Parser::SyntaxError e) {
cout << "In Parser::SyntaxError handler in main.cpp\n";
}
catch (...) {
cout << "In default handler in main.cpp\n";
}
return 0;
}
Parser.h
Code:
#ifndef _PARSER_H_
#define _PARSER_H_
#include <iostream>
#include <stack>
#include "Lexer.h"
#include "Symbol.h"
/* are there more? */
enum ParserMode {
INTERACTIVE, NONINTERACTIVE
};
class Parser {
public:
Parser(Lexer& lex, SymbolTable& t, ParserMode m,
bool chkSem, bool chkN, std::ostream *out);
//Parser(Lexer lex, SymbolTable t, std::ostream *out);
double parse(bool get = true);
struct ZeroDivide {
ZeroDivide() { }
};
struct SyntaxError {
std::string& s;
SyntaxError(const char* t) { s = std::string(t); }
};
private:
bool match(TokenType t, bool get);
double stmt(bool);
double ifBlock(bool, bool);
double expr(bool);
double lunion(bool);
double test(bool);
double additive(bool);
double term(bool);
double expn(bool);
double prim(bool);
/* Private member type to hold state of the parser
* If chkSemantics is false, Syntax only will be checked
* If chkNames is false, will not check that Names have been declared
*/
class ParserState {
public:
const bool chkSemantics; // If Semantics should be checked
const bool chkNames; // If Names should be checked
ParserState(bool chkSem, bool chkN)
: chkSemantics(chkSem), chkNames(chkN) { };
};
Lexer& lexer;
SymbolTable& table;
ParserMode mode;
std::stack<ParserState> stateStack;
std::ostream* output;
double rslt;
int numErrors;
};
#endif // _Parser_H_
Parser.cpp
Code:
#include <cmath>
#include "Parser.h"
Parser::Parser(Lexer& lex, SymbolTable& t, ParserMode m,
bool chkSem, bool chkN, std::ostream *out)
: lexer(lex), table(t), mode(m), output(out)
{
// Seems pretty useless to run in interactive mode
// and not have semantics enabled?
// Maybe allow for testing / debugging?
// if (m == INTERACTIVE)
// chkSem = true;
stateStack.push(ParserState(chkSem, chkN));
numErrors = 0;
rslt = 0;
}
/* Top level call to begin parsing
* If get is true, a new token will read
* Returns the numerical value of the last expression evaluated
*/
double Parser::parse(bool get) {
if (get) lexer.getNextToken();
// Read statements until end of input is reached
while (lexer.getCurrentToken().getTokenType() != END)
(*this).rslt = stmt(false);
return (*this).rslt;
}
bool Parser::match(TokenType t, bool get) {
if (get) lexer.getNextToken();
if (lexer.getCurrentToken().getTokenType() == t)
return true;
throw SyntaxError("Token Expected");
}
/* Match a statement
* Can be either an if-statement or a
* numerical/logical expression
*/
double Parser::stmt(bool get) {
double retVal = 0;
// IF ( expr ) ifBlock ENDIF
// IF ( expr ) ifBlock ELSE ifBlock ENDIF
if (lexer.getCurrentToken().getTokenType() == IF) {
// Match an if-statement
match(LP, true);
double condition = expr(true);
match(RP, false);
// Parse the if true part of the if statement
// Push a new Parser state onto the stack
bool chkSem = (stateStack.top().chkSemantics && condition);
bool chkNames = (stateStack.top().chkNames);
stateStack.push(ParserState(chkSem, chkNames));
retVal = ifBlock(false, true);
stateStack.pop();
// Check for an else clause
if (lexer.getCurrentToken().getTokenType() == ELSE) {
// Parse the else part of the if statement
chkSem = (stateStack.top().chkSemantics && !condition);
chkNames = (stateStack.top().chkNames);
stateStack.push(ParserState(chkSem, chkNames));
if (chkSem)
retVal = ifBlock(true, true);
else
ifBlock(true, true);
stateStack.pop();
lexer.getNextToken();
}
} else {
// expr ;
retVal = expr(false);
// If in interactive mode, allow either a semicolon or
// a newline character to terminate an expression
bool matchedNL = false;
if (mode == INTERACTIVE) {
try {
match(NEWLINE, false);
matchedNL = true;
}
catch (Parser::SyntaxError& e) {
// Error handling
std::cout << "Caught Parser::SyntaxError in stmt()\n";
}
catch (...) {
std::cout << "Caught unknown exception in stmt()\n";
}
}
if (!matchedNL)
match(SEMI, false);
}
// member variable rslt will hold value of last evaluated expression
if ((mode == INTERACTIVE) && (stateStack.top().chkSemantics) &&
(stateStack.size() == 1))
(*output) << retVal << '\n';
return retVal;
}
double Parser::ifBlock(bool inElseBlock, bool get) {
double retVal = 0;
if (get) lexer.getNextToken();
for (;;)
switch (lexer.getCurrentToken().getTokenType()) {
case ELSE:
if (inElseBlock)
throw SyntaxError("'else' without matching 'if'");
else
return retVal;
case ENDIF:
return retVal;
default:
retVal = stmt(false);
}
// Cannot reach this point
}
double Parser::expr(bool get) {
double left = lunion(get);
for (;;)
switch (lexer.getCurrentToken().getTokenType()) {
case OR: // union | union
left = left || lunion(true);
break;
default: // union
return left;
}
// Cannot reach this point
}
double Parser::lunion(bool get) {
double left = test(get);
for (;;)
switch(lexer.getCurrentToken().getTokenType()) {
case AND: // test & test
left = left || test(true);
break;
default: // test
return left;
}
// Cannot reach this point
}
double Parser::test(bool get) {
double left = additive(get);
for (;;)
switch(lexer.getCurrentToken().getTokenType()) {
case LT: // additive < additive
left = (left < additive(true));
break;
case LE: // additive <= additive
left = (left <= additive(true));
break;
case GT: // additive > additive
left = (left > additive(true));
break;
case GE: // additive >= additive
left = (left >= additive(true));
break;
case EQ: // additive == additive
left = (left == additive(true));
break;
case NEQ: // additive != additive
left = (left != additive(true));
default: // additive
return left;
}
// Cannot reach this point
}
double Parser::additive(bool get) {
double left = term(get);
for (;;)
switch(lexer.getCurrentToken().getTokenType()) {
case PLUS: // term + term
left += term(true);
break;
case MINUS: // term - term
left -= term(true);
break;
default: // term
return left;
}
// Cannot reach this point
}
double Parser::term(bool get) {
double left = expn(get);
for (;;)
switch(lexer.getCurrentToken().getTokenType()) {
case MULT: // expn * expn
left *= expn(true);
break;
case DIV: // expn / expn
{
if (double d = expn(true)) { // Fetch divisor in advance
left /= d; // and check for divide by zero
break;
}
if (stateStack.top().chkSemantics)
throw ZeroDivide();
break;
}
case MOD: // expn % expn modulus operator
{
if (double d = expn(true)) { // Fetch divisor in advance
left = int(left) % int(d); // and check for divide by zero
}
if (stateStack.top().chkSemantics)
throw ZeroDivide();
break;
}
default: // expn
return left;
}
// Cannot reach this point
}
double Parser::expn(bool get) {
double base = prim(get);
double retVal = base;
for(;;)
switch (lexer.getCurrentToken().getTokenType()) {
case POW: // prim ^ expn - expn "binds to the right"
{ // a ^ b ^ c = a ^ (b ^ c)
double power = expn(true);
retVal = pow(base, power);
break;
}
default: // prim
return retVal;
}
// Cannot reach this point
}
double Parser::prim(bool get) {
if (get) lexer.getNextToken();
switch (lexer.getCurrentToken().getTokenType()) {
case NUMBER:
{
double v = lexer.getCurrentToken().getNumberValue();
lexer.getNextToken();
return v;
}
case NAME:
{
double& v = table.getSym(lexer.getCurrentToken().getStringValue());
lexer.getNextToken();
if (lexer.getCurrentToken().getTokenType() == ASSIGN) v = expr(true);
return v;
}
case NOT: // unary negate
return !prim(true);
case MINUS: // unary minus
return -prim(true);
case PLUS: // unary plus
return +prim(true);
case LP:
{
double e = expr(true);
if (lexer.getCurrentToken().getTokenType() != RP)
throw SyntaxError("')' expected");
lexer.getNextToken(); // eat ')'
return e;
}
default:
throw SyntaxError("primary expected");
}
// Cannot reach this point
}
Lexer.h
Code:
#ifndef _LEXER_H_
#define _LEXER_H_
#include <iostream>
#include "Token.h"
class Lexer {
public:
Lexer(std::istream *in);
Token getNextToken();
Token getCurrentToken();
class LexerError {
public:
const char *p;
LexerError(const char* q) { p = q; }
};
private:
Token currentToken;
std::istream* input;
};
#endif //_LEXER_H_
Lexer.cpp
Code:
#include <iostream>
#include <cstdio>
#include "Lexer.h"
using namespace std;
Lexer::Lexer(std::istream* in)
: input(in) { }
// Could inline this in class definition
Token Lexer::getCurrentToken() {
return (*this).currentToken;
}
Token Lexer::getNextToken() {
int ch = 0; // int, rather than char so large enough to hold EOF
do { // Skip over whitespace other than newline
ch = (*input).get();
if (ch == EOF) return (*this).currentToken = Token(END);
} while (ch != '\n' && isspace(ch));
switch (ch) {
case EOF:
return (*this).currentToken = Token(END);
case '\n':
return (*this).currentToken = Token(NEWLINE);
case ';':
return (*this).currentToken = Token(SEMI);
case '/':
{
int look = 0; // Look ahead a character to detect comments
look = (*input).get();
if (look != '/') {
if (look != EOF) (*input).putback(char(look));
return (*this).currentToken = Token(TokenType(ch)); // Division
}
// Read over comment until the end of the line (or end of input)
while ((look != EOF) && (look = '\n')) look=(*input).get();
if (look == '\n')
return (*this).currentToken = Token(NEWLINE);
else
return (*this).currentToken = Token(END);
}
// Simple operators
case '^':
case '*':
case '%':
case '+':
case '-':
case '|':
case '&':
case '(':
case ')':
return (*this).currentToken = Token(TokenType(ch));
case '!':
{
int look = 0;
look = (*input).get();
if (look != '=') {
if (look != EOF) (*input).putback(char(look));
return (*this).currentToken = Token(TokenType(ch)); // Uniary NOT
} else
return (*this).currentToken = Token(NEQ); // Not equal to
}
case '=':
{
int look = 0;
look = (*input).get();
if (look != '=') {
if (look != EOF) (*input).putback(char(look));
return (*this).currentToken = Token(TokenType(ch)); // Assignment
} else
return (*this).currentToken = Token(EQ); // Equality test
}
case '<':
{
int look = 0;
look = (*input).get();
if (look != '=') {
if (look != EOF) (*input).putback(look);
return (*this).currentToken = Token(TokenType(ch)); // Less than
} else
return (*this).currentToken = Token(LE); // Less than or equal to
}
case '>':
{
int look = 0;
look = (*input).get();
if (look != '=') {
if (look != EOF) (*input).putback(char(look));
return (*this).currentToken = Token(TokenType(ch)); // Greater than
} else
return (*this).currentToken = Token(GE); // Greater than or equal to
}
// Numbers
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '.':
{
(*input).putback(ch);
double d;
(*input) >> d;
return (*this).currentToken = Token(NUMBER, d);
}
// Keywords and names
default:
{
// Names can begin with letters or underscore
if (isalpha(ch) || ch == '_') {
string s;
s.push_back(char(ch));
// And can contain letters, numbers, and underscores
while ( (ch=(*input).get()) && (isalnum(char(ch)) || ch == '_') )
s.push_back(char(ch));
(*input).putback(char(ch)); // Read one too many characters
// Check for keywords
if (s.compare("if") == 0) return (*this).currentToken = Token(IF);
if (s.compare("else") == 0) return (*this).currentToken = Token(ELSE);
if (s.compare("endif") == 0) return (*this).currentToken = Token(ENDIF);
// Not a keyword, must be a name
return (*this).currentToken = Token(NAME,s);
}
// Not good if we reach this point
throw LexerError("bad token");
}
}
}
Token.h
Code:
#ifndef _TOKEN_H_
#define _TOKEN_H_
enum TokenType {
END, NEWLINE='\n', SEMI=';', DIV='/',
MULT='*', POW='^', MOD='%', PLUS='+',
MINUS='-', OR='|', AND='&', LP='(',
RP=')', ASSIGN='=', NOT='!', LT='<',
GT='>', EQ, NEQ, LE,
GE, IF, ELSE, ENDIF,
NAME, NUMBER
};
struct Token {
private:
TokenType tokenType;
std::string stringValue;
double numberValue;
public:
Token()
: tokenType(END), stringValue(""), numberValue(0) { }
Token(const TokenType t)
: tokenType(t), stringValue(""), numberValue(0) { }
Token(const TokenType t, const std::string& s)
: tokenType(t), stringValue(s), numberValue(0) { }
Token(const TokenType t, double d)
: tokenType(t), stringValue(""), numberValue(d) { }
TokenType getTokenType() const { return tokenType; }
std::string getStringValue() const { return stringValue; }
double getNumberValue() const { return numberValue; }
};
#endif // TOKEN_H
Symbol.h
Code:
#ifndef _SYMBOL_H_
#define _SYMBOL_H_
#include <string>
#include <map>
class SymbolTable {
public:
SymbolTable();
double& getSym(std::string key);
double putSym(std::string key, double val);
class SymbolNotFound {
public:
const char *p;
SymbolNotFound(const char* q) { p=q; }
};
private:
std::map<std::string, double> table;
};
#endif // _SYMBOL_H
Symbol.cpp
Code:
#include "Symbol.h"
SymbolTable::SymbolTable() {}
double& SymbolTable::getSym(std::string key) {
std::map<std::string, double>::iterator it = table.find(key);
if (it == table.end())
throw SymbolNotFound(key.c_str());
return it->second;
}
double SymbolTable::putSym(std::string key, double val) {
return table[key] = val;
}
I just got this thing to compile, and would like to test / debug it, I'm sure there are lots of bugs in there. I work through those as I find them. I am looking for help catching my Parser::SyntaxError exception. To trigger the exception, you can simply enter a constant followed by a semicolon. When run in interactive mode, I want the parser to let the user get away with ending expressions with a newline rather than a semicolon.
As a generic way to match/eat expected tokens, I use the match() function. If I don't match what I am expecting, the program throws an exception. This will happen when parsing an expression that is terminated in a semicolon, because first the parser will try to match a newline and if that fails (or if it is not in interactive mode), then it tries to match a semicolon.
The problem is that the exception being thrown when the newline isn't matched is not being caught in Parser::stmt(). It gets caught in main() instead.
I compile the program with g++ like so:
Code:
g++ -Wall -g Symbol.cpp Lexer.cpp Parser.cpp main.cpp -o calc
I get a clean compile, with no warning or error messages. Below, I run the program, parsing a constant followed by a semicolon:
Code:
./calc
2;
Caught Parser::SyntaxError in stmt()
2
In Parser::SyntaxError handler in main.cpp
Again, I am just trying to get my head around why the SyntaxError exception being thrown in Parser::match() is blowing past my try-catch block in Parser::stmt and getting caught in main instead.
Jason