I still haven't mastered static members and initialization. I'm working on a little calculator program, and I would like to have a std::map that can associate a TokenType with a std::string representation. Mainly I want this ability right now to be able to provide better error messages.
My program has three objects that work together to do the work, a Parser, Lexer, and SymbolTable. Inside the parser, I have a function that matches Tokens from the input, like a "(" for example as part of an if statement: "if ( expr ) if-block [else if-block] endif". That function looks like:
Code:
bool Parser::match(TokenType t, bool get) {
if (get) lexer.getNextToken();
if (lexer.getCurrentToken().getTokenType() == t)
return true;
throw SyntaxError("Token Expected");
}
"Token Expected" isn't a very descriptive message. My first thought when thinking about associating strings with TokenTypes was to use a map. Then after some deliberation, I decided that such a map belongs in the Lexer, since that is the only part of the program that should have any say about what character input results in which Tokens. So, without further introduction, here is my Lexer code, with the sections I am interested in bolded.
Lexer.h
Code:
#ifndef _LEXER_H_
#define _LEXER_H_
#include <iostream>
#include <map>
#include <string>
#include "Token.h"
class Lexer {
public:
Lexer(std::istream *in);
Token getNextToken();
Token getCurrentToken();
int getLine();
static const std::string getTokenString(Token t);
class LexerError {
public:
const char *p;
LexerError(const char* q) { p = q; }
};
private:
Token currentToken;
std::istream* input;
int currentLine;
static std::map<TokenType, std::string> tokenToString;
};
#endif //_LEXER_H_
Lexer.cpp
Code:
#include <iostream>
#include <cstdio>
#include "Lexer.h"
using namespace std;
static std::map<TokenType, std::string> createTokenStringMap() {
std::map<TokenType, std::string> m;
m[END] = "EOF"; m[NEWLINE] = "new line";
m[SEMI] = ";"; m[DIV] = "/";
m[MULT] = "*"; m[POW] = "^";
m[MOD] = "%"; m[PLUS] = "+";
m[MINUS] = '-'; m[OR] = "|";
m[AND] = "&"; m[LP] = "(";
m[RP] = ")"; m[ASSIGN] = "=";
m[NOT] = "!"; m[LT] = "<";
m[GT] = ">"; m[EQ] = "==";
m[NEQ] = "!="; m[LE] = "<=";
m[GE] = ">="; m[IF] = "if";
m[ELSE] = "else"; m[ENDIF] = "endif";
m[NAME] = "name"; m[NUMBER] = "number";
return m;
}
Lexer::tokenToString = createTokenStringMap();
// Maybe provide more information in the case of a NAME or NUMBER?
const std::string Lexer::getTokenString(Token t) {
return tokenToString[t.getTokenType()];
}
Lexer::Lexer(std::istream* in)
: input(in), currentLine(1) { }
// Could inline this in class definition
Token Lexer::getCurrentToken() {
return (*this).currentToken;
}
// Could inline this one too
int Lexer::getLine() {
return (*this).currentLine;
}
Token Lexer::getNextToken() {
int ch = 0; // int, rather than char so large enough to hold EOF
do { // Skip over whitespace
ch = (*input).get();
if (ch == EOF) return (*this).currentToken = Token(END);
} while (ch != '\n' && isspace(ch));
switch (ch) {
case EOF:
return (*this).currentToken = Token(END);
case '\n':
(*this).currentLine++;
return (*this).currentToken = Token(NEWLINE);
case ';':
return (*this).currentToken = Token(SEMI);
case '/':
{
int look = 0; // Look ahead a character to detect comments
look = (*input).get();
if (look != '/') {
if (look != EOF) (*input).putback(char(look));
return (*this).currentToken = Token(TokenType(ch)); // Division
}
// Read over comment until the end of the line (or end of input)
while ((look != EOF) && (look != '\n')) look=(*input).get();
if (look == '\n')
return (*this).currentToken = Token(NEWLINE);
else
return (*this).currentToken = Token(END);
}
// Simple operators
case '^':
case '*':
case '%':
case '+':
case '-':
case '|':
case '&':
case '(':
case ')':
return (*this).currentToken = Token(TokenType(ch));
case '!':
{
int look = 0;
look = (*input).get();
if (look != '=') {
if (look != EOF) (*input).putback(char(look));
return (*this).currentToken = Token(TokenType(ch)); // Uniary NOT
} else
return (*this).currentToken = Token(NEQ); // Not equal to
}
case '=':
{
int look = 0;
look = (*input).get();
if (look != '=') {
if (look != EOF) (*input).putback(char(look));
return (*this).currentToken = Token(TokenType(ch)); // Assignment
} else
return (*this).currentToken = Token(EQ); // Equality test
}
case '<':
{
int look = 0;
look = (*input).get();
if (look != '=') {
if (look != EOF) (*input).putback(look);
return (*this).currentToken = Token(TokenType(ch)); // Less than
} else
return (*this).currentToken = Token(LE); // Less than or equal to
}
case '>':
{
int look = 0;
look = (*input).get();
if (look != '=') {
if (look != EOF) (*input).putback(char(look));
return (*this).currentToken = Token(TokenType(ch)); // Greater than
} else
return (*this).currentToken = Token(GE); // Greater than or equal to
}
// Numbers
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '.':
{
(*input).putback(ch);
double d;
(*input) >> d;
return (*this).currentToken = Token(NUMBER, d);
}
// Keywords and names
default:
{
// Names can begin with letters or underscore
if (isalpha(ch) || ch == '_') {
string s;
s.push_back(char(ch));
// And can contain letters, numbers, and underscores
while ( (ch=(*input).get()) && (isalnum(char(ch)) || ch == '_') )
s.push_back(char(ch));
(*input).putback(char(ch)); // Read one too many characters
// Check for keywords
if (s.compare("if") == 0) return (*this).currentToken = Token(IF);
if (s.compare("else") == 0) return (*this).currentToken = Token(ELSE);
if (s.compare("endif") == 0) return (*this).currentToken = Token(ENDIF);
// Not a keyword, must be a name
return (*this).currentToken = Token(NAME,s);
}
// Not good if we reach this point
throw LexerError("bad token");
}
}
}
And for completeness I guess, Token.h
Code:
#ifndef _TOKEN_H_
#define _TOKEN_H_
enum TokenType {
END, NEWLINE='\n', SEMI=';', DIV='/',
MULT='*', POW='^', MOD='%', PLUS='+',
MINUS='-', OR='|', AND='&', LP='(',
RP=')', ASSIGN='=', NOT='!', LT='<',
GT='>', EQ, NEQ, LE,
GE, IF, ELSE, ENDIF,
NAME, NUMBER
};
struct Token {
private:
TokenType tokenType;
std::string stringValue;
double numberValue;
public:
Token()
: tokenType(END), stringValue(""), numberValue(0) { }
Token(const TokenType t)
: tokenType(t), stringValue(""), numberValue(0) { }
Token(const TokenType t, const std::string& s)
: tokenType(t), stringValue(s), numberValue(0) { }
Token(const TokenType t, double d)
: tokenType(t), stringValue(""), numberValue(d) { }
TokenType getTokenType() const { return tokenType; }
std::string& getStringValue() { return stringValue; }
double getNumberValue() const { return numberValue; }
};
#endif // TOKEN_H
My basis for this approach was the second response at:
Initializing a static std::map<int, int> in C++ - Stack Overflow
However, when I compile (gcc) I get the following message:
Lexer.cpp:27: error: expected constructor, destructor, or type conversion before ‘=’ token
Lexer.cpp:7: warning: ‘std::map<TokenType, std::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::less<TokenType>, std::allocator<std:
air<const TokenType, std::basic_string<char, std::char_traits<char>, std::allocator<char> > > > > createTokenStringMap()’ defined but not used
How can I go about creating this static map? Or is that not even a good solution for what i am trying to accomplish?
I have not posted the Parser or SymbolTable as I don't think they are relevant to the syntax / procedure here. I can do so if necessary.
Jason