Hey, i have writen a spell corrector class. But now i wanted it to be optimized/reviewed so that i can learn from my errors. Readability, performance, style, every suggestions is worths now:
Code:
/*
* SpellChecker.h
*
* Copyright (C) 2007 Felipe Farinon <[email protected]>
*
* Version: 1.0
* Author: Felipe Farinon <[email protected]>
* Maintainer: Felipe Farinon <[email protected]>
* URL: http://scarvenger.wordpress.com/
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* Commentary:
*
* See http://scarvenger.wordpress.com/.
*
* Code:
*/
#ifndef _SPELLCHEKER_H_
#define _SPELLCHEKER_H_
#include <iostream>
#include <map>
#include <fstream>
#include <sstream>
#include <algorithm>
#include <vector>
#include <boost/foreach.hpp>
using namespace std;
typedef map<string, int> Dictionary;
typedef pair<string, int> Pair;
typedef vector<string> Vector;
bool max(const Pair& left, const Pair& right)
{
return left.second < right.second;
}
class SpellChecker
{
private:
static const char alphabet[];
Dictionary dictionary;
void edits(string& word, Vector& result)
{
for (string::size_type i = 0;i < word.size(); i++) result.push_back(word.substr(0, i) + word.substr(i + 1)); //deletions
for (string::size_type i = 0;i < word.size() - 1;i++) result.push_back(word.substr(0, i) + word[i+1] + word.substr(i + 2)); //transposition
for (char j = 'a';j <= 'z';++j)
{
for (string::size_type i = 0;i < word.size(); i++) result.push_back(word.substr(0, i) + j + word.substr(i + 1)); //alterations
for (string::size_type i = 0;i < word.size() + 1;i++) result.push_back(word.substr(0, i) + j + word.substr(i) ); //insertion
}
}
void known(Vector& results, Dictionary& candidates)
{
Dictionary::iterator end = dictionary.end();
for (unsigned int i = 0;i < results.size();i++)
{
Dictionary::iterator value = dictionary.find(results[i]);
if (value != end) candidates[value->first] = value->second;
}
}
void stringToLower(string& text)
{
int lenght = text.size();
for (int i = 0;i < lenght;i++)
{
text[i] = tolower(text[i]);
}
}
public:
bool load(const char* filename)
{
ifstream file(filename);
char* data;
file.seekg(0, ios_base::end);
int lenght = file.tellg();
file.seekg(0, ios_base::beg);
data = new char[lenght+1];
file.read(data, lenght);
string line(data);
delete [] data;
stringToLower(line);
string::size_type position = 0;
while ((position = line.find_first_of(alphabet, position)) != string::npos)
{
string::size_type endPosition = line.find_first_not_of(alphabet, position);
dictionary[line.substr(position, endPosition - position)]++;
position = endPosition;
}
return true;
}
string check(string word)
{
Vector result;
Dictionary candidates;
if (dictionary.find(word) != dictionary.end()) { return word; }
edits(word, result);
known(result, candidates);
if (candidates.size() > 0) { return max_element(candidates.begin(), candidates.end(), max)->first; }
for (unsigned int i = 0;i < result.size();i++)
{
Vector subResult;
edits(result[i], subResult);
known(subResult, candidates);
}
if (candidates.size() > 0) { return max_element(candidates.begin(), candidates.end(), max)->first; }
return "";
}
};
const char SpellChecker::alphabet[] = "abcdefghijklmnopqrstuvwxyz";
#endif