Ok, i have done some modifications, here is the result:
Code:
#ifndef _SPELLCORRECTOR_H_
#define _SPELLCORRECTOR_H_
#include <map>
#include <vector>
class SpellCorrector
{
private:
typedef std::map<std::string, int> Dictionary;
typedef std::vector<std::string> Vector;
static const char alphabet[];
Dictionary dictionary;
void edits(std::string& word, Vector& result);
void known(Vector& results, Dictionary& candidates);
public:
void load(std::string filename);
std::string correct(std::string word);
};
#endif
Code:
#include <fstream>
#include <string>
#include <algorithm>
#include "SpellCorrector.h"
using namespace std;
const char SpellCorrector::alphabet[] = "abcdefghijklmnopqrstuvwxyz";
bool sortBySecond(const pair<std::string, int>& left, const pair<std::string, int>& right)
{
return left.second < right.second;
}
void SpellCorrector::load(string filename)
{
ifstream file(filename.c_str());
char* data;
file.seekg(0, ios_base::end);
int length = file.tellg();
file.seekg(0, ios_base::beg);
data = new char[length+1];
file.read(data, length);
string line(data);
delete [] data;
transform(line.begin(), line.end(), line.begin(), tolower);
string::size_type position = 0;
while ((position = line.find_first_of(alphabet, position)) != string::npos)
{
string::size_type endPosition = line.find_first_not_of(alphabet, position);
dictionary[line.substr(position, endPosition - position)]++;
position = endPosition;
}
}
string SpellCorrector::correct(string word)
{
Vector result;
Dictionary candidates;
if (dictionary.find(word) != dictionary.end()) { return word; }
edits(word, result);
known(result, candidates);
if (candidates.size() > 0) { return max_element(candidates.begin(), candidates.end(), sortBySecond)->first; }
for (unsigned int i = 0;i < result.size();i++)
{
Vector subResult;
edits(result[i], subResult);
known(subResult, candidates);
}
if (candidates.size() > 0) { return max_element(candidates.begin(), candidates.end(), sortBySecond)->first; }
return "";
}
void SpellCorrector::known(Vector& results, Dictionary& candidates)
{
Dictionary::iterator end = dictionary.end();
for (unsigned int i = 0;i < results.size();i++)
{
Dictionary::iterator value = dictionary.find(results[i]);
if (value != end) candidates[value->first] = value->second;
}
}
void SpellCorrector::edits(string& word, Vector& result)
{
for (string::size_type i = 0;i < word.size(); i++) result.push_back(word.substr(0, i) + word.substr(i + 1)); //deletions
for (string::size_type i = 0;i < word.size() - 1;i++) result.push_back(word.substr(0, i) + word[i+1] + word.substr(i + 2)); //transposition
for (char j = 'a';j <= 'z';++j)
{
for (string::size_type i = 0;i < word.size(); i++) result.push_back(word.substr(0, i) + j + word.substr(i + 1)); //alterations
for (string::size_type i = 0;i < word.size() + 1;i++) result.push_back(word.substr(0, i) + j + word.substr(i) ); //insertion
}
}
I have some especific questions:
1) In the load functions isn´t there any easier way to read all data from a file without affecting too much performance?
2) I have tried
Code:
string data;
getline(file, data, file.widen(EOF));
but it seems that performance gets affected by it, i tought it is because it checks each character of the file until EOF is reached, am i right?
3) It seems that the most intensive loop in my application, besides I/O, is the one that i break the lines and insert them into the map... is there any way i could do this faster?