Ok I'm not getting the stack overflow problem anymore. Is there a way to change the thread title?
I am having another problem - this is very slow:
main.cpp
Code:
#include <iostream>
#include <fstream>
#include <string>
#include <list>
#include "HashTable.h"
using namespace std;
bool isContained(HashTable *table, string the_string);
bool contCapital(string the_string);
bool goodLength(string the_string);
string format(string the_string);
bool isFormatted(string the_string);
int main(){
list<string> filenames;
HashTable *table = new HashTable();
filenames.insert(filenames.end(), "american-words.10");
filenames.insert(filenames.end(), "american-words.20");
filenames.insert(filenames.end(), "american-words.35");
filenames.insert(filenames.end(), "american-words.40");
filenames.insert(filenames.end(), "american-words.50");
filenames.insert(filenames.end(), "american-words.55");
filenames.insert(filenames.end(), "american-words.60");
filenames.insert(filenames.end(), "american-words.70");
filenames.insert(filenames.end(), "american-words.80");
filenames.insert(filenames.end(), "american-words.95");
ifstream* inFiles = new ifstream[filenames.size()];
int i=0;
for(list<string>::iterator itr = filenames.begin(); itr != filenames.end(); itr++){
inFiles[i].open(itr->c_str());
i++;
}
string temp;
for(int i=0;i<filenames.size();i++){
if(inFiles[i]){
cout << "Working on file " << i << endl;
while(inFiles[i] >> temp){
if(!isContained(table, format(temp))&&!contCapital(temp)&&goodLength(temp)){
table->addWord(format(temp));
cout << format(temp) << endl;
}
}
}
else cout << "Could not open file " << i << endl;
inFiles[i].close();
}
delete inFiles;
ofstream outFile("outFile.txt");
for(int i=0;i<TABLESIZE;i++)
if(!table->table[i].empty())
for(list<string>::iterator itr = table->table[i].begin(); itr!= table->table[i].end(); itr++)
outFile << *itr << endl;
return 0;
}
bool isContained(HashTable *table, string the_string){
for(int i=0;i<TABLESIZE;i++)
for(list<string>::iterator itr = table->table[i].begin(); itr != table->table[i].end(); itr++)
if(itr->compare(the_string) == 0)
return true;
return false;
}
bool contCapital(string the_string){
for(int i=0;i<the_string.size();i++)
if(the_string.at(i)>='A'&&the_string.at(i)<='Z')
return true;
return false;
}
bool goodLength(string the_string){
return the_string.size()>=3&&the_string.size()<=10;
}
string format(string the_string){
while(!isFormatted(the_string))
for(int i=0;i<the_string.size();i++)
if(the_string.at(i)=='\'')
the_string.erase(i,1);
else if(the_string.at(i)=='q'&&i+1<the_string.size()&&the_string.at(i+1)=='u'){
the_string.erase(i,2);
the_string.insert(i, "1");
}
return the_string;
}
bool isFormatted(string the_string){
for(int i=0;i<the_string.size();i++)
if(the_string.at(i)=='\'')
return false;
else if(the_string.at(i)=='q')
if(i+1<the_string.size()&&the_string.at(i+1)=='u')
return false;
return true;
}
HashTable.h
Code:
#ifndef HASHTABLE_H
#define HASHTABLE_H
#define TABLESIZE 1000000
#include <list>
#include <string>
using namespace std;
class HashTable{
private:
public:
HashTable();
~HashTable();
list<string> table[TABLESIZE];
void addWord(string word);
int getIndex(string word);
};
#endif
HashTable.cpp
Code:
#include "HashTable.h"
#include <string>
#include <list>
#include <cmath>
using namespace std;
HashTable::HashTable(){
}
HashTable::~HashTable(){
}
void HashTable::addWord(string word){
int index = getIndex(word);
table[index].insert(table[index].begin(), word);
}
int HashTable::getIndex(string word){
//this causes negative values for some reason
/*int sum=0;
for(int i=0;i<(int)word.length();i++)
sum+=(word.at(i)!='1'?(int(word.at(i)-'a')):27)*pow(27,(double)word.length()-i);
return sum%TABLESIZE;*/
//so I replaced it with this
return (word.at(0)*word.at(1)*word.at(2))%TABLESIZE;
}
I figured out that the large TABLESIZE was the problem, so I changed it to a much lower value. But I would still like to keep the load factor low to prevent collisions, so what do you suggest for a table size? I want to get as many words as possible.