Ok I'm not getting the stack overflow problem anymore. Is there a way to change the thread title?
I am having another problem - this is very slow:
main.cpp
HashTable.hCode:#include <iostream> #include <fstream> #include <string> #include <list> #include "HashTable.h" using namespace std; bool isContained(HashTable *table, string the_string); bool contCapital(string the_string); bool goodLength(string the_string); string format(string the_string); bool isFormatted(string the_string); int main(){ list<string> filenames; HashTable *table = new HashTable(); filenames.insert(filenames.end(), "american-words.10"); filenames.insert(filenames.end(), "american-words.20"); filenames.insert(filenames.end(), "american-words.35"); filenames.insert(filenames.end(), "american-words.40"); filenames.insert(filenames.end(), "american-words.50"); filenames.insert(filenames.end(), "american-words.55"); filenames.insert(filenames.end(), "american-words.60"); filenames.insert(filenames.end(), "american-words.70"); filenames.insert(filenames.end(), "american-words.80"); filenames.insert(filenames.end(), "american-words.95"); ifstream* inFiles = new ifstream[filenames.size()]; int i=0; for(list<string>::iterator itr = filenames.begin(); itr != filenames.end(); itr++){ inFiles[i].open(itr->c_str()); i++; } string temp; for(int i=0;i<filenames.size();i++){ if(inFiles[i]){ cout << "Working on file " << i << endl; while(inFiles[i] >> temp){ if(!isContained(table, format(temp))&&!contCapital(temp)&&goodLength(temp)){ table->addWord(format(temp)); cout << format(temp) << endl; } } } else cout << "Could not open file " << i << endl; inFiles[i].close(); } delete inFiles; ofstream outFile("outFile.txt"); for(int i=0;i<TABLESIZE;i++) if(!table->table[i].empty()) for(list<string>::iterator itr = table->table[i].begin(); itr!= table->table[i].end(); itr++) outFile << *itr << endl; return 0; } bool isContained(HashTable *table, string the_string){ for(int i=0;i<TABLESIZE;i++) for(list<string>::iterator itr = table->table[i].begin(); itr != table->table[i].end(); itr++) if(itr->compare(the_string) == 0) return true; return false; } bool contCapital(string the_string){ for(int i=0;i<the_string.size();i++) if(the_string.at(i)>='A'&&the_string.at(i)<='Z') return true; return false; } bool goodLength(string the_string){ return the_string.size()>=3&&the_string.size()<=10; } string format(string the_string){ while(!isFormatted(the_string)) for(int i=0;i<the_string.size();i++) if(the_string.at(i)=='\'') the_string.erase(i,1); else if(the_string.at(i)=='q'&&i+1<the_string.size()&&the_string.at(i+1)=='u'){ the_string.erase(i,2); the_string.insert(i, "1"); } return the_string; } bool isFormatted(string the_string){ for(int i=0;i<the_string.size();i++) if(the_string.at(i)=='\'') return false; else if(the_string.at(i)=='q') if(i+1<the_string.size()&&the_string.at(i+1)=='u') return false; return true; }
HashTable.cppCode:#ifndef HASHTABLE_H #define HASHTABLE_H #define TABLESIZE 1000000 #include <list> #include <string> using namespace std; class HashTable{ private: public: HashTable(); ~HashTable(); list<string> table[TABLESIZE]; void addWord(string word); int getIndex(string word); }; #endif
I figured out that the large TABLESIZE was the problem, so I changed it to a much lower value. But I would still like to keep the load factor low to prevent collisions, so what do you suggest for a table size? I want to get as many words as possible.Code:#include "HashTable.h" #include <string> #include <list> #include <cmath> using namespace std; HashTable::HashTable(){ } HashTable::~HashTable(){ } void HashTable::addWord(string word){ int index = getIndex(word); table[index].insert(table[index].begin(), word); } int HashTable::getIndex(string word){ //this causes negative values for some reason /*int sum=0; for(int i=0;i<(int)word.length();i++) sum+=(word.at(i)!='1'?(int(word.at(i)-'a')):27)*pow(27,(double)word.length()-i); return sum%TABLESIZE;*/ //so I replaced it with this return (word.at(0)*word.at(1)*word.at(2))%TABLESIZE; }



LinkBack URL
About LinkBacks



. And have you tried to find out which part of it is very slow (using profiler)?