Code:
#ifndef _hashTable_h
#define _hashTable_h
#include <iostream>
#include <string>
using namespace std;
template<class KeyType, class DataType>
struct mypair
{ KeyType key;//words
DataType data;//frequencies
};
template<class KeyType, class DataType>
class hashTable
{
public:
hashTable(long int tSize = 0);//defined
//Constructor function. Dynamically allocates the arrays, table
//and status, with dimensions of the next prime number > tSize.
//Initializes the status array to show all positions in the table
//are empty and initializes size, tableSize, numSearches and
//numProbes. Uses non-member function nextPrime.
~hashTable();//defined
void find(const KeyType& k, long int& i, bool& found);
//Looks for key, k, in table. If found then sets i to the subscript
//where the key is located and sets found to true. If not found
//then sets i to the subscript where the key could be inserted
//and sets found to false. Uses linear probing for collision
//resolution with inc determined from the hash value as follows:
//i = hash(...); inc = i; if (inc == 0) inc = tableSize - 1 ; This
//is a little better than always taking inc = 1. Uses non-member
//function hash.
void insert(const KeyType& k, const DataType& d);
//Inserts a pair with key k and data d into the table. If a pair
//with the same key part already occurs in the table then it prints
//an error message and does not insert the pair. If the table is
//now more than 75% full then it rehashes into a larger table. Uses
//the public member function find and the private member function
//rehash.
//************************************************************************
//The following functions provide simialr capabilties to iterators in STL.
//They allow one to examine all elements of the hash table. You could use
//these for example to output all elements of the hash table or to copy all
//elements to a vector outside the hashTable class for sorting, etc.
const mypair<KeyType, DataType>& element(long int i) const;//defined
//Returns the pair at subscript i in the table. Cannot be used
//to assign a new value to the pair.
DataType& data(long int i) const;//defined
//Returns a reference to the data at subscript i in the table.
//Can be used to assign a new value to the data.
long int begin() const;//defined
//Returns the subscript of the first element in the table; returns
//the same value as end(), i.e. tableSize, if the table is empty.
long int end() const;//defined
//Returns subscript just past end of table, i.e. tableSize.
long int next(long int i) const;//defined
//Returns the subscript of the next element after i; if there is no
//next element then returns the same as end(), i.e. tableSize.
//*************************************************************************
float averageSearchLength() const;//defined
//returns the average length of all searches numProbes/numSearches.
private:
mypair<KeyType, DataType>* table; //dynamically allocated array of pairs
short int* status; //dynamically allocated status array; contains
//staus info: 0=empty, 1=occupied
long int size; //number of elements in table
long int tableSize; //size of table array
long int numSearches; //total number of searches done in looking up keys.
//Increment numSearches each time find called.
long int numProbes; //total number of probes done during all searches.
//Increment each time find looks at a spot.
void rehash();
//The rehash function remembers the current table, status and tableSize
//by assigning them to local variables, oldTable, oldStatus and
//oldTableSize. It then dynamically allocates new table and status
//arrays of size the next larger prime than oldTableSize and sets
//tableSize to the new size. It then goes through the old table
//and rehashes each element into the new table using the insert
//function. Uses non-member function nextPrime.
};
template<class KeyType, class DataType>
const mypair<KeyType, DataType>&
hashTable<KeyType, DataType>::element(long int i) const
{ return table[i];
}
template<class KeyType, class DataType>
DataType& hashTable<KeyType, DataType>::data(long int i) const
{ return table[i].data;
}
template<class KeyType, class DataType>
long int hashTable<KeyType, DataType>::begin() const
{ long int i = 0;
while(i < tableSize && status[i] != 1)
i++;
return i;
}
template<class KeyType, class DataType>
long int hashTable<KeyType, DataType>::end() const
{ return tableSize;
}
template<class KeyType, class DataType>
long int hashTable<KeyType, DataType>::next(long int i) const
{
i++;
while(i < tableSize && status[i] != 1)
i++;
return i;
}
template<class KeyType, class DataType>
float hashTable<KeyType, DataType>::averageSearchLength()const
{
return float(numProbes/numSearches);
}
template<class KeyType, class DataType>
hashTable<KeyType, DataType>::hashTable(long int tSize=0)
{
tableSize=nextPrime(tSize);
table = new mypair<KeyType, DataType>[tableSize];
status=new short int [tableSize];
for(int k=0;k<tableSize;k++)status[k]=0;
size=0;
numSearches=0;
numProbes=0;
}
template<class KeyType, class DataType>
hashTable<KeyType, DataType>::~hashTable()
{
delete [] table;
delete [] status;
}
template<class KeyType, class DataType>
void hashTable<KeyType, DataType>::insert(const KeyType& k, const DataType& d)
{
bool found=false;
long int i=0;
find( k, i, found);
if(found == true)cout<<"duplicated not allowed"<<endl;
if(found==false)
{
table[i].data=d;
table[i].key=k;
status[i]=1;
size++;
}
if(size > .75*(tableSize))rehash();
}
template<class KeyType, class DataType>
void hashTable<KeyType, DataType>::find(const KeyType& k, long int& i, bool& found)
{
numSearches++;
i=hash(k, tableSize);
long int inc = i;
if(inc == 0)inc = tableSize -1;
while(status[i] != 1 && found == false)
{
if(table[inc].key==k)
{
found=true;
i=inc;
table[inc].data++;
}
inc++;
numProbes++;
if(inc == tableSize)inc=0;
}
if(found == false)
{
long int k =i;
while(status[k]!=0)
{
k++;
numProbes++;
if(k==tableSize)k=0;
}
i=k;
found=false;
}
}
template<class KeyType, class DataType>
void hashTable<KeyType, DataType>::rehash()
{
//The rehash function remembers the current table, status and tableSize
//by assigning them to local variables, oldTable, oldStatus and
//oldTableSize
mypair<KeyType, DataType> *oldTable=table;
short int* oldStatus = status;
long int oldTableSize = tableSize;
bool doesIt=false;
//It then dynamically allocates new table and status
//arrays of size the next larger prime than oldTableSize and sets
//tableSize to the new size. It then goes through the old table
//and rehashes each element into the new table using the insert
//function. Uses non-member function nextPrime.
table = new mypair<KeyType, DataType>[nextPrime(oldTableSize)];
status = new short int[nextPrime(oldTableSize)];
tableSize=nextPrime(oldTableSize);
for(int h =0; h < tableSize;h++)status[h]=0;
for(int j=0; j < oldTableSize;j++)
{
while(oldStatus[j] != 0)
{
insert(oldTable[j].key, oldTable[j].data);
}
}
delete[] oldTable;
delete[] oldStatus;
}
//************************************************************************
//Non-member functions used in hashTable class.
//This is the hash function to use for strings. If the KeyType is
//anything other than string then this non-member function would
//need to be changed to hash that type of key.
long int hash(string s, int tableSize)
{ unsigned long int j;
unsigned long int sum = 0;
for (j = 0; j < s.length(); j++)
sum = (sum << 5) + s[j];
return sum % tableSize;
}
//Returns the next prime >= n from a list of primes where
//each is about twice as big as the previous one.
long int nextPrime(long int n)
{ long int i;
long int p[18] = { 31, 61, 127, 251, 509, 1021,
2039, 4093, 8191, 16381, 32749, 65521,
131071, 262139, 524287, 1048573, 2097143, 4194301
};
for (i = 0; i < 18; i++)
if (p[i] > n) return p[i];
cerr<<"Next prime too large"<<endl;
return p[17];
}
bool operator < (const mypair<string, int>& p,const mypair<string, int>& q)
{
if(p.data < q.data)return true;
else if ( p.data > q.data)return false;
else
return (p.key < q.key);
}
//************************************************************************
#endif
main file with next word
Code:
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
#include <fstream>
#include <cctype>
#include "hashTable.h"
using namespace std;
void nextWord(ifstream& ifs, string& word, bool& found);
int main()
{
char fname[20];
ifstream infile1;
char ch;
string word;
hashTable<string, int>ht(31);
vector< mypair<string, int> >h;
bool found,doesIt;
long int i=0;
int dat =1;
cout<< "Enter File Name:";
cin>>fname;
infile1.open(fname);
if(infile1.fail())
{
cerr<<"Input file" <<fname<<"doesnt exist"<<endl;
cin.get(ch);
exit(1);
}
while( !infile1.fail())
{
nextWord(infile1, word, found);
if(found == true)
{
ht.find(word, i, doesIt);
if(doesIt == false)
{
ht.insert(word,dat);
}//end of if
}//end of outer if
dat=1;
}//end of while
infile1.close();
for(int s = ht.begin(); s < ht.end(); s++)
{
h.push_back(ht.element(i));
}
sort(&h[0],&h[h.size()]);
cout<<"Average search length in hash table = " <<ht.averageSearchLength()<<endl;
cout<<"Frequency"<<"Word" <<endl;
for(unsigned long int i=0; i < h.size(); i++)
{
cout<<h[i].data << h[1].key<<endl;
}
return 0;
}
void nextWord(ifstream& ifs, string& word, bool& found)
{
int k=0;
int j=0;
string word2;
getline(ifs, word2);
if(ifs == NULL)found=false;
else
{
found=true;
while(isalnum(word2[k]))k++;
while(j < k)
{
word[j]=word2[j++];
}
}
}