Thread: Hash Table review

  1. #1
    C++まいる!Cをこわせ!
    Join Date
    Oct 2007
    Location
    Inside my computer
    Posts
    24,654

    Hash Table review

    Hello all,
    Thought I'd get some quick input on this hash table. Especially on the const part (see Find, IFind and Delete), all of which I really don't know how to implement properly.
    This is a rather naive implementation, but it works, and performance is reasonable, so I'm happy.
    Suggestions are welcome.

    Code:
    Code:
    #include <list>
    #include <vector>
    #include <utility>
    #include <fstream>
    #include <string>
    #include <iostream>
    #include "spooky.h"
    #include <cstdlib>
    #include <ctime>
    #include <cassert>
    #include <exception>
    
    #define BASIC_TYPE(type2) \
    	template<typename T> struct BasicType<type2 T> { typedef typename BasicType<T>::type type; }
    
    template<typename T> struct BasicType { typedef T type; };
    template<typename T> struct BasicType<T*> { typedef typename BasicType<T>::type type; };
    BASIC_TYPE(const);
    BASIC_TYPE(volatile);
    
    #define IIS_PRIMITIVE_FLOAT(type) \
    	template<> struct IIsPrimitive<type> { static const bool value = true; }
    
    #define IIS_PRIMITIVE(type) \
    	template<> struct IIsPrimitive<unsigned type> { static const bool value = true; }; \
    	template<> struct IIsPrimitive<signed type> { static const bool value = true; }
    
    template<typename Key_t> struct IIsPrimitive { static const bool value = false; };
    IIS_PRIMITIVE(char);
    IIS_PRIMITIVE(short);
    IIS_PRIMITIVE(int);
    IIS_PRIMITIVE(long);
    IIS_PRIMITIVE(long long);
    IIS_PRIMITIVE_FLOAT(float);
    IIS_PRIMITIVE_FLOAT(double);
    IIS_PRIMITIVE_FLOAT(long double);
    IIS_PRIMITIVE_FLOAT(void);
    
    template<typename Key_t> struct IsPrimitive { static const bool value = IIsPrimitive<typename BasicType<Key_t>::type>::value; };
    
    template<typename Key_t, typename Type_t>
    class XHashTable
    {
    protected:
    	typedef std::pair<Key_t, Type_t> Pair_t;
    	typedef std::list<Pair_t> List_t;
    	typedef std::vector<List_t> Vector_t;
    
    public:
    	XHashTable(float LoadFactor = 0.9): m_Items(0), m_Collisions(0), m_Values(100), m_LoadFactor(LoadFactor)
    	{
    		assert(LoadFactor > 0);
    	}
    
    	void Add(const Key_t& Key, const Type_t& Value)
    	{
    		assert(m_Items > 0);
    		if ((float)m_Items / m_Values.size() > m_LoadFactor)
    		{
    			m_Values.resize(m_Values.size() * 2);
    			Rehash();
    		}
    
    		bool success;
    		IFind(Key, success);
    		if (success)
    			return;
    		
    		m_Items++;
    		auto Hash = GetHash<IsPrimitive<Key_t>::value>(Key);
    		auto pair = std::make_pair(Key, Value);
    		auto & list = m_Values.at(Hash);
    		if (list.size() > 0)
    			m_Collisions++;
    		assert(m_Collisions >= 0);
    		list.push_back(pair);
    	}
    
    	Type_t& Find(const Key_t& Key)
    	{
    		return (Type_t&)Find2(Key);
    	}
    
    	const Type_t& Find(const Key_t& Key) const
    	{
    		return Find2(Key);
    	}
    
    	bool Delete(const Key_t& key)
    	{
    		bool success;
    		auto ret = IFind(key, success);
    		if (!success)
    			return false;
    		
    		auto it = ret.first;
    		auto list = (List_t*)ret.second;
    		list->erase(it);
    
    		m_Items--;
    		return true;
    	}
    
    	unsigned int GetCollisions() { return m_Collisions; }
    	std::size_t GetStorageSize() { return m_Values.size(); }
    	unsigned int GetMapSize() { return m_Items; }
    
    protected:
    	const Type_t& Find2(const Key_t& Key) const
    	{
    		bool success;
    		auto ret = IFind(Key, success);
    		if (!success)
    			throw std::runtime_error("Key not found!");
    		return ret.first->second;
    	}
    
    	template<bool IsPrimitive /* = true */>
    	unsigned int GetHash(const Key_t& Key) const
    	{
    		return SpookyHash::Hash32(&Key, sizeof(Key), 0) % m_Values.size();
    	}
    
    	template<>
    	unsigned int GetHash<false>(const Key_t& Key) const //!IsPrimitive
    	{
    		return SpookyHash::Hash32(&Key[0], Key.size(), 0) % m_Values.size();
    	}
    
    	std::pair<typename List_t::const_iterator, const List_t*> IFind(const Key_t& Key, bool& Success) const
    	{
    		auto Hash = GetHash<IsPrimitive<Key_t>::value>(Key); 
    		auto & list = m_Values.at(Hash);
    		auto list_end = list.end();
    		for (auto it = list.begin(); it != list_end; ++it)
    		{
    			if (it->first == Key)
    			{
    				Success = true;
    				return std::make_pair(it, &list);
    			}
    		}
    		Success = false;
    		return std::make_pair(list_end, &list);
    	}
    
    	void Rehash()
    	{
    		for (std::size_t it = 0; it < m_Values.size(); ++it)
    		{
    			auto & list = m_Values.at(it);
    			for (auto list_it = list.begin(); list_it != list.end();)
    			{
    				const auto & key = list_it->first;
    				const auto & value = list_it->second;
    				auto NewHash = GetHash<IsPrimitive<Key_t>::value>(key);
    				if (NewHash != it)
    				{
    					m_Values.at(NewHash).push_back(std::make_pair(key, value));
    					auto old_it = list_it++;
    					list.erase(old_it);
    				}
    				else
    					++list_it;
    			}
    		}
    	}
    	
    	Vector_t m_Values;
    	int m_Items;
    	int m_Collisions;
    	float m_LoadFactor;
    };
    Test program (for those who wish to compile and test):
    Code:
    template class XHashTable<std::string, int>;
    
    int main()
    {
    	int lines = 0;
    	XHashTable<std::vector<char>, int> HashTable;
    
    	std::vector<char> buf(1024);
    
    	{
    		std::ifstream testfile("Insert favorite file here", std::ios_base::binary);
    		while (testfile.read(&buf[0], buf.size()))
    		{
    			lines++;
    			HashTable.Add(buf, lines);
    		}
    	}
    
    	lines = 0;
    	{
    		std::ifstream testfile("Insert favorite file here", std::ios_base::binary);
    
    		while (testfile.read(&buf[0], buf.size()))
    		{
    			lines++;
    			if (HashTable.Find(buf) != lines)
    				std::cout << "MISMATCH AT ROW " << lines << ".\n";
    		}
    	}
    
    	std::cout << "Collisions: " << HashTable.GetCollisions() << " (" << (float)HashTable.GetCollisions() / HashTable.GetMapSize() * 100 << "%)\n";
    	std::cout << "Number of items in map: " << HashTable.GetMapSize() << "\nSize of internal vector: " << HashTable.GetStorageSize() << "\n";
    	
    	return 0;
    }
    Quote Originally Posted by Adak View Post
    io.h certainly IS included in some modern compilers. It is no longer part of the standard for C, but it is nevertheless, included in the very latest Pelles C versions.
    Quote Originally Posted by Salem View Post
    You mean it's included as a crutch to help ancient programmers limp along without them having to relearn too much.

    Outside of your DOS world, your header file is meaningless.

  2. #2
    - - - - - - - - oogabooga's Avatar
    Join Date
    Jan 2008
    Posts
    2,808
    Is there supposed to be a spooky.h?
    I get a lot of errors trying to compile it.

  3. #3
    C++まいる!Cをこわせ!
    Join Date
    Oct 2007
    Location
    Inside my computer
    Posts
    24,654
    Did I forget to mention that? Weird. I thought I did. Very sorry about that.
    I used the Spooky Hash algorithm: SpookyHash: a 128-bit noncryptographic hash
    Last edited by Elysia; 01-14-2012 at 11:33 AM.
    Quote Originally Posted by Adak View Post
    io.h certainly IS included in some modern compilers. It is no longer part of the standard for C, but it is nevertheless, included in the very latest Pelles C versions.
    Quote Originally Posted by Salem View Post
    You mean it's included as a crutch to help ancient programmers limp along without them having to relearn too much.

    Outside of your DOS world, your header file is meaningless.

  4. #4
    - - - - - - - - oogabooga's Avatar
    Join Date
    Jan 2008
    Posts
    2,808
    Here's direct links to spooky.h and spooky.cpp:
    http://burtleburtle.net/bob/c/spooky.h
    http://burtleburtle.net/bob/c/spooky.cpp

    But I still can't get it to compile (with CodeBlocks, GCC).
    I created a main.cpp with your test code added to the end of the hash table code.
    Here's the first two errors I get:
    HashTable\main.cpp 124 error: explicit specialization in non-namespace scope 'class XHashTable<Key_t, Type_t>'

    HashTable\main.cpp 125 error: template-id 'GetHash<false>' in declaration of primary template

  5. #5
    C++まいる!Cをこわせ!
    Join Date
    Oct 2007
    Location
    Inside my computer
    Posts
    24,654
    I honestly don't know how to fix all the problems.
    It's the const implementation that screws everything up.
    Here is updated code, but I don't know if it will compile (a perfect compiler shouldn't compile it due to the Delete function):
    Code:
    // Hash Queue.cpp : Defines the entry point for the console application.
    //
    
    #include <list>
    #include <vector>
    #include <utility>
    #include <fstream>
    #include <string>
    #include <iostream>
    #include "spooky.h"
    #include <cstdlib>
    #include <ctime>
    #include <cassert>
    #include <exception>
    
    #define BASIC_TYPE(type2) \
    	template<typename T> struct BasicType<type2 T> { typedef typename BasicType<T>::type type; }
    
    template<typename T> struct BasicType { typedef T type; };
    template<typename T> struct BasicType<T*> { typedef typename BasicType<T>::type type; };
    BASIC_TYPE(const);
    BASIC_TYPE(volatile);
    
    #define IIS_PRIMITIVE_FLOAT(type) \
    	template<> struct IIsPrimitive<type> { static const bool value = true; }
    
    #define IIS_PRIMITIVE(type) \
    	template<> struct IIsPrimitive<unsigned type> { static const bool value = true; }; \
    	template<> struct IIsPrimitive<signed type> { static const bool value = true; }
    
    template<typename Key_t> struct IIsPrimitive { static const bool value = false; };
    IIS_PRIMITIVE(char);
    IIS_PRIMITIVE(short);
    IIS_PRIMITIVE(int);
    IIS_PRIMITIVE(long);
    IIS_PRIMITIVE(long long);
    IIS_PRIMITIVE_FLOAT(float);
    IIS_PRIMITIVE_FLOAT(double);
    IIS_PRIMITIVE_FLOAT(long double);
    IIS_PRIMITIVE_FLOAT(void);
    
    template<typename Key_t> struct IsPrimitive { static const bool value = IIsPrimitive<typename BasicType<Key_t>::type>::value; };
    
    template<bool IsPrimitive, typename Key_t> class XHashTableImpl { };
    
    
    template<typename Key_t>
    class XHashTableImpl<true, Key_t>
    {
    public:
    	static unsigned int GetHash(const Key_t& Key, std::size_t Modulus)
    	{
    		return SpookyHash::Hash64(&Key, sizeof(Key), 0) % Modulus;
    	}
    };
    
    template<typename Key_t>
    class XHashTableImpl<false, Key_t>
    {
    public:
    	static unsigned int GetHash(const Key_t& Key, std::size_t Modulus)
    	{
    		return SpookyHash::Hash64(&Key[0], Key.size(), 0) % Modulus;
    	}
    };
    
    template<typename Key_t, typename Type_t>
    class XHashTable
    {
    protected:
    	typedef std::pair<Key_t, Type_t> Pair_t;
    	typedef std::list<Pair_t> List_t;
    	typedef std::vector<List_t> Vector_t;
    
    public:
    	XHashTable(float LoadFactor = 0.9): m_Items(0), m_Collisions(0), m_Values(100), m_LoadFactor(LoadFactor)
    	{
    		assert(LoadFactor > 0);
    	}
    
    	void Add(const Key_t& Key, const Type_t& Value)
    	{
    		assert(m_Items > 0);
    		if ((float)m_Items / m_Values.size() > m_LoadFactor)
    		{
    			m_Values.resize(m_Values.size() * 2);
    			Rehash();
    		}
    
    		bool success;
    		IFind(Key, success);
    		if (success)
    			return;
    		
    		m_Items++;
    		auto Hash = GetHash<IsPrimitive<Key_t>::value>(Key);
    		auto pair = std::make_pair(Key, Value);
    		auto & list = m_Values.at(Hash);
    		if (list.size() > 0)
    			m_Collisions++;
    		assert(m_Collisions >= 0);
    		list.push_back(pair);
    	}
    
    	Type_t& Find(const Key_t& Key)
    	{
    		return (Type_t&)Find2(Key);
    	}
    
    	const Type_t& Find(const Key_t& Key) const
    	{
    		return Find2(Key);
    	}
    
    	bool Delete(const Key_t& key)
    	{
    		bool success;
    		auto ret = IFind(key, success);
    		if (!success)
    			return false;
    		
    		auto it = ret.first;
    		auto list = (List_t*)ret.second;
    		list->erase(it);
    
    		m_Items--;
    		return true;
    	}
    
    	unsigned int GetCollisions() { return m_Collisions; }
    	std::size_t GetStorageSize() { return m_Values.size(); }
    	unsigned int GetMapSize() { return m_Items; }
    
    	unsigned int GetLongestChainLength()
    	{
    		unsigned int MaxChainLength = 0;
    		for (auto it = m_Values.begin(); it != m_Values.end(); ++it)
    			MaxChainLength = std::max((*it).size(), MaxChainLength);
    		return MaxChainLength;
    	}
    
    	double GetAverageChainLength()
    	{
    		long long ChainLength = 0;
    		long long DivideBy = 0;
    		for (auto it = m_Values.begin(); it != m_Values.end(); ++it)
    		{
    			auto size = (*it).size();
    			if (size > 0)
    			{
    				ChainLength += size;
    				DivideBy++;
    			}
    			assert(ChainLength > 0);
    		}
    
    		double ret = (double)ChainLength / DivideBy;
    		return ret;
    	}
    
    protected:
    	template<bool IsPrimitive>
    	unsigned int GetHash(const Key_t& Key) const
    	{
    		return XHashTableImpl<IsPrimitive, Key_t>::GetHash(Key, m_Values.size());
    	}
    
    	const Type_t& Find2(const Key_t& Key) const
    	{
    		bool success;
    		auto ret = IFind(Key, success);
    		if (!success)
    			throw std::runtime_error("Key not found!");
    		return ret.first->second;
    	}
    
    	std::pair<typename List_t::const_iterator, const List_t*> IFind(const Key_t& Key, bool& Success) const
    	{
    		auto Hash = GetHash<IsPrimitive<Key_t>::value>(Key); 
    		auto & list = m_Values.at(Hash);
    		auto list_end = list.end();
    		for (auto it = list.begin(); it != list_end; ++it)
    		{
    			if (it->first == Key)
    			{
    				Success = true;
    				return std::make_pair(it, &list);
    			}
    		}
    		Success = false;
    		return std::make_pair(list_end, &list);
    	}
    
    	void Rehash()
    	{
    		for (std::size_t it = 0; it < m_Values.size(); ++it)
    		{
    			auto & list = m_Values.at(it);
    			for (auto list_it = list.begin(); list_it != list.end();)
    			{
    				const auto & key = list_it->first;
    				const auto & value = list_it->second;
    				auto NewHash = GetHash<IsPrimitive<Key_t>::value>(key);
    				if (NewHash != it)
    				{
    					m_Values.at(NewHash).push_back(std::make_pair(key, value));
    					auto old_it = list_it++;
    					list.erase(old_it);
    				}
    				else
    					++list_it;
    			}
    		}
    	}
    	
    	Vector_t m_Values;
    	int m_Items;
    	int m_Collisions;
    	float m_LoadFactor;
    };
    
    template class XHashTable<std::string, int>;
    
    int main()
    {
    	int lines = 0;
    	XHashTable<std::vector<char>, int> HashTable;
    
    	std::vector<char> buf(1024);
    	const int NumLines = 2000000;
    
    	{
    		std::ifstream testfile("C:/Server/Video/Walkthrough and let's plays/Agarest/Record of Agarest War Zero - Digest Mode Fifth Generation [HD] Part 1.mp4", std::ios_base::binary);
    		while (testfile.read(&buf[0], buf.size()) && lines < NumLines)
    		{
    			lines++;
    			HashTable.Add(buf, lines);
    		}
    	}
    
    	lines = 0;
    	{
    		std::ifstream testfile("C:/Server/Video/Walkthrough and let's plays/Agarest/Record of Agarest War Zero - Digest Mode Fifth Generation [HD] Part 1.mp4", std::ios_base::binary);
    
    		while (testfile.read(&buf[0], buf.size()) && lines < NumLines)
    		{
    			lines++;
    			if (HashTable.Find(buf) != lines)
    				std::cout << "MISMATCH AT ROW " << lines << ".\n";
    		}
    	}
    
    	std::cout << "Collisions: " << HashTable.GetCollisions() << " (" << (float)HashTable.GetCollisions() / HashTable.GetMapSize() * 100 << "%)\n";
    	std::cout << "Number of items in map: " << HashTable.GetMapSize() << "\nSize of internal vector: " << HashTable.GetStorageSize() << "\n";
    	std::cout << "Max chain length: " << HashTable.GetLongestChainLength() << "\nAverage chain length: " << HashTable.GetAverageChainLength() << "\n";
    	
    	return 0;
    }
    Quote Originally Posted by Adak View Post
    io.h certainly IS included in some modern compilers. It is no longer part of the standard for C, but it is nevertheless, included in the very latest Pelles C versions.
    Quote Originally Posted by Salem View Post
    You mean it's included as a crutch to help ancient programmers limp along without them having to relearn too much.

    Outside of your DOS world, your header file is meaningless.

  6. #6
    - - - - - - - - oogabooga's Avatar
    Join Date
    Jan 2008
    Posts
    2,808
    My main problem was not enabling C++0x support. Oops!
    But, as you said about the Delete function, I still get these errors:
    Code:
    HashTable\main.cpp||In member function 'bool XHashTable<Key_t, Type_t>::Delete(const Key_t&) [with Key_t = std::basic_string<char>, Type_t = int]':|
    HashTable\main.cpp:223|16|instantiated from here|
    HashTable\main.cpp|125|error: no matching function for call to 'std::list<std::pair<std::basic_string<char>, int>, std::allocator<std::pair<std::basic_string<char>, int> > >::erase(std::_List_const_iterator<std::pair<std::basic_string<char>, int> >&)'|
    HashTable\main.cpp|125|note: candidates are:|
    
    c:\mingw\bin\..\lib\gcc\mingw32\4.6.1\include\c++\bits\list.tcc|109|note: std::list<_Tp, _Alloc>::iterator std::list<_Tp, _Alloc>::erase(std::list<_Tp, _Alloc>::iterator) [with _Tp = std::pair<std::basic_string<char>, int>, _Alloc = std::allocator<std::pair<std::basic_string<char>, int> >, std::list<_Tp, _Alloc>::iterator = std::_List_iterator<std::pair<std::basic_string<char>, int> >]|
    c:\mingw\bin\..\lib\gcc\mingw32\4.6.1\include\c++\bits\list.tcc|109|note:   no known conversion for argument 1 from 'std::_List_const_iterator<std::pair<std::basic_string<char>, int> >' to 'std::_List_iterator<std::pair<std::basic_string<char>, int> >'|
    c:\mingw\bin\..\lib\gcc\mingw32\4.6.1\include\c++\bits\stl_list.h|1160|note: std::list<_Tp, _Alloc>::iterator std::list<_Tp, _Alloc>::erase(std::list<_Tp, _Alloc>::iterator, std::list<_Tp, _Alloc>::iterator) [with _Tp = std::pair<std::basic_string<char>, int>, _Alloc = std::allocator<std::pair<std::basic_string<char>, int> >, std::list<_Tp, _Alloc>::iterator = std::_List_iterator<std::pair<std::basic_string<char>, int> >]|
    c:\mingw\bin\..\lib\gcc\mingw32\4.6.1\include\c++\bits\stl_list.h|1160|note:   candidate expects 2 arguments, 1 provided|
    
    ||=== Build finished: 6 errors, 0 warnings ===|

  7. #7
    C++まいる!Cをこわせ!
    Join Date
    Oct 2007
    Location
    Inside my computer
    Posts
    24,654
    Well, comment it out, write a new find function or switch to msvc to get it to compile. Const issues must be solved before I can get it to compile on gcc.

  8. #8
    Algorithm Dissector iMalc's Avatar
    Join Date
    Dec 2005
    Location
    New Zealand
    Posts
    6,318
    Any particular reason you're doing (*it).size() rather than just it->size()?

    Should use !list.empty() here:
    Code:
    if (list.size() > 0)
    IIRC on gcc list size() is O(n) in order to perform constant-time splice.

    Do you really need the cast here:
    Code:
            auto it = ret.first;
            auto list = (List_t*)ret.second;
            list->erase(it);
    What about just:
    Code:
    ret.second->erase(ret.first);
    These 5 Get methods are missing their const:
    GetCollisions
    GetStorageSize
    GetMapSize
    GetLongestChainLength
    GetAverageChainLength
    My homepage
    Advice: Take only as directed - If symptoms persist, please see your debugger

    Linus Torvalds: "But it clearly is the only right way. The fact that everybody else does it some other way only means that they are wrong"

  9. #9
    C++まいる!Cをこわせ!
    Join Date
    Oct 2007
    Location
    Inside my computer
    Posts
    24,654
    Re erase: it is to make it more readable. What is it->first? As for the cast... Yes, since it is a pointer to const. I want to get rid of it, however.

  10. #10
    Algorithm Dissector iMalc's Avatar
    Join Date
    Dec 2005
    Location
    New Zealand
    Posts
    6,318
    You could make Find2 a templated friend function that either takes a const XHashTable or non-const XHashTable as the first argument. Oh and you'd also make the return value just an out-parameter. One implementation, and const optional on all the required arguments.
    Oh wait, you'd have to do that on IFind as well. That's a few find layers you have there.
    Last edited by iMalc; 01-14-2012 at 02:32 PM.
    My homepage
    Advice: Take only as directed - If symptoms persist, please see your debugger

    Linus Torvalds: "But it clearly is the only right way. The fact that everybody else does it some other way only means that they are wrong"

  11. #11
    C++まいる!Cをこわせ!
    Join Date
    Oct 2007
    Location
    Inside my computer
    Posts
    24,654
    Quote Originally Posted by iMalc View Post
    Any particular reason you're doing (*it).size() rather than just it->size()?
    Ah, I think I simply mistook it for a pair instead of an iterator. I should fix that.

    Should use !list.empty() here:
    Code:
    if (list.size() > 0)
    IIRC on gcc list size() is O(n) in order to perform constant-time splice.
    Good idea.

    These 5 Get methods are missing their const:
    GetCollisions
    GetStorageSize
    GetMapSize
    GetLongestChainLength
    GetAverageChainLength
    I really tend to forget about const until I run into const issues.
    (For some reason, I always tend to be picky about passing by const reference, though.)
    But you are right, I should fix that.

    Quote Originally Posted by iMalc View Post
    You could make Find2 a templated friend function that either takes a const XHashTable or non-const XHashTable as the first argument. Oh and you'd also make the return value just an out-parameter. One implementation, and const optional on all the required arguments.
    Oh wait, you'd have to do that on IFind as well. That's a few find layers you have there.
    I'm going to have to try that. Thanks.
    Yeah, the reason for the layers of Find is because both Find functions take a const reference, so calling the non-const function from the const function isn't possible (I would just get infinite recursion unless I am missing a way to convince the compiler to call the other function).
    Quote Originally Posted by Adak View Post
    io.h certainly IS included in some modern compilers. It is no longer part of the standard for C, but it is nevertheless, included in the very latest Pelles C versions.
    Quote Originally Posted by Salem View Post
    You mean it's included as a crutch to help ancient programmers limp along without them having to relearn too much.

    Outside of your DOS world, your header file is meaningless.

  12. #12
    C++まいる!Cをこわせ!
    Join Date
    Oct 2007
    Location
    Inside my computer
    Posts
    24,654
    I finally got it to compile without warnings with comeau, at the cost of increased vulgarity:

    Code:
    #include <list>
    #include <vector>
    #include <utility>
    #include <fstream>
    #include <string>
    #include <iostream>
    #include "spooky.h"
    #include <ctime>
    #include <cassert>
    #include <exception>
    
    #define BASIC_TYPE(type2) \
    	template<typename T> struct BasicType<type2 T> { typedef typename BasicType<T>::type type; }
    
    template<typename T> struct BasicType { typedef T type; };
    template<typename T> struct BasicType<T*> { typedef typename BasicType<T>::type type; };
    BASIC_TYPE(const);
    BASIC_TYPE(volatile);
    
    #define IIS_PRIMITIVE_FLOAT(type) \
    	template<> struct IIsPrimitive<type> { static const bool value = true; }
    
    #define IIS_PRIMITIVE(type) \
    	template<> struct IIsPrimitive<unsigned type> { static const bool value = true; }; \
    	template<> struct IIsPrimitive<signed type> { static const bool value = true; }
    
    template<typename Key_t> struct IIsPrimitive { static const bool value = false; };
    IIS_PRIMITIVE(char);
    IIS_PRIMITIVE(short);
    IIS_PRIMITIVE(int);
    IIS_PRIMITIVE(long);
    IIS_PRIMITIVE(long long);
    IIS_PRIMITIVE_FLOAT(float);
    IIS_PRIMITIVE_FLOAT(double);
    IIS_PRIMITIVE_FLOAT(long double);
    IIS_PRIMITIVE_FLOAT(void);
    
    template<typename Key_t> struct IsPrimitive { static const bool value = IIsPrimitive<typename BasicType<Key_t>::type>::value; };
    
    template<bool IsPrimitive, typename Key_t> class XHashTableImpl { };
    
    template<typename Key_t, typename Type_t> class XHashTable;
    
    template<typename Key_t>
    class XHashTableImpl<true, Key_t>
    {
    	template<typename Key_t_, typename Value_t_> friend class XHashTable;
    	static unsigned int GetHash(const Key_t& Key, std::size_t Modulus)
    	{
    		return SpookyHash::Hash64(&Key, sizeof(Key), 0) % Modulus;
    	}
    };
    
    template<typename Key_t>
    class XHashTableImpl<false, Key_t>
    {
    	template<typename Key_t_, typename Value_t_> friend class XHashTable;
    	static unsigned int GetHash(const Key_t& Key, std::size_t Modulus)
    	{
    		return SpookyHash::Hash64(&Key[0], Key.size(), 0) % Modulus;
    	}
    };
    
    
    template<typename Key_t, typename Type_t, bool Const> class XHashTableImpl3;
    
    template<typename Key_t, typename Type_t, bool Const> class XHashTableImpl2 {};
    
    template<typename Key_t, typename Type_t>
    class XHashTableImpl2<Key_t, Type_t, false>
    {
    	template<typename Key_t_, typename Value_t_> friend class XHashTable;
    	template<typename Key_t_, typename Value_t_, bool Const> friend class XHashTableImpl3;
    	typedef XHashTable<Key_t, Type_t> HashTable_t;
    	typedef typename XHashTable<Key_t, Type_t>::List_t List_t;
    	typedef typename List_t::iterator It_t;
    };
    
    template<typename Key_t, typename Type_t>
    class XHashTableImpl2<Key_t, Type_t, true>
    {
    	template<typename Key_t_, typename Value_t_> friend class XHashTable;
    	template<typename Key_t_, typename Value_t_, bool Const> friend class XHashTableImpl3;
    	typedef const XHashTable<Key_t, Type_t> HashTable_t;
    	typedef const typename XHashTable<Key_t, Type_t>::List_t List_t;
    	typedef typename List_t::const_iterator It_t;
    };
    
    template<typename Key_t, typename Type_t, bool Const>
    class XHashTableImpl3
    {
    	template<typename Key_t_, typename Value_t_> friend class XHashTable;
    
    	static bool IFind(typename XHashTableImpl2<Key_t, Type_t, Const>::HashTable_t& Table,
    		const Key_t& Key,
    		typename XHashTableImpl2<Key_t, Type_t, Const>::List_t*& List_out,
    		typename XHashTableImpl2<Key_t, Type_t, Const>::It_t& it_out)
    	{
    		auto Hash = Table.GetHash<IsPrimitive<Key_t>::value>(Key);
    		auto & list = Table.m_Values.at(Hash);
    		auto list_end = list.end();
    		for (auto it = list.begin(); it != list_end; ++it)
    		{
    			if (it->first == Key)
    			{
    				it_out = it;
    				List_out = &list;
    				return true;
    			}
    		}
    		return false;
    	}
    
    	static bool IFind(typename XHashTableImpl2<Key_t, Type_t, Const>::HashTable_t& Table, const Key_t& Key)
    	{
    		typename XHashTableImpl2<Key_t, Type_t, Const>::List_t* List;
    		typename XHashTableImpl2<Key_t, Type_t, Const>::It_t it;
    		return IFind(Table, Key, List, it);
    	}
    
    	static bool IFind(typename XHashTableImpl2<Key_t, Type_t, Const>::HashTable_t& Table, const Key_t& Key, typename XHashTableImpl2<Key_t, Type_t, Const>::It_t& it)
    	{
    		typename XHashTableImpl2<Key_t, Type_t, Const>::List_t* List;
    		return IFind(Table, Key, List, it);
    	}
    };
    
    template<typename Key_t, typename Type_t>
    class XHashTable
    {
    protected:
    	template<bool IsPrimitive, typename Key_t_> friend class XHashTableImpl;
    	template<typename Key_t_, typename Type_t_, bool Const> friend class XHashTableImpl2;
    	template<typename Key_t_, typename Type_t_, bool Const> friend class XHashTableImpl3;
    	#define XHASH_TABLE_IMPL_1(IsPrimitive) XHashTableImpl<IsPrimitive, Key_t>
    	#define XHASH_TABLE_IMPL_2(Const) XHashTableImpl2<Key_t, Type_t, Const>
    	#define XHASH_TABLE_IMPL_3(Const) XHashTableImpl3<Key_t, Type_t, Const>
    
    	typedef std::pair<Key_t, Type_t> Pair_t;
    	typedef std::list<Pair_t> List_t;
    	typedef std::vector<List_t> Vector_t;
    
    public:
    	XHashTable(float LoadFactor = 0.9): m_Items(0), m_Collisions(0), m_Values(100), m_LoadFactor(LoadFactor)
    	{
    		assert(LoadFactor > 0);
    	}
    
    	void Add(const Key_t& Key, const Type_t& Value)
    	{
    		assert(m_Items > 0);
    		if ((float)m_Items / m_Values.size() > m_LoadFactor)
    		{
    			m_Values.resize(m_Values.size() * 2);
    			Rehash();
    		}
    
    		if (XHASH_TABLE_IMPL_3(true)::IFind(*this, Key))
    			return;
    
    		m_Items++;
    		auto Hash = GetHash<IsPrimitive<Key_t>::value>(Key);
    		auto pair = std::make_pair(Key, Value);
    		auto & list = m_Values.at(Hash);
    		if (list.size() > 0)
    			m_Collisions++;
    		assert(m_Collisions >= 0);
    		list.push_back(pair);
    	}
    
    	Type_t& Find(const Key_t& Key)
    	{
    		return const_cast<Type_t&>(Find2(Key));
    	}
    
    	const Type_t& Find(const Key_t& Key) const
    	{
    		return Find2(Key);
    	}
    
    	bool Delete(const Key_t& key)
    	{
    		List_t* List;
    		typename List_t::iterator it;
    		if (!XHASH_TABLE_IMPL_3(false)::IFind(*this, key, List, it))
    			return false;
    
    		List->erase(it);
    		m_Items--;
    		return true;
    	}
    
    	unsigned int GetCollisions() const { return m_Collisions; }
    	std::size_t GetStorageSize() const { return m_Values.size(); }
    	unsigned int GetMapSize() const { return m_Items; }
    
    	unsigned int GetLongestChainLength() const
    	{
    		unsigned int MaxChainLength = 0;
    		for (auto it = m_Values.begin(); it != m_Values.end(); ++it)
    			MaxChainLength = std::max(it->size(), MaxChainLength);
    		return MaxChainLength;
    	}
    
    	double GetAverageChainLength() const
    	{
    		long long ChainLength = 0;
    		long long DivideBy = 0;
    		for (auto it = m_Values.begin(); it != m_Values.end(); ++it)
    		{
    			auto size = it->size();
    			if (size > 0)
    			{
    				ChainLength += size;
    				DivideBy++;
    			}
    			assert(ChainLength > 0);
    		}
    
    		double ret = (double)ChainLength / DivideBy;
    		return ret;
    	}
    
    protected:
    	template<bool IsPrimitive>
    	unsigned int GetHash(const Key_t& Key) const
    	{
    		return XHashTableImpl<IsPrimitive, Key_t>::GetHash(Key, m_Values.size());
    	}
    
    	const Type_t& Find2(const Key_t& Key) const
    	{
    		typename XHashTableImpl2<Key_t, Type_t, true>::It_t it;
    		if (!XHASH_TABLE_IMPL_3(true)::IFind(*this, Key, it))
    			throw std::runtime_error("Key not found!");
    		return it->second;
    	}
    
    	void Rehash()
    	{
    		for (std::size_t it = 0; it < m_Values.size(); ++it)
    		{
    			auto & list = m_Values.at(it);
    			for (auto list_it = list.begin(); list_it != list.end();)
    			{
    				const auto & key = list_it->first;
    				const auto & value = list_it->second;
    				auto NewHash = GetHash<IsPrimitive<Key_t>::value>(key);
    				if (NewHash != it)
    				{
    					m_Values.at(NewHash).push_back(std::make_pair(key, value));
    					auto old_it = list_it++;
    					list.erase(old_it);
    				}
    				else
    					++list_it;
    			}
    		}
    	}
    
    	Vector_t m_Values;
    	int m_Items;
    	int m_Collisions;
    	float m_LoadFactor;
    };
    
    template class XHashTable<std::string, int>;
    
    int main()
    {
    	int lines = 0;
    	XHashTable<std::vector<char>, int> HashTable;
    
    	std::vector<char> buf(1024);
    	const int NumLines = 2000000;
    
    	{
    		std::ifstream testfile("C:/Server/Video/Walkthrough and let's plays/Agarest/Record of Agarest War Zero - Digest Mode Fifth Generation [HD] Part 1.mp4", std::ios_base::binary);
    		while (testfile.read(&buf[0], buf.size()) && lines < NumLines)
    		{
    			lines++;
    			HashTable.Add(buf, lines);
    		}
    	}
    
    	lines = 0;
    	{
    		std::ifstream testfile("C:/Server/Video/Walkthrough and let's plays/Agarest/Record of Agarest War Zero - Digest Mode Fifth Generation [HD] Part 1.mp4", std::ios_base::binary);
    
    		while (testfile.read(&buf[0], buf.size()) && lines < NumLines)
    		{
    			lines++;
    			if (HashTable.Find(buf) != lines)
    				std::cout << "MISMATCH AT ROW " << lines << ".\n";
    		}
    	}
    
    	std::cout << "Collisions: " << HashTable.GetCollisions() << " (" << (float)HashTable.GetCollisions() / HashTable.GetMapSize() * 100 << "%)\n";
    	std::cout << "Number of items in map: " << HashTable.GetMapSize() << "\nSize of internal vector: " << HashTable.GetStorageSize() << "\n";
    	std::cout << "Max chain length: " << HashTable.GetLongestChainLength() << "\nAverage chain length: " << HashTable.GetAverageChainLength() << "\n";
    
    	return 0;
    }
    This seems to take care of the const issues. Is there anything else I'm missing?

    EDIT: Attaching source since it's organized into files if that's more convenient. Rename txt to zip to unpack.
    Attached Files Attached Files
    Last edited by Elysia; 01-15-2012 at 07:28 AM.
    Quote Originally Posted by Adak View Post
    io.h certainly IS included in some modern compilers. It is no longer part of the standard for C, but it is nevertheless, included in the very latest Pelles C versions.
    Quote Originally Posted by Salem View Post
    You mean it's included as a crutch to help ancient programmers limp along without them having to relearn too much.

    Outside of your DOS world, your header file is meaningless.

  13. #13
    Master Apprentice phantomotap's Avatar
    Join Date
    Jan 2008
    Posts
    5,108
    O_o

    The `std::runtime_error' class and friends live in the "stdexcept" header and a few other bits are off.

    Can you post a minimum example of what `const' issue you are trying to solve? (Note: I don't want to see how you are trying to solve the issue. I can see that as it is; I want to see a direct example of the issue you are trying to solve.)

    Soma

  14. #14
    C++まいる!Cをこわせ!
    Join Date
    Oct 2007
    Location
    Inside my computer
    Posts
    24,654
    The const issues can be seen if you look at post #5.
    Basically they are Find (casts away const-ness from Find2), Delete (casts away constness) and IFind (iterator vs const_iterator).

    Minimal source of these functions:
    Code:
        Type_t& Find(const Key_t& Key)
        {
            return (Type_t&)Find2(Key);
        }
     
        const Type_t& Find(const Key_t& Key) const
        {
            return Find2(Key);
        }
    
        const Type_t& Find2(const Key_t& Key) const
        {
            bool success;
            auto ret = IFind(Key, success);
            if (!success)
                throw std::runtime_error("Key not found!");
            return ret.first->second;
        }
     
        std::pair<typename List_t::const_iterator, const List_t*> IFind(const Key_t& Key, bool& Success) const
        {
            auto Hash = GetHash<IsPrimitive<Key_t>::value>(Key);
            auto & list = m_Values.at(Hash);
            auto list_end = list.end();
            for (auto it = list.begin(); it != list_end; ++it)
            {
                if (it->first == Key)
                {
                    Success = true;
                    return std::make_pair(it, &list);
                }
            }
            Success = false;
            return std::make_pair(list_end, &list);
        }
    
        bool Delete(const Key_t& key)
        {
            bool success;
            auto ret = IFind(key, success);
            if (!success)
                return false;
             
            auto it = ret.first;
            auto list = (List_t*)ret.second;
            list->erase(it);
     
            m_Items--;
            return true;
        }
    Quote Originally Posted by Adak View Post
    io.h certainly IS included in some modern compilers. It is no longer part of the standard for C, but it is nevertheless, included in the very latest Pelles C versions.
    Quote Originally Posted by Salem View Post
    You mean it's included as a crutch to help ancient programmers limp along without them having to relearn too much.

    Outside of your DOS world, your header file is meaningless.

  15. #15
    Registered User VirtualAce's Avatar
    Join Date
    Aug 2001
    Posts
    9,607
    I really tend to forget about const until I run into const issues.
    I used to be the same until my former engineering lead drilled const correctness into my brain. I had the same reply as you did and then he had me go back into some existing code and implement const correctness where needed. After a few files I about went crazy. Since that time I am a pre-const thought kind of fella.

Popular pages Recent additions subscribe to a feed

Similar Threads

  1. more serious hash table
    By Aisthesis in forum C++ Programming
    Replies: 9
    Last Post: 09-22-2010, 11:06 PM
  2. Hash Table
    By mrsirpoopsalot in forum C++ Programming
    Replies: 11
    Last Post: 11-14-2009, 09:10 PM
  3. hash table?
    By bennyboy in forum C Programming
    Replies: 2
    Last Post: 05-10-2007, 03:06 AM
  4. hash table
    By wazza13 in forum C++ Programming
    Replies: 0
    Last Post: 04-19-2002, 01:57 AM
  5. Hash Table
    By ihsir in forum C++ Programming
    Replies: 0
    Last Post: 04-13-2002, 12:08 AM