Can someone scan through my code and tell me what I'm doing wrong?
I get 0 as results for my probability value and a 0.151722 for Multiplicative which I'm not sure is correct either..
Code:
#include <iostream>
#include <fstream>
#include <vector>
#include <cstdint>
#include <boost/math/distributions/chi_squared.hpp>
int stringLengthHash(std::string str) {
return str.length() % 65536;
}
int firstCharacterHash(std::string str) {
if (!str.empty()) {
return uint16_t(str[0]) % 65536;
}
return 0;
}
int additiveChecksumHash(std::string str) {
uint16_t sum = 0;
for (const char& c : str) {
sum += uint16_t(c);
}
return sum % 65536;
}
int remainderHash(std::string str) {
uint16_t sum = 0;
for (const char& c : str) {
sum = (sum + uint16_t(c)) % 65413;
}
return sum;
}
int multiplicativeHash(std::string str) {
uint16_t sum = 0;
for (const char& c : str) {
sum = (sum * 31 + uint16_t(c)) % 65536;
}
return sum;
}
float performChiSquareTest(const std::vector<int>& hashes) {
double totalWords = 100000.0;
double expected = totalWords / 65536;
double chiSquare = 0.0;
for (int count : hashes) {
chiSquare += (count - expected) * (count - expected) / expected;
}
boost::math::chi_squared c2d(65535.0);
float p = 1.0 - boost::math::cdf(c2d, chiSquare);
return p;
}
int main() {
std::ifstream file("/usr/share/dict/words");
std::string word;
std::vector<int> stringLengthHashes(65536, 0);
std::vector<int> firstCharacterHashes(65536, 0);
std::vector<int> additiveChecksumHashes(65536, 0);
std::vector<int> remainderHashes(65536, 0);
std::vector<int> multiplicativeHashes(65536, 0);
// Read words and perform hash functions
while (file >> word) {
// Accumulate counts for each hash function
stringLengthHashes[stringLengthHash(word)]++;
firstCharacterHashes[firstCharacterHash(word)]++;
additiveChecksumHashes[additiveChecksumHash(word)]++;
remainderHashes[remainderHash(word)]++;
multiplicativeHashes[multiplicativeHash(word)]++;
}
// Perform chi-square tests and print results
float a,b,c,d,e;
a = performChiSquareTest(stringLengthHashes);
std::cout << "String Length Hash - P Value: " << a << '\n';
b = performChiSquareTest(firstCharacterHashes);
std::cout << "First Character Hash - P Value: " << b << '\n';
c = performChiSquareTest(additiveChecksumHashes);
std::cout << "Additive Checksum Hash - P Value: " << c << '\n';
d = performChiSquareTest(remainderHashes);
std::cout << "Remainder Hash - P Value: " << d << '\n';
e = performChiSquareTest(multiplicativeHashes);
std::cout << "Multiplicative Hash - P Value: " << e << '\n';
return 0;
}