Code:
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <time.h>
#include <string.h>
#define ARRSIZE 15
#define WORDS 370119
#define NUMRANGE 100000
#define INITSIZE 17
#define INITCAP 60 /* 60% capacity */
#define SCALEFACTOR 2 /* For the resizing function */
typedef struct assoc {
int size;
int keysize;
int occupied;
void **keys;
void **data;
} assoc;
typedef enum bool {false, true} bool;
char* strduprev(char* str);
assoc* assoc_init(int keysize);
void assoc_insert(assoc** a, void* key, void* data);
unsigned int assoc_count(assoc* a);
void* assoc_lookup(assoc* a, void* key);
void assoc_free(assoc* a);
assoc* _assoc_resize(assoc* a, void *key);
unsigned int _hash (int len, void *key);
unsigned int _hash2 (int len, void *key);
int _nextPrime (int n);
bool _isPrime (int n);
assoc* assoc_init(int keysize);
void assoc_insert(assoc** a, void* key, void* data);
unsigned int assoc_count(assoc* a);
void* assoc_lookup(assoc* a, void* key);
void assoc_todot(assoc* a);
void assoc_free(assoc* a);
void on_error(const char* s);
void* vcalloc(int n, size_t size);
void* openfile(char* fname, char* mode);
int main(void)
{
static char strs[WORDS][50]={{0}};
FILE *fp;
char* tstr;
void *p;
unsigned int lngst;
unsigned int j;
assoc* a;
static int i[WORDS];
a = assoc_init(0);
fp = openfile("eng_370k_shuffle.txt", "rt");
for(j=0; j<WORDS; j++){
assert(assoc_count(a)==j);
i[j] = j;
if(fscanf(fp, "%s", strs[j])!=1){
on_error("Failed to scan in a word?");
}
assoc_insert(&a, strs[j], &i[j]);
}
fclose(fp);
/*
What's the longest word that is still spelled
correctly when reversed, but is not a palindrome ?
*/
lngst = 0;
for(j=0; j<WORDS; j++){
/* Longest */
if(strlen(strs[j]) > lngst){
tstr = strduprev(strs[j]);
/* Not a palindrome */
if(strcmp(tstr, strs[j])){
/* Spelled correctly */
if((p = assoc_lookup(a, tstr))!=NULL){
lngst = strlen(tstr);
printf("%s <-> %s = %d (%d in the file)\n", tstr, strs[j], lngst, *(int*)p);
}
}
free(tstr);
}
}
assoc_free(a);
/*
Lets choose NUMRANGE numbers at random between 0 - (NUMRANGE-1)
and hash them. Then assoc_count() tells us how many are unique
*/
srand(time(NULL));
a = assoc_init(sizeof(int));
for(j=0; j<NUMRANGE; j++){
i[j] = rand()%NUMRANGE;
assoc_insert(&a, &i[j], NULL);
}
printf("%d unique numbers out of %d\n", assoc_count(a), j);
assoc_free(a);
return 0;
}
/*
Initialise the Associative array
keysize : number of bytes (or 0 => string)
This is important when comparing keys since
we'll need to use either memcmp() or strcmp()
*/
assoc* assoc_init(int keysize)
{
assoc *a = vcalloc(1, sizeof(assoc));
int i;
a->keys = vcalloc(INITSIZE, sizeof(void *));
a->data = vcalloc(INITSIZE, sizeof(void *));
a->size = INITSIZE;
a->occupied = 0;
a->keysize = keysize;
for (i = 0; i < a->size; i++) {
a->keys[i] = NULL;
a->data[i] = NULL;
}
return a;
}
/*
Insert key/data pair
- may cause resize, therefore 'a' might
be changed due to a realloc() etc.
*/
void assoc_insert(assoc** a, void* key, void* data)
{
assoc *ptr = *a;
int h1 = _hash(ptr->size, key), h2 = _hash2(ptr->size, key), i;
if (*a == NULL || key == NULL) {
on_error("Error: Invalid insert. Please try again");
}
if (ptr->keys[h1] == NULL) {
ptr->keys[h1] = key;
ptr->data[h1] = data;
ptr->occupied++;
} else if (ptr->data[h1] != NULL) {
ptr->data[h1] = data;
ptr->occupied++;
}
_assoc_resize(*a, key);
}
/*
Returns the number of key/data pairs
currently stored in the table
*/
unsigned int assoc_count(assoc* a)
{
if (a == NULL) {
return 0;
}
return a->occupied;
}
/*
Returns a pointer to the data, given a key
NULL => not found
*/
void* assoc_lookup(assoc* a, void* key)
{
int h1 = _hash(a->size, key), h2 = _hash2(a->size, key), hash, i;
if (key == NULL || a == NULL) {
return NULL;
}
if (a->keysize == 0) {
if (strcmp((char *)a->keys[h1], (char *)key)) {
return a->data[h1];
}
} else if (memcmp(a->keys[h1], key, sizeof(a->keysize))){
return a->data[h1];
}
return NULL;
}
/*
void assoc_todot(assoc* a)
{
}
*/
/* Free up all allocated space from 'a' */
void assoc_free(assoc* a)
{
int i;
if (a == NULL) {
printf("Error: Space not freed. Please try again");
return;
}
free(a->keys);
free(a->data);
free(a);
}
assoc* _assoc_resize(assoc* a, void *key)
{
assoc *new;
double fill = (a->size / 100.0) * INITCAP;
int i, h1 = _hash(a->size, key), h2 = _hash2(a->size, key), hash;
if (a->occupied > fill) {
new = vcalloc(1, sizeof(assoc));
new->keysize = a->keysize;
new->size = _nextPrime(a->size);
new->occupied = 0;
new->keys = vcalloc(_nextPrime(a->size), sizeof(a->keys));
new->data = vcalloc(_nextPrime(a->size), sizeof(a->data));
for (i = 0; i < a->size; i++) {
hash = (h1 + i * h2) % new->size;
new->keys[hash] = a->keys[i];
new->data[hash] = a->data[i];
}
return new;
}
return a;
}
/* Make a copy, reversed */
char* strduprev(char* str)
{
int i, j;
char* t;
j = strlen(str);
t = vcalloc(j+1, 1); /* Add null char */
strcpy(t, str);
for(i=0, j--; i<j; i++,j--){
/* Swap using bit-twiddling */
t[i] ^= t[j];
t[j] ^= t[i];
t[i] ^= t[j];
}
return t;
}
unsigned int _hash (int len, void *key)
{
unsigned long hash = 5381;
int c;
unsigned char *str = key;
while ((c = *str++)) {
hash = ((hash << 5) + hash) + c;
}
return hash % len;
}
unsigned int _hash2 (int len, void *key)
{
unsigned long hash = 5381;
int c;
unsigned char *str = key;
while ((c = (*str++))) {
hash = 33 * hash ^ c;
}
return hash % len;
}
int _nextPrime (int n)
{
int prime = n * SCALEFACTOR;
bool found = false;
if (n <= 1) {
return 2;
}
while (found == false) {
prime++;
if (_isPrime(prime)) {
found = true;
}
}
return prime;
}
bool _isPrime (int n)
{
int i;
if (n <= 1) {
return false;
}
if (n <= 3) {
return true;
}
if (n % 2 == 0 || n % 3 == 0) {
return false;
}
for (i = 5; i * i <= n; i += 6) {
if (n % i == 0 || n % (i + 2) == 0) {
return false;
}
}
return true;
}
void on_error(const char* s)
{
fprintf(stderr, "%s\n", s);
exit(EXIT_FAILURE);
}
void* vcalloc(int n, size_t size)
{
void* v = calloc(n, size);
if(v==NULL){
on_error("Cannot calloc() space");
}
return v;
}
void* openfile(char* fname, char* mode)
{
FILE* fp;
fp = fopen(fname, mode);
if(!fp){
on_error("Cannot open file");
}
return fp;
}