Thread: Polymorphic Associative Array

  1. #1
    Registered User
    Join Date
    Dec 2020
    Posts
    2

    Polymorphic Associative Array

    I'm attempting to implement a polymorphic associative array which can handle any data type, so as both the key and data are of unknown types I'm just storing void pointers to both of them. The user enters a keysize (in bytes) or 0 for strings. I'm attempting to use double hashing for dealing with collisions.

    I have written the majority of the code however I am getting incorrect results, as my code is outputting palindromes when it should be outputting anagrams, and the "unique numbers" it displays is far too low at 17, it should be around 60-70K. Any tips or pointers as to where I'm going wrong and what to do to fix it would be incredibly helpful.

    The dictionary file is too large to upload here but it can be accessed here: WeTransfer

    Code:
    #include <stdio.h>
    #include <stdlib.h>
    #include <assert.h>
    #include <time.h>
    #include <string.h>
    
    
    #define ARRSIZE 15
    #define WORDS 370119
    #define NUMRANGE 100000
    #define INITSIZE 17
    #define INITCAP 60 /* 60% capacity */
    #define SCALEFACTOR 2 /* For the resizing function */
    
    
    typedef struct assoc {
       int size;
       int keysize;
       int occupied;
       void **keys;
       void **data;
    } assoc;
    
    
    typedef enum bool {false, true} bool;
    
    
    char* strduprev(char* str);
    assoc* assoc_init(int keysize);
    void assoc_insert(assoc** a, void* key, void* data);
    unsigned int assoc_count(assoc* a);
    void* assoc_lookup(assoc* a, void* key);
    void assoc_free(assoc* a);
    assoc* _assoc_resize(assoc* a, void *key);
    unsigned int _hash (int len, void *key);
    unsigned int _hash2 (int len, void *key);
    int _nextPrime (int n);
    bool _isPrime (int n);
    assoc* assoc_init(int keysize);
    void assoc_insert(assoc** a, void* key, void* data);
    unsigned int assoc_count(assoc* a);
    void* assoc_lookup(assoc* a, void* key);
    void assoc_todot(assoc* a);
    void assoc_free(assoc* a);
    void on_error(const char* s);
    void* vcalloc(int n, size_t size);
    void* openfile(char* fname, char* mode);
    
    
    int main(void)
    {
    
    
       static char strs[WORDS][50]={{0}};
       FILE *fp;
       char* tstr;
       void *p;
       unsigned int lngst;
       unsigned int j;
       assoc* a;
       static int i[WORDS];
    
    
       a = assoc_init(0);
       fp = openfile("eng_370k_shuffle.txt", "rt");
       for(j=0; j<WORDS; j++){
          assert(assoc_count(a)==j);
          i[j] = j;
          if(fscanf(fp, "%s", strs[j])!=1){
             on_error("Failed to scan in a word?");
          }
          assoc_insert(&a, strs[j], &i[j]);
       }
       fclose(fp);
    
    
       /*
          What's the longest word that is still spelled
          correctly when reversed, but is not a palindrome ?
       */
       lngst = 0;
       for(j=0; j<WORDS; j++){
          /* Longest */
          if(strlen(strs[j]) > lngst){
             tstr = strduprev(strs[j]);
             /* Not a palindrome */
             if(strcmp(tstr, strs[j])){
                /* Spelled correctly */
                if((p = assoc_lookup(a, tstr))!=NULL){
                   lngst = strlen(tstr);
                   printf("%s <-> %s = %d (%d in the file)\n", tstr, strs[j], lngst, *(int*)p);
                }
             }
             free(tstr);
          }
       }
       assoc_free(a);
    
    
       /*
          Lets choose NUMRANGE numbers at random between 0 - (NUMRANGE-1)
          and hash them.  Then assoc_count() tells us how many are unique
       */
       srand(time(NULL));
       a = assoc_init(sizeof(int));
       for(j=0; j<NUMRANGE; j++){
          i[j] = rand()%NUMRANGE;
          assoc_insert(&a, &i[j], NULL);
       }
       printf("%d unique numbers out of %d\n", assoc_count(a), j);
    
    
       assoc_free(a);
    
    
       return 0;
    }
    
    
    /*
       Initialise the Associative array
       keysize : number of bytes (or 0 => string)
       This is important when comparing keys since
       we'll need to use either memcmp() or strcmp()
    */
    assoc* assoc_init(int keysize)
    {
       assoc *a = vcalloc(1, sizeof(assoc));
       int i;
    
    
       a->keys = vcalloc(INITSIZE, sizeof(void *));
       a->data = vcalloc(INITSIZE, sizeof(void *));
       a->size = INITSIZE;
       a->occupied = 0;
       a->keysize = keysize;
    
    
       for (i = 0; i < a->size; i++) {
          a->keys[i] = NULL;
          a->data[i] = NULL;
       }
    
    
       return a;
    }
    
    
    /*
       Insert key/data pair
       - may cause resize, therefore 'a' might
       be changed due to a realloc() etc.
    */
    void assoc_insert(assoc** a, void* key, void* data)
    {
       assoc *ptr = *a;
       int h1 = _hash(ptr->size, key), h2 = _hash2(ptr->size, key), i;
    
    
       if (*a == NULL || key == NULL) {
          on_error("Error: Invalid insert. Please try again");
       }
    
    
       if (ptr->keys[h1] == NULL) {
          ptr->keys[h1] = key;
          ptr->data[h1] = data;
          ptr->occupied++;
       } else if (ptr->data[h1] != NULL) {
          ptr->data[h1] = data;
          ptr->occupied++;
       }
    
    
       _assoc_resize(*a, key);
    }
    
    
    /*
       Returns the number of key/data pairs
       currently stored in the table
    */
    unsigned int assoc_count(assoc* a)
    {
       if (a == NULL) {
          return 0;
       }
    
    
       return a->occupied;
    }
    
    
    /*
       Returns a pointer to the data, given a key
       NULL => not found
    */
    void* assoc_lookup(assoc* a, void* key)
    {
       int h1 = _hash(a->size, key), h2 = _hash2(a->size, key), hash, i;
    
    
       if (key == NULL || a == NULL) {
          return NULL;
       }
       if (a->keysize == 0) {
          if (strcmp((char *)a->keys[h1], (char *)key)) {
             return a->data[h1];
          }
       } else if (memcmp(a->keys[h1], key, sizeof(a->keysize))){
             return a->data[h1];
       }
    
    
       return NULL;
    }
    
    
    /*
    void assoc_todot(assoc* a)
    {
    
    
    }
    */
    
    
    /* Free up all allocated space from 'a' */
    void assoc_free(assoc* a)
    {
       int i;
    
    
       if (a == NULL) {
          printf("Error: Space not freed. Please try again");
          return;
       }
    
    
       free(a->keys);
       free(a->data);
       free(a);
    }
    
    
    assoc* _assoc_resize(assoc* a, void *key)
    {
       assoc *new;
       double fill = (a->size / 100.0) * INITCAP;
       int i, h1 = _hash(a->size, key), h2 = _hash2(a->size, key), hash;
    
    
       if (a->occupied > fill) {
          new = vcalloc(1, sizeof(assoc));
          new->keysize = a->keysize;
          new->size = _nextPrime(a->size);
          new->occupied = 0;
          new->keys = vcalloc(_nextPrime(a->size), sizeof(a->keys));
          new->data = vcalloc(_nextPrime(a->size), sizeof(a->data));
    
    
          for (i = 0; i < a->size; i++) {
             hash = (h1 + i * h2) % new->size;
             new->keys[hash] = a->keys[i];
             new->data[hash] = a->data[i];
          }
    
    
          return new;
       }
    
    
       return a;
    }
    
    
    /* Make a copy, reversed */
    char* strduprev(char* str)
    {
       int i, j;
       char* t;
       j = strlen(str);
       t = vcalloc(j+1, 1); /* Add null char */
       strcpy(t, str);
       for(i=0, j--; i<j; i++,j--){
          /* Swap using bit-twiddling */
          t[i] ^= t[j];
          t[j] ^= t[i];
          t[i] ^= t[j];
       }
       return t;
    }
    
    
    unsigned int _hash (int len, void *key)
    {
       unsigned long hash = 5381;
       int c;
       unsigned char *str = key;
    
    
       while ((c = *str++)) {
          hash = ((hash << 5) + hash) + c;
       }
    
    
       return hash % len;
    }
    
    
    unsigned int _hash2 (int len, void *key)
    {
       unsigned long hash = 5381;
       int c;
       unsigned char *str = key;
    
    
       while ((c = (*str++))) {
          hash = 33 * hash ^ c;
       }
    
    
       return hash % len;
    }
    
    
    int _nextPrime (int n)
    {
       int prime = n * SCALEFACTOR;
       bool found = false;
    
    
       if (n <= 1) {
          return 2;
       }
       while (found == false) {
          prime++;
          if (_isPrime(prime)) {
             found = true;
          }
       }
       return prime;
    }
    
    
    bool _isPrime (int n)
    {
       int i;
    
    
       if (n <= 1) {
          return false;
       }
       if (n <= 3) {
          return true;
       }
       if (n % 2 == 0 || n % 3 == 0) {
          return false;
       }
       for (i = 5; i * i <= n; i += 6) {
          if (n % i == 0 || n % (i + 2) == 0) {
             return false;
          }
       }
       return true;
    }
    
    
    void on_error(const char* s)
    {
       fprintf(stderr, "%s\n", s);
       exit(EXIT_FAILURE);
    }
    
    
    void* vcalloc(int n, size_t size)
    {
       void* v = calloc(n, size);
       if(v==NULL){
          on_error("Cannot calloc() space");
       }
       return v;
    }
    
    
    void* openfile(char* fname, char* mode)
    {
       FILE* fp;
       fp = fopen(fname, mode);
       if(!fp){
          on_error("Cannot open file");
       }
       return fp;
    }

  2. #2
    and the hat of int overfl Salem's Avatar
    Join Date
    Aug 2001
    Location
    The edge of the known universe
    Posts
    39,659
    _assoc_resize returns a result you're ignoring.

    And some ignored values - are these intentional?
    Code:
    $ gcc -g -Wall -Wextra -O2 foo.c
    foo.c: In function ‘assoc_insert’:
    foo.c:157:65: warning: unused variable ‘i’ [-Wunused-variable]
        int h1 = _hash(ptr->size, key), h2 = _hash2(ptr->size, key), i;
                                                                     ^
    foo.c:157:36: warning: unused variable ‘h2’ [-Wunused-variable]
        int h1 = _hash(ptr->size, key), h2 = _hash2(ptr->size, key), i;
                                        ^
    foo.c: In function ‘assoc_lookup’:
    foo.c:200:67: warning: unused variable ‘i’ [-Wunused-variable]
        int h1 = _hash(a->size, key), h2 = _hash2(a->size, key), hash, i;
                                                                       ^
    foo.c:200:61: warning: unused variable ‘hash’ [-Wunused-variable]
        int h1 = _hash(a->size, key), h2 = _hash2(a->size, key), hash, i;
                                                                 ^
    foo.c:200:34: warning: unused variable ‘h2’ [-Wunused-variable]
        int h1 = _hash(a->size, key), h2 = _hash2(a->size, key), hash, i;
                                      ^
    foo.c: In function ‘assoc_free’:
    foo.c:231:8: warning: unused variable ‘i’ [-Wunused-variable]
        int i;
            ^
    Oh, and you should be able to test this with sample data sets that are 1/1000th the size of what you have at the moment.
    If you dance barefoot on the broken glass of undefined behaviour, you've got to expect the occasional cut.
    If at first you don't succeed, try writing your phone number on the exam paper.

  3. #3
    Registered User
    Join Date
    Dec 2020
    Posts
    2
    Ah so should I be allocating _assoc_resize to a? With a* = _assoc_resize(*a, key); ?
    As for the uignored values they are, at the moment, ignored. But I think I should be using my second hash function to help with collisions but I'm only using it one place at the moment, in the resize.

Popular pages Recent additions subscribe to a feed

Similar Threads

  1. Using Associative Arrays..
    By jewelz in forum C++ Programming
    Replies: 8
    Last Post: 03-23-2009, 04:02 PM
  2. Right associative??
    By audinue in forum C Programming
    Replies: 1
    Last Post: 07-05-2008, 10:46 PM
  3. How to use associative arrays in C
    By sreenu.daram in forum C Programming
    Replies: 2
    Last Post: 11-13-2005, 08:38 AM
  4. nonpolymorphic to polymorphic
    By kurz7 in forum C Programming
    Replies: 1
    Last Post: 08-13-2003, 05:26 PM
  5. initialising an associative container
    By sayz0 in forum C++ Programming
    Replies: 5
    Last Post: 10-17-2001, 12:10 AM

Tags for this Thread