Thread: Non-whitespace in-file sequences counter using own hashtable

  1. #1
    Registered User
    Join Date
    Sep 2018
    Posts
    53

    Non-whitespace in-file sequences counter using own hashtable

    main.c:
    Code:
    #include <stdio.h>
    #include <stdlib.h> 
    #include <stdbool.h>
    #include <iso646.h>
    #include <string.h>
    #include <ctype.h>
    #include <stdint.h>
    #include <time.h>
    
    // exit on failure                           
    #define FAIL return EXIT_FAILURE
    #define MAX_WORD_LENGTH_PRIME 251 // some high word length
    #define LAST_ELEMENT "<END_OF_STRING>" // do not use in text
    #define EMPTY_ELEMENT "<EMPTY_SYMBOL>" // do not use in text
    #define MIN_PRIME_NUMBER 2
    
    // remainders for each letter code in hash function computation
    size_t remainders[MAX_WORD_LENGTH_PRIME];
    
    // hash table entry
    typedef struct
    {
        char word[MAX_WORD_LENGTH_PRIME]; // 8-bit UTF-8
        size_t count;
    } element; // to use declaration without struct keyword over all source
    
    typedef struct
    {
        size_t index;
        size_t count;
    } found_element;
    
    typedef struct
    {
        bool result;
        bool is_error;
        size_t index;
    } insert_status;
    
    size_t hashtable_prime_size = MIN_PRIME_NUMBER; // starts from minimal prime size because of empty textfile existence probability (we do not got memory loss in this case)
    
    void hashtable_content_print(element * hashtable)
    {
        if (hashtable != NULL)
        {    
            element element_i;
            
            for(size_t i = 0; i< hashtable_prime_size; ++i)
            {
                element_i = hashtable[i];
                
                if (strcmp(element_i.word, EMPTY_ELEMENT) != 0 && strcmp(element_i.word, LAST_ELEMENT) != 0)
                    printf("%s count is %zu\n", element_i.word, element_i.count);
            }
        }    
        else
            puts("NULL table pointer passed to print.");
    }
    
    
    void recalculate_remainders()
    {
        for (size_t i = 0; i < MAX_WORD_LENGTH_PRIME; ++i)
            remainders[i] = rand() % MAX_WORD_LENGTH_PRIME;
    }
    
    size_t get_next_prime_number(size_t current_prime_number) // required to get new hashtable size on reconstruction (with rounds == 1 we will got minimal memory loss)
    {
        size_t saved = current_prime_number;
        bool is_prime = true;
        
        if (current_prime_number >= MIN_PRIME_NUMBER)
            for(current_prime_number = current_prime_number + 1; current_prime_number < SIZE_MAX; ++current_prime_number)    
                {
                    for(size_t j = 2; j < current_prime_number; ++j)
                        if (current_prime_number % j == 0)
                        {
                            is_prime = false;
                            
                            break;    
                        }
                        else
                            is_prime = true;
        
                    if (is_prime)
                        break;        
                }
        
        return (current_prime_number == saved) ? 0 : current_prime_number;
    }
    
    // argc, argv checking
    bool check_params(int params_count, char *params[])
    {
        return (params_count == 2) and (params != NULL) and (params[1] != NULL);
    }
    
    size_t hash(char value[], size_t offset) // double hashing probe
    {
        size_t result = 0;
        
        if (value != NULL and strlen(value) > 0)
            for (size_t i = 0; value[i] != '\0'; ++i)
                result += remainders[i] * value[i];
        else
            puts("NULL value pointer or empty value passed to hash function.");
        result = (result % hashtable_prime_size + offset * (1 + result % (hashtable_prime_size - 1))) % hashtable_prime_size;
        
        printf("word = %s; offset = %zu; hash = %zu\n", value, offset, result);
        
        return result;
    }
    
    bool hashtable_element_search(element * hashtable, char key[], found_element * result)
    {    
        if ((hashtable != NULL) and (key != NULL))
        {
            puts("SEARCHING...");
            
            element found;
            size_t offset = 0, index;
            
            do        
                if (strcmp((found = hashtable[index = hash(key, offset++)]).word, key) == 0)    
                {    
                    *result = (found_element) { index, found.count };
                    
                    printf("FOUND ELEMENT WORD = \"%s\" WITH INDEX = %zu AND COUNT = %zu\n", found.word, index, found.count);
                    
                    return true;            
                }
            while(offset != hashtable_prime_size);
        }
        else
            puts("NULL table pointer or NULL key pointer passed to search.");
        
        return false;
    }
    
    bool hashtable_size_increase(element * hashtable, size_t rounds) // rounds is a number of prime hashtable size rises
    {    
        if (hashtable != NULL and rounds > 0)
            {    
                printf("INCREASING HASHTABLE USING %zu ROUNDS...", rounds);
                
                size_t new_size, offset = 0, index = 0, saved_size = 0;
            
                while(rounds-- != 0)
                    saved_size = new_size = get_next_prime_number(hashtable_prime_size);
                    
                printf("NEW PRIME SIZE = %zu\n", saved_size);
                
                element new_hashtable[new_size];
                memset(&new_hashtable, 0, sizeof(new_hashtable));
                hashtable_prime_size = new_size;
                new_hashtable[--new_size] = (element) { LAST_ELEMENT, 0 };
                            
                // cleaning array
                while(--new_size > 0)
                    new_hashtable[new_size] = (element) { EMPTY_ELEMENT, 0 };
                    
                new_hashtable[new_size] = (element) { EMPTY_ELEMENT, 0 };
                element found;    
                
                puts("FINDING EMPTY ELEMENT...");
                
                // empty element in new hashtable already exists
                while(strcmp((found = hashtable[new_size++]).word, LAST_ELEMENT) != 0)
                    {    
                        printf("FINDING PLACE FOR WORD = \"%s\" WITH COUNT = %zu\n", found.word, found.count);
                         
                        if (strcmp(found.word, EMPTY_ELEMENT) == 0) // pass
                        {
                            puts("PASSED BECAUSE OF EMPTY");
                            
                            continue;
                        }
                        else
                        {
                            while(strcmp(new_hashtable[index = hash(found.word, offset++)].word, EMPTY_ELEMENT) != 0);
                            new_hashtable[index] = found;    
                            
                            printf("POSTED WITH INDEX = %zu INTO TEMP TABLE\n", index);
                        }
                    }
                
                if(realloc(hashtable, saved_size * sizeof(element)))
                {
                    memset(&hashtable, 0, sizeof(hashtable));
                    
                    if (memcpy(hashtable, new_hashtable, sizeof(new_hashtable)))
                    {
                        puts("COPIED INTO NEW MEMORY LOCATION");
                    
                        return true; // success
                    }
                }
            }
            else
                puts("NULL pointer to hashtable or incorrect rounds count passed to increase function.");
                
        return false;
    }
    
    insert_status hashtable_element_insert(element * hashtable, element new_element)
    {
        insert_status result = { false, true, 0 };
        
        if (new_element.word != NULL and strcmp(new_element.word, LAST_ELEMENT) != 0 and strcmp(new_element.word, EMPTY_ELEMENT) != 0) //validation
        {
            puts("INSERTING...");
            
            found_element search_output;
            result.is_error = false;
            
            if (hashtable_element_search(hashtable, new_element.word, &search_output))
            {    
                result.index = search_output.index;
                
                printf("STEP 1. FOUND AT INDEX = %zu. NOT INSERTED.", result.index);
                
                return result; // element already exists        
            }
            
            if (!hashtable_element_search(hashtable, EMPTY_ELEMENT, &search_output)) // key not found
            {    
                puts("STEP 2. EMPTY ELEMENT NOT FOUND.");
                
                hashtable_size_increase(hashtable, 1); // reconstruct hashtable to add empty elements            
                hashtable_element_search(hashtable, EMPTY_ELEMENT, &search_output);        
            }
            
            hashtable[search_output.index] = new_element; // rewrite empty element
            result.result = true;        
            
            printf("NEW ELEMENT WORD = \"%s\" FOUND WITH INDEX = %zu\n", new_element.word, search_output.index);
            
            return result;        
        }
        else
            puts("Incorrect new element word.");
        
        return result;
    }
    
    void hashtable_elements_fill(FILE * textfile, element * hashtable)
    {    
        char word[MAX_WORD_LENGTH_PRIME];
        word[0] = '\0';
        bool is_word = false;
        char input;
        
        while(!feof(textfile) and !ferror(textfile))
        {
            input = getc(textfile);
            
            puts("FILLING...");
            
            if (!isspace(input)) // append to a whole UTF-8 word
            {            
                is_word = true;            
                strncat(word, &input, 1);                                     
            }
            else if (is_word)
            {            
                element new_element = { "", 1 };
                strcpy(new_element.word, word);
                
                puts("===WORD===");
                puts(word);
                puts("===END===");   
                
                insert_status output = hashtable_element_insert(hashtable, new_element);
                
                printf("INSERTED INDEX = %zu\n", output.index);
                
                if(output.is_error)
                {
                    puts("OUTPUT.IS_ERROR = TRUE");
                    
                    continue; // invalid word
                }
                else if (!output.result)        
                {
                    puts("OUTPUT.RESULT = FALSE");
                    
                    ++hashtable[output.index].count; // increasing found word counter    
                }
                
                word[0] = '\0';
                is_word = false;            
            }
        }    
        
        if (ferror(textfile))    
            perror("File reading error occured.");    
    }
    
    int main(int params_count, char *params[])
    {            
        srand(time(NULL)); // setup randomizer
    
        if (!check_params(params_count, params))
        {
            puts("Params check failed. Program terminated.");
            
            FAIL;
        }
        
        char *filename;
        
        filename = params[1];                            
        FILE *textfile = fopen(filename, "r");
    
        if(textfile == NULL)           
        {    
            puts("Cannot read input file. Program terminated.");
            
            FAIL;
        }
        printf("FILENAME = %s", filename);
        recalculate_remainders();
        printf("HASHTABLE SIZE = %zu", MIN_PRIME_NUMBER * sizeof(element));
        element * hashtable = malloc(MIN_PRIME_NUMBER * sizeof(element)); 
        
        if (hashtable)
        {
            size_t counter = MIN_PRIME_NUMBER;
            hashtable[--counter] = (element){ LAST_ELEMENT, 0 };
            
            while(counter > 0)
                hashtable[--counter] = (element){ EMPTY_ELEMENT, 0 };
            // there are may be no words in a file at all (end element with NULL string always stored for barrier search)
            puts("===HASHTABLE CONTENT===");
            hashtable_content_print(hashtable);
            puts("===END===");
            hashtable_elements_fill(textfile, hashtable); 
            puts("===HASHTABLE CONTENT===");
            hashtable_content_print(hashtable);
            puts("===END===");
            free(hashtable);             
         }
         else
             perror("Cannot assign memory location.");
         
           fclose(textfile);       
        
        return EXIT_SUCCESS;
    }
    Trace from stdout:
    $ ./a.out input.txt
    FILENAME = input.txtHASHTABLE SIZE = 528===HASHTABLE CONTENT===
    <EMPTY_SYMBOL> count is 0
    <END_OF_STRING> count is 0
    ===END===
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    ===WORD===
    qwerty
    ===END===
    INSERTING...
    SEARCHING...
    word = qwerty; offset = 0; hash = 0
    word = qwerty; offset = 1; hash = 1
    SEARCHING...
    word = <EMPTY_SYMBOL>; offset = 0; hash = 1
    word = <EMPTY_SYMBOL>; offset = 1; hash = 0
    FOUND ELEMENT WORD = "<EMPTY_SYMBOL>" WITH INDEX = 0 AND COUNT = 0
    NEW ELEMENT WORD = "qwerty" FOUND WITH INDEX = 0
    INSERTED INDEX = 0
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    ===WORD===
    qwerty
    ===END===
    INSERTING...
    SEARCHING...
    word = qwerty; offset = 0; hash = 0
    FOUND ELEMENT WORD = "qwerty" WITH INDEX = 0 AND COUNT = 1
    STEP 1. FOUND AT INDEX = 0. NOT INSERTED.INSERTED INDEX = 0
    OUTPUT.RESULT = FALSE
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    ===WORD===
    qwerty
    ===END===
    INSERTING...
    SEARCHING...
    word = qwerty; offset = 0; hash = 0
    FOUND ELEMENT WORD = "qwerty" WITH INDEX = 0 AND COUNT = 2
    STEP 1. FOUND AT INDEX = 0. NOT INSERTED.INSERTED INDEX = 0
    OUTPUT.RESULT = FALSE
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    ===WORD===
    ytrewq
    ===END===
    INSERTING...
    SEARCHING...
    word = ytrewq; offset = 0; hash = 1
    word = ytrewq; offset = 1; hash = 0
    SEARCHING...
    word = <EMPTY_SYMBOL>; offset = 0; hash = 1
    word = <EMPTY_SYMBOL>; offset = 1; hash = 0
    STEP 2. EMPTY ELEMENT NOT FOUND.
    INCREASING HASHTABLE USING 1 ROUNDS...NEW PRIME SIZE = 3
    FINDING EMPTY ELEMENT...
    FINDING PLACE FOR WORD = "qwerty" WITH COUNT = 3
    word = qwerty; offset = 0; hash = 0
    POSTED WITH INDEX = 0 INTO TEMP TABLE
    Segmentation fault (core dumped)
    I am writing a program that will be able to count words in a textfile input.txt with content:
    qwerty qwerty qwerty ytrewq qazwsx hi dasf
    Compilation:
    gcc main.c -Wall -Wextra -pedantic -std=c11
    I did not understand where is the bug... Now, I've posted all code here because of it. My program crashes after first resizement. Please, tell anything about.

  2. #2
    and the hat of int overfl Salem's Avatar
    Join Date
    Aug 2001
    Location
    The edge of the known universe
    Posts
    39,659
    The first thing to do is compile with the -g flag.

    Then run the code in the debugger.
    Code:
    $ gdb -q ./a.out
    Reading symbols from ./a.out...
    (gdb) run input.txt
    Starting program: ./a.out input.txt
    FILENAME = input.txtHASHTABLE SIZE = 528===HASHTABLE CONTENT===
    ===END===
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    ===WORD===
    qwerty
    ===END===
    INSERTING...
    SEARCHING...
    word = qwerty; offset = 0; hash = 0
    word = qwerty; offset = 1; hash = 1
    SEARCHING...
    word = <EMPTY_SYMBOL>; offset = 0; hash = 1
    word = <EMPTY_SYMBOL>; offset = 1; hash = 0
    FOUND ELEMENT WORD = "<EMPTY_SYMBOL>" WITH INDEX = 0 AND COUNT = 0
    NEW ELEMENT WORD = "qwerty" FOUND WITH INDEX = 0
    INSERTED INDEX = 0
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    ===WORD===
    qwerty
    ===END===
    INSERTING...
    SEARCHING...
    word = qwerty; offset = 0; hash = 0
    FOUND ELEMENT WORD = "qwerty" WITH INDEX = 0 AND COUNT = 1
    STEP 1. FOUND AT INDEX = 0. NOT INSERTED.INSERTED INDEX = 0
    OUTPUT.RESULT = FALSE
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    ===WORD===
    qwerty
    ===END===
    INSERTING...
    SEARCHING...
    word = qwerty; offset = 0; hash = 0
    FOUND ELEMENT WORD = "qwerty" WITH INDEX = 0 AND COUNT = 2
    STEP 1. FOUND AT INDEX = 0. NOT INSERTED.INSERTED INDEX = 0
    OUTPUT.RESULT = FALSE
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    ===WORD===
    ytrewq
    ===END===
    INSERTING...
    SEARCHING...
    word = ytrewq; offset = 0; hash = 0
    word = ytrewq; offset = 1; hash = 1
    SEARCHING...
    word = <EMPTY_SYMBOL>; offset = 0; hash = 1
    word = <EMPTY_SYMBOL>; offset = 1; hash = 0
    STEP 2. EMPTY ELEMENT NOT FOUND.
    INCREASING HASHTABLE USING 1 ROUNDS...NEW PRIME SIZE = 3
    FINDING EMPTY ELEMENT...
    FINDING PLACE FOR WORD = "qwerty" WITH COUNT = 3
    word = qwerty; offset = 0; hash = 2
    word = qwerty; offset = 1; hash = 0
    POSTED WITH INDEX = 0 INTO TEMP TABLE
    
    Program received signal SIGSEGV, Segmentation fault.
    __memmove_sse2_unaligned_erms () at ../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:416
    416	../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S: No such file or directory.
    (gdb) bt
    #0  __memmove_sse2_unaligned_erms () at ../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:416
    #1  0x0000555555555d47 in hashtable_size_increase (hashtable=0x0, rounds=18446744073709551615) at bar.c:191
    #2  0x0000555555555e93 in hashtable_element_insert (hashtable=0x55555555a890, new_element=...) at bar.c:229
    #3  0x000055555555608c in hashtable_elements_fill (textfile=0x55555555a2a0, hashtable=0x55555555a890) at bar.c:273
    #4  0x0000555555556389 in main (params_count=2, params=0x7fffffffdf48) at bar.c:337
    (gdb) frame 1
    #1  0x0000555555555d47 in hashtable_size_increase (hashtable=0x0, rounds=18446744073709551615) at bar.c:191
    191	                if (memcpy(hashtable, new_hashtable, sizeof(new_hashtable)))
    (gdb) list
    186	            
    187	            if(realloc(hashtable, saved_size * sizeof(element)))
    188	            {
    189	                memset(&hashtable, 0, sizeof(hashtable));
    190	                
    191	                if (memcpy(hashtable, new_hashtable, sizeof(new_hashtable)))
    192	                {
    193	                    puts("COPIED INTO NEW MEMORY LOCATION");
    194	                
    195	                    return true; // success
    (gdb) p hashtable
    $1 = (element *) 0x0
    The first obvious problem is you're using realloc wrong.
    The second obvious problem is your memset &ptr is trashing the pointer itself (and some data following the pointer).

    realloc may move the memory to a new location. If it does, you lose.

    What you should do is
    Code:
    void *temp = realloc(hashtable, saved_size * sizeof(element));
    if ( temp ) {
      hashtable = temp;
    } else {
      // backup plan
      free(hashtable);
      exit(1);
    }
    Which brings us to the third problem.
    hashtable is a value parameter to hashtable_size_increase, so if realloc does move the memory, the caller of the function will never know about it.
    If you dance barefoot on the broken glass of undefined behaviour, you've got to expect the occasional cut.
    If at first you don't succeed, try writing your phone number on the exam paper.

  3. #3
    Registered User
    Join Date
    Sep 2018
    Posts
    53
    It is not clear in the part of memset meaning. Is it really that I can assign struct field by field using strcpy if it contains strings only? I've added some perfect magic with your recommendation interpretation and there is infinite loop. I will investigate...

  4. #4
    Registered User
    Join Date
    Sep 2018
    Posts
    53
    #include <stdio.h>#include <stdlib.h> #include <stdbool.h>#include <iso646 - Pastebin.com - tag CODE got an error.

    FILENAME = input.txtHASHTABLE SIZE = 528===HASHTABLE CONTENT===
    HASHTABLE PRIME SIZE = 2
    <EMPTY_SYMBOL> count is 0
    <END_OF_STRING> count is 0
    ===END===
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    ===WORD===
    qwerty
    ===END===
    INSERTING...
    SEARCHING...
    word = qwerty; offset = 0; hash = 1
    PROBE OF ELEMENT WORD = "<END_OF_STRING>" WITH INDEX = 1 AND KEY = "qwerty"
    word = qwerty; offset = 1; hash = 0
    PROBE OF ELEMENT WORD = "<EMPTY_SYMBOL>" WITH INDEX = 0 AND KEY = "qwerty"
    SEARCHING...
    word = <EMPTY_SYMBOL>; offset = 0; hash = 1
    PROBE OF ELEMENT WORD = "<END_OF_STRING>" WITH INDEX = 1 AND KEY = "<EMPTY_SYMBOL>"
    word = <EMPTY_SYMBOL>; offset = 1; hash = 0
    FOUND ELEMENT WORD = "<EMPTY_SYMBOL>" WITH INDEX = 0 AND COUNT = 0
    NEW ELEMENT WORD = "qwerty" FOUND WITH INDEX = 0
    OUTPUT INDEX = 0
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    ===WORD===
    qwerty
    ===END===
    INSERTING...
    SEARCHING...
    word = qwerty; offset = 0; hash = 1
    PROBE OF ELEMENT WORD = "<END_OF_STRING>" WITH INDEX = 1 AND KEY = "qwerty"
    word = qwerty; offset = 1; hash = 0
    FOUND ELEMENT WORD = "qwerty" WITH INDEX = 0 AND COUNT = 1
    STEP 1. FOUND AT INDEX = 0. NOT INSERTED.
    OUTPUT INDEX = 0
    OUTPUT.RESULT = FALSE. INCREASING COUNT...
    COUNT BEFORE = 1
    COUNT AFTER = 2
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    ===WORD===
    qwerty
    ===END===
    INSERTING...
    SEARCHING...
    word = qwerty; offset = 0; hash = 1
    PROBE OF ELEMENT WORD = "<END_OF_STRING>" WITH INDEX = 1 AND KEY = "qwerty"
    word = qwerty; offset = 1; hash = 0
    FOUND ELEMENT WORD = "qwerty" WITH INDEX = 0 AND COUNT = 2
    STEP 1. FOUND AT INDEX = 0. NOT INSERTED.
    OUTPUT INDEX = 0
    OUTPUT.RESULT = FALSE. INCREASING COUNT...
    COUNT BEFORE = 2
    COUNT AFTER = 3
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    ===WORD===
    ytrewq
    ===END===
    INSERTING...
    SEARCHING...
    word = ytrewq; offset = 0; hash = 0
    PROBE OF ELEMENT WORD = "qwerty" WITH INDEX = 0 AND KEY = "ytrewq"
    word = ytrewq; offset = 1; hash = 1
    PROBE OF ELEMENT WORD = "<END_OF_STRING>" WITH INDEX = 1 AND KEY = "ytrewq"
    SEARCHING...
    word = <EMPTY_SYMBOL>; offset = 0; hash = 1
    PROBE OF ELEMENT WORD = "<END_OF_STRING>" WITH INDEX = 1 AND KEY = "<EMPTY_SYMBOL>"
    word = <EMPTY_SYMBOL>; offset = 1; hash = 0
    PROBE OF ELEMENT WORD = "qwerty" WITH INDEX = 0 AND KEY = "<EMPTY_SYMBOL>"
    STEP 2. EMPTY ELEMENT NOT FOUND.
    INCREASING HASHTABLE USING 1 ROUNDS...
    ===HASHTABLE CONTENT BEFORE RESIZEMENT===
    HASHTABLE PRIME SIZE = 2
    qwerty count is 3
    <END_OF_STRING> count is 0
    ===END===
    NEW PRIME SIZE = 3
    FINDING EMPTY ELEMENT...
    FINDING PLACE FOR WORD = "qwerty" WITH COUNT = 3
    word = qwerty; offset = 0; hash = 1
    TEMP TABLE WORD = "<EMPTY_SYMBOL>" AT INDEX = 1
    POSTED WORD = "qwerty" WITH INDEX = 1 AND COUNT = 3 INTO TEMP TABLE
    ===OLD HASHTABLE CONTENT===
    HASHTABLE PRIME SIZE = 2
    qwerty count is 3
    <END_OF_STRING> count is 0
    ===END===
    ===TEMPORARY HASHTABLE CONTENT===
    HASHTABLE PRIME SIZE = 3
    ZUUU count is 3
    <END_OF_STRING> count is 0
    count is 0
    ===END===
    COPIED INTO NEW MEMORY LOCATION
    ===NEW HASHTABLE CONTENT===
    HASHTABLE PRIME SIZE = 3
    <EMPTY_SYMBOL> count is 0
    qwerty count is 3
    <END_OF_STRING> count is 0
    ===END===
    SEARCHING...
    word = <EMPTY_SYMBOL>; offset = 0; hash = 1
    PROBE OF ELEMENT WORD = "<END_OF_STRING>" WITH INDEX = 1 AND KEY = "<EMPTY_SYMBOL>"
    word = <EMPTY_SYMBOL>; offset = 1; hash = 0
    PROBE OF ELEMENT WORD = "ZUUU" WITH INDEX = 0 AND KEY = "<EMPTY_SYMBOL>"
    NEW ELEMENT WORD = "ytrewq" FOUND WITH INDEX = 0
    OUTPUT INDEX = 0
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    FILLING...
    ===WORD===
    qazwsx
    ===END===
    INSERTING...
    SEARCHING...
    word = qazwsx; offset = 0; hash = 1
    PROBE OF ELEMENT WORD = "<END_OF_STRING>" WITH INDEX = 1 AND KEY = "qazwsx"
    word = qazwsx; offset = 1; hash = 0
    PROBE OF ELEMENT WORD = "ytrewq" WITH INDEX = 0 AND KEY = "qazwsx"
    SEARCHING...
    word = <EMPTY_SYMBOL>; offset = 0; hash = 1
    PROBE OF ELEMENT WORD = "<END_OF_STRING>" WITH INDEX = 1 AND KEY = "<EMPTY_SYMBOL>"
    word = <EMPTY_SYMBOL>; offset = 1; hash = 0
    PROBE OF ELEMENT WORD = "ytrewq" WITH INDEX = 0 AND KEY = "<EMPTY_SYMBOL>"
    STEP 2. EMPTY ELEMENT NOT FOUND.
    INCREASING HASHTABLE USING 1 ROUNDS...
    ===HASHTABLE CONTENT BEFORE RESIZEMENT===
    HASHTABLE PRIME SIZE = 2
    ytrewq count is 1
    <END_OF_STRING> count is 0
    ===END===
    NEW PRIME SIZE = 3
    FINDING EMPTY ELEMENT...
    FINDING PLACE FOR WORD = "ytrewq" WITH COUNT = 1
    word = ytrewq; offset = 0; hash = 0
    TEMP TABLE WORD = "<EMPTY_SYMBOL>" AT INDEX = 0
    POSTED WORD = "ytrewq" WITH INDEX = 0 AND COUNT = 1 INTO TEMP TABLE
    ===OLD HASHTABLE CONTENT===
    HASHTABLE PRIME SIZE = 2
    ytrewq count is 1
    <END_OF_STRING> count is 0
    ===END===
    free(): double free detected in tcache 2

    Program received signal SIGABRT, Aborted.
    __pthread_kill_implementation (no_tid=0, signo=6, threadid=140737351468864) at ./nptl/pthread_kill.c:44
    44 ./nptl/pthread_kill.c: No such file or directory.
    (gdb) bt
    #0 __pthread_kill_implementation (no_tid=0, signo=6, threadid=140737351468864) at ./nptl/pthread_kill.c:44
    #1 __pthread_kill_internal (signo=6, threadid=140737351468864) at ./nptl/pthread_kill.c:78
    #2 __GI___pthread_kill (threadid=140737351468864, signo=signo@entry=6) at ./nptl/pthread_kill.c:89
    #3 0x00007ffff7db9476 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
    #4 0x00007ffff7d9f7f3 in __GI_abort () at ./stdlib/abort.c:79
    #5 0x00007ffff7e006f6 in __libc_message (action=action@entry=do_abort, fmt=fmt@entry=0x7ffff7f52b8c "%s\n") at ../sysdeps/posix/libc_fatal.c:155
    #6 0x00007ffff7e17d7c in malloc_printerr (str=str@entry=0x7ffff7f55710 "free(): double free detected in tcache 2") at ./malloc/malloc.c:5664
    #7 0x00007ffff7e1a12b in _int_free (av=0x7ffff7f90c80 <main_arena>, p=0x55555555a880, have_lock=1) at ./malloc/malloc.c:4473
    #8 0x00007ffff7e1bc5b in _int_realloc (av=av@entry=0x7ffff7f90c80 <main_arena>, oldp=oldp@entry=0x55555555a880, oldsize=oldsize@entry=544, nb=nb@entry=800)
    at ./malloc/malloc.c:4900
    #9 0x00007ffff7e1c989 in __GI___libc_realloc (oldmem=0x55555555a890, bytes=792) at ./malloc/malloc.c:3485
    #10 0x0000555555555fb9 in hashtable_size_increase (hashtable=0x55555555a890, rounds=0, hashtable_prime_size=2) at main.c:226
    #11 0x0000555555556242 in hashtable_element_insert (hashtable=0x55555555a890, new_element=..., hashtable_prime_size=2) at main.c:287
    #12 0x0000555555556507 in hashtable_elements_fill (textfile=0x55555555a2a0, hashtable=0x55555555a890, hashtable_prime_size=2) at main.c:345
    #13 0x00005555555568f2 in main (params_count=2, params=0x7fffffffde28) at main.c:421
    (gdb) frame 9
    #9 0x00007ffff7e1c989 in __GI___libc_realloc (oldmem=0x55555555a890, bytes=792) at ./malloc/malloc.c:3485
    3485 ./malloc/malloc.c: No such file or directory.
    (gdb) frame 10
    #10 0x0000555555555fb9 in hashtable_size_increase (hashtable=0x55555555a890, rounds=0, hashtable_prime_size=2) at main.c:226

    Reallocation fails. Why?

  5. #5
    and the hat of int overfl Salem's Avatar
    Join Date
    Aug 2001
    Location
    The edge of the known universe
    Posts
    39,659
    > Reallocation fails. Why?
    Simply put, you trashed memory.

    Your 100+ line monster function isn't doing you any favours in terms of s/w understanding.

    > element new_hashtable[new_size];
    If you're doing this, then you may as well malloc the new size to begin with, and save creating a local array (which risks running out of stack space way before you run out of memory).

    Code:
    			memset(&new_hashtable, 0, new_size * sizeof(element)); 
    			old_size = hashtable_prime_size;
    			result.new_size = new_size; 
    			printf("NEW PRIME SIZE = %zu\n", new_size);
    			new_hashtable[--new_size].count = 0;
    			new_hashtable[new_size].word[0] = '\0';
    			strcpy(new_hashtable[new_size].word, LAST_ELEMENT);
    						
    			// cleaning array
    			while(--new_size > 0)
    			{				
    				new_hashtable[new_size].count = 0;
    				new_hashtable[new_size].word[0] = '\0';
    				strcpy(new_hashtable[new_size].word, EMPTY_ELEMENT);
    			}
    				
    			new_hashtable[new_size].count = 0;
    			new_hashtable[new_size].word[0] = '\0';
    			strcpy(new_hashtable[new_size].word, EMPTY_ELEMENT);
    Much of this "cleaning" is unnecessary, with the memset.

    Code:
    typedef struct {
        element *table;
        size_t   size;
        // any other members, like your 'rounds', and 'hashtable_prime_size'?
    } hashTable;
    Pass around a pointer to one of these.

    Code:
    increase_status hashtable_size_increase(hashTable *hashtable) {
        new_size = get_next_prime_number(hashtable_prime_size);
    
        hashTable newTable;
        newTable.table = malloc(new_size * sizeof(element) );
        memset( newTable.table, 0, new_size * sizeof(element) );
        for(int i = 0 ; i < new_size-1 ; i++ ) {
            strcpy(newTable[i].word, EMPTY_ELEMENT);
        }
        strcpy(newTable[new_size-1].word, LAST_ELEMENT);
    
        // rehash all elements of hashtable->table into newTable.table
    
        // free and reassign
        free( hashtable->table );
        *hashtable = newTable;
    }
    If you dance barefoot on the broken glass of undefined behaviour, you've got to expect the occasional cut.
    If at first you don't succeed, try writing your phone number on the exam paper.

Popular pages Recent additions subscribe to a feed

Similar Threads

  1. Identify and count word sequences within a text file
    By Freem in forum C++ Programming
    Replies: 3
    Last Post: 02-04-2012, 12:37 PM
  2. Searching for a string in txt file with whitespace!
    By invisible_wall in forum C++ Programming
    Replies: 0
    Last Post: 03-29-2009, 10:02 PM
  3. Page File counter and Private Bytes Counter
    By George2 in forum Tech Board
    Replies: 0
    Last Post: 01-31-2008, 03:17 AM
  4. Interpreting literal escape sequences from a file...
    By Sebastiani in forum C++ Programming
    Replies: 1
    Last Post: 07-08-2003, 02:00 PM
  5. parsing a FASTA file (protein sequences)
    By Sargnagel in forum C Programming
    Replies: 5
    Last Post: 04-03-2003, 10:50 AM

Tags for this Thread