Non-whitespace in-file sequences counter using own hashtable

**Wido** · 11-17-2022

main.c:

Code:

#include <stdio.h>
#include <stdlib.h> 
#include <stdbool.h>
#include <iso646.h>
#include <string.h>
#include <ctype.h>
#include <stdint.h>
#include <time.h>

// exit on failure                           
#define FAIL return EXIT_FAILURE
#define MAX_WORD_LENGTH_PRIME 251 // some high word length
#define LAST_ELEMENT "<END_OF_STRING>" // do not use in text
#define EMPTY_ELEMENT "<EMPTY_SYMBOL>" // do not use in text
#define MIN_PRIME_NUMBER 2

// remainders for each letter code in hash function computation
size_t remainders[MAX_WORD_LENGTH_PRIME];

// hash table entry
typedef struct
{
    char word[MAX_WORD_LENGTH_PRIME]; // 8-bit UTF-8
    size_t count;
} element; // to use declaration without struct keyword over all source

typedef struct
{
    size_t index;
    size_t count;
} found_element;

typedef struct
{
    bool result;
    bool is_error;
    size_t index;
} insert_status;

size_t hashtable_prime_size = MIN_PRIME_NUMBER; // starts from minimal prime size because of empty textfile existence probability (we do not got memory loss in this case)

void hashtable_content_print(element * hashtable)
{
    if (hashtable != NULL)
    {    
        element element_i;
        
        for(size_t i = 0; i< hashtable_prime_size; ++i)
        {
            element_i = hashtable[i];
            
            if (strcmp(element_i.word, EMPTY_ELEMENT) != 0 && strcmp(element_i.word, LAST_ELEMENT) != 0)
                printf("%s count is %zu\n", element_i.word, element_i.count);
        }
    }    
    else
        puts("NULL table pointer passed to print.");
}


void recalculate_remainders()
{
    for (size_t i = 0; i < MAX_WORD_LENGTH_PRIME; ++i)
        remainders[i] = rand() % MAX_WORD_LENGTH_PRIME;
}

size_t get_next_prime_number(size_t current_prime_number) // required to get new hashtable size on reconstruction (with rounds == 1 we will got minimal memory loss)
{
    size_t saved = current_prime_number;
    bool is_prime = true;
    
    if (current_prime_number >= MIN_PRIME_NUMBER)
        for(current_prime_number = current_prime_number + 1; current_prime_number < SIZE_MAX; ++current_prime_number)    
            {
                for(size_t j = 2; j < current_prime_number; ++j)
                    if (current_prime_number % j == 0)
                    {
                        is_prime = false;
                        
                        break;    
                    }
                    else
                        is_prime = true;
    
                if (is_prime)
                    break;        
            }
    
    return (current_prime_number == saved) ? 0 : current_prime_number;
}

// argc, argv checking
bool check_params(int params_count, char *params[])
{
    return (params_count == 2) and (params != NULL) and (params[1] != NULL);
}

size_t hash(char value[], size_t offset) // double hashing probe
{
    size_t result = 0;
    
    if (value != NULL and strlen(value) > 0)
        for (size_t i = 0; value[i] != '\0'; ++i)
            result += remainders[i] * value[i];
    else
        puts("NULL value pointer or empty value passed to hash function.");
    result = (result % hashtable_prime_size + offset * (1 + result % (hashtable_prime_size - 1))) % hashtable_prime_size;
    
    printf("word = %s; offset = %zu; hash = %zu\n", value, offset, result);
    
    return result;
}

bool hashtable_element_search(element * hashtable, char key[], found_element * result)
{    
    if ((hashtable != NULL) and (key != NULL))
    {
        puts("SEARCHING...");
        
        element found;
        size_t offset = 0, index;
        
        do        
            if (strcmp((found = hashtable[index = hash(key, offset++)]).word, key) == 0)    
            {    
                *result = (found_element) { index, found.count };
                
                printf("FOUND ELEMENT WORD = \"%s\" WITH INDEX = %zu AND COUNT = %zu\n", found.word, index, found.count);
                
                return true;            
            }
        while(offset != hashtable_prime_size);
    }
    else
        puts("NULL table pointer or NULL key pointer passed to search.");
    
    return false;
}

bool hashtable_size_increase(element * hashtable, size_t rounds) // rounds is a number of prime hashtable size rises
{    
    if (hashtable != NULL and rounds > 0)
        {    
            printf("INCREASING HASHTABLE USING %zu ROUNDS...", rounds);
            
            size_t new_size, offset = 0, index = 0, saved_size = 0;
        
            while(rounds-- != 0)
                saved_size = new_size = get_next_prime_number(hashtable_prime_size);
                
            printf("NEW PRIME SIZE = %zu\n", saved_size);
            
            element new_hashtable[new_size];
            memset(&new_hashtable, 0, sizeof(new_hashtable));
            hashtable_prime_size = new_size;
            new_hashtable[--new_size] = (element) { LAST_ELEMENT, 0 };
                        
            // cleaning array
            while(--new_size > 0)
                new_hashtable[new_size] = (element) { EMPTY_ELEMENT, 0 };
                
            new_hashtable[new_size] = (element) { EMPTY_ELEMENT, 0 };
            element found;    
            
            puts("FINDING EMPTY ELEMENT...");
            
            // empty element in new hashtable already exists
            while(strcmp((found = hashtable[new_size++]).word, LAST_ELEMENT) != 0)
                {    
                    printf("FINDING PLACE FOR WORD = \"%s\" WITH COUNT = %zu\n", found.word, found.count);
                     
                    if (strcmp(found.word, EMPTY_ELEMENT) == 0) // pass
                    {
                        puts("PASSED BECAUSE OF EMPTY");
                        
                        continue;
                    }
                    else
                    {
                        while(strcmp(new_hashtable[index = hash(found.word, offset++)].word, EMPTY_ELEMENT) != 0);
                        new_hashtable[index] = found;    
                        
                        printf("POSTED WITH INDEX = %zu INTO TEMP TABLE\n", index);
                    }
                }
            
            if(realloc(hashtable, saved_size * sizeof(element)))
            {
                memset(&hashtable, 0, sizeof(hashtable));
                
                if (memcpy(hashtable, new_hashtable, sizeof(new_hashtable)))
                {
                    puts("COPIED INTO NEW MEMORY LOCATION");
                
                    return true; // success
                }
            }
        }
        else
            puts("NULL pointer to hashtable or incorrect rounds count passed to increase function.");
            
    return false;
}

insert_status hashtable_element_insert(element * hashtable, element new_element)
{
    insert_status result = { false, true, 0 };
    
    if (new_element.word != NULL and strcmp(new_element.word, LAST_ELEMENT) != 0 and strcmp(new_element.word, EMPTY_ELEMENT) != 0) //validation
    {
        puts("INSERTING...");
        
        found_element search_output;
        result.is_error = false;
        
        if (hashtable_element_search(hashtable, new_element.word, &search_output))
        {    
            result.index = search_output.index;
            
            printf("STEP 1. FOUND AT INDEX = %zu. NOT INSERTED.", result.index);
            
            return result; // element already exists        
        }
        
        if (!hashtable_element_search(hashtable, EMPTY_ELEMENT, &search_output)) // key not found
        {    
            puts("STEP 2. EMPTY ELEMENT NOT FOUND.");
            
            hashtable_size_increase(hashtable, 1); // reconstruct hashtable to add empty elements            
            hashtable_element_search(hashtable, EMPTY_ELEMENT, &search_output);        
        }
        
        hashtable[search_output.index] = new_element; // rewrite empty element
        result.result = true;        
        
        printf("NEW ELEMENT WORD = \"%s\" FOUND WITH INDEX = %zu\n", new_element.word, search_output.index);
        
        return result;        
    }
    else
        puts("Incorrect new element word.");
    
    return result;
}

void hashtable_elements_fill(FILE * textfile, element * hashtable)
{    
    char word[MAX_WORD_LENGTH_PRIME];
    word[0] = '\0';
    bool is_word = false;
    char input;
    
    while(!feof(textfile) and !ferror(textfile))
    {
        input = getc(textfile);
        
        puts("FILLING...");
        
        if (!isspace(input)) // append to a whole UTF-8 word
        {            
            is_word = true;            
            strncat(word, &input, 1);                                     
        }
        else if (is_word)
        {            
            element new_element = { "", 1 };
            strcpy(new_element.word, word);
            
            puts("===WORD===");
            puts(word);
            puts("===END===");   
            
            insert_status output = hashtable_element_insert(hashtable, new_element);
            
            printf("INSERTED INDEX = %zu\n", output.index);
            
            if(output.is_error)
            {
                puts("OUTPUT.IS_ERROR = TRUE");
                
                continue; // invalid word
            }
            else if (!output.result)        
            {
                puts("OUTPUT.RESULT = FALSE");
                
                ++hashtable[output.index].count; // increasing found word counter    
            }
            
            word[0] = '\0';
            is_word = false;            
        }
    }    
    
    if (ferror(textfile))    
        perror("File reading error occured.");    
}

int main(int params_count, char *params[])
{            
    srand(time(NULL)); // setup randomizer

    if (!check_params(params_count, params))
    {
        puts("Params check failed. Program terminated.");
        
        FAIL;
    }
    
    char *filename;
    
    filename = params[1];                            
    FILE *textfile = fopen(filename, "r");

    if(textfile == NULL)           
    {    
        puts("Cannot read input file. Program terminated.");
        
        FAIL;
    }
    printf("FILENAME = %s", filename);
    recalculate_remainders();
    printf("HASHTABLE SIZE = %zu", MIN_PRIME_NUMBER * sizeof(element));
    element * hashtable = malloc(MIN_PRIME_NUMBER * sizeof(element)); 
    
    if (hashtable)
    {
        size_t counter = MIN_PRIME_NUMBER;
        hashtable[--counter] = (element){ LAST_ELEMENT, 0 };
        
        while(counter > 0)
            hashtable[--counter] = (element){ EMPTY_ELEMENT, 0 };
        // there are may be no words in a file at all (end element with NULL string always stored for barrier search)
        puts("===HASHTABLE CONTENT===");
        hashtable_content_print(hashtable);
        puts("===END===");
        hashtable_elements_fill(textfile, hashtable); 
        puts("===HASHTABLE CONTENT===");
        hashtable_content_print(hashtable);
        puts("===END===");
        free(hashtable);             
     }
     else
         perror("Cannot assign memory location.");
     
       fclose(textfile);       
    
    return EXIT_SUCCESS;
}

Trace from stdout:

$ ./a.out input.txt
FILENAME = input.txtHASHTABLE SIZE = 528===HASHTABLE CONTENT===
<EMPTY_SYMBOL> count is 0
<END_OF_STRING> count is 0
===END===
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
===WORD===
qwerty
===END===
INSERTING...
SEARCHING...
word = qwerty; offset = 0; hash = 0
word = qwerty; offset = 1; hash = 1
SEARCHING...
word = <EMPTY_SYMBOL>; offset = 0; hash = 1
word = <EMPTY_SYMBOL>; offset = 1; hash = 0
FOUND ELEMENT WORD = "<EMPTY_SYMBOL>" WITH INDEX = 0 AND COUNT = 0
NEW ELEMENT WORD = "qwerty" FOUND WITH INDEX = 0
INSERTED INDEX = 0
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
===WORD===
qwerty
===END===
INSERTING...
SEARCHING...
word = qwerty; offset = 0; hash = 0
FOUND ELEMENT WORD = "qwerty" WITH INDEX = 0 AND COUNT = 1
STEP 1. FOUND AT INDEX = 0. NOT INSERTED.INSERTED INDEX = 0
OUTPUT.RESULT = FALSE
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
===WORD===
qwerty
===END===
INSERTING...
SEARCHING...
word = qwerty; offset = 0; hash = 0
FOUND ELEMENT WORD = "qwerty" WITH INDEX = 0 AND COUNT = 2
STEP 1. FOUND AT INDEX = 0. NOT INSERTED.INSERTED INDEX = 0
OUTPUT.RESULT = FALSE
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
===WORD===
ytrewq
===END===
INSERTING...
SEARCHING...
word = ytrewq; offset = 0; hash = 1
word = ytrewq; offset = 1; hash = 0
SEARCHING...
word = <EMPTY_SYMBOL>; offset = 0; hash = 1
word = <EMPTY_SYMBOL>; offset = 1; hash = 0
STEP 2. EMPTY ELEMENT NOT FOUND.
INCREASING HASHTABLE USING 1 ROUNDS...NEW PRIME SIZE = 3
FINDING EMPTY ELEMENT...
FINDING PLACE FOR WORD = "qwerty" WITH COUNT = 3
word = qwerty; offset = 0; hash = 0
POSTED WITH INDEX = 0 INTO TEMP TABLE
Segmentation fault (core dumped)

I am writing a program that will be able to count words in a textfile input.txt with content:

qwerty qwerty qwerty ytrewq qazwsx hi dasf

Compilation:

gcc main.c -Wall -Wextra -pedantic -std=c11

I did not understand where is the bug... Now, I've posted all code here because of it. My program crashes after first resizement. Please, tell anything about.

**Salem** · 11-17-2022

The first thing to do is compile with the -g flag.

Then run the code in the debugger.

Code:

$ gdb -q ./a.out
Reading symbols from ./a.out...
(gdb) run input.txt
Starting program: ./a.out input.txt
FILENAME = input.txtHASHTABLE SIZE = 528===HASHTABLE CONTENT===
===END===
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
===WORD===
qwerty
===END===
INSERTING...
SEARCHING...
word = qwerty; offset = 0; hash = 0
word = qwerty; offset = 1; hash = 1
SEARCHING...
word = <EMPTY_SYMBOL>; offset = 0; hash = 1
word = <EMPTY_SYMBOL>; offset = 1; hash = 0
FOUND ELEMENT WORD = "<EMPTY_SYMBOL>" WITH INDEX = 0 AND COUNT = 0
NEW ELEMENT WORD = "qwerty" FOUND WITH INDEX = 0
INSERTED INDEX = 0
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
===WORD===
qwerty
===END===
INSERTING...
SEARCHING...
word = qwerty; offset = 0; hash = 0
FOUND ELEMENT WORD = "qwerty" WITH INDEX = 0 AND COUNT = 1
STEP 1. FOUND AT INDEX = 0. NOT INSERTED.INSERTED INDEX = 0
OUTPUT.RESULT = FALSE
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
===WORD===
qwerty
===END===
INSERTING...
SEARCHING...
word = qwerty; offset = 0; hash = 0
FOUND ELEMENT WORD = "qwerty" WITH INDEX = 0 AND COUNT = 2
STEP 1. FOUND AT INDEX = 0. NOT INSERTED.INSERTED INDEX = 0
OUTPUT.RESULT = FALSE
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
===WORD===
ytrewq
===END===
INSERTING...
SEARCHING...
word = ytrewq; offset = 0; hash = 0
word = ytrewq; offset = 1; hash = 1
SEARCHING...
word = <EMPTY_SYMBOL>; offset = 0; hash = 1
word = <EMPTY_SYMBOL>; offset = 1; hash = 0
STEP 2. EMPTY ELEMENT NOT FOUND.
INCREASING HASHTABLE USING 1 ROUNDS...NEW PRIME SIZE = 3
FINDING EMPTY ELEMENT...
FINDING PLACE FOR WORD = "qwerty" WITH COUNT = 3
word = qwerty; offset = 0; hash = 2
word = qwerty; offset = 1; hash = 0
POSTED WITH INDEX = 0 INTO TEMP TABLE

Program received signal SIGSEGV, Segmentation fault.
__memmove_sse2_unaligned_erms () at ../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:416
416	../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S: No such file or directory.
(gdb) bt
#0  __memmove_sse2_unaligned_erms () at ../sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S:416
#1  0x0000555555555d47 in hashtable_size_increase (hashtable=0x0, rounds=18446744073709551615) at bar.c:191
#2  0x0000555555555e93 in hashtable_element_insert (hashtable=0x55555555a890, new_element=...) at bar.c:229
#3  0x000055555555608c in hashtable_elements_fill (textfile=0x55555555a2a0, hashtable=0x55555555a890) at bar.c:273
#4  0x0000555555556389 in main (params_count=2, params=0x7fffffffdf48) at bar.c:337
(gdb) frame 1
#1  0x0000555555555d47 in hashtable_size_increase (hashtable=0x0, rounds=18446744073709551615) at bar.c:191
191	                if (memcpy(hashtable, new_hashtable, sizeof(new_hashtable)))
(gdb) list
186	            
187	            if(realloc(hashtable, saved_size * sizeof(element)))
188	            {
189	                memset(&hashtable, 0, sizeof(hashtable));
190	                
191	                if (memcpy(hashtable, new_hashtable, sizeof(new_hashtable)))
192	                {
193	                    puts("COPIED INTO NEW MEMORY LOCATION");
194	                
195	                    return true; // success
(gdb) p hashtable
$1 = (element *) 0x0

The first obvious problem is you're using realloc wrong.
The second obvious problem is your memset &ptr is trashing the pointer itself (and some data following the pointer).

realloc may move the memory to a new location. If it does, you lose.

What you should do is

Code:

void *temp = realloc(hashtable, saved_size * sizeof(element));
if ( temp ) {
  hashtable = temp;
} else {
  // backup plan
  free(hashtable);
  exit(1);
}

Which brings us to the third problem.
hashtable is a value parameter to hashtable_size_increase, so if realloc does move the memory, the caller of the function will never know about it.

**Wido** · 11-17-2022

It is not clear in the part of memset meaning. Is it really that I can assign struct field by field using strcpy if it contains strings only? I've added some perfect magic with your recommendation interpretation and there is infinite loop. I will investigate...

**Wido** · 11-18-2022

#include <stdio.h>#include <stdlib.h> #include <stdbool.h>#include <iso646 - Pastebin.com - tag CODE got an error.

FILENAME = input.txtHASHTABLE SIZE = 528===HASHTABLE CONTENT===
HASHTABLE PRIME SIZE = 2
<EMPTY_SYMBOL> count is 0
<END_OF_STRING> count is 0
===END===
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
===WORD===
qwerty
===END===
INSERTING...
SEARCHING...
word = qwerty; offset = 0; hash = 1
PROBE OF ELEMENT WORD = "<END_OF_STRING>" WITH INDEX = 1 AND KEY = "qwerty"
word = qwerty; offset = 1; hash = 0
PROBE OF ELEMENT WORD = "<EMPTY_SYMBOL>" WITH INDEX = 0 AND KEY = "qwerty"
SEARCHING...
word = <EMPTY_SYMBOL>; offset = 0; hash = 1
PROBE OF ELEMENT WORD = "<END_OF_STRING>" WITH INDEX = 1 AND KEY = "<EMPTY_SYMBOL>"
word = <EMPTY_SYMBOL>; offset = 1; hash = 0
FOUND ELEMENT WORD = "<EMPTY_SYMBOL>" WITH INDEX = 0 AND COUNT = 0
NEW ELEMENT WORD = "qwerty" FOUND WITH INDEX = 0
OUTPUT INDEX = 0
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
===WORD===
qwerty
===END===
INSERTING...
SEARCHING...
word = qwerty; offset = 0; hash = 1
PROBE OF ELEMENT WORD = "<END_OF_STRING>" WITH INDEX = 1 AND KEY = "qwerty"
word = qwerty; offset = 1; hash = 0
FOUND ELEMENT WORD = "qwerty" WITH INDEX = 0 AND COUNT = 1
STEP 1. FOUND AT INDEX = 0. NOT INSERTED.
OUTPUT INDEX = 0
OUTPUT.RESULT = FALSE. INCREASING COUNT...
COUNT BEFORE = 1
COUNT AFTER = 2
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
===WORD===
qwerty
===END===
INSERTING...
SEARCHING...
word = qwerty; offset = 0; hash = 1
PROBE OF ELEMENT WORD = "<END_OF_STRING>" WITH INDEX = 1 AND KEY = "qwerty"
word = qwerty; offset = 1; hash = 0
FOUND ELEMENT WORD = "qwerty" WITH INDEX = 0 AND COUNT = 2
STEP 1. FOUND AT INDEX = 0. NOT INSERTED.
OUTPUT INDEX = 0
OUTPUT.RESULT = FALSE. INCREASING COUNT...
COUNT BEFORE = 2
COUNT AFTER = 3
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
===WORD===
ytrewq
===END===
INSERTING...
SEARCHING...
word = ytrewq; offset = 0; hash = 0
PROBE OF ELEMENT WORD = "qwerty" WITH INDEX = 0 AND KEY = "ytrewq"
word = ytrewq; offset = 1; hash = 1
PROBE OF ELEMENT WORD = "<END_OF_STRING>" WITH INDEX = 1 AND KEY = "ytrewq"
SEARCHING...
word = <EMPTY_SYMBOL>; offset = 0; hash = 1
PROBE OF ELEMENT WORD = "<END_OF_STRING>" WITH INDEX = 1 AND KEY = "<EMPTY_SYMBOL>"
word = <EMPTY_SYMBOL>; offset = 1; hash = 0
PROBE OF ELEMENT WORD = "qwerty" WITH INDEX = 0 AND KEY = "<EMPTY_SYMBOL>"
STEP 2. EMPTY ELEMENT NOT FOUND.
INCREASING HASHTABLE USING 1 ROUNDS...
===HASHTABLE CONTENT BEFORE RESIZEMENT===
HASHTABLE PRIME SIZE = 2
qwerty count is 3
<END_OF_STRING> count is 0
===END===
NEW PRIME SIZE = 3
FINDING EMPTY ELEMENT...
FINDING PLACE FOR WORD = "qwerty" WITH COUNT = 3
word = qwerty; offset = 0; hash = 1
TEMP TABLE WORD = "<EMPTY_SYMBOL>" AT INDEX = 1
POSTED WORD = "qwerty" WITH INDEX = 1 AND COUNT = 3 INTO TEMP TABLE
===OLD HASHTABLE CONTENT===
HASHTABLE PRIME SIZE = 2
qwerty count is 3
<END_OF_STRING> count is 0
===END===
===TEMPORARY HASHTABLE CONTENT===
HASHTABLE PRIME SIZE = 3
ZUUU count is 3
<END_OF_STRING> count is 0
count is 0
===END===
COPIED INTO NEW MEMORY LOCATION
===NEW HASHTABLE CONTENT===
HASHTABLE PRIME SIZE = 3
<EMPTY_SYMBOL> count is 0
qwerty count is 3
<END_OF_STRING> count is 0
===END===
SEARCHING...
word = <EMPTY_SYMBOL>; offset = 0; hash = 1
PROBE OF ELEMENT WORD = "<END_OF_STRING>" WITH INDEX = 1 AND KEY = "<EMPTY_SYMBOL>"
word = <EMPTY_SYMBOL>; offset = 1; hash = 0
PROBE OF ELEMENT WORD = "ZUUU" WITH INDEX = 0 AND KEY = "<EMPTY_SYMBOL>"
NEW ELEMENT WORD = "ytrewq" FOUND WITH INDEX = 0
OUTPUT INDEX = 0
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
FILLING...
===WORD===
qazwsx
===END===
INSERTING...
SEARCHING...
word = qazwsx; offset = 0; hash = 1
PROBE OF ELEMENT WORD = "<END_OF_STRING>" WITH INDEX = 1 AND KEY = "qazwsx"
word = qazwsx; offset = 1; hash = 0
PROBE OF ELEMENT WORD = "ytrewq" WITH INDEX = 0 AND KEY = "qazwsx"
SEARCHING...
word = <EMPTY_SYMBOL>; offset = 0; hash = 1
PROBE OF ELEMENT WORD = "<END_OF_STRING>" WITH INDEX = 1 AND KEY = "<EMPTY_SYMBOL>"
word = <EMPTY_SYMBOL>; offset = 1; hash = 0
PROBE OF ELEMENT WORD = "ytrewq" WITH INDEX = 0 AND KEY = "<EMPTY_SYMBOL>"
STEP 2. EMPTY ELEMENT NOT FOUND.
INCREASING HASHTABLE USING 1 ROUNDS...
===HASHTABLE CONTENT BEFORE RESIZEMENT===
HASHTABLE PRIME SIZE = 2
ytrewq count is 1
<END_OF_STRING> count is 0
===END===
NEW PRIME SIZE = 3
FINDING EMPTY ELEMENT...
FINDING PLACE FOR WORD = "ytrewq" WITH COUNT = 1
word = ytrewq; offset = 0; hash = 0
TEMP TABLE WORD = "<EMPTY_SYMBOL>" AT INDEX = 0
POSTED WORD = "ytrewq" WITH INDEX = 0 AND COUNT = 1 INTO TEMP TABLE
===OLD HASHTABLE CONTENT===
HASHTABLE PRIME SIZE = 2
ytrewq count is 1
<END_OF_STRING> count is 0
===END===
free(): double free detected in tcache 2

Program received signal SIGABRT, Aborted.
__pthread_kill_implementation (no_tid=0, signo=6, threadid=140737351468864) at ./nptl/pthread_kill.c:44
44 ./nptl/pthread_kill.c: No such file or directory.
(gdb) bt
#0 __pthread_kill_implementation (no_tid=0, signo=6, threadid=140737351468864) at ./nptl/pthread_kill.c:44
#1 __pthread_kill_internal (signo=6, threadid=140737351468864) at ./nptl/pthread_kill.c:78
#2 __GI___pthread_kill (threadid=140737351468864, signo=signo@entry=6) at ./nptl/pthread_kill.c:89
#3 0x00007ffff7db9476 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
#4 0x00007ffff7d9f7f3 in __GI_abort () at ./stdlib/abort.c:79
#5 0x00007ffff7e006f6 in __libc_message (action=action@entry=do_abort, fmt=fmt@entry=0x7ffff7f52b8c "%s\n") at ../sysdeps/posix/libc_fatal.c:155
#6 0x00007ffff7e17d7c in malloc_printerr (str=str@entry=0x7ffff7f55710 "free(): double free detected in tcache 2") at ./malloc/malloc.c:5664
#7 0x00007ffff7e1a12b in _int_free (av=0x7ffff7f90c80 <main_arena>, p=0x55555555a880, have_lock=1) at ./malloc/malloc.c:4473
#8 0x00007ffff7e1bc5b in _int_realloc (av=av@entry=0x7ffff7f90c80 <main_arena>, oldp=oldp@entry=0x55555555a880, oldsize=oldsize@entry=544, nb=nb@entry=800)
at ./malloc/malloc.c:4900
#9 0x00007ffff7e1c989 in __GI___libc_realloc (oldmem=0x55555555a890, bytes=792) at ./malloc/malloc.c:3485
#10 0x0000555555555fb9 in hashtable_size_increase (hashtable=0x55555555a890, rounds=0, hashtable_prime_size=2) at main.c:226
#11 0x0000555555556242 in hashtable_element_insert (hashtable=0x55555555a890, new_element=..., hashtable_prime_size=2) at main.c:287
#12 0x0000555555556507 in hashtable_elements_fill (textfile=0x55555555a2a0, hashtable=0x55555555a890, hashtable_prime_size=2) at main.c:345
#13 0x00005555555568f2 in main (params_count=2, params=0x7fffffffde28) at main.c:421
(gdb) frame 9
#9 0x00007ffff7e1c989 in __GI___libc_realloc (oldmem=0x55555555a890, bytes=792) at ./malloc/malloc.c:3485
3485 ./malloc/malloc.c: No such file or directory.
(gdb) frame 10
#10 0x0000555555555fb9 in hashtable_size_increase (hashtable=0x55555555a890, rounds=0, hashtable_prime_size=2) at main.c:226

Reallocation fails. Why?

**Salem** · 11-18-2022

> Reallocation fails. Why?
Simply put, you trashed memory.

Your 100+ line monster function isn't doing you any favours in terms of s/w understanding.

> element new_hashtable[new_size];
If you're doing this, then you may as well malloc the new size to begin with, and save creating a local array (which risks running out of stack space way before you run out of memory).

Code:

			memset(&new_hashtable, 0, new_size * sizeof(element)); 
			old_size = hashtable_prime_size;
			result.new_size = new_size; 
			printf("NEW PRIME SIZE = %zu\n", new_size);
			new_hashtable[--new_size].count = 0;
			new_hashtable[new_size].word[0] = '\0';
			strcpy(new_hashtable[new_size].word, LAST_ELEMENT);
						
			// cleaning array
			while(--new_size > 0)
			{				
				new_hashtable[new_size].count = 0;
				new_hashtable[new_size].word[0] = '\0';
				strcpy(new_hashtable[new_size].word, EMPTY_ELEMENT);
			}
				
			new_hashtable[new_size].count = 0;
			new_hashtable[new_size].word[0] = '\0';
			strcpy(new_hashtable[new_size].word, EMPTY_ELEMENT);

Much of this "cleaning" is unnecessary, with the memset.

Code:

typedef struct {
    element *table;
    size_t   size;
    // any other members, like your 'rounds', and 'hashtable_prime_size'?
} hashTable;

Pass around a pointer to one of these.

Code:

increase_status hashtable_size_increase(hashTable *hashtable) {
    new_size = get_next_prime_number(hashtable_prime_size);

    hashTable newTable;
    newTable.table = malloc(new_size * sizeof(element) );
    memset( newTable.table, 0, new_size * sizeof(element) );
    for(int i = 0 ; i < new_size-1 ; i++ ) {
        strcpy(newTable[i].word, EMPTY_ELEMENT);
    }
    strcpy(newTable[new_size-1].word, LAST_ELEMENT);

    // rehash all elements of hashtable->table into newTable.table

    // free and reassign
    free( hashtable->table );
    *hashtable = newTable;
}

Thread: Non-whitespace in-file sequences counter using own hashtable

Thread Tools

Search Thread

Display

Non-whitespace in-file sequences counter using own hashtable

Similar Threads

Identify and count word sequences within a text file

Searching for a string in txt file with whitespace!

Page File counter and Private Bytes Counter

Interpreting literal escape sequences from a file...

parsing a FASTA file (protein sequences)

Tags for this Thread