-
implementing a hashmap
Hello,
I want to implement a hashmap and get a segfault again when testing my program. It seems that I cannot access as it throws the segmentation fault error. I suspect that I should use
Code:
Entry* test = (Entry*) malloc(sizeof(Entry));
instead. Why does it not work without malloc()?
Code:
#include <stdio.h>
#define TABLE_SIZE 16
typedef int Data;
typedef struct entry Entry;
struct entry {
Data value;
unsigned int hash;
Entry* next;
};
void insert_into(Entry* table[], Data data){
Entry entry = {data, 0, NULL};
entry.hash = ((unsigned int) &entry) % TABLE_SIZE;
if (table[entry.hash] == NULL){
table[entry.hash] = &entry;
}
}
int main(){
Entry* test = {1,0,NULL};
printf("%p\n", test);
test->hash = (unsigned int) test;
printf("%d\n", test->hash);
Data data_points[5] = {3,5,2,12,9};
Entry* lookup_table[TABLE_SIZE];
for (int i = 0; i < 5; i++){
insert_into(lookup_table, data_points[i]);
}
for (int i = 0; i < TABLE_SIZE; i++)
printf("%i = %s\n", i, lookup_table[i]->value);
return 0;
}
-
a new attempt
Hello,
I now simply set the hash value to the same as the data value and can change that later. The array does not contain duplicates.
When I run the debugger, I get this:
Code:
Breakpoint 1, insert_into (table=0x7fffffffdd70, data=3) at myHashmap.c:16warning: Source file is more recent than executable.
16 Entry* entry = (Entry*)malloc(sizeof(Entry));
(gdb) n
17 entry->hash = data;
(gdb) n
18 entry->value = data;
(gdb) n
19 table[entry->hash] = entry;
(gdb) p entry->hash
$1 = 3
(gdb) p table[entry->hash]
$2 = (Entry *) 0x1
(gdb) n
20 entry->next = NULL;
(gdb) p table[entry->hash]
$3 = (Entry *) 0x5555555592a0
I wonder how the value 0x1 gets determined. This is my code:
Code:
#include <stdio.h>#include <stdlib.h>
#define TABLE_SIZE 16
typedef int Data;
typedef struct entry Entry;
struct entry {
Data value;
unsigned int hash;
Entry* next;
};
void insert_into(Entry* table[], Data data){
Entry* entry = (Entry*)malloc(sizeof(Entry));
entry->hash = data;
entry->value = data;
table[entry->hash] = entry;
entry->next = NULL;
}
int main(){
/**
Entry* test = (Entry*) malloc(sizeof(Entry));
printf("%p\n", test);
test->hash = (unsigned int) test;
printf("%d\n", test->value);
**/
Data data_points[5] = {3,5,2,12,9};
Entry* lookup_table[TABLE_SIZE];
for (int i = 0; i < 5; i++){
insert_into(lookup_table, data_points[i]);
}
for (int i = 0; i < TABLE_SIZE; i++)
printf("%d = %d\n", i, lookup_table[i]->value);
return 0;
}
-
> table[entry->hash] = entry;
This should be
table[entry->hash % TABLE_SIZE] = entry;
And then you need to deal with duplicates.
Also, don't cast the return result of malloc. There is no need to do that in C.
-
changed what you told me
So now I have corrected my mistakes. For some reason I get a segfault when trying to print the contents of the lookup table. Can anyone tell me why?
Code:
#include <stdio.h>
#include <stdlib.h>
#define TABLE_SIZE 16
typedef int Data;
typedef struct entry Entry;
struct entry {
Data value;
unsigned int hash;
Entry* next;
};
void insert_into(Entry* table[], Data data){
Entry* entry = malloc(sizeof(Entry));
entry->hash = data;
entry->value = data;
entry->next = NULL;
if (table[entry->hash % TABLE_SIZE] == NULL) {
table[entry->hash % TABLE_SIZE] = entry;
return;
}
Entry* it = table[entry->hash % TABLE_SIZE];
for (; it != NULL; it = it->next)
;
it->next = entry;
}
int main(){
Data data_points[5] = {3,5,2,12,9};
Entry* lookup_table[TABLE_SIZE];
for (int i = 0; i < TABLE_SIZE; i++){
lookup_table[i] = NULL;
}
for (int i = 0; i < 5; i++){
insert_into(lookup_table, data_points[i]);
}
// a segmentation fault occurs here, why?
for (int i = 0; i < TABLE_SIZE; i++)
printf("%d = %d\n", i, lookup_table[i]->value);
return 0;
}
-
Because not every table entry has an entry.
And some have more than one.
Code:
if ( lookup_table[i] ) {
printf("%d = %d\n", i, lookup_table[i]->value);
// this is now a list, traverse it.
}
-
@Salem's got it right.
Also, in your insert code you should consider just dropping the new entry at the front of the linked list:
Code:
entry->next = lookup_table[i];
That way it doesn't matter whether the existing value is NULL or not. And it makes the most-recently inserted value the fastest to retrieve, if that's worth anything.