Thread: problem with implementing trie data structure with dynamic array

  1. #1
    Registered User
    Join Date
    Jun 2013
    Posts
    3

    problem with implementing trie data structure with dynamic array

    hi friends ,
    i can't implement trie with dynamic array .
    the problem is in this line i think :
    Code:
    childs_size = (node_p -> childs_value_p)[0] + 1;
    here is my code :
    Code:
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    
    struct trie_node {
        unsigned char *childs_value_p;    // pointer to an array of child nodes value
        struct trie_node **childs_ptp;    // pointer to an array of child nodes pointer
        struct trie_node *failure_node_p; // pointer to failure node
    };
    
    struct trie_node *init_trie_node();
    struct trie_node *add_to_trie(struct trie_node *root_p, unsigned char *value_p);
    struct trie_node *search_trie(struct trie_node *root_p, unsigned char *value_p);
    int add_to_childs(struct trie_node *node_p, unsigned char value);
    void shift_childs_to_right(struct trie_node *node_p, int value_index, int childs_size);
    int specific_binary_search(unsigned char *list_p, unsigned char target, int low, int high);
    int read_db(char *db_file_name, struct trie_node *root_p);
    
    int count_nodes = 0;
    
    #define SIGN_SIZE 16
    
    #define V_INIT_SIZE 2
    #define P_INIT_SIZE 1
    
    void print_arr(char *arr)
    {
        int i = 0;
    
        while(i <= arr[0] + 1)
        {
            if(i == 0)
                printf("%d, ", arr[i ++]);
            else
                printf("%c, ", arr[i ++]);
        }
        printf("\n");
    }
    
    int main()
    {
        struct trie_node *root_p = NULL;
    
        root_p = init_trie_node();
    
    //    read_db("my_db.txt", root_p);
    
        root_p = add_to_trie(root_p, "amir");
        root_p = add_to_trie(root_p, "amin");
        root_p = add_to_trie(root_p, "ali");
        root_p = add_to_trie(root_p, "all");
        root_p = add_to_trie(root_p, "mina");
        root_p = add_to_trie(root_p, "mitra");
        root_p = add_to_trie(root_p, "bita");
        root_p = add_to_trie(root_p, "melika");
        root_p = add_to_trie(root_p, "ahmad");
    
        getchar();
    
        printf("%.2f", (count_nodes * (sizeof(struct trie_node) + sizeof(char) + sizeof(struct trie_node *))) / (1000 * 1024 * 1.0));
    
        return 0;
    }
    
    struct trie_node *init_trie_node()
    {
        struct trie_node *new_p = NULL;
    
        new_p = (struct trie_node *) malloc(sizeof(struct trie_node));
        if(new_p == NULL)
        {
            return NULL;
        }
        new_p -> childs_value_p = NULL;
        new_p -> childs_ptp = NULL;
        new_p -> failure_node_p = NULL;
        return new_p;
    }
    
    
    struct trie_node *add_to_trie(struct trie_node *root_p, unsigned char *value_p)
    {
        int i = 0;
        struct trie_node *root_temp_p = root_p;
        int p_index = 0;
        unsigned char c = 0;
    
        while((c = value_p[i]) != '\0')
        {
            p_index = add_to_childs(root_temp_p, c);
    //        if(p_index != -1)
    //        {
    //            if(i == 15)
    //            {
    //                free((root_temp_p -> childs_ptp)[p_index]);
    //                free(root_temp_p -> childs_ptp);
    //            }
    //        }
    //        else
    //        {
    //            continue;
    //        }
    //        print_arr(root_temp_p -> childs_value_p);
            root_temp_p = (root_temp_p -> childs_ptp)[p_index];
            i ++;
        }
        printf("\n");
        return root_p;
    }
    
    int add_to_childs(struct trie_node *node_p, unsigned char value)
    {
        int childs_size = 0;
        int value_index = 0;
        unsigned char *new_childs_value_p = NULL;
        struct trie_node **new_childs_ptp = NULL;
    
        if(node_p -> childs_value_p == NULL)
        {
            new_childs_value_p = (unsigned char *) malloc(V_INIT_SIZE * sizeof(unsigned char));
            new_childs_ptp = (struct trie_node **) malloc(P_INIT_SIZE * sizeof(struct trie_node *));
            if(new_childs_value_p != NULL && new_childs_ptp != NULL)
            {
                count_nodes ++;
                node_p -> childs_value_p = new_childs_value_p;
                node_p -> childs_ptp = new_childs_ptp;
                (node_p -> childs_value_p)[0] = 0;
                (node_p -> childs_value_p)[1] = value;
                (node_p -> childs_ptp)[0] = init_trie_node();
                return 0;
            }
            return -1;
        }
        childs_size = (node_p -> childs_value_p)[0] + 1;
        value_index = specific_binary_search(node_p -> childs_value_p, value, 1, childs_size);
        if(value_index <= childs_size && (node_p -> childs_value_p)[value_index] == value)
        {
            return value_index - 1;
        }
        new_childs_value_p = (unsigned char *) malloc((childs_size + 2) * sizeof(unsigned char));
        new_childs_ptp = (struct trie_node **) malloc((childs_size + 1) * sizeof(struct trie_node *));
        if(new_childs_value_p != NULL && new_childs_ptp != NULL)
        {
            count_nodes ++;
            memcpy(new_childs_value_p, node_p -> childs_value_p, childs_size + 1);
            memcpy(new_childs_ptp, node_p -> childs_ptp, childs_size);
            free(node_p -> childs_value_p);
            free(node_p -> childs_ptp);
            node_p -> childs_value_p = new_childs_value_p;
            node_p -> childs_ptp = new_childs_ptp;
            shift_childs_to_right(node_p, value_index, childs_size);
            (node_p -> childs_value_p)[0] ++;
            (node_p -> childs_value_p)[value_index] = value;
            (node_p -> childs_ptp)[value_index - 1] = init_trie_node();
            return value_index - 1;
        }
        return -1;
    }
    
    void shift_childs_to_right(struct trie_node *node_p, int value_index, int childs_size)
    {
        while(value_index <= childs_size)
        {
            (node_p -> childs_value_p)[childs_size + 1] = (node_p -> childs_value_p)[childs_size];
            (node_p -> childs_ptp)[childs_size] = (node_p -> childs_ptp)[childs_size - 1];
            childs_size --;
        }
    }
    
    int specific_binary_search(unsigned char *list_p, unsigned char target, int low, int high)
    {
        int middle = 0;
    
        while(low <= high)
        {
            middle = (high + low) / 2;
            if(target <= *(list_p + middle))
            {
                high = middle - 1;
            }
            else
            {
                low = middle + 1;
            }
        }
        return low;
    }
    i can't debug it .
    i could write this code with binary tree instead of dynamic array but need a large amount of memory for about 13000000 strings of length 16 .
    is there any better solution with lower memory usage to implement trie ?
    thanks a lot , good luck .

  2. #2
    and the hat of int overfl Salem's Avatar
    Join Date
    Aug 2001
    Location
    The edge of the known universe
    Posts
    39,659
    Here's an example debug session using valgrind and gdb.
    Code:
    $ gcc -g -W -Wall -Wextra bar.c
    $ valgrind --db-attach=yes ./a.out
    ==2975== Memcheck, a memory error detector
    ==2975== Copyright (C) 2002-2010, and GNU GPL'd, by Julian Seward et al.
    ==2975== Using Valgrind-3.6.1-Debian and LibVEX; rerun with -h for copyright info
    ==2975== Command: ./a.out
    ==2975== 
    ==2975== Use of uninitialised value of size 8
    ==2975==    at 0x400935: add_to_childs (bar.c:118)
    ==2975==    by 0x4008B8: add_to_trie (bar.c:90)
    ==2975==    by 0x4007AE: main (bar.c:55)
    ==2975== 
    ==2975== 
    ==2975== ---- Attach to debugger ? --- [Return/N/n/Y/y/C/c] ---- y
    ==2975== starting debugger with cmd: /usr/bin/gdb -nw /proc/2978/fd/1024 2978
    GNU gdb (Ubuntu/Linaro 7.3-0ubuntu2) 7.3-2011.08
    Copyright (C) 2011 Free Software Foundation, Inc.
    License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>
    This is free software: you are free to change and redistribute it.
    There is NO WARRANTY, to the extent permitted by law.  Type "show copying"
    and "show warranty" for details.
    This GDB was configured as "x86_64-linux-gnu".
    For bug reporting instructions, please see:
    <http://bugs.launchpad.net/gdb-linaro/>...
    Reading symbols from /proc/2978/fd/1024...done.
    Attaching to program: /proc/2978/fd/1024, process 2978
    Reading symbols from /usr/lib/valgrind/vgpreload_core-amd64-linux.so...done.
    Loaded symbols for /usr/lib/valgrind/vgpreload_core-amd64-linux.so
    Reading symbols from /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so...done.
    Loaded symbols for /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so
    Reading symbols from /lib/x86_64-linux-gnu/libc.so.6...Reading symbols from /usr/lib/debug/lib/x86_64-linux-gnu/libc-2.13.so...done.
    done.
    Loaded symbols for /lib/x86_64-linux-gnu/libc.so.6
    Reading symbols from /lib64/ld-linux-x86-64.so.2...(no debugging symbols found)...done.
    Loaded symbols for /lib64/ld-linux-x86-64.so.2
    Failed to read a valid object file image from memory.
    0x0000000000400935 in add_to_childs (node_p=0x0, value=101 'e') at bar.c:118
    118     if(node_p -> childs_value_p == NULL)
    (gdb) info locals
    childs_size = 0
    value_index = 0
    new_childs_value_p = 0x0
    new_childs_ptp = 0x0
    (gdb) info args
    node_p = 0x0
    value = 101 'e'
    (gdb) where
    #0  0x0000000000400935 in add_to_childs (node_p=0x0, value=101 'e') at bar.c:118
    #1  0x00000000004008b9 in add_to_trie (root_p=0x51d2040, value_p=0x400d6c "melika") at bar.c:90
    #2  0x00000000004007af in main () at bar.c:55
    (gdb) list
    113     int childs_size = 0;
    114     int value_index = 0;
    115     unsigned char *new_childs_value_p = NULL;
    116     struct trie_node **new_childs_ptp = NULL;
    117 
    118     if(node_p -> childs_value_p == NULL)
    119     {
    120         new_childs_value_p = (unsigned char *) malloc(V_INIT_SIZE * sizeof(unsigned char));
    121         new_childs_ptp = (struct trie_node **) malloc(P_INIT_SIZE * sizeof(struct trie_node *));
    122         if(new_childs_value_p != NULL && new_childs_ptp != NULL)
    (gdb) up
    #1  0x00000000004008b9 in add_to_trie (root_p=0x51d2040, value_p=0x400d6c "melika") at bar.c:90
    90          p_index = add_to_childs(root_temp_p, c);
    (gdb) info locals
    i = 1
    root_temp_p = 0x0
    p_index = 2
    c = 101 'e'
    (gdb)
    Where
    $ gcc -g -W -Wall -Wextra bar.c
    The key flag here is -g, to compile with debug information. The tools make use of this to print out detailed stack trace and variable information.
    You should also fix any warnings that appear.

    $ valgrind --db-attach=yes ./a.out
    Run the program under control of valgrind (search the web for what it does), and launch the debugger when a problem is found.

    The gdb commands used are
    (gdb) info locals - to print all the local variables
    (gdb) info args - to print the function parameters
    (gdb) where - show the current stack trace
    (gdb) list - list the source code around the current instruction
    (gdb) up - navigate the stack (see also 'down' and 'frame n' for other stack navigation)

    Soon after, learn how to set breakpoints, and use step/next commands to step through the code in a controlled manner.

    For example, it all seems to start going wrong when this line is called from main
    > root_p = add_to_trie(root_p, "melika");
    So stopping there and then single-stepping the code, examining your data structure as you go seems like a good bet.
    If you dance barefoot on the broken glass of undefined behaviour, you've got to expect the occasional cut.
    If at first you don't succeed, try writing your phone number on the exam paper.

  3. #3
    young grasshopper jwroblewski44's Avatar
    Join Date
    May 2012
    Location
    Where the sidewalk ends
    Posts
    294
    Could you give us a description of what the code is attempting to do? And also brief descriptions for each function to give us a better idea of what is going on.

    And if you're having memory usage issues, might I suggest using a sorted list to store the strings. Open the sorted file containing the strings (or read it in somehow) and add the strings into the list as needed. That way, you only need to read in the string you are looking to manipulate into memory, saving on memory usage at a given moment.
    Last edited by jwroblewski44; 06-23-2013 at 04:36 PM.
    "Simplicity is the ultimate sophistication." - Leonardo da Vinci

  4. #4
    Registered User
    Join Date
    Jun 2013
    Posts
    3
    thanks a lot for your help ,
    how i can edit my post ?

  5. #5
    Algorithm Dissector iMalc's Avatar
    Join Date
    Dec 2005
    Location
    New Zealand
    Posts
    6,318
    You can't, it's more than 1 hour old. Just post a new update.
    My homepage
    Advice: Take only as directed - If symptoms persist, please see your debugger

    Linus Torvalds: "But it clearly is the only right way. The fact that everybody else does it some other way only means that they are wrong"

  6. #6
    Registered User MutantJohn's Avatar
    Join Date
    Feb 2013
    Posts
    2,665
    Huh, there's an example of this on Wikipedia but their structure takes up 216 bytes total. What does yours take up when it's fully malloc'd? I know it's 24 bytes as you have 3 pointers but how memory does your code use compared to this one: Trie - Wikipedia, the free encyclopedia

  7. #7
    Registered User
    Join Date
    Jun 2013
    Posts
    3
    Quote Originally Posted by MutantJohn View Post
    Huh, there's an example of this on Wikipedia but their structure takes up 216 bytes total. What does yours take up when it's fully malloc'd? I know it's 24 bytes as you have 3 pointers but how memory does your code use compared to this one: Trie - Wikipedia, the free encyclopedia
    That example only save 26 alphabetic characters but I want to save all of the 256 ASCII characters .
    My structures used 24 + (256 * 4) + 257 = 1305 when it is full .
    Thanks a lot .

  8. #8
    Registered User MutantJohn's Avatar
    Join Date
    Feb 2013
    Posts
    2,665
    So, in that Wikipedia example, it looked like this was the sort of thing you could use as a dictionary. Is that what the purpose of creating this trie is?

  9. #9
    young grasshopper jwroblewski44's Avatar
    Join Date
    May 2012
    Location
    Where the sidewalk ends
    Posts
    294
    I drove to Colorado from Wisconsin, hung out for two days, drove home, and you still haven't stated what your code is trying to accomplish. I'm not going to try to guess what you are trying to do. Also, describe what the different data members in your node struct are for and the purpose for each function you've written.

    Once you tell us what you are trying to do, we can begin to help.
    "Simplicity is the ultimate sophistication." - Leonardo da Vinci

Popular pages Recent additions subscribe to a feed

Similar Threads

  1. Replies: 0
    Last Post: 11-23-2010, 01:36 AM
  2. dynamic array and structure problem
    By bluetxxth in forum C Programming
    Replies: 3
    Last Post: 04-13-2010, 06:56 AM
  3. Data Structure to store unlimited dynamic vectors
    By m3rk in forum C++ Programming
    Replies: 8
    Last Post: 04-22-2009, 06:12 AM
  4. Replies: 1
    Last Post: 06-07-2006, 09:42 AM
  5. Dynamic Data Structure -- Which one is better?
    By Yin in forum C++ Programming
    Replies: 0
    Last Post: 04-10-2002, 11:38 PM