Thread: Malloc causes segfault only for certain sizes

  1. #1
    Registered User
    Join Date
    Apr 2020
    Posts
    1

    Malloc causes segfault only for certain sizes

    I have a program that reads sentences from a file. The sentence has a delimiter that separates text and and an integer. Each word in the sentence is saved to a struct along with keeping count of whether the word was used in a positive or negative context.

    In the following code, I define the maximum number of rows to be read in size_t row_size. If row_size = 1024, no problem. If it is 4096, a segfault occurs. If it is 10000, no problem. Why is a segfault occurring only for certain sizes?

    Please note that the realloc in the following code for row_size does not occur since the file being read currently only has ~150 sentences.

    Code:
    // load sentiment data from filename
    int loadDataset(char *filename, struct sentiment_data *sd)
    {
        FILE *fp;
        fp = fopen(filename, "r");
        if(!fp)    //open file
            return 0;
        size_t row_size = 10000;
        char **row_str = NULL;
        if((row_str = malloc(row_size*sizeof(char*)))==NULL) {    //malloc array of row strings (sentences)
            fclose(fp);
            return 0;
        }
        int *row_int = NULL;
        if((row_int = malloc(row_size*sizeof(int)))==NULL) {    //malloc array of ints (each sentence sentiment)
            free(row_str);
            fclose(fp);
            return 0;
        }
        char buf[4096];
        size_t row_count = 0;
        unsigned int field_count = 0;
        sd->pos_sentence = 0;
        sd->neg_sentence = 0;
        while(fgets(buf, 4096, fp)) {    //get line from file
            char *field = strtok(buf, "+");    //delimit line with + symbol
            while(field) {
                if(field_count == 0) {    //handle sentence
                    if((row_str[row_count] = malloc((strlen(field)+1)*sizeof(char)))==NULL) {    //malloc for sentence
                        int j;
                        for(j=0; j<row_count; ++j)
                            free(row_str[j]);
                        free(row_str);
                        free(row_int);
                        fclose(fp);
                        return 0;
                    }
                    strcpy(row_str[row_count], field);    //add sentence to row str
                } else if(field_count == 1) {    //handle sentence sentiment
                    row_int[row_count] = atoi(field);    //add sentiment to row int
                    if(row_int[row_count] == 1) //add to sum of total pos/neg sentences
                        sd->pos_sentence +=1;
                    else
                        sd->neg_sentence +=1;
                }
                field = strtok(NULL, "+");    //delimit next line in file
                ++field_count;
            }
            ++row_count;
            field_count = 0;
            if(row_count >= row_size) {    //check if there is enough space for new sentences, if not, realloc
                row_size += 4096;
                char **row_str_tmp = NULL;
                int *row_int_tmp= NULL;
                if((row_str_tmp = realloc(row_str, row_size * sizeof(char*)))==NULL) {
                    int j;
                    for(j=0; j<row_count; ++j)
                        free(row_str[j]);
                    free(row_str);
                    free(row_int);
                    fclose(fp);
                    return 0;
                }
                if((row_int_tmp = realloc(row_int, row_size * sizeof(int)))==NULL) {
                    int j;
                    for(j=0; j<row_count; ++j)
                        free(row_str[j]);
                    free(row_str);
                    free(row_int);
                    fclose(fp);
                    return 0;
                }
                row_str = row_str_tmp;
                row_int = row_int_tmp;
            }
        }
        sd->word_count = 0;
        sd->word_max = 4096;
        if((sd->words = malloc(sd->word_max * sizeof(struct sentiment_word)))==NULL) {    //malloc sd->words
            int j;
            for(j=0; j<row_count; ++j)
                free(row_str[j]);
            free(row_str);
            free(row_int);
            fclose(fp);
            return 0;    
        }
        field_count = 0;
        unsigned int i;
        initdelims(sd->delims);    //initialize delimeters
        for(i=0; i<row_count; ++i) {
            char *field = strtok(row_str[i], sd->delims);    //delimit each string in row_str, get each word
            while(field) {
                if(strlen(field)>2) {    //only take words greater than 2 chars in size
                    char *p = field;
                    for ( ; *p; ++p) *p = tolower(*p);    //make word lowercase
                    int index;
                    if((index = wordExist(field, sd)) != -1) {    //check if word exists
                    //if((index = binaryWordExist(field, 0, sd->word_count, sd)) != -1) {
                        if(row_int[i] == 1)
                            ++sd->words[index].pos;    //if word is in positive sentence, add pos sentiment to word and vice versa
                        else
                            ++sd->words[index].neg;
                    } else {    //if word doesn't exist
                        unsigned long j = 0, k = 0;
                        while(sd->word_count > j && strcmp(field, sd->words[j++].word)>=0);    //get index j where to insert word
                        unsigned int times_to_shift = sd->word_count - j;
                        for(k=0; k<times_to_shift; ++k)    //move words for insertion
                            sd->words[sd->word_count-k] = sd->words[sd->word_count-k-1];
                        ++(sd->word_count);
                        if((sd->words[j].word = malloc((strlen(field)+1)*sizeof(char)))==NULL) { //malloc for word
                            int x;
                            for(x=0; x<row_count; ++x)
                                free(row_str[j]);
                            for(x=0; x<sd->word_count; ++x)
                                if(x!=j)
                                    free(sd->words[j].word);
                            free(sd->words);
                            free(row_str);
                            free(row_int);
                            fclose(fp);
                            return 0;
                        }
                        strcpy(sd->words[j].word, field);    //add the word
                        if(row_int[i] == 1) {    //set word sentiment
                            sd->words[j].pos = 1;
                            sd->words[j].neg = 0;
                        } else {
                            sd->words[j].pos = 0;
                            sd->words[j].neg = 1;
                        }
                        if(sd->word_count >= sd->word_max) {    //check if sd->words is big enough, if not, realloc
                            sd->word_max += 4096;
                            struct sentiment_word *words_tmp = NULL;
                            if((words_tmp = realloc(sd->words, sd->word_max * sizeof(struct sentiment_word)))==NULL) {
                                int x;
                                for(x=0; x<row_count; ++x)
                                    free(row_str[j]);
                                for(x=0; x<sd->word_count; ++x)
                                        free(sd->words[j].word);
                                free(sd->words);
                                free(row_str);
                                free(row_int);
                                fclose(fp);
                                return 0;    
                            }
                            sd->words = words_tmp;
                        }
                    }
                }
                field = strtok(NULL, sd->delims);    //get next word
            }    
        }
        free(row_int);    //free all malloc'd vars no longer needed
        for(i=0; i<row_count; ++i)
            free(row_str[i]);
        free(row_str);
        fclose(fp);
        return 1;
    }

  2. #2
    C++ Witch laserlight's Avatar
    Join Date
    Oct 2003
    Location
    Singapore
    Posts
    28,413
    I suggest breaking up your rather long function into smaller functions that each does one thing and does it well.

    Also, step through the code with a debugger. Where does the segfault occur? The mistake probably lies in the code that you stepped through before that point.
    Quote Originally Posted by Bjarne Stroustrup (2000-10-14)
    I get maybe two dozen requests for help with some sort of programming or design problem every day. Most have more sense than to send me hundreds of lines of code. If they do, I ask them to find the smallest example that exhibits the problem and send me that. Mostly, they then find the error themselves. "Finding the smallest program that demonstrates the error" is a powerful debugging tool.
    Look up a C++ Reference and learn How To Ask Questions The Smart Way

  3. #3
    and the hat of int overfl Salem's Avatar
    Join Date
    Aug 2001
    Location
    The edge of the known universe
    Posts
    39,659
    > Please note that the realloc in the following code for row_size does not occur since the file being read currently only has ~150 sentences.
    I'm glad you said, because it's broken to begin with.

    Code:
                if((row_str_tmp = realloc(row_str, row_size * sizeof(char*)))==NULL) {
                    int j;
                    for(j=0; j<row_count; ++j)
                        free(row_str[j]);
                    free(row_str);
                    free(row_int);
                    fclose(fp);
                    return 0;
                }
                if((row_int_tmp = realloc(row_int, row_size * sizeof(int)))==NULL) {
                    int j;
                    for(j=0; j<row_count; ++j)
                        free(row_str[j]);  /*!! wrong, it's now in row_str_tmp following the above successful realloc*/
                    free(row_str);
                    free(row_int);
                    fclose(fp);
                    return 0;
                }
    Segfaults are a symptom, not a cause.
    You can stare at that code for hours and not see a problem.

    Your real root cause is quite likely to have happened long before you got to this function.
    The damage was done a long time ago, here is where you finally notice there is something wrong.

    Running it in valgrind might give you a better chance of getting closer to the cause of the corruption and not the symptom.
    If you dance barefoot on the broken glass of undefined behaviour, you've got to expect the occasional cut.
    If at first you don't succeed, try writing your phone number on the exam paper.

Popular pages Recent additions subscribe to a feed

Similar Threads

  1. SegFault with malloc
    By Sourabh Verma in forum C Programming
    Replies: 2
    Last Post: 11-02-2013, 12:51 PM
  2. malloc segfault
    By Miryafa in forum C Programming
    Replies: 3
    Last Post: 10-08-2011, 11:57 AM
  3. Segfault when malloc()'ing
    By msh in forum C Programming
    Replies: 2
    Last Post: 01-19-2011, 08:54 AM
  4. Malloc -segfault
    By ganesh bala in forum C Programming
    Replies: 8
    Last Post: 02-17-2009, 08:08 AM
  5. malloc() resulting in a SegFault?!
    By cipher82 in forum C++ Programming
    Replies: 21
    Last Post: 09-18-2008, 11:24 AM

Tags for this Thread