Thread: How can I improve the performance of this code?

  1. #1
    Registered User
    Join Date
    Nov 2015
    Posts
    72

    How can I improve the performance of this code?

    To begin with, I want to say thank you to all people who have helped me with issues that I had with c programming. That is a learning journey that I'm undertaking. I'm trying to be playful and lighthearted because if I took this too seriously I would probably shoot myself in the head as it can be extremely frustrating when things don't work as intended and I'm spending hours after hours looking for the problem. I'm just saying this because some people may seem to find me arrogant which is not really the case.

    A big thanks should go to laserlight for his feedback and support when I was solving the double digest problem in C++. I didn't get to thank him so I might as well do it now. The threads covering this seem to have been deleted though. :'(

    Now here is my program

    Code:
    #include <stdio.h>
    #include <stdbool.h> // For using booleans
    #include <stdlib.h>  // For using malloc
    
    bool **worldArray;
    bool **oceanArray;
    bool **queuedArray;
    int nRows, nCols;
    char *inputRow;
    
    //Faster algorithm?
    int numEdges = 0;
    
    typedef struct {
        int x;
        int y;
    } Pixel;
    
    void allocateMemory();
    void extractInput();
    void resetMask();
    
    // Minimum value: 757
    int MAX_QUEUE_LENGTH = 757;
    Pixel *floodQueue;
    int startQueue;
    int maxQueue;
    int endQueue;
    int queueLength;
    void pushQueue(Pixel element);
    Pixel popQueue();
    
    void primeOceanMask();
    void maskOcean(int row, int col);
    bool isToBeMasked(int row, int col);
    
    int evaluateCoastLength();
    
    int main(int argc, const char * argv[]) {
        if(scanf("%d%d", &nRows, &nCols) == 2) {
            allocateMemory();
            extractInput();
            resetMask();
        } else {
            printf("Invalid map size!\n\n");
            return -1;
        }
        maskOcean(0,0);
    
        printf("%d\n", numEdges);
    
        free(worldArray[0]);
        free(oceanArray[0]);
        free(queuedArray[0]);
        free(floodQueue);
        free(worldArray);
        free(oceanArray);
        free(queuedArray);
    
        return 0;
    }
    
    void allocateMemory() {
        int NROWS = nRows + 2, NCOLS = nCols + 2;
        worldArray = malloc(NROWS * sizeof *worldArray);
        worldArray[0] = malloc(NROWS * NCOLS * sizeof **worldArray);
        oceanArray = malloc(NROWS * sizeof *oceanArray);
        oceanArray[0] = calloc(NROWS * NCOLS, sizeof **oceanArray);
        queuedArray = malloc(NROWS * sizeof *queuedArray);
        queuedArray[0] = calloc(NROWS * NCOLS, sizeof **queuedArray);
        floodQueue = (Pixel *) malloc(MAX_QUEUE_LENGTH * sizeof(Pixel));
        startQueue = 0;
        endQueue = 0;
        maxQueue = MAX_QUEUE_LENGTH - 1;
        for(int i = 1; i < NROWS; ++i) {
            worldArray[i] = worldArray[i - 1] + NCOLS;
            oceanArray[i] = oceanArray[i - 1] + NCOLS;
            queuedArray[i] = queuedArray[i - 1] + NCOLS;
        }
    }
    
    void extractInput() {
        inputRow = (char *) malloc ((nCols + 1) * sizeof(char));
        for(int row = 0; row < nRows; ++row) {
            if(!(scanf("%s", inputRow))) {
                printf("Input error!\n");
            }
            for(int col = 0; col < nCols; ++col) {
                if(inputRow[col] == '0')
                    worldArray[row+1][col+1] = false;
                else
                    worldArray[row+1][col+1] = true;
            }
            // Zeros along vertical borders
            worldArray[row+1][nCols+1] = false;
            worldArray[row+1][0] = false;
        }
        free(inputRow);
        // Zeros along horizontal borders
        for(int col = 0; col <= nCols+1; ++col) {
            worldArray[0][col] = false;
            worldArray[nRows+1][col] = false;
        }
    }
    
    void resetMask() {
        for(int i = 0; i <= nRows+1; ++i)
            for(int j = 0; j <= nCols+1; ++j) {
                oceanArray[i][j] = false;
            }
    }
    
    void pushQueue(Pixel element) {
        if(queuedArray[element.x][element.y])
           return;
        floodQueue[endQueue] = element;
        endQueue = (endQueue + 1) % maxQueue;
        if(queueLength == maxQueue)
           startQueue = (startQueue + 1) % maxQueue;
        else
           ++queueLength;
    }
    
    Pixel popQueue() {
        if(queueLength > 0) {
           --queueLength;
           int popIndex = startQueue;
           startQueue = ++startQueue % maxQueue;
           return floodQueue[popIndex];
        } else {
            printf("Empty queue, popQueue() abuse!\n");
            Pixel nonPixel;
            nonPixel.x = -1;
            nonPixel.y = -1;
            return nonPixel;
        }
    }
    
    void maskOcean(int row, int col) {
        Pixel pushElement;
        pushElement.x = row;
        pushElement.y = col;
        pushQueue(pushElement);
        while(queueLength > 0) {
            Pixel popElement;
            popElement = popQueue();
            oceanArray[popElement.x][popElement.y] = true;
            int offset[4][2] = {{-1,0},{1,0},{0,-1},{0,1}};
            for(int i = 0;i < 4;++i) {
               int maskX = popElement.x + offset[i][0];
               int maskY = popElement.y + offset[i][1];
                if(isToBeMasked(maskX, maskY)) {
                    pushElement.x = maskX;
                    pushElement.y = maskY;
                    pushQueue(pushElement);
                    queuedArray[maskX][maskY] = true;
                }
            }
        }
    }
    bool isToBeMasked(int row, int col) {
        if((row >= 0)&&(row<=nRows+1))
            if((col >= 0)&&(col<=nCols+1)) {
                if(worldArray[row][col]) {
                    ++numEdges;
                } else if(!oceanArray[row][col])
                   return true;
            }
        return false;
    }
    It was clocked to taking 0.05 secs which I'd claim to be an eternity in terms of CPU cycles. I tried more direct declarations without the mallocs (e.g. int worldArray[1002][1002]; ), but I didn't get any noticeable improvement in performance. Moreover, the results got corrupted when doing test runs which kind of baffles me.

    I'm open to suggestions as to how to improve the performance of this code. The best result clocked in at 0.02s which is less than half of what I've got. Perhaps call to functions consumes CPU cycles?
    Last edited by bot; 12-31-2017 at 05:35 PM.

  2. #2
    Programming Wraith GReaper's Avatar
    Join Date
    Apr 2009
    Location
    Greece
    Posts
    2,738
    This is a nice opportunity to use a profiler. Remember, you need to focus your optimization efforts on the parts of the code that consume the most CPU time. Trying to optimize every single thing is pointless.
    Devoted my life to programming...

  3. #3
    and the hat of int overfl Salem's Avatar
    Join Date
    Aug 2001
    Location
    The edge of the known universe
    Posts
    39,665
    What are you using to measure time?

    A couple of hundredths of a second either way can easily be explained by system level noise. Timing code with any form of I/O widens error bars, stdio especially.

    Line 148 can be static const.
    If you dance barefoot on the broken glass of undefined behaviour, you've got to expect the occasional cut.
    If at first you don't succeed, try writing your phone number on the exam paper.

  4. #4
    misoturbutc Hodor's Avatar
    Join Date
    Nov 2013
    Posts
    1,787
    It would be nice to have a text file containing test data to go along with the source code...

  5. #5
    Registered User
    Join Date
    Nov 2015
    Posts
    72
    The time measured comes as reported by the website (kattis.com) so I would assume that they use proper tools to measure the time to execute. Perhaps it is an average value of a larger number of runs.

    I want to optimize the program as a general exercise on how to do it and build an understanding of what part of the program it is that takes most of the execution time. Perhaps an optimization at this stage means reorganizing memory in such a way that it reduces cache-misses, which I do not know how to do.

    I did the compiling on a Mac so I would assume the most viable tools to be valgrind, gdb and dtrace. I see a small problem here, by the looks of it, the callgrind tool (which supposedly is a subtool of the valgrind package) seems to require some kind of user interaction during the profiling process which seems to be impossible considering the short time it executes. Here is what I tried with valgrind:

    Code:
    gcc -g -Wall myProgram.c
    valgrind --tool=callgrind ./a.out < 19.in
    The program executed fine and that "< 19.in" to my grand surprise appeared to parse properly into the executable. The only output that gets generated is some general information about events that were generated and collected, not any useful information about the program itself. There is no room for "callgrind_control -e -b" etc as the program has exited promptly.

    The dtrace appears to inject probes in different components of the system, not to functions inside the particular binary or the code itself.

    Ah, the test data, I was a little confused as to what the meant, but now I think I understand. The input files that I have used to test my program can be found here:

    http://challenge.csc.kth.se/2011/testdata.tar.bz2

    Under the "testdata/coast" directory.
    Last edited by bot; 01-04-2018 at 05:48 PM.

Popular pages Recent additions subscribe to a feed

Similar Threads

  1. Replies: 11
    Last Post: 03-29-2009, 12:27 PM
  2. Improve Performance of String comparison
    By yagmai in forum C++ Programming
    Replies: 5
    Last Post: 03-11-2008, 12:11 PM
  3. Help me improve my code!
    By wise_ron in forum C Programming
    Replies: 11
    Last Post: 09-19-2006, 10:04 AM
  4. why page based I/O can improve performance?
    By George2 in forum C Programming
    Replies: 1
    Last Post: 06-12-2006, 07:42 AM
  5. Replies: 6
    Last Post: 06-09-2006, 12:44 AM

Tags for this Thread