Sorry but it doesn't compile at all on any platform with "worldArray[i] = worldArray[i - 1] + NCOLS;", I hope you are aware that this is an array of booleans, not integers. I had a little hope to gain some performance and lower memory usage by using booleans but perhaps this is not the case eh? The Windows environment is 64-bit but I could launch a 32 bit environment. The Linux environment uses gcc version gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609 with the following flags:
Code:
-g -O2 -std=gnu99 -static {files} -lm
and I cannot change that.
Replacing lines containing "endQueue = ++endQueue % maxQueue;" with "endQueue = (endQueue + 1) % maxQueue;" didn't improve performance one bit.
I'm not sure what's so fishy about queueLength. I have defined startQueue as the index at which the first element in the queue resides and endQueue as the index at which the next element will be added when calling pushQueue(element). That way these indices will know where to be if the queue is empty; startQueue == endQueue either means that the queue is full or is empty. So instead of rearranging the queue to prevent indices from exceeding the limit as shown above, I let the indices spinning round round round like in a ring without really touching the elements in the queue. But invoking the modulus for each index in the uniqueness test seems to hog a lot of CPU cycles.
So the fastest version of my program looks like this:
Code:
#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
bool **worldArray;
bool **oceanArray;
int nRows, nCols;
char *inputRow;
typedef struct {
int x;
int y;
} Pixel;
void allocateMemory();
void extractInput();
void resetMask();
int QUEUE_LENGTH = 757;
Pixel *floodQueue;
int startQueue;
int maxQueue;
int endQueue;
void requeue();
void pushQueue(Pixel element);
Pixel popQueue();
void maskOcean(int row, int col);
bool isToBeMasked(int row, int col);
int evaluateCoastLength();
int main(int argc, const char * argv[]) {
if(scanf("%d%d", &nRows, &nCols) == 2) {
allocateMemory();
extractInput();
resetMask();
} else {
printf("Invalid map size!\n\n");
return -1;
}
maskOcean(0,0);
printf("%d\n", evaluateCoastLength());
for (int i = 0; i < nRows + 2; ++i) {
free(worldArray[i]);
free(oceanArray[i]);
}
free(floodQueue);
free(worldArray);
free(oceanArray);
return 0;
}
void allocateMemory() {
worldArray = (bool **) malloc((nRows + 2)* sizeof(bool *));
oceanArray = (bool **) malloc((nRows + 2)* sizeof(bool *));
floodQueue = (Pixel *) malloc(QUEUE_LENGTH * sizeof(Pixel));
startQueue = 0;
endQueue = 0;
maxQueue = QUEUE_LENGTH - 1;
for(int i = 0; i <= nRows + 1; ++i) {
worldArray[i] = (bool *) malloc((nCols + 2) * sizeof(bool));
oceanArray[i] = (bool *) malloc((nCols + 2) * sizeof(bool));
}
}
void extractInput() {
for(int row = 0; row < nRows; ++row) {
inputRow = (char *) malloc ((nCols + 1) * sizeof(char));
if(!(scanf("%s", inputRow))) {
printf("Input error!\n");
}
for(int col = 0; col < nCols; ++col) {
if(inputRow[col] == '0')
worldArray[row+1][col+1] = false;
else
worldArray[row+1][col+1] = true;
}
// Zeros along vertical borders
worldArray[row+1][nCols+1] = false;
worldArray[row+1][0] = false;
free(inputRow);
}
// Zeros along horizontal borders
for(int col = 0; col <= nCols+1; ++col) {
worldArray[0][col] = false;
worldArray[nRows+1][col] = false;
}
}
void resetMask() {
for(int i = 0; i <= nRows+1; ++i)
for(int j = 0; j <= nCols+1; ++j) {
oceanArray[i][j] = false;
}
}
int evaluateCoastLength() {
int numSegs = 0;
for(int row = 0; row < nRows + 1; ++row)
for(int col = 0; col < nCols+1; ++col)
if(oceanArray[row][col] != oceanArray[row][col+1])
++numSegs;
for(int col = 0; col < nCols + 1; ++col)
for(int row = 0; row < nRows+1; ++row)
if(oceanArray[row][col] != oceanArray[row+1][col])
++numSegs;
return numSegs;
}
void requeue() {
if(startQueue > 0) {
int counter = 0;
for (int i = startQueue; i < endQueue; ++i)
floodQueue[counter++] = floodQueue[i];
startQueue = 0;
endQueue = counter;
}
}
void pushQueue(Pixel element) {
for(int q = startQueue; q < endQueue; ++q)
if(element.x == floodQueue[q].x)
if(element.y == floodQueue[q].y)
return;
if(endQueue == maxQueue) {
if(startQueue == 0)
popQueue();
requeue();
}
floodQueue[endQueue] = element;
++endQueue;
}
Pixel popQueue() {
if(startQueue < endQueue)
return floodQueue[startQueue++];
else {
printf("Empty queue, popQueue() abuse!\n");
Pixel nonPixel;
nonPixel.x = -1;
nonPixel.y = -1;
return nonPixel;
}
}
void maskOcean(int row, int col) {
Pixel pushElement;
pushElement.x = row;
pushElement.y = col;
pushQueue(pushElement);
while(startQueue < endQueue) {
Pixel popElement;
popElement = popQueue();
oceanArray[popElement.x][popElement.y] = true;
int offset[4][2] = {{-1,0},{1,0},{0,-1},{0,1}};
for(int i = 0;i < 4;++i) {
if(isToBeMasked(popElement.x + offset[i][0], popElement.y + offset[i][1])) {
pushElement.x = popElement.x + offset[i][0];
pushElement.y = popElement.y + offset[i][1];
pushQueue(pushElement);
}
}
}
}
bool isToBeMasked(int row, int col) {
if((row >= 0)&&(row<=nRows+1))
if((col >= 0)&&(col<=nCols+1))
return !(worldArray[row][col]) && !oceanArray[row][col];
return false;
}
I have made the queue length as small as possible. In order to use my ring buffer instead, remove the functions requeue(), popQueue() and pushQueue() and replace them with the functions in the first textbox in post #11 at the end. You may want to declare an int named queueLength = 0 at the top of the code. If you want to improve the ringbuffer as discussed, then replace the pushQueue() function with the function in the second textbox of post #11.
The new method of allocating memory looks like so:
Code:
...
bool **worldArray;
bool **oceanArray;
int nRows, nCols;
...
int main(int argc, const char * argv[]) {
if(scanf("%d%d", &nRows, &nCols) == 2) {
allocateMemory();
...
printf("%d\n", evaluateCoastLength());
free(worldArray[0]);
free(oceanArray[0]);
free(floodQueue);
free(worldArray);
free(oceanArray);
return 0;
}
void allocateMemory() {
int NROWS = nRows + 2, NCOLS = nCols + 2;
worldArray = malloc(NROWS * sizeof *worldArray);
worldArray[0] = malloc(NROWS * NCOLS, sizeof *worldArray[0]);
oceanArray = malloc(NROWS * sizeof *oceanArray);
oceanArray[0] = calloc(NROWS * NCOLS, sizeof *oceanArray[0]);
startQueue = 0;
endQueue = 0;
maxQueue = QUEUE_LENGTH - 1;
for(int i = 0; i < NROWS; ++i) {
worldArray[i] = worldArray[i - 1 + NCOLS];
oceanArray[i] = oceanArray[i - 1 + NCOLS];
}
}
(no need to calloc the worldArray as it will be filled with data anyway, only the surrounding border indices need to. The oceanArray otoh needs to...)
Some relevant testdata can be found in the following beezipped tarball:
http://challenge.csc.kth.se/2011/testdata.tar.bz2