Hello folks, first post.
I'm fairly new to C so I'm not very familiar with C99 optimization. I thought I could optimize some code by creating an array so that the program wouldn't have to iterate through the stack to find pointers.
Instead it could utilize the consecutive memory locations of the array. (un)fortunately, this is handled extremely well by the compiler. (every time I think I can outsmart the compiler I feel like I'm dissing the C devs, haha)
I've included the code I used for testing. I came to the conclusion that either:
1. The stack isn't iterated but uses a shortcut (very likely);
2. Stack pointer lookup is blazing fast (wouldn't necessarily account for lower clock but maybe it can do multiple at once?);
3. Because of the limited test environment, the structs aren't defragmented, so there is no gain from consecutive memory locations of the array.
Can someone shed a light on what is happening?
code file (better formatting)
google drive
Test results:
Original:
Test 1 of 100 - Size: 20, Cycles: 2227
Test 2 of 100 - Size: 20, Cycles: 2354
Test 3 of 100 - Size: 20, Cycles: 2291
Test 4 of 100 - Size: 20, Cycles: 2353
Test 5 of 100 - Size: 20, Cycles: 2291...
Test 1 of 100 - Size: 1000, Cycles: 7970
Test 2 of 100 - Size: 1000, Cycles: 8053
Test 3 of 100 - Size: 1000, Cycles: 7960
Test 4 of 100 - Size: 1000, Cycles: 7940
Test 5 of 100 - Size: 1000, Cycles: 7904
...
"Optimized"
Test 1 of 100 - Size: 20, Cycles: 2780
Test 2 of 100 - Size: 20, Cycles: 2904
Test 3 of 100 - Size: 20, Cycles: 3000
Test 4 of 100 - Size: 20, Cycles: 3006
Test 5 of 100 - Size: 20, Cycles: 2905
...
Test 1 of 100 - Size: 1000, Cycles: 33837
Test 2 of 100 - Size: 1000, Cycles: 32723
Test 3 of 100 - Size: 1000, Cycles: 33288
Test 4 of 100 - Size: 1000, Cycles: 33116
Test 5 of 100 - Size: 1000, Cycles: 32804
...
Code:
As you can see the optimized function calls an array init function. This is a linear array with every other index being x.
arr[0] = x1; arr[1] = y1; arr[2] = x2; ...
The original (faster code) just iterates the linked list and gets its x and y.
Code:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#define MATRIX_W 10
#define MATRIX_H 24
#define ARRAY_SIZE 20
typedef struct Shape {
struct Block *tail;
} Shape;
typedef struct Block {
int x, y;
struct Block *prev;
} Block;
Block *link (Block *prev, int x, int y)
{
Block *new = calloc(1, sizeof(Block));
new-> x = x;
new-> y = y;
new-> prev = prev;
return new;
}
void *init_shape(int *values)
{
int i;
Block *block = NULL;
Shape *shape = calloc(1, sizeof(Shape));
for (i=0; i<ARRAY_SIZE; i+=2)
{
block = link(block, values[i], values[i+1]);
}
shape->tail = block;
return shape;
}
// create int array
int *init_coord_array(Shape *shape)
{
int x, y, i;
int *coords;
Block *block;
i=0;
coords = calloc(ARRAY_SIZE, sizeof(int));
block = shape->tail;
while (block != NULL) // for block in shape
{
coords[i++] = block->x; // add to array
coords[i++] = block->y;
block = block->prev; // next block in shape
}
return coords;
}
// populate VLA
int *init_test_values()
{
int i,w,h,*values;
i=0;
w = rand() % MATRIX_W; // no srand for testing purposes
h = rand() % MATRIX_H;
values = calloc(ARRAY_SIZE, sizeof(int));
while (i<ARRAY_SIZE)
{
values[i] = w;
values[i+1] = h;
i+=2;
}
return values;
}
int original_function(Shape *shape)
{
int x,y,match;
Block *block = shape->tail;
match = 0;
for (y=MATRIX_H-1; y>=0; y--)
{
for (x = 0; x < MATRIX_W; x++)
{
while (block != NULL)
{
if (block->x == x && block->y == y)
{
match++;
break;
}
block = block->prev;
}
}
}
return match;
}
int optimized_function(Shape *shape)
{
int i,x,y,match,*coords;
i = match = 0;
coords = init_coord_array(shape); // create linear int array
for (y=MATRIX_H-1; y>=0; y--)
{
for (x = 0; x < MATRIX_W; x++)
{
while (i < ARRAY_SIZE)
{
if (coords[i] == x && coords[i + 1] == y)
{
match++;
break;
}
i += 2; // goto next two values
}
}
}
free(coords);
return match;
}
int main()
{
int *values = init_test_values(); // create not so random values (no srand)
Shape *shape = init_shape(values); // populate struct with generated values
free(values);
for (int i=0; i<100; i++)
{
clock_t start = clock();
for (int j=0; j<10000000; j++)
{
original_function(shape); // struct iteration, linked list
//optimized_function(shape); // utilize int array
}
printf("Test %d of 100 - Size: %d, Cycles: %d\n", i+1, ARRAY_SIZE, clock() - start);
}
}