gmp/pthread program leaking memory. Why?
I am writing a program to enumerate the number of unique positions for the game of Go given a board size. I am using the GNU Multiple Precision Arithmetic Library (gmp) to handle the large position hashes that appear once the board size hits 5x5 and the POSIX threads (pthread) library to make it so that I can process multiple position hashes simultaneously.
When I try to run the program for a 5x5 board, my program runs for about five minutes, swallowing RAM every step of the way, until it breaks the 2-gig limit and Windows 7 kills the process. The weird part is that it fails when the position hashes are well within the 32-bit integer representation range (the reported hash value is around 130 million, but is not the same value twice in a row). Furthermore, I've looked over my code, and I cannot see where the leak is happening.
I've tried to find a memory leak tool that I can use with my environment (Eclipse Galileo with CDT, MinGW 4.6.2), but the only tool that has seemed promising requires a debugger, and gdb refuses to run. I don't know how to solve this problem without giving up on my current approach and switching to MSVS.
I've included a listing of my source code. I'd cull some of the lines, but I don't know what is leaking the memory. Can anyone help me pinpoint the problem?
Code:
/*
* main.c
*
* Created on: Feb 29, 2012
* Author: JAC
*/
#include <gmp.h>
#include <pthread.h>
#include <stdio.h>
#include <time.h>
///////////////////////////////// DEFINITIONS //////////////////////////////////
#define SIDE_LENGTH 5
#define FORM_CATEGORIES 5
#define THREADS 12
#define TRANSFORMATIONS 16
/////////////////////////////////// STRUCTS ////////////////////////////////////
typedef struct {
clock_t tick;
mpz_t
i,
last,
total,
unique,
forms[FORM_CATEGORIES];
pthread_mutex_t lock;
} state;
///////////////////////////// FUNCTION PROTOTYPES //////////////////////////////
int cleanState(state *);
int getNextPosition(state *, mpz_t *);
int initState(state *, unsigned);
int processHash(
unsigned,
unsigned,
mpz_t *,
mpz_t *,
mpz_t *
);
int registerUnique(state *, int);
void *worker(void *);
////////////////////////// SYNCHRONIZATION INTERFACE ///////////////////////////
int cleanState(state *sync) {
int i;
mpz_clear(sync->i);
mpz_clear(sync->last);
mpz_clear(sync->total);
mpz_clear(sync->unique);
for (i = 0; i < FORM_CATEGORIES; i++) {
mpz_clear(sync->forms[i]);
}
pthread_mutex_destroy(&(sync->lock));
return 0;
}
int getNextPosition(state *sync, mpz_t *out) {
int to_return = 0;
pthread_mutex_lock(&(sync->lock));
if (mpz_cmp(sync->i, sync->last) <= 0) {
mpz_set(*out, sync->i);
mpz_add_ui(sync->i, sync->i, 1);
} else {
to_return = 1;
}
pthread_mutex_unlock(&(sync->lock));
return to_return;
}
int initState(state *sync, unsigned side_length) {
int i;
mpz_t ll, lr, ur;
unsigned size = side_length * side_length;
// Initialize the local variables.
mpz_init(ll);
mpz_init(lr);
mpz_init(ur);
// Initialize all the state variables.
sync->tick = clock();
mpz_init(sync->i);
mpz_init(sync->last);
mpz_init(sync->total);
mpz_init(sync->unique);
for (i = 0; i < FORM_CATEGORIES; i++) {
mpz_init(sync->forms[i]);
}
pthread_mutex_init(&(sync->lock), NULL);
// Calculate the total number of positions.
mpz_ui_pow_ui(sync->total, 3, size);
// Calculate the hash of the last unique position to optimize performance.
// The formula for the last unique position is as follows:
// last =
// total - 2 -
// (3 ** (size - 1))(length > 1) -
// (3 ** (size - length) + 3 ** (length - 1))(length > 2)
//
// This is the position with all its corners containing black stones and all
// other intersections contain white.
mpz_sub_ui(sync->last, sync->total, 2);
if (side_length > 1) {
mpz_ui_pow_ui(ll, 3, size - 1);
mpz_sub(sync->last, sync->last, ll);
}
if (side_length > 2) {
mpz_ui_pow_ui(lr, 3, size - side_length);
mpz_ui_pow_ui(ur, 3, side_length - 1);
mpz_sub(sync->last, sync->last, lr);
mpz_sub(sync->last, sync->last, ur);
}
// Clean up and return.
mpz_clear(ll);
mpz_clear(lr);
mpz_clear(ur);
return 0;
}
int registerUnique(state *sync, int forms) {
char buffer[BUFSIZ];
clock_t current;
int index;
pthread_mutex_lock(&(sync->lock));
mpz_add_ui(sync->unique, sync->unique, 1);
for (index = 0; forms > 1; index++, forms >>= 1);
mpz_add_ui(sync->forms[index], sync->forms[index], 1);
//*
current = clock();
if (current - sync->tick >= 500) {
sync->tick = current;
gmp_sprintf(
buffer,
"%Zd: %Zd (%u)",
sync->i,
sync->unique,
(unsigned)sync->tick
);
printf("%s\n", buffer);
}
//*/
pthread_mutex_unlock(&(sync->lock));
return 0;
}
///////////////////////////////// PROCESS HASH /////////////////////////////////
int processHash(
unsigned side_length,
unsigned board_size,
mpz_t *curr,
mpz_t *digit,
mpz_t *transformations
) {
unsigned long cell, form_count, i, j, coords[7];
// Initialize.
form_count = 1;
mpz_init_set(*curr, transformations[0]);
for (i = 1; i < TRANSFORMATIONS; i++) {
mpz_set_ui(transformations[i], 0);
}
// Transform the board at transformations[0] to its fifteen other positions.
for (i = 0; i < board_size; i++) {
short
col = i % side_length,
row = i / side_length;
// Calculate the value for the current intersection: 0 means empty, 1
// means black stone, and 2 means white stone. If the intersection is
// empty, skip to the next.
cell = mpz_fdiv_q_ui(*curr, *curr, 3);
if (!cell) {
continue;
}
// Determine the cell's coordinate in each of the transformations.
// 0: mirror horizontally
// 1: mirror vertically
// 2: mirror both
// 3: rotate left
// 4: rotate right
// 5: rotate left, mirror horizontally
// 6: rotate left, mirror vertically
coords[0] = row * side_length + (side_length - col - 1);
coords[1] = (side_length - row - 1) * side_length + col;
coords[2] = (side_length - row - 1) * side_length + (side_length - col - 1);
coords[3] = (side_length - col - 1) * side_length + row;
coords[4] = col * side_length + (side_length - row - 1);
coords[5] = (side_length - col - 1) * side_length + (side_length - row - 1);
coords[6] = col * side_length + row;
// Build the hash for each of the transformations using the determined
// cell value and coordinate.
for (j = 0; j < TRANSFORMATIONS - 1; j++) {
short place = j % 8;
mpz_ui_pow_ui(*digit, 3, place == 7 ? i : coords[place]);
if ((j > 6) ^ (cell == 2)) {
mpz_addmul_ui(transformations[j + 1], *digit, 2);
} else {
mpz_add(transformations[j + 1], transformations[j + 1], *digit);
}
}
}
// Determine the lowest hash value, the prototype, for the current board,
// and count the unique positions.
for (i = 1; i < TRANSFORMATIONS; i++) {
int is_hash;
if ((is_hash = mpz_cmp(transformations[i], transformations[0])) == 0) {
continue;
} else if (is_hash < 0) {
form_count = 0;
break;
}
for (
j = 0;
j < i && mpz_cmp(transformations[i], transformations[j]) != 0;
j++
); // Intentionally blank.
if (j == i) {
form_count++;
}
}
// Clean up and return.
return (int) form_count;
}
//////////////////////////////// WORKER THREAD /////////////////////////////////
void *worker(void *args) {
int i;
mpz_t curr, digit, transformations[TRANSFORMATIONS];
state *sync = (state *) ((unsigned *) args)[0];
unsigned
side_length = ((unsigned *) args)[1],
board_size = ((unsigned *) args)[2];
// Initialize.
mpz_init(curr);
mpz_init(digit);
for (i = 0; i < TRANSFORMATIONS; i++) {
mpz_init(transformations[i]);
}
// Continuously get position hashes from the synchronization state and
// evaluate them until there are none left.
while (!getNextPosition(sync, transformations)) {
if (
(i = processHash(
side_length,
board_size,
&curr,
&digit,
transformations
)) > 0
) {
registerUnique(sync, i);
}
}
// Clean up and return.
mpz_clear(curr);
mpz_clear(digit);
for (i = 0; i < TRANSFORMATIONS; i++) {
mpz_init(transformations[i]);
}
return NULL;
}
///////////////////////////////////// MAIN /////////////////////////////////////
int main() {
clock_t start;
int i;
pthread_t threads[THREADS];
state sync;
unsigned args[3] = {
(unsigned) &sync,
SIDE_LENGTH,
SIDE_LENGTH * SIDE_LENGTH
};
initState(&sync, SIDE_LENGTH);
start = clock();
for (i = 0; i < THREADS; i++) {
if (pthread_create(threads + i, NULL, worker, (void *) args) != 0) {
printf("-- THREAD COUNT: %d --\n", i);
break;
}
}
for (i = 0; i < THREADS; i++) {
pthread_join(threads[i], NULL);
}
gmp_printf(
"Size %u - %.15f seconds\nUnique: %Zd\n\tForm-1: %Zd\n\tForm-2: %Zd\n"
"\tForm-4: %Zd\n\tForm-8: %Zd\n\tForm-16: %Zd\nTotal: %Zd\n",
SIDE_LENGTH,
(clock() - start) / 1000.0,
sync.unique,
sync.forms[0],
sync.forms[1],
sync.forms[2],
sync.forms[3],
sync.forms[4],
sync.total
);
cleanState(&sync);
return 0;
}