I'm using mpich2 1.0.7. I installed and configured it on 2 machines, started daemons.
The problems begin when I launch my program: first MPI_Bcast succeeds, but the second one (or any other communication function) hangs. Source code is attached.

Also when I call MPI_Bcast, second machine deamon prints the following:
F9Virtual64_mpdman_1 (run282): invalid msg from lhs; expecting ringsize got: {}

Can anyone look at the code and tell me what's wrong?

Code:
#include <mpi.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <sys/time.h>

int main(int arg_c, char** arg_v)
{
	if(arg_c<2)
	{
		printf("Input file not specified\n");
		return 0;
	}
	if(MPI_Init(&arg_c, &arg_v)!=MPI_SUCCESS)
	{
		printf("MPI initialization failed\n");
		return 0;
	}
	int ProcNum, ProcRank;
	MPI_Comm_size(MPI_COMM_WORLD, &ProcNum);
	MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank);
	
	int size;
	double e;
	double* A;
	double* b;
	double* x, *xprev;
	
	FILE* f=0;
	if(!ProcRank)
	{
		f=fopen(arg_v[1], "r");
		if(!f)
		{
			printf("Error opening input file\n");
			MPI_Finalize();
			return 0;
		}
		fscanf(f, "%d", &size);
		printf("Enter desired accuracy\n");
		scanf("%lf", &e);
	}
	if(!ProcRank)
		printf("Broadcasting matrix size\n");
	if(MPI_Bcast(&size, 1, MPI_INT, 0, MPI_COMM_WORLD)!=MPI_SUCCESS)
		if(!ProcRank)
			printf("Failed to broadcast matrix size\n");
	if(!ProcRank)
		printf("Broadcasting accuracy\n");
	if(MPI_Bcast(&e, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD)!=MPI_SUCCESS)
		if(!ProcRank)
			printf("Failed to broadcast accuracy\n");
	A=new double[size*size];
	b=new double[size];
	x=new double[size];
	xprev=new double[size];
	
	memset(x, 0, size*sizeof(double));
	memset(xprev, 0, size*sizeof(double));
	
	if(!ProcRank)
	{
		for(int i=0; i<size; i++)
			for(int j=0; j<size; j++)
				fscanf(f, "%lf", &(A[i*size+j]));
		for(int i=0; i<size; i++)
			fscanf(f, "%lf", &(b[i]));
		fclose(f);
	}
	
	if(!ProcRank)
		printf("Waiting all processes to be initialized\n");
	MPI_Barrier(MPI_COMM_WORLD);
	
	if(!ProcRank)
		printf("Broadcasting matrix\n");
	MPI_Bcast(A, size*size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	if(!ProcRank)
		printf("Broadcasting b\n");
	MPI_Bcast(b, size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
	
	double cursum, totalsum;
	int fin, itercount=0;
	
	if(!ProcRank)
		printf("Starting calculation\n");
	struct timeval tv1, tv2;
	gettimeofday(&tv1, 0);
	do
	{
		MPI_Bcast(xprev, size, MPI_DOUBLE, 0, MPI_COMM_WORLD);
		itercount++;
		for(int i=0; i<size; i++)
		{
			int snum=(size)/ProcNum;
			cursum=0.0;
			totalsum=0.0;
			for(int n=ProcRank*snum; n<((ProcRank==ProcNum-1)?(size):((ProcRank+1)*snum)); n++)
			{
				if(n<=i-1)
					cursum+=A[i*size+n]*x[n];
				else
					cursum+=A[i*size+n]*xprev[n];
			}
			MPI_Barrier(MPI_COMM_WORLD);
			MPI_Reduce(&cursum, &totalsum, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
			if(!ProcRank)
			{
				x[i]=xprev[i]-1.0/A[i*size+i]*(totalsum-b[i]);
			}
		}
		if(!ProcRank)
		{
			fin=1;
			for(int i=0; i<size; i++)
				if(fabs(x[i]-xprev[i])>e)
					fin=0;
			if(!fin)
				memcpy(xprev, x, size*sizeof(double));
		}
		MPI_Bcast(&fin, 1, MPI_INT, 0, MPI_COMM_WORLD);
	}
	while(!fin);
	
	gettimeofday(&tv2, 0);
	
	if(!ProcRank)
	{
		printf("Matrix A:\n");
		for(int i=0; i<size; i++)
		{
			for(int j=0; j<size; j++)
				printf("%lf ", A[i*size+j]);
			printf("\n");
		}
		printf("\n");
		printf("Line b:\n");
		for(int i=0; i<size; i++)
			printf("%lf ", b[i]);
		printf("\n");
		
		printf("Iteration number: %d\n", itercount);
		printf("Calculation time: %d microseconds\n", (tv2.tv_sec-tv1.tv_sec)*1000000+(tv2.tv_usec-tv1.tv_usec));
		
		printf("Results: \n");
		for(int i=0; i<size; i++)
			printf("x%d=%lf\n", i+1, x[i]);
	}
	
	MPI_Barrier(MPI_COMM_WORLD);
	
	delete [] A;
	delete [] b;
	delete [] x;
	delete [] xprev;
	
	MPI_Finalize();
	
	return 0;
}