Hi, I'm writing an MPI program and part of this program just needs to do a simple dot product. To do this I wrote the following code:

Code:
float innerProduct(float *v1,float *v2){
    ftemp=0.0;
    for(fi=0;fi<mySize;fi++){
        ftemp+=v1[fi]*v2[fi];
    }
    MPI_Allreduce(&ftemp,&ftemp2,1,MPI_FLOAT,MPI_SUM,MPI_COMM_WORLD);
    return ftemp2;
}
However it would not computer the correct dot product so I got rid of the Allreduce and did things more explicitly and put in debug statements and it now looks like this:

Code:
float innerProduct(float *v1,float *v2){
    ftemp=0.0;
    for(fi=0;fi<mySize;fi++){
        ftemp+=v1[fi]*v2[fi];
    }
    //MPI Gather Step
    cout <<"Server " << myRank << " has ftemp = " << ftemp << endl;
    MPI_Gather(&ftemp,1,MPI_FLOAT,dotReceive,1,MPI_FLOAT,0,MPI_COMM_WORLD);
    ftemp2=0.0;
    if(myRank==0){
        cout << "&&&& Server 0 RECEIVED [";
        for(fi=0;fi<nProcs;fi++){
            ftemp2+=dotReceive[fi];
            cout << dotReceive[fi] << " ftemp2="<<ftemp2<<" | ";
        }
        cout << "] sum = "<<ftemp2<< endl;
    }
    MPI_Bcast(&ftemp2,1,MPI_FLOAT,0,MPI_COMM_WORLD);
    //MPI_Allreduce(&ftemp,&ftemp2,1,MPI_FLOAT,MPI_SUM,MPI_COMM_WORLD);
    return ftemp2;
}

Now, for some reason which I can't for the life of me figure out this code outputs:


Server 0 has ftemp = 5625216
&&&& Server 0 RECEIVED [5625216 ftemp2=5625216 | 39245132 ftemp2=44870348 | 106419424 ftemp2=151289776 | 207148304 ftemp2=358438080 | ] sum = 358438080
Server 3 has ftemp = 207148304
Server 1 has ftemp = 39245132
Server 2 has ftemp = 106419424

WTF?? It appears that the root process is getting the correct values but isn't actually summing properly?! Those numbers do not add up to that sum. Does anyone have a clue what is going on here? Any help would be greatly appreciated