i have 2 matrices in 2 files and i read them in 2 1d array , the result matrix has the same value of the second matrix ??why ,and the first matrix output repeated 4 times why i check the code and there is no loop ,what is the reason of repeated first matrix ??
Code:
// ahmed matrix_mul.cpp : Defines the entry point for the console application.
//
#include "mpi.h"
#include <stdio.h>
#include <iostream>
#include <iomanip>
#include <fstream>
#include<string>
using namespace std;
//#define NRA 3 			/* number of rows in matrix A */
//#define NCA 3			/* number of columns in matrix A */
//#define NCB 3  		/* number of columns in matrix B */
#define MASTER 0		/* taskid of first task */
#define FROM_MASTER 1		/* setting a message type */
#define FROM_WORKER 2		/* setting a message type */
int z2=0;
int NRA=0;
int NCA=0;
int NCB=0;
//MPI_Status status;
MPI::Status status;
void main(int argc, char **argv) 
{
int numtasks,			/* number of tasks in partition */
    taskid,			/* a task identifier */
    numworkers,			/* number of worker tasks */
    source,			/* task id of message source */
    dest,			/* task id of message destination */
    nbytes,			/* number of bytes in message */
    mtype,			/* message type */
    intsize,			/* size of an integer in bytes */
    dbsize,			/* size of a double float in bytes */
    rows,                      	/* rows of matrix A sent to each worker */
    averow, extra, offset,      /* used to determine rows sent to each worker */
    i, j, //k,			/* misc */
    count;
//double a[NRA][NCA], 		/* matrix A to be multiplied */
      // b[NCA][NCB],      	/* matrix B to be multiplied */
     //  c[NRA][NCB];		    /* result matrix C */
	double *a=NULL;
	double *b=NULL;
	double *c=NULL;

//----------------create a&B&C----------------------------

ifstream inFile;
    int  x=0;
    inFile.open("E:\\matrix.txt");
    if (!inFile) 
	{
        cout << "Unable to open file";
        exit(1); // terminate with error
    }
	else
	{
		inFile>>x;
		NRA=x;
	}
	inFile.close();

//-------------------------------------------------------------------------------------
	a=new double[NRA*NRA];
	b=new double[NRA*NRA];
	c=new double[NRA*NRA];
//--------------------------------------------------------------------------------------
	//ifstream inFile;
     x=0;
	int size=0;
	bool var=true;
	int c1=0,c2=0;
    inFile.open("E:\\matrix.txt");
    if (!inFile) 
	{
        cout << "Unable to open file";
        exit(1); // terminate with error
    }
	else
	{
		while (inFile >> x) 
		{
			if(var==true)
			{
			    size= x;
				//NRA=size;
				size=0;
				//array_ptr=new double[NRA]; //creates a new array of pointers to int objects
				var=false;
				break;
				
			}
			
		}
		
		while(inFile >> x)
		{
			
			a[ c2 ]=x;
               if( inFile ) 
			   {
							c2=c2+1;
			   }

			}
		}
		inFile.close();
//------------------------------------------------------------------------
		x=0;
        size=0;
        var=true;
        c1=0,c2=0;
    inFile.open("E:\\ahmed.txt");
    if (!inFile) 
	{
        cout << "Unable to open file";
        exit(1); // terminate with error
    }
	else
	{
		while (inFile >> x) 
		{
			if(var==true)
			{
			    size= x;
				//NRA=size;
				size=0;
				//array_ptr=new double[NRA]; //creates a new array of pointers to int objects
				var=false;
				break;
				
			}
			
		}
		
		while(inFile >> x)
		{
			
			b[ c2 ]=x;
               if( inFile ) 
			   {
							c2=c2+1;
			   }

			}
		}
		inFile.close();




//------------------------------------------------------------------------------------
		
		cout<<endl;
		for(int i=0;i<NRA*NRA;i++)
			cout<<b[i]<<"	";
		cout<<NRA;
		cout<<endl;
		for(int i=0;i<NRA*NRA;i++)
			cout<<a[i]<<"	";
//-----------------------------create end--------------------
intsize = sizeof(int);
dbsize = sizeof(double);
MPI::Init(argc,argv);
//MPI_Init(&argc, &argv);
//MPI_Comm_rank(MPI_COMM_WORLD, &taskid);
taskid=MPI::COMM_WORLD.Get_rank();
//MPI_Comm_size(MPI_COMM_WORLD, &numtasks);
numtasks=MPI::COMM_WORLD.Get_size();
numworkers = numtasks-1;

/**************************** master task ************************************/
if (taskid == MASTER)
{
  printf("Number of worker tasks = %d\n",numworkers);

  /* send matrix data to the worker tasks */
  averow = NRA/numworkers;
  extra = NRA%numworkers;
  offset = 0;
  mtype = FROM_MASTER;
  for (dest=1; dest<=numworkers; dest++) 
  {			
    rows = (dest <= extra) ? averow+1 : averow;   	
    printf("   sending %d rows to task %d\n",rows,dest);
    //MPI_Send(&offset, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
	MPI::COMM_WORLD.Send(&offset,1,MPI::INT,dest,mtype);
   // MPI_Send(&rows, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD);
	MPI::COMM_WORLD.Send(&rows,1,MPI::INT,dest,mtype);
    count = rows*NCA;
    //MPI_Send(&a[offset][0], count, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD);
	MPI::COMM_WORLD.Send(&a[offset],count,MPI::DOUBLE,dest,mtype);
    count = NCA*NCB;
   // MPI_Send(&b, count, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD);
	MPI::COMM_WORLD.Send(&b,count,MPI::DOUBLE,dest,mtype);
	//MPI::COMM_WORLD.Send(&c[offset],count,MPI::DOUBLE,dest,mtype);

    offset = offset + rows;
    }

  /* wait for results from all worker tasks */
    mtype = FROM_WORKER;
    for (i=1; i<=numworkers; i++)
	{			
    source = i;
   // MPI_Recv(&offset, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
	MPI::COMM_WORLD.Recv(&offset,1,MPI::INT,source,mtype,status);
    //MPI_Recv(&rows, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
	MPI::COMM_WORLD.Recv(&rows,1,MPI::INT,source,mtype,status);
    count = rows*NCB;
   // MPI_Recv(&c[offset][0], count, MPI_DOUBLE, source, mtype, MPI_COMM_WORLD, &status);
	MPI::COMM_WORLD.Recv(&c[offset],count,MPI::DOUBLE,source,mtype,status);
    }

  /* print results */
  printf("Here is the result matrix\n");
   
    printf("\n"); 
    for (j=0; j<NCB*NCB; j++) 
      printf("%f   ", c[j]);
    }
  printf ("\n");

    /* end of master section */



/**************************** worker task ************************************/
if (taskid > MASTER) {
  mtype = FROM_MASTER;
  source = MASTER;
  printf ("Master =%d, mtype=%d\n", source, mtype);
  //MPI_Recv(&offset, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
  MPI::COMM_WORLD.Recv(&offset,1,MPI::INT,source,mtype,status);
  printf ("offset =%d\n", offset);
  //MPI_Recv(&rows, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status);
   MPI::COMM_WORLD.Recv(&rows,1,MPI::INT,source,mtype,status);
  printf ("row =%d\n", rows);
  count = rows*NCA;
  //MPI_Recv(&a, count, MPI_DOUBLE, source, mtype, MPI_COMM_WORLD, &status);
  MPI::COMM_WORLD.Recv(&a, count,MPI::DOUBLE,source,mtype,status);
  printf ("a[0] =%e\n", a[0]);
  count = NCA*NCB;
  //MPI_Recv(&b, count, MPI_DOUBLE, source, mtype, MPI_COMM_WORLD, &status);
  MPI::COMM_WORLD.Recv(&b, count,MPI::DOUBLE,source,mtype,status);
  int  counter=0;  int counter1=0; 
  printf ("b=\n");
 
//#pragma omp parallel for shared(a,b,c)
for (long i=0; i<NCA; i++)
{
    for (long j = 0; j < NCA; j++)
    {
        long idx = i * NCA;
        double sum = 0;
        for (long k1 = 0; k1 < NCA; k1++)
        {
            sum =sum+(a[idx + k1]*b[k1 * NCA +j]);
        }
        c[idx + j] = sum;
    }
}

  mtype = FROM_WORKER;
  printf ("after computer\n");
  //MPI_Send(&offset, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD);
  MPI::COMM_WORLD.Send(&offset,1,MPI::INT,MASTER,mtype);
  //MPI_Send(&rows, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD);
  MPI::COMM_WORLD.Send(&rows,1,MPI::INT,MASTER,mtype);
  //MPI_Send(&c, rows*NCB, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD);
  MPI::COMM_WORLD.Send(&c,rows*NCB,MPI::DOUBLE,MASTER,mtype);

  printf ("after send\n");

  }  /* end of worker */
  MPI::Finalize();
} /* of main */