Hi there,

I cannot for the life of me get dgemm to spit out the correct answer when I call it from C.

I would love to have a function that that was some thing like:

void matmult(float ** A, float ** B, float ** C, int Arows, int innerdim, int Bcols)

where you could pass it three arrays float ** or float * form (I don't care, I just want something to work) the rows of the A matrix, the shared inner dimension, and the columns of the B matrix and have the answer dumped in to the C matrix.

Any thoughts? Figuring out how to correct for the column major order and the leading dimensions is difficult.

Thanks so much,

Mike

Here's what I have so far, and it only works for an NxN times and NxN matrix:

Code:/************************************ INCLUDES ************************************/ #include <stdio.h> #include <stdlib.h> #include <math.h> /************************************ CREATE A 1D ARRAY ************************************/ float* alloc_float_1d(int ni){ int i; float* a = (float*) malloc(ni*sizeof(float)); for(i=0;i<ni;i++) a[i]=0; return a; } /************************************ 1D ARRAY INDEX ************************************/ int aind(int row, int col, int numcols) { return (numcols*row)+col; } void aprint1d(float * A, int rows, int cols) { for(int i=0;i<rows;i++) { for(int j=0;j<cols;j++) printf("%1.4f ", A[aind(i,j,cols)]); printf("\n"); } } /************************************ Multiply 2 2d(float *) ARRAYS ************************************/ void matmult(float * A, float * B, float * C, int arows, int idim, int bcols) { //Perform the matrix multiplication sgemms(A, &arows, "N", B, &bcols, "N", C, &bcols, &arows, &bcols, &idim, 0, 0); float temp; //FLIP UD for(int i=0;i<(int)floor(arows/2.0);i++) { for(int j=0;j<bcols;j++) { temp=C[aind(i,j,bcols)]; C[aind(i,j,bcols)]=C[aind((bcols-1)-i,j,bcols)]; C[aind((bcols-1)-i,j,bcols)]=temp; } } //FLIP LR for(int i=0;i<(int)floor(arows/2.0);i++) { for(int j=0;j<bcols;j++) { temp=C[aind(j,i,bcols)]; C[aind(j,i,bcols)]=C[aind(j,(bcols-1)-i,bcols)]; C[aind(j,(bcols-1)-i,bcols)]=temp; } } } //*********************************** //*********************************** // MAIN //*********************************** //*********************************** void main() { //Square Array size int i, j; int arows=3; int idim=3; int bcols=3; //Allocate the arrays float * A=alloc_float_1d(arows*idim); float * B=alloc_float_1d(idim*bcols); float * C=alloc_float_1d(arows*bcols); int c=1; for(i=0; i<arows; i++){ for(j=0; j<idim; j++){ A[aind(i,j,idim)] = c++; } } c=1; for(i=0; i<idim; i++){ for(j=0; j<bcols; j++){ B[aind(i,j,bcols)] = (idim*bcols+1)-c++; } } matmult(A,B,C,arows,idim,bcols); printf("A:\n"); aprint1d(A,arows,idim); printf("\n"); printf("B:\n"); aprint1d(B,idim,bcols); printf("\n"); printf("C:\n"); aprint1d(C,arows,bcols); }