This might help if you want to create your own matrices - the rotation matrices might be switch but I cannot check them since I'm not on my own computer right now.

I use doubles in my code so that when I use assembly I can easily use the math co-processor. Most compilers will hand off doubles to the math-co processor - check me on this, though, because some say doubles are faster and some say they are not. But all the matrices I've seen (except for those in 3Dfx) use doubles.

Code:

//Make this faster by unrolling the inner k loop
//Did not do so here for clarity's sake
//Concantenates m1[] and m2[] - result in r[]
void MatMult(double m1[3][3],double m2[3][3],double r[3][3])
{
for (int i=0;i<4;i++)
{
for (int j=0;j<4;j++)
{
for (int k=0;k<4;k++)
{
r[i][j]+=m1[i][k]*m2[k][j];
}
}
}
}
//Copies source to target - no loop
void MatCopy(double source[3][3],double target[3][3])
{
target[0][0]=source[0][0];
target[0][1]=source[0][1];
target[0][2]=source[0][2];
target[0][3]=source[0][3];
target[1][0]=source[1][0];
target[1][1]=source[1][1];
target[1][2]=source[1][2];
target[1][3]=source[1][3];
target[2][0]=source[2][0];
target[2][1]=source[2][1];
target[2][2]=source[2][2];
target[2][3]=source[2][3];
target[3][0]=source[3][0];
target[3][1]=source[3][1];
target[3][2]=source[3][2];
target[3][3]=source[3][3];
}
void Translate(double dx,double dy,double dz)
{
double trans[3][3];
double result[3][3];
trans[0][0]=1.0;trans[0][1]=0.0;trans[0][2]=0.0;trans[0][3]=dx;
trans[1][0]=0.0;trans[1][1]=1.0;trans[1][2]=0.0;trans[1][3]=dy;
trans[2][0]=0.0;trans[2][1]=0.0;trans[2][2]=1.0;trans[2][3]=dz;
trans[3][0]=0.0;trans[3][1]=0.0;trans[3][2]=0.0;trans[3][3]=1.0;
MatMult(trans,result);
MatCopy(result,master);
}
void Scale(double sx,double sy,double sz)
{
double s[3][3];
double result[3][3];
s[0][0]=sx;s[0][1]=0.0;s[0][2]=0.0;s[0][3]=0.0;
s[1][0]=0.0;s[1][1]=sy;s[1][2]=0.0;s[1][3]=0.0;
s[2][0]=0.0;s[2][1]=0.0;s[2][2]=sz;s[2][3]=0.0;
s[3][0]=0.0;s[3][1]=0.0;s[3][2]=0.0;s[3][3]=1.0;
MatMult(s,result);
MatCopy(result,master);
}
void Rotate(double ax,double ay,double az)
{
double xmat[3][3];
double ymat[3][3];
double zmat[3][3];
double r1[3][3];
double r2[3][3];
//Order of rotations does matter
//Concatenation of matrices is NOT commutative
//Rotate about x axis
xmat[1][1]=cos(ax);xmat[1][2]=-sin(ax);
xmat[2][1]=sin(ax);xmat[2][2]=cos(ax);
xmat[0][0]=1.0;xmat[0][1]=0.0;xmat[0][2]=0.0;xmat[0][3]=0.0;
xmat[1][0]=0.0;xmat[1][3]=0.0;
xmat[2][0]=0.0;xmat[2][3]=0.0;
xmat[3][0]=0.0;xmat[3][1]=0.0;xmat[3][2]=0.0;xmat[3][3]=1.0;
//Concatenate with master
MatMult(xmat,master,r1);
//Rotate about y axis
ymat[0][0]=cos(ay);ymat[0][2]=-sin(ay);
ymat[2][0]=sin(ay);ymat[2][2]=cos(ay);
ymat[0][1]=0.0;ymat[0][3]=0.0;
ymat[1][0]=0.0;ymat[1][1]=1.0;ymat[1][2]=0.0;ymat[1][3]=0.0;
ymat[2][1]=0.0;ymat[2][3]=0.0;
ymat[3][0]=0.0;ymat[3][1]=0.0;ymat[3][2]=0.0;ymat[3][3]=1.0;
//Concatenate with result of x rotation - r1[]
MatMult(ymat,r2,r1);
//Rotate about z axis
zmat[0][0]=-sin(az);zmat[0][1]=cos(az);
zmat[1][0]=cos(az);zmat[1][1]=sin(az);
zmat[0][2]=0.0;ymat[0][3]=0.0;
zmat[1][2]=0.0;zmat[1][3]=0.0;
zmat[2][0]=0.0;zmat[2][1]=0.0;zmat[2][2]=1.0;zmat[2][3]=0.0;
zmat[3][0]=0.0;zmat[3][1]=0.0;zmat[3][2]=0.0;zmat[3][3]=1.0;
//Concatenate with result of y rotation - r2[] - leave in master
MatMult(zmat,r2,master);
}

Now all you need is to transform these vertices from local space to world space to view space. I'm not familiar with OpenGL but I think it will probably do this for you - it's much faster to have your 3D card process the vertices for you. But here is an old DOS transform that I used to use - before Direct3D that is.

Again Direct3D and OpenGL have their own vertex structures that you can use but here is a simple breakdown of a typical vertex structure.

Code:

struct pt2d
{
int x;
iny y;
};
struct pt3d
{
double x;
double y;
double z;
};
struct vertex
{
pt3d local;
pt3d world;
pt3d view;
pt2d screen;
//pt2d texture; //for texture mapping
//pt3d normal; //pre-computed normal
};
void LocalTransform(vertex *object,long numvertexes)
{
vertex *temp=object;
for (int i=0;i<numvertexes;i++)
{
double lx=temp[i].local.x;
double ly=temp[i].local.y;
double lz=temp[i].local.z;
double wx=lx*master[0][0]+ly*master[0][1]+
lz*master[0][2]+master[0][3];
double wy=lx*master[1][0]+ly*master[1][1]+
lz*master[1][2]+master[1][3];
double wz=lx*master[2][0]+ly*master[2][1]+
lz*master[2][2]+master[2][3];
temp[i].world.x=wx;
temp[i].world.y=wy;
temp[i].world.z=wz;
}
}
void ViewTransform(vertex *object,long numvertexes)
{
vertex *temp=object;
for (int i=0;i<numvertexes;i++)
{
double wx=temp[i].world.x;
double wy=temp[i].world.y;
double wz=temp[i].world.z;
double vx=wx*master[0][0]+wy*master[0][1]+
wz*master[0][2]+master[0][3];
double vy=wx*master[1][0]+wy*master[1][1]+
wz*master[1][2]+ master[1][3];
double vz=wx*master[2][0]+wy*master[2][1]+
wz*master[2][2]+master[2][3];
temp[i].view.x=vx;
temp[i].view.y=vy;
temp[i].view.z=vz;
}
}

Again my rotation matrices might be backwards and some of the structure operations might be wrong - I hand-coded this from memory while sitting here so I'm sure there are some errors. But most of this should work as is. Again this is old school since most APIs hand this off to the video card and they perform it much quicker than this code could.

Hope this helps. It should give you an idea of the matrices and concatenations required for 3D. If you want to know how to concatenate different types of matrices then find a math book or get a book that explains 3D concepts and math.

Only thing left to code for the above is the projection function but I will leave that to you. Now you can create some vertexes and rotate them about any axis. If you do not like using radians then convert them to degrees (radians*PI/180). Not the best to use euler angles but it works to start off with.

If you are using OpenGL like you said then just set up your rotation matrix and let it do the rest.