I've got this one little function that calculates standard deviation. All the separate peices work, but together it blows up. I simply cannot figure out why.
I'm trying to get this formula (its hard to show in ascii): Sx = sqrt( (#SUM OF FOR I TO N# Xi^2 - n(average of X)^2) / N)
Here's the function. FYI I put in a zillion extra ()'s to make sure it did things right...
Code:
double standev( void ) {
//standard deviation
int i;
double top=0;
for (i=0;i<N;i++) {
top += ((x[i]*x[i]) - (((double)(N))*(avgx*avgx)));
if (DEBUG) printf("[+] top += %f %f*%f=%f - %f * %f * %f = %f\n",(x[i]*x[i]) - (N*(avgx*avgx)),x[i],x[i],(x[i]*x[i]),(double)(N),avgx,avgx,(avgx*avgx));
}
if (DEBUG) printf("[+] standard dev: %f\n[+] top: %f, divide by N : %f\n",sqrt(top/(double)(N)),top,(double)(N));
return sqrt(top/(double)(N));
}
See, the output with DEBUG set is as follows:
Code:
[+] top += -3252.785233 3.000000*3.000000=9.000000 - 20.000000 * 12.770641 * 12.770641 = 163.089262
[+] top += -3245.785233 4.000000*4.000000=16.000000 - 20.000000 * 12.770641 * 12.770641 = 163.089262
[+] top += -3236.785233 5.000000*5.000000=25.000000 - 20.000000 * 12.770641 * 12.770641 = 163.089262
[+] top += -3225.785233 6.000000*6.000000=36.000000 - 20.000000 * 12.770641 * 12.770641 = 163.089262
[+] top += -3212.785233 7.000000*7.000000=49.000000 - 20.000000 * 12.770641 * 12.770641 = 163.089262
[+] top += -3197.785233 8.000000*8.000000=64.000000 - 20.000000 * 12.770641 * 12.770641 = 163.089262
[+] top += -3180.785233 9.000000*9.000000=81.000000 - 20.000000 * 12.770641 * 12.770641 = 163.089262
[+] top += -3161.785233 10.000000*10.000000=100.000000 - 20.000000 * 12.770641 * 12.770641 = 163.089262
[+] top += -3140.785233 11.000000*11.000000=121.000000 - 20.000000 * 12.770641 * 12.770641 = 163.089262
[+] top += -3117.785233 12.000000*12.000000=144.000000 - 20.000000 * 12.770641 * 12.770641 = 163.089262
[+] top += -3092.785233 13.000000*13.000000=169.000000 - 20.000000 * 12.770641 * 12.770641 = 163.089262
[+] top += -3065.785233 14.000000*14.000000=196.000000 - 20.000000 * 12.770641 * 12.770641 = 163.089262
[+] top += -3036.785233 15.000000*15.000000=225.000000 - 20.000000 * 12.770641 * 12.770641 = 163.089262
[+] top += -3005.785233 16.000000*16.000000=256.000000 - 20.000000 * 12.770641 * 12.770641 = 163.089262
[+] top += -2972.785233 17.000000*17.000000=289.000000 - 20.000000 * 12.770641 * 12.770641 = 163.089262
[+] top += -2937.785233 18.000000*18.000000=324.000000 - 20.000000 * 12.770641 * 12.770641 = 163.089262
[+] top += -2900.785233 19.000000*19.000000=361.000000 - 20.000000 * 12.770641 * 12.770641 = 163.089262
[+] top += -2861.785233 20.000000*20.000000=400.000000 - 20.000000 * 12.770641 * 12.770641 = 163.089262
[+] top += -2820.785233 21.000000*21.000000=441.000000 - 20.000000 * 12.770641 * 12.770641 = 163.089262
[+] top += -2777.785233 22.000000*22.000000=484.000000 - 20.000000 * 12.770641 * 12.770641 = 163.089262
See, all the various peices hold the right values, but the final is crazy way off.
Here's the full source:
Code:
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#define N 20
#define DEBUG 1
double x[N],y[N],avgx,avgy,avgxy,r;
void getvals( void ) {
//read from file...
int i;
for (i=0;i<20;i++) {
x[i]=(double)(i) + 3.0;
y[i]=(double)(i);
if (DEBUG) printf("[+] (%f,%f)\n",x[i],y[i]);
}
}
void getavg( void ) {
int i;
double sumx, sumy, sumxy;
//before here, validate so that sizeof(x)=sizeof(y)
for (i=0;i<N;i++) {
sumx += x[i];
sumy += y[i];
sumxy += x[i]*y[i];
}
avgx = sumx/(double)(N);
avgy = sumy/(double)(N);
avgxy = sumxy/(double)(N);
if (DEBUG) printf("[+] avgx %f avgy %f avgxy %f\n",avgx,avgy,avgxy);
}
double standev( void ) {
//standard deviation
int i;
double top=0;
for (i=0;i<N;i++) {
top += ((x[i]*x[i]) - (((double)(N))*(avgx*avgx)));
if (DEBUG) printf("[+] top += %f %f*%f=%f - %f * %f * %f = %f\n",(x[i]*x[i]) - (N*(avgx*avgx)),x[i],x[i],(x[i]*x[i]),(double)(N),avgx,avgx,(avgx*avgx));
}
if (DEBUG) printf("[+] standard dev: %f\n[+] top: %f, divide by N : %f\n",sqrt(top/(double)(N)),top,(double)(N));
return sqrt(top/(double)(N));
}
void findlinear( void ) {
double slope,yint,sx;
//here we need to find the slope, then change the
//y-int so that the line runs through (avgx,avgy)
getvals();
getavg();
sx = standev();
slope = (avgxy - (avgx*avgy))/(sx*sx);
yint = avgy - (slope*avgx);
printf("y = %fx + %f\n");
}
int main(int argc, char *argv[]) {
findlinear();
return 0;
}
Thanks a lot, I'm pulling my hair out.