Code:
/**************************************************************************************************
* ZValues.cpp *
* This file contains functions for working with a standard normal distribution. *
* The data for the distribution table comes from "Finite Mathematics", 9th Edition, *
* Barnett, Ziegler, and Byleen, Prentice Hall, 2002, pg 697 *
* *
* All the rest of the contents of this file are written by James G Flewelling, copyright 2005. *
* You may use this file for personal or educational use free of restraint, but any commercial *
* use whatsoever is prohibited without prior consent from the author. *
* *
* If you wish to sue this code for any personal or educational project, you may do so, but I *
* would be interested in knowing what you're using it for; you may contact me at *
* cobras2 softhome net or cobras2 vcs-inet net (insert an @ symbol and a . in their proper *
* locations). If either of these addresses fail, look me up - my name isn't very common. *
*************************************************************************************************/
#include <cstdio>
#include <cmath>
#include <iostream>
#include <sstream>
#include <list>
#include <climits>
using std::sort;
using std::list;
using std::string;
using std::ostringstream;
using std::numeric_limits;
float ZValues[][10] = {
/*0.00 to 0.09*/
{0.0000, 0.0040, 0.0080, 0.0120, 0.0160, 0.0199, 0.0239, 0.0279, 0.0319, 0.0359},
/*0.10 to 0.19*/
{0.0398, 0.0438, 0.0478, 0.0517, 0.0557, 0.0596, 0.0636, 0.0675, 0.0714, 0.0753},
/*0.20 to 0.29*/
{0.0793, 0.0832, 0.0871, 0.0910, 0.0948, 0.0987, 0.1026, 0.1064, 0.1103, 0.1141},
/*0.30 to 0.39*/
{0.1179, 0.1217, 0.1255, 0.1293, 0.1331, 0.1368, 0.1406, 0.1443, 0.1480, 0.1517},
/*0.40 to 0.49*/
{0.1554, 0.1591, 0.1628, 0.1664, 0.1700, 0.1736, 0.1772, 0.1808, 0.1844, 0.1879},
/*0.50 to 0.59*/
{0.1915, 0.1950, 0.1985, 0.2019, 0.2054, 0.2088, 0.2123, 0.2157, 0.2190, 0.2224},
/*0.60 to 0.69*/
{0.2257, 0.2291, 0.2324, 0.2357, 0.2389, 0.2422, 0.2454, 0.2486, 0.2517, 0.2549},
/*0.70 to 0.79*/
{0.2580, 0.2611, 0.2642, 0.2673, 0.2704, 0.2734, 0.2764, 0.2794, 0.2823, 0.2852},
/*0.80 to 0.89*/
{0.2881, 0.2910, 0.2939, 0.2967, 0.2995, 0.3023, 0.3051, 0.3078, 0.3106, 0.3133},
/*0.90 to 0.99*/
{0.3159, 0.3186, 0.3212, 0.3238, 0.3264, 0.3289, 0.3315, 0.3340, 0.3365, 0.3389},
/*1.00 to 1.09*/
{0.3413, 0.3438, 0.3461, 0.3485, 0.3508, 0.3531, 0.3554, 0.3577, 0.3599, 0.3621},
/*1.10 to 1.19*/
{0.3643, 0.3665, 0.3686, 0.3708, 0.3729, 0.3749, 0.3770, 0.3790, 0.3810, 0.3830},
/*1.20 to 1.29*/
{0.3849, 0.3869, 0.3888, 0.3907, 0.3925, 0.3944, 0.3962, 0.3980, 0.3997, 0.4015},
/*1.30 to 1.39*/
{0.4032, 0.4049, 0.4066, 0.4082, 0.4099, 0.4115, 0.4131, 0.4147, 0.4162, 0.4177},
/*1.40 to 1.49*/
{0.4192, 0.4207, 0.4222, 0.4236, 0.4251, 0.4265, 0.4279, 0.4292, 0.4306, 0.4319},
/*1.50 to 1.59*/
{0.4332, 0.4345, 0.4357, 0.4370, 0.4382, 0.4394, 0.4406, 0.4418, 0.4429, 0.4441},
/*1.60 to 1.69*/
{0.4452, 0.4463, 0.4474, 0.4484, 0.4495, 0.4505, 0.4515, 0.4525, 0.4535, 0.4545},
/*1.70 to 1.79*/
{0.4554, 0.4564, 0.4573, 0.4582, 0.4591, 0.4599, 0.4608, 0.4616, 0.4625, 0.4633},
/*1.80 to 1.89*/
{0.4641, 0.4649, 0.4656, 0.4664, 0.4671, 0.4678, 0.4686, 0.4693, 0.4699, 0.4706},
/*1.90 to 1.99*/
{0.4713, 0.4719, 0.4726, 0.4732, 0.4738, 0.4744, 0.4750, 0.4756, 0.4761, 0.4767},
/*2.00 to 2.09*/
{0.4772, 0.4778, 0.4783, 0.4788, 0.4793, 0.4798, 0.4803, 0.4808, 0.4812, 0.4817},
/*2.10 to 2.19*/
{0.4821, 0.4826, 0.4830, 0.4834, 0.4838, 0.4842, 0.4846, 0.4850, 0.4854, 0.4857},
/*2.20 to 2.29*/
{0.4861, 0.4864, 0.4868, 0.4871, 0.4875, 0.4878, 0.4881, 0.4884, 0.4887, 0.4890},
/*2.30 to 2.39*/
{0.4893, 0.4896, 0.4898, 0.4901, 0.4904, 0.4906, 0.4909, 0.4911, 0.4913, 0.4916},
/*2.40 to 2.49*/
{0.4918, 0.4920, 0.4922, 0.4925, 0.4927, 0.4929, 0.4931, 0.4932, 0.4934, 0.4936},
/*2.50 to 2.59*/
{0.4938, 0.4940, 0.4941, 0.4943, 0.4945, 0.4946, 0.4948, 0.4949, 0.4951, 0.4952},
/*2.60 to 2.69*/
{0.4953, 0.4955, 0.4956, 0.4957, 0.4959, 0.4960, 0.4961, 0.4962, 0.4963, 0.4964},
*2.70 to 2.79*/
{0.4965, 0.4966, 0.4967, 0.4968, 0.4969, 0.4970, 0.4971, 0.4972, 0.4973, 0.4974},
/*2.80 to 2.89*/
{0.4974, 0.4975, 0.4976, 0.4977, 0.4977, 0.4978, 0.4979, 0.4979, 0.4980, 0.4981},
/*2.90 to 2.99*/
{0.4981, 0.4982, 0.4982, 0.4983, 0.4984, 0.4984, 0.4985, 0.4985, 0.4986, 0.4986},
/*3.00 to 3.09*/
{0.4987, 0.4987, 0.4987, 0.4988, 0.4988, 0.4989, 0.4989, 0.4989, 0.4990, 0.4990},
/*3.10 to 3.19*/
{0.4990, 0.4991, 0.4991, 0.4991, 0.4992, 0.4992, 0.4992, 0.4992, 0.4993, 0.4993},
/*3.20 to 3.29*/
{0.4993, 0.4993, 0.4994, 0.4994, 0.4994, 0.4994, 0.4994, 0.4995, 0.4995, 0.4995},
/*3.30 to 3.39*/
{0.4995, 0.4995, 0.4995, 0.4996, 0.4996, 0.4996, 0.4996, 0.4996, 0.4996, 0.4997},
/*3.40 to 3.49*/
{0.4997, 0.4997, 0.4997, 0.4997, 0.4997, 0.4997, 0.4997, 0.4997, 0.4997, 0.4998},
/*3.50 to 3.59*/
{0.4998, 0.4998, 0.4998, 0.4998, 0.4998, 0.4998, 0.4998, 0.4998, 0.4998, 0.4998},
/*3.60 to 3.69*/
{0.4998, 0.4998, 0.4999, 0.4999, 0.4999, 0.4999, 0.4999, 0.4999, 0.4999, 0.4999},
/*3.70 to 3.79*/
{0.4999, 0.4999, 0.4999, 0.4999, 0.4999, 0.4999, 0.4999, 0.4999, 0.4999, 0.4999},
/*3.80 to 3.89*/
{0.4999, 0.4999, 0.4999, 0.4999, 0.4999, 0.4999, 0.4999, 0.4999, 0.4999, 0.4999},
/*3.90 to 3.99*/
{0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000, 0.5000}
};
//I would use a standard library routine for this, but I haven't been able to find one.. odd,
// considering how important rounding can be.
/************************************************************************************************
* This function simply rounds the float "in" to the place designated by the float "placemask" *
* placemask should be a 1 in the place that you want to round to *
* so i.e. to round to the nearest hundred, placemask is: *
* 100 *
* and to round to the nearest ten thousandth, placemask is: *
* 0.0001 *
* if you pass 4.32779 as in, and 0.0001 as placemask, you will get 4.3278 returned. Sort of.. *
* *
* the reason I convert the floats to strings is because it does not keep track of floats *
* very accurately, when you get into big numbers; once it's in a string I can (should be *
* able to, at least) keep track of it more accurately. *
************************************************************************************************/
float roundtoplace(float in, float placemask) throw(string)
{
float r=0;
ostringstream o;
if(placemask == 0) {
o << "Invalid placemask 0";
throw o.str();
}
if(placemask < 0) placemask = -placemask;
bool positive = true;
if(in < 0) {
positive = false;
in = -in;
}
char temp0[1024];
char* temp2;
try {
//numeric_limits<float> gives information on how many digits of precision you can get into a float number
//this way I am allocating a string big enough to hold a float, even if it's the biggest float you've
//ever seen, on this particular system.
temp2 = new char[(-numeric_limits<float>::min_exponent10)+numeric_limits<float>::max_exponent10+10];
} catch(...) {
string e("Failed allocating memory for temporary string.");
throw(e);
}
int placeSmall=0;
int placeBig=0;
float tmpPlacemask = placemask;
//count how many places are in the place mask
if(placemask < 1) {
for(placeSmall=0; tmpPlacemask < 1; placeSmall++, tmpPlacemask *= 10);
} else if(placemask > 1) {
for(placeBig=0; tmpPlacemask > 1; placeBig++, tmpPlacemask /= 10);
}
if(placeSmall > 0) {
sprintf(temp0, "%%.%if", placeSmall);
sprintf(temp2, temp0, in);
r = atof(temp2);
if(!positive) r = -r;
} else if(placeBig > 0) {
int x = placeBig;
bool add=false;
sprintf(temp2, "%f", in);
char* s;
s=temp2+strlen(temp2); //set s to the end of the string
//the point of this loop is to truncate the floating point number, rather than rounding it to the nearest int;
//unless we are rounding to the ones place, which is done below in the "else" statement, we want to ignore the decimal
//part of the number, because for instance if the number is 44.9 and we want to round to the nearest tens place,
//rounding off to ones first gives us 45 which then rounds to 50, which is incorrect;
//ignoring the decimal part gives us 44 which then rounds to 40, which is correct.
while(s-- > temp2) {
if(*s == '.') {
*s = '\0';
break;
}
}
//this part sets everything after the place we want to round to to a 0, but also checks the digit after
//the one we want to round to, and if it is >= 5 then we will add one to our rounding place, after the loop.
for(s = temp2+strlen(temp2)-1; x > 0; x--,s--) {
if(x == 1) {
if(*s >= '5' && *s <= '9') add = true;
}
if(*s != '0') *s = '0';
}
r = atof(temp2);
//if applicable, add one to the rounding place (for rounding up instead of down)
if(add) r += placemask;
} else /* ones place */{
sprintf(temp2, "%.0f", in);
r = atof(temp2);
}
delete [] temp2;
return r;
}
/*************************************************************************************************
* This function takes as input a start and end Z value, and returns the area under the *
* standard curve bounded by those z values. *
* It only works for values above 0, so if you want to know the distance between -0.37 and *
* 0.12 for instance, then check the distance between 0 and 0.12 and add it to the distance *
* between 0 and 0.37 like so: *
* float x = standardCurve(0, 0.12) + standardCurve(0, 0.37); *
*************************************************************************************************/
float standardCurve(float x1, float x2)
{
if(x1 > 3.99 || x1 < 0 || x2 > 3.99 || x2 < 0) return 0;
float r;
//take the first two digits of the float val
int X1 = (int)(10*x1);
r = x1 - ((float)X1/10);
r *= 100;
int Y1 = (int)r;
int X2 = (int)(10*x1);
r = x2 - ((float)X2/10);
r *= 100;
int Y2 = (int)r;
if(X1 > 39 || X1 < 0 || Y1 > 9 || Y1 < 0) return 0;
if(X2 > 39 || X2 < 0 || Y2 > 9 || Y2 < 0) return 0;
r = ZValues[X2][Y2] - ZValues[X1][Y1];
return r;
}
/***************************************************************************************************
* this function takes a value between -0.5 and 0.5 (after being rounded to four places), *
* which corresponds to an area under the standard normal distribution, and returns the value of *
* the standard deviation that corresponds to the upper (right) or lower (left) edge of that area. *
* *
* to use it, you need a few steps, and probably also a basic understanding of the math involved. *
* Try reading a textbook on statistics if you have no clue :) *
* anyway, here's for if you know what you're doing; *
* float sDev=2.8, mean=69.09; //example, this is the mean and sdev of height for *
* //males in the US - i got it off some website I can't remember :-/ *
* float x = 0.37; //Mr. X is in the 87th percentile for height, i.e. he's taller than *
* //about 87% of the male population of the US (I'm not american, but my *
* // data is ;) ..I couldn't find Canadain height statistics.) *
* //as long as the value of x is between -0.5 (~0th percentile) and 0.5 (~100th percentile) *
* //you will get good results. Don't forget to use a try block in the real thing. *
* float y = (sDev*standardCurveBackwards(x)) + mean; //now, y contains Mr. X's actual height! *
* *
* so you see, this function is useful if you already know what percentil of height Mr X is, *
* but you want to know *HOW TALL* he actually is. (That's why it's called backwards; if you know *
* how tall he is but not what percentile he's in, use standardCurve().) *
* For instance, if you want to generate some people with random height, but you actually want it *
* to be a realistic random value, you can make a random value between -0.5 and 0.5, then feed it *
* through the above computation ((sDev*standardCurveBackwards(((rand()%100)/100)-0.5)) + mean), *
* you get a weighted random height. i.e. generating ten random people should be like picking ten *
* random people in the US and measuring their heights. *
***************************************************************************************************/
float standardCurveBackwards(float x1) throw(string)
{
ostringstream e;
bool neg=false;
float r;
try {
r = roundtoplace(x1, 0.0001);
} catch(string e) {
throw(e);
} catch (...) {
string e("Uknown error caught.");
throw(e);
}
if(r < 0) {
r = -r;
neg = true;
}
if(r > 0.5 || r < 0) {
//is there a friggin formatted printing function for strings ?!
//streams are not as cool as printf. They're (sort of) handy for speed, but their formatting is nowhere near as good.
e << "Invalid input (after rounding): " << r;
throw(e.str());
}
//there's probably a faster way to do this...
//like, binary sorting or something...
//but meh, this is still a rough draft version of the function anyway.
int x,y;
for(x=0; x<39; x++) {
for(y=0; y<10; y++) {
if(r == ZValues[x][y]) {
r = ((float)(x*10 + y)/100);
if(neg) return -r;
else return r;
}
else if(r < ZValues[x][y]) {
if(y == 0) {
x -= 1;
y = 9;
} else {
y -= 1;
}
r = ((float)(x*10 + y)/100);
if(neg) return -r;
else return r;
}
//else continue
}
}
return 1;
}
//minPcnt is the lowest percentile you will accept, max is the highest.
//if you want only the top 2% of the population, for instance, you can do minPcnt as 98 and maxPcnt as 100;
//if you want only above average, then max minPcnt 50 and maxPcnt 100.
//This func can be used to generate a random height like it says in the comment above standardCurveBackwards
// just pass the mean and sDev of heights you want, and then either pass 0 and 100 or pass certain constraints
// for if you want to work with, for instance, above average heights.
float randValue(float mean, float sDev, float minPcnt, float maxPcnt)
{
float r;
float temp;
//if minPcnt is too small change it
if(minPcnt < 0 || minPcnt > 100) minPcnt = 0;
//if maxPcnt is too big change it
if(maxPcnt > 100 || maxPcnt < 0) maxPcnt = 100;
//if minPcnt is bigger than maxPcnt, switch them
if(minPcnt > maxPcnt) {
float x = minPcnt;
minPcnt = maxPcnt;
maxPcnt = x;
}
//generate a random number between 0 and 1000000
temp = rand()%1000000;
//then change it to something between 0.0000 and 99.9999
temp /= 10000;
//then force it into the bracket between minPcnt and maxPcnt
//(this shouldn't change it at all if(minPcnt == 0 && maxPcnt == 100))
temp = ((maxPcnt - minPcnt)/100)*temp;
temp += minPcnt;
//then change it from a percentage into a positive or negative value between 0 and 0.4999
temp = (temp-50)/100;
//translate the percentage from an area to a Z value
try {
temp = standardCurveBackwards(temp);
} catch(string e) {
throw(e);
} catch(...) {
string e("Uknown error caught.");
throw(e);
}
//use the Z value, mean, and standard deviation to find the value of x
//what we do is to add a multiple of the standard deviation to the mean.
//the multiple of the standard deviation to use is, of course, determined by our formerly discovered random value.
r = (sDev*temp) + mean;
return r;
}
int main(int argc, char* argv[])
{
//these are the approximate values for the mean and standard deviation of height for males in the US
//I can't remember where I found the data, somewhere in a web search awhile back, but it'll work for a test
float sDev=2.8;
float mean=69.09;
list<float> array;
int x;
unsigned int loopIterations=100;
srand(time(0));
if(argc > 1) {
loopIterations = (unsigned int)atoi(argv[1]);
if(loopIterations < 10) loopIterations = 10;
}
for(x=0;x<loopIterations;x++) {
try {
array.push_back(randValue(mean, sDev, 0, 100));
} catch(string e) {
} catch(...) {
}
}
array.sort();
list<float>::iterator p = array.begin();
for(x=0;p != array.end();p++,x++) {
if(!(x%10) && x!=0) printf("\n");
printf("%2.0f ", *p);
}
printf("\n");
/*
try {
printf("577.369:\n");
printf("to 100: %f\n", roundtoplace(577.369, 100));
printf("to 10: %f\n", roundtoplace(577.369, 10));
printf("to 1: %f\n", roundtoplace(577.369, 1));
printf("to 0.1: %f\n", roundtoplace(577.369, 0.1));
printf("to 0.01: %f\n", roundtoplace(577.369, 0.01));
} catch(string e) {
printf("%s\n", e.c_str());
} catch(...) {
}
//*/
return 0;
}
It's still a draft version, so I'm not even sure how well it works.. and it's been awhile since I did the statistics course.