>> You didn't test enough, or your tests are broken.
I looked at *all* bucket sizes where the algorithms differ. I did not find any outliers, but perhaps my RAND_MAX or sample size isn't large enough to produce any...
(forgive the C++ on the C forum...)
Code:
#define _CRT_SECURE_NO_WARNINGS
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
int nrand1(int f)
{
// if (f <= 1 || f >= RAND_MAX)
// {
// return(-1); /* throw domain_error(); */
// }
int s = ((RAND_MAX - f + 1) / f + 1);
int r;
do {r = rand() / s;} while(r >= f);
return(r);
}//nrand1
int nrand2(int n)
{
// if (n <= 0 || n > RAND_MAX)
// return -1; // C++: throw domain_error("Argument to nrand is out of range");
const int bucket_size = RAND_MAX / n;
int r;
do r = rand() / bucket_size;
while (r >= n);
return r;
}//nrand2
#include <vector>
bool test(const size_t bucket_sz)
{
static std::vector<int> samples1;
static std::vector<int> samples2;
samples1.resize(bucket_sz);
samples2.resize(bucket_sz);
memset(&samples1[0], 0, sizeof(int) * samples1.size());
memset(&samples2[0], 0, sizeof(int) * samples2.size());
// try to get 50,000 samples per bucket
const size_t n_samples = 50000;
size_t n, i;
srand(1);
for (i = 0; i < n_samples; ++i)
for (n = 0; n < bucket_sz; ++n)
++samples1[nrand1(bucket_sz)];
srand(1);
for (i = 0; i < n_samples; ++i)
for (n = 0; n < bucket_sz; ++n)
++samples2[nrand2(bucket_sz)];
if (samples1 != samples2)
{
int sum1 = 0;
for (n = 0; n < bucket_sz; ++n)
sum1 += samples1[n];
int sum2 = 0;
for (n = 0; n < bucket_sz; ++n)
sum2 += samples2[n];
double avg1 = (double)sum1 / samples1.size();
double avg2 = (double)sum2 / samples2.size();
double maxdiff = 0;
for (n = 0; n < bucket_sz; ++n)
{
double diff = abs(samples1[n] - samples2[n]);
if (diff > maxdiff)
maxdiff = diff;
}//for
printf("#buckets,avg1,avg2,max bucket diff\n");
printf("%u,%f,%f,%f\n\n", bucket_sz, avg1, avg2, maxdiff);
char filename[128];
sprintf(filename, "bucket_%u.csv", bucket_sz);
FILE *f = fopen(filename, "w");
if (!f)
{
fprintf(stderr, "Failed to open %s for writing\n", filename);
return true;
}//if
fprintf(f, "bucket, nrand1, nrand2, diff\n");
for (n = 0; n < bucket_sz; ++n)
fprintf(f, "%u,%d,%d,%d\n", n, samples1[n], samples2[n],
(int)abs(samples1[n] - samples2[n]));
fclose(f);
return true;
}//if
return false;
}//test
int main()
{
size_t bucket_sz;
#if 0
// this results in diffs at 4,8,16,32,etc..
for (bucket_sz = 2; bucket_sz < RAND_MAX; ++bucket_sz)
test(bucket_sz);
#else
// run test only where they differ
for (bucket_sz = 4; bucket_sz < RAND_MAX; bucket_sz *= 2)
test(bucket_sz);
#endif
return 0;
}//main
>> The function `rand' is designed for [0, RAND_MAX]--inclusive
That's what I was missing originally - before Koenig's proposal.
>> as Koenig notes the correction may overflow.
Right, there is no guarantee that UINT_MAX > INT_MX - so it may overflow on some platform.
I'll take yours as the new "best known method for an int within [0, RAND_MAX)"
gg