More thoughts on random floats

**Sir Galahad** · 04-20-2021

I was reading this interesting thread about generating random ranges of floating point values.

What about closed ranges? I was wondering if a simple binary partitioning approach could be used.

Code:

#include "stdbool.h"

double random_within_range_with(bool (*extract_bit)(void*), void* data, double low, double high)
{
 if(low > high)
  return random_within_range_with(extract_bit, data, high, low);
 double range = (high - low) / 2;
 double sum = range;
 for(;;)
 {
  range /= 2;
  if(extract_bit(data))
   sum += range;
  else
   sum -= range;
  if(range == 0)
   break;
 }
 return low + sum;
}

#include "stdlib.h"
#include "time.h"

bool extract_srand_bit(void* unused)
{
 static bool init = true;
 if(init)
 {
  srand(time(NULL));
  init = false;
 }
 return rand() & 1;
}

double random_within_range(double low, double high)
{
 return random_within_range_with(extract_srand_bit, NULL, low, high);
}

#include "stdio.h"

int main(int argc, char** argv)
{
 puts("Random [LOW] [HIGH]");
 double low = -1;
 if(argc > 1)
  low = atof(argv[1]);
 double high = 1;
 if(argc > 2)
  high = atof(argv[2]);
 printf("Low: %g\n", low);
 printf("High: %g\n", high);
 for(;;)
 {
  printf(" %g\n", random_within_range(low, high));
  getchar();
 }
}

I'm trying to avoid platform-specific particularities. Does that look like an acceptable approach?

**hamster_nz** · 04-20-2021

It's pretty inefficient, and I'm not so sure that "range == 0" is a good idea.

If doing this, I would generate random bits for the memory holding the double, then explicitly mask and set the sign and exponent to give numbers between 1.99999... and 1.0.

That could then be scaled / offset to the range I want.

Pretty ugly, but might be good enough.

**Salem** · 04-21-2021

Beware that the LSB of rand() might not be all that it seems.
Question 13.18

**Sir Galahad** · 04-21-2021

Originally Posted by hamster_nz

It's pretty inefficient, and I'm not so sure that "range == 0" is a good idea.

If doing this, I would generate random bits for the memory holding the double, then explicitly mask and set the sign and exponent to give numbers between 1.99999... and 1.0.

That could then be scaled / offset to the range I want.

Pretty ugly, but might be good enough.

Would you mind giving an example of how that might be done in a platform neutral manner? I know my way is rather inefficient, I just couldn't figure how else to do it.

Originally Posted by Salem

Beware that the LSB of rand() might not be all that it seems.
Question 13.18

Good point! It was just an example of course. I would normally use /dev/urandom (or at least a decent PRNG).

**flp1969** · 04-21-2021

Originally Posted by Sir Galahad

Would you mind giving an example of how that might be done in a platform neutral manner? I know my way is rather inefficient, I just couldn't figure how else to do it.

Code:

#define swapd(a,b) ( double t_; t_ = (a); (a) = (b); (b) = t_; }

double rand_range( double min, double max )
{
  double r;

  if ( min > max ) 
    swapd( min, max );

  r = rand() / (double) RAND_MAX;

  // lerp.
  return (1.0 - r)*min + r*max;  
}

**Sir Galahad** · 04-21-2021

Originally Posted by flp1969

Code:

#define swapd(a,b) ( double t_; t_ = (a); (a) = (b); (b) = t_; }

double rand_range( double min, double max )
{
  double r;

  if ( min > max )
    swapd( min, max );

  r = rand() / (double) RAND_MAX;

  // lerp.
  return (1.0 - r)*min + r*max;  
}

Nice! One issue with that though is being somewhat limited by the RAND_MAX term. Maybe constructing the number from a string of bits to fill the largest integral machine type would be better?

I tried this, but the `constant` variable always forces the result to be -1 for some reason. Changing it to RAND_MAX "works" but I really don't want to hard code that into the calculation.

Code:

double real_random(unsigned long (*extract_word)(void*), void* data, double min, double max)
{
 const double constant = ((unsigned long)~0); // This doesn't work!
 if(min > max)
  return real_random(extract_word, data, max, min);
 double r = extract_word(data) / constant;
 return (1 - r) * min + r * max;
}

#include "stdlib.h"
#include "stdio.h"
#include "time.h"

unsigned long extract_srand_word(void* unused)
{
 static bool init = true;
 static size_t bits = 0;
 static size_t loops = 0;
 if(init)
 {
  srand(time(NULL));
  unsigned long shift = RAND_MAX;
  while(shift)
  {
   ++bits;
   shift <<= 1;
  }
  loops = (sizeof(unsigned long) * 8) / bits;
  if(loops == 0) // unlikely but...
   loops = 1;
  init = false;
 }
 unsigned long result = 0;
 size_t counter = loops;
 while(counter--)
 {
  result <<= bits;
  result |= rand();
 }
 return result;
}

int main(int argc, char** argv)
{
 printf("Usage: %s [LOW] [HIGH]\n", argv[0]);
 double low = -1;
 if(argc > 1)
  low = atof(argv[1]);
 double high = 1;
 if(argc > 2)
  high = atof(argv[2]);
 printf("Low: %g\n", low);
 printf("High: %g\n", high);
 for(;;)
 {
  printf(" %g\n", real_random(extract_srand_word, NULL, low, high));
  getchar();
 }
}

**hamster_nz** · 04-21-2021

This isn't perfect and isn't portable, but how I would think about it.

Note I haven't tested it on multiple platforms

Code:

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <time.h>


double double_rand(double min, double max) {
   double new_val = 2.0;
   double range = max - min;
   uint64_t *d = (uint64_t *)&new_val;


   ////////////////////////////////////////////////////////
   // Check that the represntation is at least what we want
   // because the code isn't portable.
   ////////////////////////////////////////////////////////
   if(sizeof(double) != 8 || (*d != 0x4000000000000000L && *d != 0x0000000000000040L)) {
     fprintf(stderr, "double_rand() is broken\n");
   };
   int endian = (*d == 0x4000000000000000)?1:0;


   //////////////////////////////////////////////////////////////
   // Fill the uint64_t it with random bits 16 bits at a time.
   // Makes four calls to rand().
   //////////////////////////////////////////////////////////////
   *d = 0;
   for(int i = 0; i < sizeof(double)/2; i++) {
      *d |=  (rand()&0xFFFFLL)<<(16*i);
   }

   //////////////////////////////////////////////////////////////
   // This assumes we are on IEEE754 doubles, that
   // we check for above.
   //
   // Makes it a number 1.0 <= x < 2.0
   //////////////////////////////////////////////////////////////
   if(endian) {
     *d &= 0x000FFFFFFFFFFFFF;  // Mask off sign and exponent
     *d |= 0x3FF0000000000000;  // Set the exponent
   } else {
     *d &= 0xFFFFFFFFFFFF0F00;  // Mask off sign and exponent
     *d |= 0x000000000000F03F;  // Set the exponent
   }


   //////////////////////////////////////////////////////////////
   // This has so many logical hazards, but should work except for
   // large or small values. It might unrandomize the low order
   // bit of the mantissa.
   //////////////////////////////////////////////////////////////
   new_val *= range;  // Expands it to the desired range of numbers, but from range <= x <= range*2.0;
   new_val -= range;  // Move it to 0 <= x < range
   new_val += min;    // move it to min <= x < (min+range)
   return new_val;
}


int main(int argc, char *argv) {
   srand(time(NULL));
   // Generate 1000 random doubles in the range -1.0 <= x < 1.0
   for(int i = 0; i < 1000; i++) {
      printf("%10.2f\n",double_rand(-1.0,1.0));
   }
}

**flp1969** · 04-21-2021

Originally Posted by Sir Galahad

Nice! One issue with that though is being somewhat limited by the RAND_MAX term

You asked for a "platform neutral" method... rand() returns an uniform distributed random number in the range 0..RAND_MAX, usually through a LCG algorithm. In a 32 bit processor this is, again, usually a 31 bits long integer value.

if you want a better RNG, with more bits, you can use platform specific methods, including using the DTRNG inside your Intel processor with intrinsic functions as _rdrand64_step() [but you need to verify if your processor supports RDRAND instruction via CPUID].

Trying to build your own RNG isn't easy and you probably end up with somethings that isn't random at all.

**flp1969** · 04-21-2021

Using RDRAND:

Code:

// test.c - code for x86-64 (gcc).
#include <stdio.h>
#include <stdlib.h>
#include <x86intrin.h>

static int check_rdrand ( void );
static double rand_ ( double, double );

int main ( int argc, char *argv[] )
{
  double min, max;
  char *p;

  if ( argc != 3 )
  {
    fprintf ( stderr, "Usage: %s <min> <max>\n", argv[0] );
    return EXIT_FAILURE;
  }

  if ( ! check_rdrand() )
  {
    fputs ( "Your processor don't support RDRAND instruction.\n", stderr );
    return EXIT_FAILURE;
  }

  min = strtod ( argv[1], &p );

  if ( *p || ( p == argv[1] ) )
  {
    fprintf ( stderr, "Invalid mininum value: %s\n", argv[1] );
    return EXIT_FAILURE;
  }

  max = strtod ( argv[2], &p );

  if ( *p || ( p == argv[2] ) )
  {
    fprintf ( stderr, "Invalid maximum value: %s\n", argv[2] );
    return EXIT_FAILURE;
  }

  for ( ;; getchar() )
  {
    printf ( "%g", rand_ ( min, max ) );
    fflush ( stdout );
  }

  // never gets here.
  return EXIT_SUCCESS;
}

// Check bit 30 of ECX for RDRAND support (EAX=1)
int check_rdrand ( void )
{
  int c;

  __asm__ __volatile__ (
    "cpuid"
    : "=c" ( c )
    : "a" ( 1 )
    : "rbx", "rdx"
  );

  return c & ( 1 << 30 );
}

#define swapd( a, b ) { double t_ = (a); (a) = (b); (b) = t_; }

// Notice: don't need a random seed!
double rand_ ( double min, double max )
{
  int tries;
  double r;
  unsigned long long int rnd;

  if ( min > max )
    swapd ( min, max );

  // Intel recomends 10 tries before giving up.
  tries = 10;

  while ( tries-- )
  {
    if ( _rdrand64_step ( &rnd ) )
      break;
  }

  if ( tries < 0 )
  {
    fputs ( "Cannot get random value. Aborted.\n", stderr );
    exit ( EXIT_FAILURE );
  }

  // Using long double because of precision.
  r = ( long double ) rnd / ~0ULL;

  // lerp
  return ( 1.0 - r ) * min + r * max;
}

**Sir Galahad** · 04-21-2021

Originally Posted by flp1969

You asked for a "platform neutral" method... rand() returns an uniform distributed random number in the range 0..RAND_MAX, usually through a LCG algorithm. In a 32 bit processor this is, again, usually a 31 bits long integer value.

if you want a better RNG, with more bits, you can use platform specific methods, including using the DTRNG inside your Intel processor with intrinsic functions as _rdrand64_step() [but you need to verify if your processor supports RDRAND instruction via CPUID].

Trying to build your own RNG isn't easy and you probably end up with somethings that isn't random at all.

I wouldn't use rand() for anything other than a toy program, I was just giving an example of the interface I was thinking of.

The platform independant part I was referring to was how the numbers are generated from some "random" sequence of bits. My first attempt was indeed inefficient, at 1075 iterations for each number generated. But it does at least (seem to) work across the board (WRT different floating point types).

So maybe that could just be the fallback case. Otherwise, something like what you guys have suggested could be used.

**husoski** · 04-21-2021

If you're worried about platform-specific problems, one thing to avoid is rand(). Microsoft's implementation is seriously deficient, and their run-time library is used by the various MinGW ports of GNU gcc. It has an acceptable period, but you only get about 15-bits of randomness out of each rand() call. (RAND_MAX is 37267 on MSC/VC++.) GCC implementations for x86/x64 elsewhere usually have 31 bits.

A double is typically IEEE 64-bit binary floating point (this century, anyway) with 52 bits of fraction. To get a random generator where all doubles in an interval are possible generally takes multiple rand() calls per double. If RAND_MAX is not 1 less than a power of 2, you'll need to do some arithmetic to glue those results together. The result depends on two aspects of the exact values of RAND_MAX (from <stdlib.h>) and DBL_MANT_DIGITS (from <float.h>).

As for requiring a closed interval for floating point within bounds, I don't see the use case for it. With that 52 bit fraction field, any endpoint of that range will occur once in 2^52 times, "on average". You need 3x10^15 before you have a 50-50 chance of seeing that at least once. That's often enough to worry about as an edge case, but not enough to be a significant part of any calculation.

Most libraries will provide a uniform random function on the interval [0, 1), including 0 but excluding 1. Here's an example of how to do that, catering two just the two rand() function possibilities (MS VC++ vs. GCC/clang):

Code:

double uniform_dbl() // Returns uniformly-random double in [0.0, 1.0) interval
{
    long long int temp = 0;
    static const double EPS = (1.0 / (1LL << DBL_MANT_DIG));
    int bits_left = DBL_MANT_DIG;

    if (DBL_MANT_DIG%RAND_BITS != 0)
    {
        temp = rand() >> (RAND_BITS - DBL_MANT_DIG%RAND_BITS);
        bits_left = DBL_MANT_DIG - DBL_MANT_DIG%RAND_BITS;
    }

    while (bits_left)
    {
        temp = (temp << RAND_BITS) + rand();
        bits_left -= RAND_BITS;
    }
    return temp * EPS;
}

You can scale that to any interval with:

Code:

double rand_range(double a, double b)
{
    return a + (b - a) * uniform_dbl();
}

That gets you a uniform random between a(inclusive) and b(exclusive). If you insist on a closed interval, I'd suggest something like:

Code:

double rand_range(double a, double b)
{
    double u;
    do {u = 1.000000001 * uniform_dbl(); } while (u > 1.0);
    return a + (b - a) * u;
}

That expands the range of uniform_dbl() to include 1.0 and a small interval above that, and discards the numbers above 1.0 to get a closed [0,1] range. The cost is that extra floating point compare and some loss of randomness in the lowest bits of the result.

**hamster_nz** · 04-22-2021

Originally Posted by husoski

Code:

double uniform_dbl() // Returns uniformly-random double in [0.0, 1.0) interval
{
    long long int temp = 0;
    static const double EPS = (1.0 / (1LL << DBL_MANT_DIG));
    int bits_left = DBL_MANT_DIG;

    if (DBL_MANT_DIG%RAND_BITS != 0)
    {
        temp = rand() >> (RAND_BITS - DBL_MANT_DIG%RAND_BITS);
        bits_left = DBL_MANT_DIG - DBL_MANT_DIG%RAND_BITS;
    }

    while (bits_left)
    {
        temp = (temp << RAND_BITS) + rand();
        bits_left -= RAND_BITS;
    }
    return temp * EPS;
}

Which header file is DBL_MANT_DIG defined in?

Ah, found it - float.h

That is a pretty good solution.

**husoski** · 04-22-2021

Thanks and yes...I forgot to mention that <float.h> and <stdlib.h> are needed.

It's not bad for pure-standard C. You can expand on the conditional compilation ideas to include a wider range of implementations, and perhaps deal with the issues of implementations where RAND_MAX isn't exactly one less than a power of 2, but as-is it does cover almost all of the desktop/notebook use cases.

**Sir Galahad** · 04-23-2021

Thanks for all the useful suggestions everyone! I do wonder if the mantissa bits could somehow be "detected" (computed) without any prior assumptions? If so then these methods could be used as a generic approach "right out of the box" for any given floating point type.

**Sir Galahad** · 04-23-2021

Or maybe this? Repeatedly subdivide into 256 "spaces" (in single byte chunks) until enough "bits of the mantissa" have been processed?

Code:

#include "float.h"
#include "limits.h"

/*
 Generate a random real number from a generic byte-stream callback 
*/
double random_real(unsigned char (*extract_byte)(void*), void* data, double low, double high)
{
 if(low > high)
  return random_real(extract_byte, data, high, low);
 double sum = 0;
 double range = 1;
 int count = DBL_MANT_DIG;
 for(;;)
 {
  double divisions = range / 256;
  sum += divisions * extract_byte(data);
  range = divisions;
  if(count <= 8)
   break;
  count -= 8;
 }
 return (1 - sum) * low + sum * high;
}

#include "stdlib.h"
#include "time.h"

/*
 Trivial rand() implementation 
*/ 
unsigned char extract_srand_byte(void* unused)
{
 static int initial = 1;
 if(initial)
 {
  srand(time(NULL));
  initial = 0;
 }
 return rand() & 0xff;
}

#include "stdio.h"

/*
 Trivial /dev/urandom implementation 
*/
unsigned char extract_urandom_byte(void* unused)
{
 static FILE* entropy = NULL;
 if(entropy == NULL)
 {
  if((entropy = fopen("/dev/urandom", "rb")) == NULL)
  {
   fprintf(stderr, "Error: cannot open /dev/urandom!");
   exit(1);
  }
 }
 unsigned char buffer = 0;
 fread(&buffer, 1, sizeof(unsigned char), entropy);
 return buffer;
}

int main(int argc, char** argv)
{
 puts("Random Reals");
 printf("Usage: %s [LOW] [HIGH]\n", argv[0]);
 double low = -1;
 if(argc > 1)
  low = atof(argv[1]);
 double high = 1;
 if(argc > 2)
  high = atof(argv[2]);
 printf("Low: %g\n", low);
 printf("High: %g\n", high);
 for(;;)
 {
  printf("rand():       %g\n", random_real(extract_srand_byte, NULL, low, high));
  printf("/dev/urandom  %g\n", random_real(extract_urandom_byte, NULL, low, high));
  getchar();
 }
 return 0;
}

It still does rely on a hard-coded value however. Is there a way to detect something like that?

Thread: More thoughts on random floats

Thread Tools

Search Thread

Display

More thoughts on random floats

Similar Threads

Random array generator for floats

More random thoughts on constructing scripting languages.

UML and C++: Your Thoughts

more thoughts....

thoughts

Tags for this Thread