camel-cdr · March 5, 2021 21:48 · camel-cdr · Feb 5, 2021
diff --git a/random.h b/random.h
 /*
 * 5.2 Uniform real distribution -----------------------------------------------
 *
 * Generating uniform random floating-point numbers might seem easy at first
 * glance, just cast the output of a PRNG to the desired float type and divide
 * by the biggest possible output of the PRNG to obtain a random float between
 * 0 and 1 that can now be scaled up.
 * Though as before this straightforward approach is biased.
 *
 * For the next part, I assume you are already acquainted with the IEEE 754
 * floating-point standard <12>, we will not cover other formats.
 *
 * To recap, here is a quick reminder on how 32-bit floats are laid out in
 * memory:
 *
 *     sign  exponent           mantissa
 *     +/-  bias: -126
 *     [X]  [XXXXXXXX]  [XXXXXXXXXXXXXXXXXXXXXXX]
 *
 * The interpretation of the data is dependent on the value of the exponent:
 *
 *     If 0 < exp < 255:
 *         +/- 1.mantissa * 2^{exponent - 126}
 *     If exp = 0 (demormalized):
 *         +/- 0.mantissa * 2^{-126}
 *     If exp = 255:
 *         If mantissa = 0:
 *             >+/- inf
 *         If mantissa != 0:
 *             NaN
 *
 * The varying exponent allows floating-point numbers to represent a huge range
 * of values. What makes random float generation tricky is the fact, that the
 * amount of numbers less than 1 and greater than 1 is the same. This means that
 * we can't just divide a random integer among these values without getting
 * duplicates and omissions <13>:
 *
 *    Integer: |----|----|----|----|----|----|----|----|----|----|----|----|
 *             |    |    /    |   /    /    /    /      \ __/    \ __/     |
 *    Float:   ||||-|-|-|--|--|--|----|----|----|--------|--------|--------|
 *
 * There are multiple ways to subvert this problem.
 *
 * You can use the division method described above with a divider that
 * guarantees equal spacing between generated floating-point numbers.
 * This is less biased, but can't generate all possible representable values
 * between 0 and 1 and can lose equal spacing when scaling up a specific range.
 * Nevertheless, this is a good tradeoff between speed and accuracy.
 */

 static inline float
 dist_uniformf(uint_least32_t x)
 {
 	return (x >> (32 - FLT_MANT_DIG)) *
 	       (1.0f / (UINT32_C(1) << FLT_MANT_DIG));
 }

 static inline double
 dist_uniform(uint_least64_t x)
 {
 	return (x >> (64 - DBL_MANT_DIG)) *
 	       (1.0 / (UINT64_C(1) << DBL_MANT_DIG));
 }

 /*
 * Another solution is to generate every representable floating-point number
 * with a probability proportional to the covered real number range. <14>
 * So obtaining a number in a floating-point subrange [s1,s2] of the output
 * range [r1,r2] has the probability of \frac{s2-s1}{r2-r1}.
 *
 *     r1=5                   r2=25
 *        |||||||||||||||||||||     -->  \frac{15-10}{25-5} = 0.25
 *             |    |
 *         s1=10    s2=15
 *
 * This property is obtained by randomizing the mantissa uniformly and
 * randomizing the exponent, with the probability 2^{-x} for every possible
 * exponent x.
 * This can be achieved programmatically by generating random bits and
 * decrementing the maximal exponent until one of the generated bits is set,
 * thus halving the probability of the next decrement every time,
 * or the exponent is smaller than the minimum exponent,
 * in which case this step is repeated.
 * Finally, we need to reject all values that are outside of the desired range.
 *
 * Before we go into the implementation let's compare the quality of generated
 * floating-point numbers between dist_uniformf and dist_uniformf_dense:
 *
 * The difference in the expected probability and the empirical result of a
 * random floating-point in [r1,r2] also being inside [s1,s2] are plotted in the
 * histograms below.
 * Here [r1,r2] was randomly choosen inside [-10^{-6},10^{-6}] and [s1,s2]
 * randomly choosen inside [r1,r2].
 * Then random floats between 0 and 1 were generated using dist_uniformf and
 * dist_uniformf_dense and rejected if they aren't inside [r1,r2].
 * From those that get though, we count the probability of them also being
 * inside [s1,s2] and plott \frac{P(expected)-P(empirical)}{P(expected)},
 * with the expected probability calculated as described above.
 * The rejection process is necessary to expose the defects in dist_uniformf,
 * as they only show up in the smaller subranges.
 * With larger ranges (e.g. [r1=-0.1,r2=0.1]) the two plots are equally normal
 * distributed.
 *
 *    150 +---------------------------+     100 +---------------------------+
 *        |      +      **     +      |         |    +   +    +    +   +    |
 *        |             **   dist_uni-|         |                  dist_uni-|
 *        |             ** formf_dense|         |                  *   formf|
 *        |             **            |         |                  *        |
 *    100 |-+           **          +-|         |                  *        |
 *        |             **            |         |                  *        |
 *        |             **            |         |                 **        |
 *        |            ****           |      50 |-+               **   *  +-|
 *        |           *****           |         |                 ***  *    |
 *        |           *******         |         |                 ***  *    |
 *     50 |-+         *******       +-|         |                ****  *    |
 *        |           *******         |         |                ****  *    |
 *        |         *********         |         |                ****  *    |
 *        |         **********        |         |               ****** *    |
 *        |   ...***************...   |         |   . . *.  ...*******.*    |
 *      0 +---------------------------+       0 +---------------------------+
 *       -1    -0.5     0     0.5     1        -4   -3  -2   -1    0   1    2
 *
 * We can observe that the dist_uniform plot has a big spike at 1 and deviates
 * further in the negative direction.
 * The spike at 1 can be explained by dist_uniform not being able to generate
 * any float that lies in the range [s1,s2] when s2-s1 is too small, hence:
 *     \frac{P(exp)-P(emp)}{P(exp)} = \frac{P(exp) - 0}{P(exp)} = 1
 * The bigger deviation in the negative x-axis happens because some
 * probabilities are too likely when mapping integers to floating-points
 * directly:
 *
 *    Integer: |----|----|----|----|----|----|----|----|----|----|----|----|
 *             |    |    /    |   /    /    /    /      \ __/    \ __/     |
 *    Float:   ||||-|-|-|--|--|--|----|----|----|--------|--------|--------|
 *                  |   |
 *                 [     ] <-- This range would be too likely
 */

 /* We'll begin by writing a macro that decrements the exponent until one bit is
 * set, using __builtin_ctz if it's available. */
 #if __GNUC__ >= 4 || __clang_major__ >= 2 || \
 	(__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || \
 	(__clang_major__ == 1 && __clang_minor__ >= 5)
 	#if UINT_MAX >= UINT32_MAX
 		#define DIST_UNIFORMF_DENSE_DEC_CTZ(exp, x) \
 			((exp) -= __builtin_ctz(x))
 	#elif ULONG_MAX >= UINT32_MAX
 		#define DIST_UNIFORMF_DENSE_DEC_CTZ(exp, x) \
 			((exp) -= __builtin_ctzl(x))
 	#endif
 	#if UINT_MAX >= UINT64_MAX
 		#define DIST_UNIFORM_DENSE_DEC_CTZ(exp, x) \
 			((exp) -= __builtin_ctz(x))
 	#elif ULONG_MAX >= UINT64_MAX
 		#define DIST_UNIFORM_DENSE_DEC_CTZ(exp, x) \
 			((exp) -= __builtin_ctzl(x))
 	#elif ULLONG_MAX >= UINT64_MAX
 		#define DIST_UNIFORM_DENSE_DEC_CTZ(exp, x) \
 			((exp) -= __builtin_ctzll(x))
 	#endif
 #endif

 /* Otherwise, we'll use a lookup table and a De Bruijn sequence to calculate
 * the number of trailing zeros. <15>
 *
 * Given an initial random number n, we calculate m = n & -n,
 * this will only set the last set bit in n in m.
 * Now we have a distinct value for every possible number of trailing zeros and
 * a perfect hash function can be constructed that maps every potential value of
 * m to the number of trailing zeros.
 *
 * This is done by multiplying the modified number by a De Bruijn constant.
 * A De Bruijn constant contains all possible binary values of n-bits in all
 * subranges of n-bits. E.g. for n = 3:
 *                               0b00011101 = 232
 *                                 000        = 0
 *                                  001       = 1
 *                                   011      = 3
 *                                    111     = 7
 *                                     110    = 6
 *                                      101   = 5
 *                                       010  = 2
 *                                        100 = 4
 *
 * m is a power-of-two, as only one bit is set, hence a multiplication with the
 * De Bruijn constant shifts it by its power.
 * Thus we get a distinct De Bruijn subsequence for every possible power and the
 * index can now be determined via a lookup table. */

 #ifndef DIST_UNIFORMF_DENSE_DEC_CTZ
 #define DIST_UNIFORMF_DENSE_DEC_CTZ(exp, x) do { \
 		static const char deBruijn[32] = { \
 			0, 1, 28,2,29,14,24,3,30,22,20,15,25,17, 4,8, \
 			31,27,13,23,21,19,16,7,26,12,18, 6,11, 5,10,9 }; \
 		(exp) -= deBruijn[(((x) & -(x)) * \
 		                  UINT32_C(0x077CB531)) >> 27]; \
 	} while (0)
 #endif

 #ifndef DIST_UNIFORM_DENSE_DEC_CTZ
 #define DIST_UNIFORM_DENSE_DEC_CTZ(exp, x) do { \
 		static const char deBruijn[64] = { \
 			0,  1, 2,53, 3, 7,54,27, 4,38,41, 8,34,55,48,28, \
 			62, 5,39,46,44,42,22, 9,24,35,59,56,49,18,29,11, \
 			63,52, 6,26,37,40,33,47,61,45,43,21,23,58,17,10, \
 			51,25,36,32,60,20,57,16,50,31,19,15,30,14,13,12 }; \
 		(exp) -= deBruijn[(((x) & -(x)) * \
 		                UINT64_C(0x022FDD63CC95386D)) >> 58]; \
 	} while (0)
 #endif

 #define DIST_UNIFORMF_DENSE_AVAILABLE (!!UINT32_MAX)
 #if DIST_UNIFORMF_DENSE_AVAILABLE

 static float
 dist_uniformf_dense(
 		float a, float b, /* [a,b] */
 		uint32_t (*rand32)(void*), void *rng)
 {
 	union { float f; uint32_t i; } u;
 	enum { SIGN_POS = 0, SIGN_RAND = 1, SIGN_NEG = 2 } sign;
 	uint32_t minexp, minmant, maxexp, maxmant;

 	#define DIST_UNIFORMF_DENSE_MANT \
 			((UINT32_C(1) << (FLT_MANT_DIG - 1)) - 1)

 	/* make sure a is smaller than b */
 	if (a == b) {
 		return a;
 	} else if (a > b) {
 		float tmp = a;
 		a = b;
 		b = tmp;
 	}

 	{
 		/* calculate min and max with respect to signedness */
 		float min, max;
 		switch ((sign = (a < 0.0f) + (b < 0.0f))) {
 		case SIGN_POS: min = a; max = b; break;
 		case SIGN_RAND: min = 0; max = (b > -a) ? b : -a; break;
 		case SIGN_NEG: min = b; max = a; break;
 		}
 		/* extract the minimum and maximum exponent and mantissa */
 		u.f = min;
 		minexp = (u.i << 1) >> (FLT_MANT_DIG);
 		minmant = u.i & DIST_UNIFORMF_DENSE_MANT;
 		u.f = max;
 		maxexp = (u.i << 1) >> (FLT_MANT_DIG);
 		maxmant = u.i & DIST_UNIFORMF_DENSE_MANT;
 	}

 	/* optimize special cases, that could otherwise reject almost all
 	 * possible values of the mantissa. */
 	if (minexp == maxexp) {
 		u.i = (minexp << (FLT_MANT_DIG - 1)) |
 		       (dist_uniform_u32(maxmant - minmant + 1, rand32, rng) +
 		       minmant);

 		/* apply signedness */
 		/**/ if (sign == SIGN_RAND)
 			u.i |= rand32(rng) & (UINT32_C(1) << 31);
 		else if (sign == SIGN_NEG)
 			u.i |= UINT32_C(1) << 31;
 		return u.f;
 	} else if (minexp + 1 == maxexp) {
 		const uint32_t invminmant = DIST_UNIFORMF_DENSE_MANT - minmant;
 		const uint32_t range = invminmant + maxmant + 1;
 		uint32_t mant, exp, x;
 		size_t i = 0;

 		while (1) {
 			/* make sure three bits always set, two for the
 			 * rejection and one for the sign. */
 			if (i <= 3) {
 				x = rand32(rng);
 				i = 32;
 			}

 			/* use maxexp if the first bit is set and minexp
 			 * if the second bit is set, otherwise repeat. */
 			if (x & 1) {
 				i -= 1, x >>= 1;
 				exp = maxexp;
 				mant = dist_uniform_u32(range, rand32, rng);
 				if (mant <= maxmant)
 					break;
 			} else if (x & 3) {
 				i -= 2, x >>= 2;
 				exp = minexp;
 				mant = dist_uniform_u32(range, rand32, rng);
 				if (mant <= invminmant) {
 					mant = DIST_UNIFORMF_DENSE_MANT - mant;
 					break;
 				}
 			} else {
 				i -= 2, x >>= 2;
 			}
 		}

 		/* combine exp with a mantissa */
 		u.i = (exp << (FLT_MANT_DIG - 1)) | mant;

 		/* apply signedness */
 		/**/ if (sign == SIGN_RAND)
 			u.i |= x << 31;
 		else if (sign == SIGN_NEG)
 			u.i |= UINT32_C(1) << 31;
 		return u.f;
 	} else while (1) {
 		uint32_t exp, x;

 		/* decrement exp until at least one bit is set */
 		exp = maxexp;
 		while ((x = rand32(rng)) == 0)
 			exp -= 32;

 		/* decrement exp by the number of trailing zeros */
 		DIST_UNIFORMF_DENSE_DEC_CTZ(exp, x);

 		/* repeat when the exponent is to low and on underflow */
 		if ((exp < minexp) || (exp > maxexp))
 			continue;

 		/* combine exp with a random mantissa */
 		x = rand32(rng);
 		u.i = (exp << (FLT_MANT_DIG - 1)) | (x >> (33 - FLT_MANT_DIG));

 		/* apply signedness */
 		/**/ if (sign == SIGN_RAND)
 			u.i |= x << 31;
 		else if (sign == SIGN_NEG)
 			u.i |= UINT32_C(1) << 31;

 		/* reject if not in range */
 		if (u.f >= a && u.f <= b)
 			return u.f;
 	}
 #undef DIST_UNIFORMF_DENSE_MANT
 }

 #endif /* DIST_UNIFORMF_DENSE_AVAILABLE */

 #define DIST_UNIFORM_DENSE_AVAILABLE (!!UINT64_MAX)
 #if DIST_UNIFORM_DENSE_AVAILABLE

 static double
 dist_uniform_dense(
 		double a, double b, /* [a,b] */
 		uint64_t (*rand64)(void*), void *rng)
 {
 	union { double f; uint64_t i; } u;
 	enum { SIGN_POS = 0, SIGN_RAND = 1, SIGN_NEG = 2 } sign;
 	uint64_t minexp, minmant, maxexp, maxmant;

 	#define DIST_UNIFORMF_DENSE_MANT \
 			((UINT64_C(1) << (DBL_MANT_DIG - 1)) - 1)

 	/* make sure a is smaller than b */
 	if (a == b) {
 		return a;
 	} else if (a > b) {
 		double tmp = a;
 		a = b;
 		b = tmp;
 	}

 	{
 		/* calculate min and max with respect to signedness */
 		double min, max;
 		switch ((sign = (a < 0.0) + (b < 0.0))) {
 		case SIGN_POS: min = a; max = b; break;
 		case SIGN_RAND: min = 0; max = (b > -a) ? b : -a; break;
 		case SIGN_NEG: min = b; max = a; break;
 		}
 		/* extract the minimum and maximum exponent and mantissa */
 		u.f = min;
 		minexp = (u.i << 1) >> (DBL_MANT_DIG);
 		minmant = u.i & DIST_UNIFORMF_DENSE_MANT;
 		u.f = max;
 		maxexp = (u.i << 1) >> (DBL_MANT_DIG);
 		maxmant = u.i & DIST_UNIFORMF_DENSE_MANT;
 	}

 	/* optimize special cases, that could otherwise reject almost all
 	 * possible values of the mantissa. */
 	if (minexp == maxexp) {
 		u.i = (minexp << (DBL_MANT_DIG - 1)) |
 		       (dist_uniform_u64(maxmant - minmant + 1, rand64, rng) +
 		       minmant);

 		/* apply signedness */
 		/**/ if (sign == SIGN_RAND)
 			u.i |= rand64(rng) & (UINT64_C(1) << 63);
 		else if (sign == SIGN_NEG)
 			u.i |= UINT64_C(1) << 63;
 		return u.f;
 	} else if (minexp + 1 == maxexp) {
 		const uint64_t invminmant = DIST_UNIFORMF_DENSE_MANT - minmant;
 		const uint64_t range = invminmant + maxmant + 1;
 		uint64_t mant, exp, x;
 		size_t i = 0;

 		while (1) {
 			/* make sure three bits always set, two for the
 			 * rejection and one for the sign. */
 			if (i <= 3) {
 				x = rand64(rng);
 				i = 64;
 			}

 			/* use maxexp if the first bit is set and minexp
 			 * if the second bit is set, otherwise repeat. */
 			if (x & 1) {
 				i -= 1, x >>= 1;
 				exp = maxexp;
 				mant = dist_uniform_u64(range, rand64, rng);
 				if (mant <= maxmant)
 					break;
 			} else if (x & 3) {
 				i -= 2, x >>= 2;
 				exp = minexp;
 				mant = dist_uniform_u64(range, rand64, rng);
 				if (mant <= invminmant) {
 					mant = DIST_UNIFORMF_DENSE_MANT - mant;
 					break;
 				}
 			} else {
 				i -= 2, x >>= 2;
 			}
 		}

 		/* combine exp with a mantissa */
 		u.i = (exp << (DBL_MANT_DIG - 1)) | mant;

 		/* apply signedness */
 		/**/ if (sign == SIGN_RAND)
 			u.i |= x << 63;
 		else if (sign == SIGN_NEG)
 			u.i |= UINT64_C(1) << 63;
 		return u.f;
 	} else while (1) {
 		uint64_t exp, x;

 		/* decrement exp until at least one bit is set */
 		exp = maxexp;
 		while ((x = rand64(rng)) == 0)
 			exp -= 64;

 		/* decrement exp by the number of trailing zeros */
 		DIST_UNIFORMF_DENSE_DEC_CTZ(exp, x);

 		/* repeat when the exponent is to low and on underflow */
 		if ((exp < minexp) || (exp > maxexp))
 			continue;

 		/* combine exp with a random mantissa */
 		x = rand64(rng);
 		u.i = (exp << (DBL_MANT_DIG - 1)) | (x >> (65 - DBL_MANT_DIG));

 		/* apply signedness */
 		/**/ if (sign == SIGN_RAND)
 			u.i |= x << 63;
 		else if (sign == SIGN_NEG)
 			u.i |= UINT64_C(1) << 63;

 		/* reject if not in range */
 		if (u.f >= a && u.f <= b)
 			return u.f;
 	}
 #undef DIST_UNIFORMF_DENSE_MANT
 }

 #endif /* DIST_UNIFORM_DENSE_AVAILABLE */

 #undef DIST_UNIFORMF_DENSE_DEC_CTZ
 #undef DIST_UNIFORM_DENSE_DEC_CTZ
	/*
	* 5.2 Uniform real distribution -----------------------------------------------
	*
	* Generating uniform random floating-point numbers might seem easy at first
	* glance, just cast the output of a PRNG to the desired float type and divide
	* by the biggest possible output of the PRNG to obtain a random float between
	* 0 and 1 that can now be scaled up.
	* Though as before this straightforward approach is biased.
	*
	* For the next part, I assume you are already acquainted with the IEEE 754
	* floating-point standard <12>, we will not cover other formats.
	*
	* To recap, here is a quick reminder on how 32-bit floats are laid out in
	* memory:
	*
	* sign exponent mantissa
	* +/- bias: -126
	* [X] [XXXXXXXX] [XXXXXXXXXXXXXXXXXXXXXXX]
	*
	* The interpretation of the data is dependent on the value of the exponent:
	*
	* If 0 < exp < 255:
	* +/- 1.mantissa * 2^{exponent - 126}
	* If exp = 0 (demormalized):
	* +/- 0.mantissa * 2^{-126}
	* If exp = 255:
	* If mantissa = 0:
	* >+/- inf
	* If mantissa != 0:
	* NaN
	*
	* The varying exponent allows floating-point numbers to represent a huge range
	* of values. What makes random float generation tricky is the fact, that the
	* amount of numbers less than 1 and greater than 1 is the same. This means that
	* we can't just divide a random integer among these values without getting
	* duplicates and omissions <13>:
	*
	* Integer: \|----\|----\|----\|----\|----\|----\|----\|----\|----\|----\|----\|----\|
	* \| \| / \| / / / / \ __/ \ __/ \|
	* Float: \|\|\|\|-\|-\|-\|--\|--\|--\|----\|----\|----\|--------\|--------\|--------\|
	*
	* There are multiple ways to subvert this problem.
	*
	* You can use the division method described above with a divider that
	* guarantees equal spacing between generated floating-point numbers.
	* This is less biased, but can't generate all possible representable values
	* between 0 and 1 and can lose equal spacing when scaling up a specific range.
	* Nevertheless, this is a good tradeoff between speed and accuracy.
	*/

	static inline float
	dist_uniformf(uint_least32_t x)
	{
	return (x >> (32 - FLT_MANT_DIG)) *
	(1.0f / (UINT32_C(1) << FLT_MANT_DIG));
	}

	static inline double
	dist_uniform(uint_least64_t x)
	{
	return (x >> (64 - DBL_MANT_DIG)) *
	(1.0 / (UINT64_C(1) << DBL_MANT_DIG));
	}

	/*
	* Another solution is to generate every representable floating-point number
	* with a probability proportional to the covered real number range. <14>
	* So obtaining a number in a floating-point subrange [s1,s2] of the output
	* range [r1,r2] has the probability of \frac{s2-s1}{r2-r1}.
	*
	* r1=5 r2=25
	* \|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\|\| --> \frac{15-10}{25-5} = 0.25
	* \| \|
	* s1=10 s2=15
	*
	* This property is obtained by randomizing the mantissa uniformly and
	* randomizing the exponent, with the probability 2^{-x} for every possible
	* exponent x.
	* This can be achieved programmatically by generating random bits and
	* decrementing the maximal exponent until one of the generated bits is set,
	* thus halving the probability of the next decrement every time,
	* or the exponent is smaller than the minimum exponent,
	* in which case this step is repeated.
	* Finally, we need to reject all values that are outside of the desired range.
	*
	* Before we go into the implementation let's compare the quality of generated
	* floating-point numbers between dist_uniformf and dist_uniformf_dense:
	*
	* The difference in the expected probability and the empirical result of a
	* random floating-point in [r1,r2] also being inside [s1,s2] are plotted in the
	* histograms below.
	* Here [r1,r2] was randomly choosen inside [-10^{-6},10^{-6}] and [s1,s2]
	* randomly choosen inside [r1,r2].
	* Then random floats between 0 and 1 were generated using dist_uniformf and
	* dist_uniformf_dense and rejected if they aren't inside [r1,r2].
	* From those that get though, we count the probability of them also being
	* inside [s1,s2] and plott \frac{P(expected)-P(empirical)}{P(expected)},
	* with the expected probability calculated as described above.
	* The rejection process is necessary to expose the defects in dist_uniformf,
	* as they only show up in the smaller subranges.
	* With larger ranges (e.g. [r1=-0.1,r2=0.1]) the two plots are equally normal
	* distributed.
	*
	* 150 +---------------------------+ 100 +---------------------------+
	* \| + ** + \| \| + + + + + \|
	* \| ** dist_uni-\| \| dist_uni-\|
	* \| ** formf_dense\| \| * formf\|
	* \| ** \| \| * \|
	* 100 \|-+ ** +-\| \| * \|
	* \| ** \| \| * \|
	* \| \| \| \|
	* \| ** \| 50 \|-+ * +-\|
	* \| *** \| \| * * \|
	* \| ***** \| \| * * \|
	* 50 \|-+ ***** +-\| \| ** * \|
	* \| ***** \| \| ** * \|
	* \| ******* \| \| ** * \|
	* \| ******** \| \| **** * \|
	* \| ...**************... \| \| . . . ...******. \|
	* 0 +---------------------------+ 0 +---------------------------+
	* -1 -0.5 0 0.5 1 -4 -3 -2 -1 0 1 2
	*
	* We can observe that the dist_uniform plot has a big spike at 1 and deviates
	* further in the negative direction.
	* The spike at 1 can be explained by dist_uniform not being able to generate
	* any float that lies in the range [s1,s2] when s2-s1 is too small, hence:
	* \frac{P(exp)-P(emp)}{P(exp)} = \frac{P(exp) - 0}{P(exp)} = 1
	* The bigger deviation in the negative x-axis happens because some
	* probabilities are too likely when mapping integers to floating-points
	* directly:
	*
	* Integer: \|----\|----\|----\|----\|----\|----\|----\|----\|----\|----\|----\|----\|
	* \| \| / \| / / / / \ __/ \ __/ \|
	* Float: \|\|\|\|-\|-\|-\|--\|--\|--\|----\|----\|----\|--------\|--------\|--------\|
	* \| \|
	* [ ] <-- This range would be too likely
	*/

	/* We'll begin by writing a macro that decrements the exponent until one bit is
	* set, using __builtin_ctz if it's available. */
	#if __GNUC__ >= 4 \|\| __clang_major__ >= 2 \|\| \
	(__GNUC__ == 3 && __GNUC_MINOR__ >= 4) \|\| \
	(__clang_major__ == 1 && __clang_minor__ >= 5)
	#if UINT_MAX >= UINT32_MAX
	#define DIST_UNIFORMF_DENSE_DEC_CTZ(exp, x) \
	((exp) -= __builtin_ctz(x))
	#elif ULONG_MAX >= UINT32_MAX
	#define DIST_UNIFORMF_DENSE_DEC_CTZ(exp, x) \
	((exp) -= __builtin_ctzl(x))
	#endif
	#if UINT_MAX >= UINT64_MAX
	#define DIST_UNIFORM_DENSE_DEC_CTZ(exp, x) \
	((exp) -= __builtin_ctz(x))
	#elif ULONG_MAX >= UINT64_MAX
	#define DIST_UNIFORM_DENSE_DEC_CTZ(exp, x) \
	((exp) -= __builtin_ctzl(x))
	#elif ULLONG_MAX >= UINT64_MAX
	#define DIST_UNIFORM_DENSE_DEC_CTZ(exp, x) \
	((exp) -= __builtin_ctzll(x))
	#endif
	#endif

	/* Otherwise, we'll use a lookup table and a De Bruijn sequence to calculate
	* the number of trailing zeros. <15>
	*
	* Given an initial random number n, we calculate m = n & -n,
	* this will only set the last set bit in n in m.
	* Now we have a distinct value for every possible number of trailing zeros and
	* a perfect hash function can be constructed that maps every potential value of
	* m to the number of trailing zeros.
	*
	* This is done by multiplying the modified number by a De Bruijn constant.
	* A De Bruijn constant contains all possible binary values of n-bits in all
	* subranges of n-bits. E.g. for n = 3:
	* 0b00011101 = 232
	* 000 = 0
	* 001 = 1
	* 011 = 3
	* 111 = 7
	* 110 = 6
	* 101 = 5
	* 010 = 2
	* 100 = 4
	*
	* m is a power-of-two, as only one bit is set, hence a multiplication with the
	* De Bruijn constant shifts it by its power.
	* Thus we get a distinct De Bruijn subsequence for every possible power and the
	* index can now be determined via a lookup table. */

	#ifndef DIST_UNIFORMF_DENSE_DEC_CTZ
	#define DIST_UNIFORMF_DENSE_DEC_CTZ(exp, x) do { \
	static const char deBruijn[32] = { \
	0, 1, 28,2,29,14,24,3,30,22,20,15,25,17, 4,8, \
	31,27,13,23,21,19,16,7,26,12,18, 6,11, 5,10,9 }; \
	(exp) -= deBruijn[(((x) & -(x)) * \
	UINT32_C(0x077CB531)) >> 27]; \
	} while (0)
	#endif

	#ifndef DIST_UNIFORM_DENSE_DEC_CTZ
	#define DIST_UNIFORM_DENSE_DEC_CTZ(exp, x) do { \
	static const char deBruijn[64] = { \
	0, 1, 2,53, 3, 7,54,27, 4,38,41, 8,34,55,48,28, \
	62, 5,39,46,44,42,22, 9,24,35,59,56,49,18,29,11, \
	63,52, 6,26,37,40,33,47,61,45,43,21,23,58,17,10, \
	51,25,36,32,60,20,57,16,50,31,19,15,30,14,13,12 }; \
	(exp) -= deBruijn[(((x) & -(x)) * \
	UINT64_C(0x022FDD63CC95386D)) >> 58]; \
	} while (0)
	#endif

	#define DIST_UNIFORMF_DENSE_AVAILABLE (!!UINT32_MAX)
	#if DIST_UNIFORMF_DENSE_AVAILABLE

	static float
	dist_uniformf_dense(
	float a, float b, /* [a,b] */
	uint32_t (rand32)(void), void *rng)
	{
	union { float f; uint32_t i; } u;
	enum { SIGN_POS = 0, SIGN_RAND = 1, SIGN_NEG = 2 } sign;
	uint32_t minexp, minmant, maxexp, maxmant;

	#define DIST_UNIFORMF_DENSE_MANT \
	((UINT32_C(1) << (FLT_MANT_DIG - 1)) - 1)

	/* make sure a is smaller than b */
	if (a == b) {
	return a;
	} else if (a > b) {
	float tmp = a;
	a = b;
	b = tmp;
	}

	{
	/* calculate min and max with respect to signedness */
	float min, max;
	switch ((sign = (a < 0.0f) + (b < 0.0f))) {
	case SIGN_POS: min = a; max = b; break;
	case SIGN_RAND: min = 0; max = (b > -a) ? b : -a; break;
	case SIGN_NEG: min = b; max = a; break;
	}
	/* extract the minimum and maximum exponent and mantissa */
	u.f = min;
	minexp = (u.i << 1) >> (FLT_MANT_DIG);
	minmant = u.i & DIST_UNIFORMF_DENSE_MANT;
	u.f = max;
	maxexp = (u.i << 1) >> (FLT_MANT_DIG);
	maxmant = u.i & DIST_UNIFORMF_DENSE_MANT;
	}

	/* optimize special cases, that could otherwise reject almost all
	* possible values of the mantissa. */
	if (minexp == maxexp) {
	u.i = (minexp << (FLT_MANT_DIG - 1)) \|
	(dist_uniform_u32(maxmant - minmant + 1, rand32, rng) +
	minmant);

	/* apply signedness */
	/**/ if (sign == SIGN_RAND)
	u.i \|= rand32(rng) & (UINT32_C(1) << 31);
	else if (sign == SIGN_NEG)
	u.i \|= UINT32_C(1) << 31;
	return u.f;
	} else if (minexp + 1 == maxexp) {
	const uint32_t invminmant = DIST_UNIFORMF_DENSE_MANT - minmant;
	const uint32_t range = invminmant + maxmant + 1;
	uint32_t mant, exp, x;
	size_t i = 0;

	while (1) {
	/* make sure three bits always set, two for the
	* rejection and one for the sign. */
	if (i <= 3) {
	x = rand32(rng);
	i = 32;
	}

	/* use maxexp if the first bit is set and minexp
	* if the second bit is set, otherwise repeat. */
	if (x & 1) {
	i -= 1, x >>= 1;
	exp = maxexp;
	mant = dist_uniform_u32(range, rand32, rng);
	if (mant <= maxmant)
	break;
	} else if (x & 3) {
	i -= 2, x >>= 2;
	exp = minexp;
	mant = dist_uniform_u32(range, rand32, rng);
	if (mant <= invminmant) {
	mant = DIST_UNIFORMF_DENSE_MANT - mant;
	break;
	}
	} else {
	i -= 2, x >>= 2;
	}
	}

	/* combine exp with a mantissa */
	u.i = (exp << (FLT_MANT_DIG - 1)) \| mant;

	/* apply signedness */
	/**/ if (sign == SIGN_RAND)
	u.i \|= x << 31;
	else if (sign == SIGN_NEG)
	u.i \|= UINT32_C(1) << 31;
	return u.f;
	} else while (1) {
	uint32_t exp, x;

	/* decrement exp until at least one bit is set */
	exp = maxexp;
	while ((x = rand32(rng)) == 0)
	exp -= 32;

	/* decrement exp by the number of trailing zeros */
	DIST_UNIFORMF_DENSE_DEC_CTZ(exp, x);

	/* repeat when the exponent is to low and on underflow */
	if ((exp < minexp) \|\| (exp > maxexp))
	continue;

	/* combine exp with a random mantissa */
	x = rand32(rng);
	u.i = (exp << (FLT_MANT_DIG - 1)) \| (x >> (33 - FLT_MANT_DIG));

	/* apply signedness */
	/**/ if (sign == SIGN_RAND)
	u.i \|= x << 31;
	else if (sign == SIGN_NEG)
	u.i \|= UINT32_C(1) << 31;

	/* reject if not in range */
	if (u.f >= a && u.f <= b)
	return u.f;
	}
	#undef DIST_UNIFORMF_DENSE_MANT
	}

	#endif /* DIST_UNIFORMF_DENSE_AVAILABLE */

	#define DIST_UNIFORM_DENSE_AVAILABLE (!!UINT64_MAX)
	#if DIST_UNIFORM_DENSE_AVAILABLE

	static double
	dist_uniform_dense(
	double a, double b, /* [a,b] */
	uint64_t (rand64)(void), void *rng)
	{
	union { double f; uint64_t i; } u;
	enum { SIGN_POS = 0, SIGN_RAND = 1, SIGN_NEG = 2 } sign;
	uint64_t minexp, minmant, maxexp, maxmant;

	#define DIST_UNIFORMF_DENSE_MANT \
	((UINT64_C(1) << (DBL_MANT_DIG - 1)) - 1)

	/* make sure a is smaller than b */
	if (a == b) {
	return a;
	} else if (a > b) {
	double tmp = a;
	a = b;
	b = tmp;
	}

	{
	/* calculate min and max with respect to signedness */
	double min, max;
	switch ((sign = (a < 0.0) + (b < 0.0))) {
	case SIGN_POS: min = a; max = b; break;
	case SIGN_RAND: min = 0; max = (b > -a) ? b : -a; break;
	case SIGN_NEG: min = b; max = a; break;
	}
	/* extract the minimum and maximum exponent and mantissa */
	u.f = min;
	minexp = (u.i << 1) >> (DBL_MANT_DIG);
	minmant = u.i & DIST_UNIFORMF_DENSE_MANT;
	u.f = max;
	maxexp = (u.i << 1) >> (DBL_MANT_DIG);
	maxmant = u.i & DIST_UNIFORMF_DENSE_MANT;
	}

	/* optimize special cases, that could otherwise reject almost all
	* possible values of the mantissa. */
	if (minexp == maxexp) {
	u.i = (minexp << (DBL_MANT_DIG - 1)) \|
	(dist_uniform_u64(maxmant - minmant + 1, rand64, rng) +
	minmant);

	/* apply signedness */
	/**/ if (sign == SIGN_RAND)
	u.i \|= rand64(rng) & (UINT64_C(1) << 63);
	else if (sign == SIGN_NEG)
	u.i \|= UINT64_C(1) << 63;
	return u.f;
	} else if (minexp + 1 == maxexp) {
	const uint64_t invminmant = DIST_UNIFORMF_DENSE_MANT - minmant;
	const uint64_t range = invminmant + maxmant + 1;
	uint64_t mant, exp, x;
	size_t i = 0;

	while (1) {
	/* make sure three bits always set, two for the
	* rejection and one for the sign. */
	if (i <= 3) {
	x = rand64(rng);
	i = 64;
	}

	/* use maxexp if the first bit is set and minexp
	* if the second bit is set, otherwise repeat. */
	if (x & 1) {
	i -= 1, x >>= 1;
	exp = maxexp;
	mant = dist_uniform_u64(range, rand64, rng);
	if (mant <= maxmant)
	break;
	} else if (x & 3) {
	i -= 2, x >>= 2;
	exp = minexp;
	mant = dist_uniform_u64(range, rand64, rng);
	if (mant <= invminmant) {
	mant = DIST_UNIFORMF_DENSE_MANT - mant;
	break;
	}
	} else {
	i -= 2, x >>= 2;
	}
	}

	/* combine exp with a mantissa */
	u.i = (exp << (DBL_MANT_DIG - 1)) \| mant;

	/* apply signedness */
	/**/ if (sign == SIGN_RAND)
	u.i \|= x << 63;
	else if (sign == SIGN_NEG)
	u.i \|= UINT64_C(1) << 63;
	return u.f;
	} else while (1) {
	uint64_t exp, x;

	/* decrement exp until at least one bit is set */
	exp = maxexp;
	while ((x = rand64(rng)) == 0)
	exp -= 64;

	/* decrement exp by the number of trailing zeros */
	DIST_UNIFORMF_DENSE_DEC_CTZ(exp, x);

	/* repeat when the exponent is to low and on underflow */
	if ((exp < minexp) \|\| (exp > maxexp))
	continue;

	/* combine exp with a random mantissa */
	x = rand64(rng);
	u.i = (exp << (DBL_MANT_DIG - 1)) \| (x >> (65 - DBL_MANT_DIG));

	/* apply signedness */
	/**/ if (sign == SIGN_RAND)
	u.i \|= x << 63;
	else if (sign == SIGN_NEG)
	u.i \|= UINT64_C(1) << 63;

	/* reject if not in range */
	if (u.f >= a && u.f <= b)
	return u.f;
	}
	#undef DIST_UNIFORMF_DENSE_MANT
	}

	#endif /* DIST_UNIFORM_DENSE_AVAILABLE */

	#undef DIST_UNIFORMF_DENSE_DEC_CTZ
	#undef DIST_UNIFORM_DENSE_DEC_CTZ