Last active
December 30, 2022 19:59
-
-
Save danmoseley/784a25839978425ae15d037f587ac76b to your computer and use it in GitHub Desktop.
test random with chi squared
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using System.Diagnostics; | |
using System.Linq; | |
using System.Numerics; | |
internal class Program | |
{ | |
public static void Main(string[] args) | |
{ | |
// Check for an off-by-one type error causing us to consistently not generate values | |
// across the whole of the requested range | |
ChiSquaredTest(GetBucketizedBinaryIntegers(Random.Shared.Next)); | |
ChiSquaredTest(GetBucketizedBinaryIntegers(Random.Shared.Next, 0, 50)); | |
ChiSquaredTest(GetBucketizedBinaryIntegers(Random.Shared.Next, -25, 25)); | |
ChiSquaredTest(GetBucketizedBinaryIntegers(Random.Shared.Next, 0, int.MaxValue)); | |
ChiSquaredTest(GetBucketizedBinaryIntegers(Random.Shared.Next, int.MinValue, int.MaxValue)); | |
ChiSquaredTest(GetBucketizedBinaryIntegers(Random.Shared.NextInt64)); | |
ChiSquaredTest(GetBucketizedBinaryIntegers(Random.Shared.NextInt64, 0, 50L)); | |
ChiSquaredTest(GetBucketizedBinaryIntegers(Random.Shared.NextInt64, -25L, 25L)); | |
ChiSquaredTest(GetBucketizedBinaryIntegers(Random.Shared.NextInt64, 0, long.MaxValue)); | |
ChiSquaredTest(GetBucketizedBinaryIntegers(Random.Shared.NextInt64, long.MinValue, 0)); | |
ChiSquaredTest(GetBucketizedFloats(Random.Shared.NextSingle)); | |
ChiSquaredTest(GetBucketizedFloats(Random.Shared.NextDouble)); | |
// To illustrate it is sensitive to an off-by-one: | |
// | |
static Func<T, T, T> FilterOneValue<T>(Func<T, T, T> func, T reject) where T : IEqualityOperators<T, T, bool> => | |
(T min, T maxValue) => | |
{ | |
T sample; | |
while (reject == (sample = func(min, maxValue))); | |
return sample; | |
}; | |
Console.Write(ChiSquaredTest(GetBucketizedBinaryIntegers(FilterOneValue((min, maxExclusive) => Random.Shared.Next(min, maxExclusive), reject: 49), 0, 50))); | |
Console.WriteLine("done"); | |
Console.ReadLine(); | |
} | |
static int[] GetBucketizedFloats<T>(Func<T> func) where T : INumber<T> | |
{ | |
int[] buckets = new int[50]; | |
for (int i = 0; i < 100_000; i++) | |
{ | |
T sample = func(); | |
Debug.Assert(T.Zero <= sample && sample < T.CreateSaturating(1)); | |
buckets[(int)(decimal.CreateSaturating(sample) * buckets.Length)]++; | |
} | |
return buckets; | |
} | |
static int[] GetBucketizedBinaryIntegers<T>(Func<T> func) where T : INumber<T>, IMinMaxValue<T>, IModulusOperators<T, T, T> => | |
GetBucketizedBinaryIntegers<T>((_) => func(), T.MaxValue); | |
static int[] GetBucketizedBinaryIntegers<T>(Func<T, T> func, T maxExclusive) where T : INumber<T>, IMinMaxValue<T>, IModulusOperators<T, T, T> => | |
GetBucketizedBinaryIntegers<T>((_, _) => func(maxExclusive), T.Zero, maxExclusive); | |
static int[] GetBucketizedBinaryIntegers<T>(Func<T, T, T> func, T min, T maxExclusive) where T : INumber<T>, IModulusOperators<T, T, T> | |
{ | |
Debug.Assert(min < maxExclusive); | |
int[] buckets = new int[50]; | |
// Range must be a multiple of bucket count, unless it is large enough to not matter | |
decimal range = decimal.CreateSaturating(maxExclusive) - decimal.CreateSaturating(min); | |
Debug.Assert(range > 100_000m || range % buckets.Length == 0); | |
for (int i = 0; i < 100_000; i++) | |
{ | |
T sample = func(min, maxExclusive); | |
int index = (int)((decimal.CreateSaturating(sample) - decimal.CreateSaturating(min)) % buckets.Length); | |
buckets[index]++; | |
} | |
return buckets; | |
} | |
// Provided an array of 50 buckets each containing a count, returns true if there is a less than 1/10**8 | |
// probability that the sample originated from a random distribution. | |
static bool ChiSquaredTest(int[] buckets) | |
{ | |
// Critical value depends on bucket count. | |
Debug.Assert(buckets.Length == 50); | |
// We expect a perfectly uniform distribution. | |
double sum = 0; | |
for (int i = 0; i < buckets.Length; i++) | |
{ | |
sum += buckets[i]; | |
} | |
double expectedPerBucket = sum / buckets.Length; | |
// Calculate chi-square statistic = sum of (observed - expected)**2 / expected | |
double chiSquare = 0; | |
foreach (int bucket in buckets) | |
{ | |
chiSquare += Math.Pow(bucket - expectedPerBucket, 2) / expectedPerBucket; | |
} | |
// This is a "Chi Squared Goodness of fit" problem, so degrees of freedom is (#buckets - 1) which in this case is 49. | |
// We choose a significance level such that the chance of a false positive is 1/10**8. | |
// Plugging these into Excel formula `=CHISQ.INV.RT(0.00000001,49)` gives 126. | |
// If Chi Squared is greater than 126, the sample is very unlikely to be derived from the uniform distribution. | |
Console.WriteLine($"{chiSquare}"); | |
Debug.Assert(chiSquare <= 126); | |
return (chiSquare <= 126); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment