Created
February 7, 2011 14:38
-
-
Save dylan-evans/814453 to your computer and use it in GitHub Desktop.
This snippet generates some reasonably normal statistics. I wrote it as an experiment to find a way to generate this data for a game, which i never wrote, so precision was never a goal. I had trouble finding information about doing this sort of thing, but
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# Generate a (roughly normal) sample population for a given mean & standard deviation. | |
# This was intended for use modelling hypothetical demographics in a sim game. | |
"""\ | |
Demonstration of the StatGen class | |
This script demonstrates the StatGen class which generates a normalized sample | |
from a mean and standard deviation. The algorithm can be adjusted, tweaked and | |
broken in many different ways which can provide some interesting results, but | |
the current form provides an almost bell curve sample. Well it's more of a | |
volcano than a bell curve, use your imagination! | |
If matplotlib is available this script displays a histogram representation, this | |
is recommended. | |
The purpose of the algorithm was to provide sample data for a game | |
""" | |
from math import sqrt | |
from optparse import OptionParser | |
from random import random | |
class StatGen(object): | |
"""\ | |
Statistic generation class. | |
This class generates a statistical population based on a given mean, | |
standard deviation and sample size. While the result is not intended to | |
be perfect the resulting sample should be more or less normalized which | |
is achieved by using both a floating mean and standard deviation, and while | |
i came up with the maths from scratch i have difficulty explaining it. | |
""" | |
limit = 2.6 # Theoretical limit in standard deviations | |
def __init__(self, weight, mean, std_dev, verbose=False): | |
self.verbose = verbose | |
self.weight = weight + 1 | |
self.orig_mean = self.mean = mean | |
self.std_dev = std_dev | |
self.toggle = 1 | |
self.next(); self.next() | |
def next(self): | |
"""Get the next value""" | |
# Create the offset from the current std_dev | |
# The offset is the actual limit of the value | |
offset = self.limit * self.std_dev | |
val = offset * random() # Calculate the raw value | |
#val = (sqrt(random())-self.limit*self.orig_std_dev)*(self.std_dev) | |
# Decide whether the value should be greater or lesser than the mean. | |
# This method simply alternates | |
if self.toggle: | |
self.toggle = 0 | |
val += self.mean | |
else: | |
self.toggle = 1 | |
val = self.mean - val | |
# Alternate method, probably a better sample but slower | |
#if random() > 0.5: | |
# val += self.mean | |
#else: | |
# val = self.mean - val | |
if self.weight > 1: | |
# Recalculate the standard deviation | |
diff = (val - self.mean) ** 2 | |
inter = (self.std_dev ** 2) * self.weight - diff | |
self.weight -= 1 | |
try: | |
self.std_dev = sqrt(inter / self.weight) | |
except: | |
pass | |
# Calculate the new mean | |
mean = (self.mean / self.weight) * (self.weight + 1) - (val / self.weight) | |
self.mean = (mean - self.mean) / 2 | |
val += self.orig_mean # Align the value by adding the original mean | |
if self.verbose: | |
print "value: %f new mean: %f" % (val, self.mean) | |
return val | |
if __name__ == '__main__': | |
# Test program portion | |
# Parse command line | |
parser = OptionParser() | |
parser.add_option('-p', '--population', action='store', type='int', default=1000) | |
# I am aware that population is technically incorrect, however it does start | |
# with a 'P' giving an advantage over 'sample' and 'size'. See -s | |
parser.add_option('-m', '--mean', action='store', type='float', default=0.5) | |
parser.add_option('-s', '--stddev', action='store', type='float', dest='std_dev', default=0.2) | |
parser.add_option('-v', '--verbose', action='store_true', default=False) | |
parser.add_option('-M', '--no-matplotlib', action='store_true', default=False) | |
(options, args) = parser.parse_args() | |
if options.no_matplotlib: | |
plt = False | |
else: | |
try: | |
import matplotlib.pyplot as plt | |
#from pylab import hist | |
except: | |
print "matplotlib not available" | |
plt = False | |
g = StatGen(options.population, options.mean, options.std_dev, options.verbose) | |
values = [] | |
errors = [] | |
rmean = 0 | |
for i in range(0, options.population): | |
# Generate the sample and add it to the values list | |
val = g.next() | |
if val > 1 or val < 0: | |
#Out of bounds values, there are usually a couple | |
errors.append(val) | |
values.append(val) | |
rmean += val | |
print "Summary:" | |
rmean = rmean / options.population | |
diff = rmean - options.mean | |
print "Mean: %f / %f :: %f %f%% difference" % (rmean, options.mean, diff, (diff / rmean) * 100) | |
adev = 0 | |
for val in values: | |
adev += (val - options.mean) ** 2 | |
adev = sqrt(adev / options.population) | |
print "Deviation: %f / %f :: %f %f%% difference" % (adev, options.std_dev, | |
adev - options.std_dev, | |
((adev-options.std_dev)/options.std_dev)*100) | |
if options.verbose: | |
print "Errors:" | |
for v in errors: | |
print v | |
if plt: #show histogram | |
# Display graph | |
#h = hist(values, bins=200) | |
plt.hist(values, bins=options.population*0.2) | |
plt.title("Mean result: %f Difference: %f %f%%" % (rmean, diff, (diff / rmean) * 100)) | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment