Skip to content

Instantly share code, notes, and snippets.

@VadimKirilchuk
Created March 18, 2018 12:57
Show Gist options
  • Save VadimKirilchuk/36b96de118d4d48e8c7e836a15b2a401 to your computer and use it in GitHub Desktop.
Save VadimKirilchuk/36b96de118d4d48e8c7e836a15b2a401 to your computer and use it in GitHub Desktop.
Central limit theorem in action
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import math
# distribution
# In probability theory and statistics, the Laplace distribution is a continuous probability distribution
# named after Pierre-Simon Laplace.
# It is also sometimes called the double exponential distribution,
# because it can be thought of as two exponential distributions
# (with an additional location parameter) spliced together back-to-back,
# although the term is also sometimes used to refer to the Gumbel distribution.
# The difference between two independent identically distributed exponential random variables
# is governed by a Laplace distribution, as is a Brownian motion evaluated at an exponentially distributed random time.
# Increments of Laplace motion or a variance gamma process evaluated over the time scale also have a Laplace distribution.
ex = -1.0
scale = 2.0
distribution = stats.laplace(loc = ex, scale = scale)
# calculate the real dispersion/variance for the given laplasse distribution
# it is 2*scale^2: 2 * 4 = 8
dx = distribution.var()
# generate 1000 values from distribution for the hist vs pdf plot
values = distribution.rvs(size = 1000)
# x axis bounds
left = -10
right = 10
# hist and probability density function
plt.hist(values, 50, normed=True)
x = np.linspace(left, right, num=100)
y = distribution.pdf(x)
plt.plot(x, y, linewidth=2, color='r')
plt.ylabel('$f(x)$')
plt.xlabel('$x$')
plt.show()
k = 1000 #number of samples
def sample(n):
samples = np.zeros(k)
for index in range(k):
# for each sample find its average, the result array is our X estimation
samples[index] = distribution.rvs(size = n).mean()
return samples
# create a histogram of X estimation
def clt(n):
plt.title(f"X and Normal Distribution for n={n}")
samples = sample(n)
# hist and probability density function
plt.ylabel('$f(x)$')
plt.xlabel('$x$')
plt.hist(samples, 20, normed=True, label="experimental")
# count parameters for normal distributions approximating averages
# the mean is the same as for distribution,so just reuse it
sigma = math.sqrt(dx / n)
norm_rv = stats.norm(loc = ex, scale = sigma)
x = np.linspace(left, right, num=100)
pdf = norm_rv.pdf(x)
plt.plot(x, pdf, linewidth=2, color='r', label='theoretical')
plt.legend(loc = 'upper right')
# for n = 5, 10, 50 show clt(n)
for n in [5, 10, 50]:
plt.figure()
clt(n)
plt.show()
# Conclusion
# Central theorem works!
# Even for small n approximation is very close to the actual values.
# The more n the better approximation.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment