Skip to content

Instantly share code, notes, and snippets.

@ageron
Last active May 17, 2017 18:38
Show Gist options
  • Save ageron/694ebadda3bdccdd1b9b95abb228a7d6 to your computer and use it in GitHub Desktop.
Save ageron/694ebadda3bdccdd1b9b95abb228a7d6 to your computer and use it in GitHub Desktop.
from __future__ import division, print_function
import numpy as np
women_ratio = 0.513 # ratio of women in the US
min_ratio = 0.49 # roughly 5% more than women_ratio
max_ratio = 0.54 # roughly 5% less than women_ratio
sample_size = 1000
n_samples = 10000
####################################
# Long version, easy to understand #
####################################
def sample_people(n=sample_size, p=women_ratio):
"""Returns an ndarray of n random people, True for women, False for men"""
return np.random.rand(n) < p
def is_skewed(sample, min_ratio=min_ratio, max_ratio=max_ratio):
"""Returns True if the sample is skewed"""
ratio = np.mean(sample)
return ratio <= min_ratio or ratio > max_ratio
def skewed_ratio(n_samples=n_samples):
"""Creates n_samples samples and returns the ratio of skewed samples"""
n_skewed = 0
for i in range(n_samples):
sample = sample_people()
if is_skewed(sample):
n_skewed += 1
return n_skewed / n_samples
print("Simulation #1: skewed ratio is {:.1f}%".format(100*skewed_ratio()))
print("Measured on", n_samples, "random samples of", sample_size, "people each")
print()
#########################################
# Much shorter version, harder to grasp #
#########################################
def skewed_ratio2(n=sample_size, n_samples=n_samples, p=women_ratio):
women_ratios = (np.random.rand(n, n_samples) < p).mean(axis=0)
return ((women_ratios <= min_ratio) | (women_ratios > max_ratio)).mean()
print("Simulation #2: skewed ratio is {:.1f}%".format(100*skewed_ratio2()))
print("Measured on", n_samples, "random samples of", sample_size, "people each")
print()
#################################################
# Mathematical version, instead of a simulation #
#################################################
from scipy.stats import binom
def skewed_ratio3(n=sample_size, p=women_ratio,
min_ratio=min_ratio, max_ratio=max_ratio):
distrib = binom(n=sample_size, p=women_ratio)
proba_lower_than_min = distrib.cdf(x=min_ratio*sample_size)
proba_higher_than_max = 1.0 - distrib.cdf(x=max_ratio*sample_size)
return proba_lower_than_min + proba_higher_than_max
print("Mathematical solution:")
print("Skewed ratio is {:.1f}%".format(100*skewed_ratio3()))
print("Using the binomial distribution.")
print()
@javabean68
Copy link

Nice example.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment