Last active
May 17, 2017 18:38
-
-
Save ageron/694ebadda3bdccdd1b9b95abb228a7d6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import division, print_function | |
import numpy as np | |
women_ratio = 0.513 # ratio of women in the US | |
min_ratio = 0.49 # roughly 5% more than women_ratio | |
max_ratio = 0.54 # roughly 5% less than women_ratio | |
sample_size = 1000 | |
n_samples = 10000 | |
#################################### | |
# Long version, easy to understand # | |
#################################### | |
def sample_people(n=sample_size, p=women_ratio): | |
"""Returns an ndarray of n random people, True for women, False for men""" | |
return np.random.rand(n) < p | |
def is_skewed(sample, min_ratio=min_ratio, max_ratio=max_ratio): | |
"""Returns True if the sample is skewed""" | |
ratio = np.mean(sample) | |
return ratio <= min_ratio or ratio > max_ratio | |
def skewed_ratio(n_samples=n_samples): | |
"""Creates n_samples samples and returns the ratio of skewed samples""" | |
n_skewed = 0 | |
for i in range(n_samples): | |
sample = sample_people() | |
if is_skewed(sample): | |
n_skewed += 1 | |
return n_skewed / n_samples | |
print("Simulation #1: skewed ratio is {:.1f}%".format(100*skewed_ratio())) | |
print("Measured on", n_samples, "random samples of", sample_size, "people each") | |
print() | |
######################################### | |
# Much shorter version, harder to grasp # | |
######################################### | |
def skewed_ratio2(n=sample_size, n_samples=n_samples, p=women_ratio): | |
women_ratios = (np.random.rand(n, n_samples) < p).mean(axis=0) | |
return ((women_ratios <= min_ratio) | (women_ratios > max_ratio)).mean() | |
print("Simulation #2: skewed ratio is {:.1f}%".format(100*skewed_ratio2())) | |
print("Measured on", n_samples, "random samples of", sample_size, "people each") | |
print() | |
################################################# | |
# Mathematical version, instead of a simulation # | |
################################################# | |
from scipy.stats import binom | |
def skewed_ratio3(n=sample_size, p=women_ratio, | |
min_ratio=min_ratio, max_ratio=max_ratio): | |
distrib = binom(n=sample_size, p=women_ratio) | |
proba_lower_than_min = distrib.cdf(x=min_ratio*sample_size) | |
proba_higher_than_max = 1.0 - distrib.cdf(x=max_ratio*sample_size) | |
return proba_lower_than_min + proba_higher_than_max | |
print("Mathematical solution:") | |
print("Skewed ratio is {:.1f}%".format(100*skewed_ratio3())) | |
print("Using the binomial distribution.") | |
print() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Nice example.