Created
September 23, 2019 10:25
-
-
Save el-hult/56953ea3fcc6807611c80bb448a95dd3 to your computer and use it in GitHub Desktop.
Some classes for random variables, in a Pythonic way with inheritance. Fun way to make universal random number generation. Might be inefficient, but still very cool.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# standards | |
from abc import ABC | |
from itertools import islice | |
from collections import Counter | |
import types | |
# 3rd party | |
import numpy as np | |
import scipy.stats as stats | |
import matplotlib.pyplot as plt | |
class RandomVariable(): | |
def __init__(self, name: str): | |
self.name = name | |
self.symbol = name | |
def __iter__(self): | |
return self | |
def __next__(self): | |
raise NotImplementedError | |
def __add__(self, other): | |
return SumVariable(self, other) | |
def sample_descriptive_stats(self, n=100): | |
acc_sum = 0 | |
acc_square = 0 | |
for k in range(n): | |
sample = self.__next__() | |
acc_sum += sample | |
acc_square += sample ** 2 | |
mean = acc_sum / n | |
variace = acc_square / n - mean ** 2 | |
std_dev = np.sqrt(variace) | |
return {'mean': mean, 'std_dev': std_dev, 'n': n} | |
class DiscreteVariable(RandomVariable, ABC): | |
def dist_plot(self, n=100): | |
# get a histogram. This can be made without up front sampling... | |
# a numpy way to do it. very compact. works in the numpy world. | |
# values = np.array(list(islice(self, n))) | |
# unique, counts = np.unique(values, return_counts=True) | |
# cool way to do it witout numpy! :) It does this in a memory-lean way! :) Only using generators! | |
# Notice there are no lists and no ndarrays anywhere. only functions and zips and stuff the whole way! | |
# about as performant as the numpy solution, assuming the data fits in memory | |
counts = Counter(islice(self, n)) | |
unique, count = zip(*counts.items()) | |
count = np.array(count) | |
unique = np.array(unique) | |
probs = count / n | |
fig = plt.figure() | |
ax = fig.gca() | |
# plot lines | |
y = np.vstack([np.zeros_like(probs), probs]) | |
x = np.vstack([unique, unique]) | |
ax.plot(x, y, color='b') | |
ax.plot(unique, probs, color='b', marker='.', markersize=15, linestyle='none') | |
ax.set_xlabel("x") | |
ax.set_ylabel("p(x)") | |
ax.set_title(f"Observed pdf for {self.name}") | |
plt.show() | |
class ContinousVariable(RandomVariable): | |
"""Mixin for functions on continous random variables""" | |
def kde_plot(self, n=100): | |
values = np.array(list(islice(self, n))) | |
x_grid = np.linspace(values.min(), values.max()) | |
kernel = stats.gaussian_kde(values) | |
kde_vals = kernel.evaluate(x_grid) | |
fig = plt.figure() | |
ax = fig.gca() | |
ax.plot(x_grid, kde_vals) | |
ax.plot(values, np.zeros(values.shape), 'b', marker='|', linestyle='none', | |
markersize=5) # rug plot | |
ax.set_title(f"KDE-plot for {self.name}") | |
plt.show() | |
class SumVariable(RandomVariable): | |
"""A Random variable that is created as a sum of two others | |
Since adding stuff should keep it a Discrete or change to Continous, we would need a smarter class construction. Dynamic mixins kinda... | |
Check this SO post https://stackoverflow.com/questions/15222085/python-dynamic-multiple-inheritance | |
""" | |
def __init__(self, a: RandomVariable, b: RandomVariable): | |
super().__init__(a.name + "+" + b.name) | |
self.a = a | |
self.b = b | |
def __next__(self): | |
a_val = self.a.__next__() | |
b_val = self.b.__next__() | |
return a_val + b_val | |
class Uniform(ContinousVariable): | |
def __init__(self, a, b): | |
"""A random variable that is uniform on the interval [a,b)""" | |
super().__init__(f"Uniform({a},{b})") | |
self.a = a | |
self.b = b | |
self.rand = np.random.random # binding the function here saves time when calling, later on | |
def __next__(self): | |
return self.rand() * (self.b - self.a) + self.a | |
class Bernoulli(DiscreteVariable): | |
def __init__(self, p=.5): | |
"""Super-inefficient implementation of a Bernoulli trial""" | |
super().__init__(f"Bernoulli({p})") | |
self.uniform = Uniform(0, 1) | |
self.p = p | |
def __next__(self): | |
r = next(self.uniform) | |
return int(r < self.p) | |
class Binomial( DiscreteVariable): | |
def __init__(self, n, p=.5): | |
super().__init__(f"Binomial({n},{p})") | |
self.n = n | |
self.rand = Bernoulli(p) | |
def __next__(self): | |
k = 0 | |
for _ in range(self.n): | |
k += next(self.rand) | |
return k | |
class Normal(ContinousVariable): | |
def __init__(self, mean, variance): | |
super().__init__(f"Normal({mean},{variance})") | |
self.mean = mean | |
self.variance = variance | |
def __next__(self): | |
return np.random.standard_normal() * self.variance + self.mean | |
class StandardNormal(Normal): | |
def __init__(self): | |
super().__init__(0, 1) | |
if __name__ == "__main__": | |
rv1 = Uniform(2, 3) | |
print(list(islice(rv1, 3))) | |
print((rv1 + Uniform(1, 2)).sample_descriptive_stats(100000)) | |
print(Bernoulli().sample_descriptive_stats(100000)) | |
print(Bernoulli(.8).sample_descriptive_stats(100000)) | |
print(Binomial(36, 1 / 6).sample_descriptive_stats(100000)) | |
Binomial(10,.2).kde_plot(n=2000) | |
Uniform(.3,10).kde_plot(n=200) | |
Normal(np.pi,3.1).kde_plot() | |
k = int(1e7) | |
Binomial(30, .1).dist_plot() | |
StandardNormal().kde_plot() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment