el-hult · September 23, 2019 10:25
diff --git a/random_variables.py b/random_variables.py
 # standards
 from abc import ABC
 from itertools import islice
 from collections import Counter
 import types

 # 3rd party
 import numpy as np
 import scipy.stats as stats
 import matplotlib.pyplot as plt


 class RandomVariable():

    def __init__(self, name: str):
        self.name = name
        self.symbol = name

    def __iter__(self):
        return self

    def __next__(self):
        raise NotImplementedError

    def __add__(self, other):
        return SumVariable(self, other)

    def sample_descriptive_stats(self, n=100):
        acc_sum = 0
        acc_square = 0
        for k in range(n):
            sample = self.__next__()
            acc_sum += sample
            acc_square += sample ** 2

        mean = acc_sum / n
        variace = acc_square / n - mean ** 2
        std_dev = np.sqrt(variace)
        return {'mean': mean, 'std_dev': std_dev, 'n': n}


 class DiscreteVariable(RandomVariable, ABC):
    def dist_plot(self, n=100):
        # get a histogram. This can be made without up front sampling...

        # a numpy way to do it. very compact. works in the numpy world.
        # values = np.array(list(islice(self, n)))
        # unique, counts = np.unique(values, return_counts=True)

        # cool way to do it witout numpy! :) It does this in a memory-lean way! :) Only using generators!
        # Notice there are no lists and no ndarrays anywhere. only functions and zips and stuff the whole way!
        # about as performant as the numpy solution, assuming the data fits in memory
        counts = Counter(islice(self, n))
        unique, count = zip(*counts.items())
        count = np.array(count)
        unique = np.array(unique)

        probs = count / n

        fig = plt.figure()
        ax = fig.gca()
        # plot lines
        y = np.vstack([np.zeros_like(probs), probs])
        x = np.vstack([unique, unique])
        ax.plot(x, y, color='b')
        ax.plot(unique, probs, color='b', marker='.', markersize=15, linestyle='none')
        ax.set_xlabel("x")
        ax.set_ylabel("p(x)")
        ax.set_title(f"Observed pdf for {self.name}")
        plt.show()


 class ContinousVariable(RandomVariable):
    """Mixin for functions on continous random variables"""

    def kde_plot(self, n=100):
        values = np.array(list(islice(self, n)))
        x_grid = np.linspace(values.min(), values.max())
        kernel = stats.gaussian_kde(values)
        kde_vals = kernel.evaluate(x_grid)

        fig = plt.figure()
        ax = fig.gca()
        ax.plot(x_grid, kde_vals)
        ax.plot(values, np.zeros(values.shape), 'b', marker='|', linestyle='none',
                markersize=5)  # rug plot
        ax.set_title(f"KDE-plot for {self.name}")
        plt.show()


 class SumVariable(RandomVariable):
    """A Random variable that is created as a sum of two others

    Since adding stuff should keep it a Discrete or change to Continous, we would need a smarter class construction. Dynamic mixins kinda...

    Check this SO post https://stackoverflow.com/questions/15222085/python-dynamic-multiple-inheritance
    """

    def __init__(self, a: RandomVariable, b: RandomVariable):
        super().__init__(a.name + "+" + b.name)
        self.a = a
        self.b = b

    def __next__(self):
        a_val = self.a.__next__()
        b_val = self.b.__next__()
        return a_val + b_val


 class Uniform(ContinousVariable):
    def __init__(self, a, b):
        """A random variable that is uniform on the interval [a,b)"""
        super().__init__(f"Uniform({a},{b})")
        self.a = a
        self.b = b
        self.rand = np.random.random  # binding the function here saves time when calling, later on

    def __next__(self):
        return self.rand() * (self.b - self.a) + self.a


 class Bernoulli(DiscreteVariable):
    def __init__(self, p=.5):
        """Super-inefficient implementation of a Bernoulli trial"""
        super().__init__(f"Bernoulli({p})")
        self.uniform = Uniform(0, 1)
        self.p = p

    def __next__(self):
        r = next(self.uniform)
        return int(r < self.p)


 class Binomial( DiscreteVariable):
    def __init__(self, n, p=.5):
        super().__init__(f"Binomial({n},{p})")
        self.n = n
        self.rand = Bernoulli(p)

    def __next__(self):
        k = 0
        for _ in range(self.n):
            k += next(self.rand)
        return k


 class Normal(ContinousVariable):
    def __init__(self, mean, variance):
        super().__init__(f"Normal({mean},{variance})")
        self.mean = mean
        self.variance = variance

    def __next__(self):
        return np.random.standard_normal() * self.variance + self.mean

 class StandardNormal(Normal):
    def __init__(self):
        super().__init__(0, 1)

 if __name__ == "__main__":

 	rv1 = Uniform(2, 3)
 	print(list(islice(rv1, 3)))
 	print((rv1 + Uniform(1, 2)).sample_descriptive_stats(100000))
 	print(Bernoulli().sample_descriptive_stats(100000))
 	print(Bernoulli(.8).sample_descriptive_stats(100000))
 	print(Binomial(36, 1 / 6).sample_descriptive_stats(100000))
 	Binomial(10,.2).kde_plot(n=2000)
 	Uniform(.3,10).kde_plot(n=200)
 	Normal(np.pi,3.1).kde_plot()
 	k = int(1e7)
 	Binomial(30, .1).dist_plot()
 	StandardNormal().kde_plot()
	# standards
	from abc import ABC
	from itertools import islice
	from collections import Counter
	import types

	# 3rd party
	import numpy as np
	import scipy.stats as stats
	import matplotlib.pyplot as plt


	class RandomVariable():

	def __init__(self, name: str):
	self.name = name
	self.symbol = name

	def __iter__(self):
	return self

	def __next__(self):
	raise NotImplementedError

	def __add__(self, other):
	return SumVariable(self, other)

	def sample_descriptive_stats(self, n=100):
	acc_sum = 0
	acc_square = 0
	for k in range(n):
	sample = self.__next__()
	acc_sum += sample
	acc_square += sample ** 2

	mean = acc_sum / n
	variace = acc_square / n - mean ** 2
	std_dev = np.sqrt(variace)
	return {'mean': mean, 'std_dev': std_dev, 'n': n}


	class DiscreteVariable(RandomVariable, ABC):
	def dist_plot(self, n=100):
	# get a histogram. This can be made without up front sampling...

	# a numpy way to do it. very compact. works in the numpy world.
	# values = np.array(list(islice(self, n)))
	# unique, counts = np.unique(values, return_counts=True)

	# cool way to do it witout numpy! :) It does this in a memory-lean way! :) Only using generators!
	# Notice there are no lists and no ndarrays anywhere. only functions and zips and stuff the whole way!
	# about as performant as the numpy solution, assuming the data fits in memory
	counts = Counter(islice(self, n))
	unique, count = zip(*counts.items())
	count = np.array(count)
	unique = np.array(unique)

	probs = count / n

	fig = plt.figure()
	ax = fig.gca()
	# plot lines
	y = np.vstack([np.zeros_like(probs), probs])
	x = np.vstack([unique, unique])
	ax.plot(x, y, color='b')
	ax.plot(unique, probs, color='b', marker='.', markersize=15, linestyle='none')
	ax.set_xlabel("x")
	ax.set_ylabel("p(x)")
	ax.set_title(f"Observed pdf for {self.name}")
	plt.show()


	class ContinousVariable(RandomVariable):
	"""Mixin for functions on continous random variables"""

	def kde_plot(self, n=100):
	values = np.array(list(islice(self, n)))
	x_grid = np.linspace(values.min(), values.max())
	kernel = stats.gaussian_kde(values)
	kde_vals = kernel.evaluate(x_grid)

	fig = plt.figure()
	ax = fig.gca()
	ax.plot(x_grid, kde_vals)
	ax.plot(values, np.zeros(values.shape), 'b', marker='\|', linestyle='none',
	markersize=5) # rug plot
	ax.set_title(f"KDE-plot for {self.name}")
	plt.show()


	class SumVariable(RandomVariable):
	"""A Random variable that is created as a sum of two others

	Since adding stuff should keep it a Discrete or change to Continous, we would need a smarter class construction. Dynamic mixins kinda...

	Check this SO post https://stackoverflow.com/questions/15222085/python-dynamic-multiple-inheritance
	"""

	def __init__(self, a: RandomVariable, b: RandomVariable):
	super().__init__(a.name + "+" + b.name)
	self.a = a
	self.b = b

	def __next__(self):
	a_val = self.a.__next__()
	b_val = self.b.__next__()
	return a_val + b_val


	class Uniform(ContinousVariable):
	def __init__(self, a, b):
	"""A random variable that is uniform on the interval [a,b)"""
	super().__init__(f"Uniform({a},{b})")
	self.a = a
	self.b = b
	self.rand = np.random.random # binding the function here saves time when calling, later on

	def __next__(self):
	return self.rand() * (self.b - self.a) + self.a


	class Bernoulli(DiscreteVariable):
	def __init__(self, p=.5):
	"""Super-inefficient implementation of a Bernoulli trial"""
	super().__init__(f"Bernoulli({p})")
	self.uniform = Uniform(0, 1)
	self.p = p

	def __next__(self):
	r = next(self.uniform)
	return int(r < self.p)


	class Binomial( DiscreteVariable):
	def __init__(self, n, p=.5):
	super().__init__(f"Binomial({n},{p})")
	self.n = n
	self.rand = Bernoulli(p)

	def __next__(self):
	k = 0
	for _ in range(self.n):
	k += next(self.rand)
	return k


	class Normal(ContinousVariable):
	def __init__(self, mean, variance):
	super().__init__(f"Normal({mean},{variance})")
	self.mean = mean
	self.variance = variance

	def __next__(self):
	return np.random.standard_normal() * self.variance + self.mean

	class StandardNormal(Normal):
	def __init__(self):
	super().__init__(0, 1)

	if __name__ == "__main__":

	rv1 = Uniform(2, 3)
	print(list(islice(rv1, 3)))
	print((rv1 + Uniform(1, 2)).sample_descriptive_stats(100000))
	print(Bernoulli().sample_descriptive_stats(100000))
	print(Bernoulli(.8).sample_descriptive_stats(100000))
	print(Binomial(36, 1 / 6).sample_descriptive_stats(100000))
	Binomial(10,.2).kde_plot(n=2000)
	Uniform(.3,10).kde_plot(n=200)
	Normal(np.pi,3.1).kde_plot()
	k = int(1e7)
	Binomial(30, .1).dist_plot()
	StandardNormal().kde_plot()