jcbozonier’s gists

jcbozonier / BoggleBoard.py

Created August 8, 2018 11:24

How I represent a Boggle board in code

	[
	[SL('P'), SL('B'), DL('S'), SL('L')],
	[TL('R'), SL('A'), SL('H'), SL('D')],
	[SL('B'), SL('E'), SL('C'), TW('P')],
	[SL('L'), DW('S'), SL('E'), SL('K')]
	]

jcbozonier / foo.py

Created May 22, 2017 12:48

Generating example data

	# These are the "true" slopes and intercepts that I made up.
	set_parameters = (
	(('no degree', 'CA', 'Riverside County'), (5000, 1250)),
	(('no degree', 'IL', 'Cook County'), (6500, 1150)),
	(('no degree', 'IL', 'Lake County'), (7000, 1350)),

	(('degree', 'CA', 'Riverside County'), (6000, 1250)),
	(('degree', 'IL', 'Cook County'), (7500, 1150)),
	(('degree', 'IL', 'Lake County'), (8000, 1350))
	)

jcbozonier / foo.py

Created May 22, 2017 12:39

Full hierarchical model

	# For use in Jupyter Notebook include the next line
	%matplotlib inline
	import pymc3 as pm

	degree_indexes = degree_index['index'].values
	degree_count = len(degree_indexes)
	degree_state_indexes = degree_state_indexes_df['index_d'].values
	degree_state_count = len(degree_state_indexes)
	degree_state_county_indexes = degree_state_county_indexes_df['index_ds'].values
	degree_state_county_count = len(degree_state_county_indexes)

jcbozonier / foo.py

Created May 22, 2017 12:28

Creating indexes for vectorization

	# Index each of the unique variable values
	degree_index = salary_df.groupby('degree').all().reset_index().reset_index()[['index', 'degree']]
	degree_state_index = salary_df.groupby(['degree', 'state']).all().reset_index().reset_index()[['index', 'degree', 'state']]
	degree_state_county_index = salary_df.groupby(['degree', 'state', 'county']).all().reset_index().reset_index()[['index', 'degree', 'state', 'county']]

	degree_state_indexes_df = pd.merge(degree_index, degree_state_index, how='inner', on='degree', suffixes=('_d', '_ds'))
	degree_state_county_indexes_df = pd.merge(degree_state_indexes_df, degree_state_county_index, how='inner', on=['degree', 'state'])
	indexed_salary_df = pd.merge(salary_df, degree_state_county_indexes_df, how='inner', on=['degree', 'state', 'county']).reset_index()

jcbozonier / foo.py

Last active May 5, 2017 12:54

Setting up our global priors

	import pymc3 as pm

	with pm.Model() as model:
	global_m = pm.Normal('global_m', mu=0, sd=100**2)
	global_m_sd = pm.Uniform('global_m_sd', lower=0, upper=1000)
	global_b = pm.Normal('global_b', mu=0, sd=100**2)
	global_b_sd = pm.Uniform('global_b_sd', lower=0, upper=1000)

	degree_m = pm.Normal('degree_m', mu=global_m, sd=global_m_sd, shape=degree_count)
	degree_m_sd = pm.Uniform('degree_m_sd', lower=0, upper=1000, shape=degree_count)

jcbozonier / foo.py

Last active April 7, 2017 18:32

Radon Hierarchical Model with state as a level in addition to county. Causes error in PyMC3 of "fatal error: bracket nesting level exceeded maximum of 256"

	%matplotlib inline
	import matplotlib.pyplot as plt
	import numpy as np
	import pymc3 as pm
	import pandas as pd

	data = pd.read_csv('./radon.csv')

	state_names = data.state.unique()
	county_names = data.county.unique()

jcbozonier / py.py

Created January 30, 2017 23:28

Updating our probabilities

	hypotheses = [[1/3,1/3,1/3],
	[.8, .1, .1],
	[.1, .8, .1],
	[.1, .1, .8]]
	# Updated probabilities
	pdf_score = np.array([ss.dirichlet.pdf(hypothesis, [1+1+1, 1+5+4, 1+2+4]) for hypothesis in hypotheses])
	probabilities = pdf_score/pdf_score.sum()
	list(zip(hypotheses, probabilities))

jcbozonier / py.py

Last active January 30, 2017 13:43

Moving to 3 types of nuts

	# Each hypothesis in our list has a third
	# element now to represent cashews.
	hypotheses = [[1/3,1/3,1/3],
	[.8, .1, .1],
	[.1,.8,.1],
	[.1,.1,.8]]

	# When we evaluate our pdf for the Dirichlet
	# we include a third element here to represent
	# the additional nut type we observe.

jcbozonier / py.py

Created January 30, 2017 13:39 — forked from anonymous/py.py

Evaluating Walnut and Almond Mix

	# The walnut/almond mix ratios we have hypothesized
	hypotheses = [[.8, .2],
	[.5,.5],
	[.2,.8]]

	# Evaluate the pdf for each hypothesis
	# Note that we only evaluate the hypothesis
	# for one nut. If it's 80% we know the
	# other must be 20%.
	pdf_score = np.array([ss.beta.pdf(hypothesis[0], 1+1, 1+5) for hypothesis in hypotheses])

jcbozonier / py.py

Created January 30, 2017 13:38 — forked from anonymous/py.py

Switching from Beta to Dirichlet

	hypotheses = [[.8, .2],
	[.5,.5],
	[.2,.8]]
	# Notice how we swapped out the Beta for
	# a Dirichlet. The only difference is we
	# now pass a list of counts to the pdf
	# function. We'll get to why in a bit.
	pdf_score = np.array([ss.dirichlet.pdf(hypothesis, [1+1+2, 1+5+3]) for hypothesis in hypotheses])
	probabilities = pdf_score/pdf_score.sum()

Justin Bozonier jcbozonier