Greg Lamp glamp

268 followers · 28 following

Boulder, CO

View GitHub Profile

Recently created

Least recently created

Recently updated

Least recently updated

glamp / pandasql_readme.py

Created February 18, 2013 02:32


	from sklearn.datasets import load_iris
	import pandas as pd
	from pandasql import sqldf
	import re


	iris = load_iris()
	iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
	iris_df['species'] = pd.Factor(iris.target, levels=iris.target_names)

glamp / pandasql_plot_births_and_babies.py

Last active April 2, 2018 12:52

	import matplotlib.pyplot as plt
	from pandasql import *
	import pandas as pd

	pysqldf = lambda q: sqldf(q, globals())

	q = """
	SELECT
	m.date
	, m.beef

glamp / logistic_load_data.py

Last active December 14, 2015 10:39

	import pandas as pd
	import statsmodels.api as sm
	import pylab as pl
	import numpy as np

	# read the data in
	df = pd.read_csv("http://www.ats.ucla.edu/stat/data/binary.csv")

	# take a look at the dataset
	print df.head()

glamp / logistic_looking_at_the_data.py

Last active December 14, 2015 10:39

	# summarize the data
	print df.describe()
	# admit gre gpa prestige
	# count 400.000000 400.000000 400.000000 400.00000
	# mean 0.317500 587.700000 3.389900 2.48500
	# std 0.466087 115.516536 0.380567 0.94446
	# min 0.000000 220.000000 2.260000 1.00000
	# 25% 0.000000 520.000000 3.130000 2.00000
	# 50% 0.000000 580.000000 3.395000 2.00000
	# 75% 1.000000 660.000000 3.670000 3.00000

glamp / logistic_prepping.py

Last active December 14, 2015 10:39

	# dummify rank
	dummy_ranks = pd.get_dummies(df['prestige'], prefix='prestige')
	print dummy_ranks.head()
	# prestige_1 prestige_2 prestige_3 prestige_4
	# 0 0 0 1 0
	# 1 0 0 1 0
	# 2 1 0 0 0
	# 3 0 0 0 1
	# 4 0 0 0 1

glamp / logistic_do_regression.py

Last active December 25, 2018 03:25

	train_cols = data.columns[1:]
	# Index([gre, gpa, prestige_2, prestige_3, prestige_4], dtype=object)

	logit = sm.Logit(data['admit'], data[train_cols])

	# fit the model
	result = logit.fit()

glamp / logistic_results.py

Last active December 14, 2015 10:39

	# cool enough to deserve it's own gist
	print result.summary()

glamp / logistic_conf_int.py

Last active December 14, 2015 10:39

	# look at the confidence interval of each coeffecient
	print result.conf_int()
	# 0 1
	# gre 0.000120 0.004409
	# gpa 0.153684 1.454391
	# prestige_2 -1.295751 -0.055135
	# prestige_3 -2.016992 -0.663416
	# prestige_4 -2.370399 -0.732529
	# intercept -6.224242 -1.755716

glamp / logistic_odds_ratio.py

Last active December 14, 2015 10:39

	# odds ratios only
	print np.exp(result.params)
	# gre 1.002267
	# gpa 2.234545
	# prestige_2 0.508931
	# prestige_3 0.261792
	# prestige_4 0.211938
	# intercept 0.018500

glamp / logistic_ci_and_est.py

Last active December 14, 2015 10:39

	# odds ratios and 95% CI
	params = result.params
	conf = result.conf_int()
	conf['OR'] = params
	conf.columns = ['2.5%', '97.5%', 'OR']
	print np.exp(conf)
	# 2.5% 97.5% OR
	# gre 1.000120 1.004418 1.002267
	# gpa 1.166122 4.281877 2.234545
	# prestige_2 0.273692 0.946358 0.508931