joseph-allen’s gists

joseph-allen / UKDSBrandedSeabornPlot.py

Created November 12, 2020 17:42

Branded Seaborn plot

joseph-allen / train_polynomial.py

Last active April 29, 2019 11:00

Quickly train a polynomial

	import pandas as pd
	import numpy as np

	from sklearn.linear_model import LinearRegression
	from sklearn.preprocessing import PolynomialFeatures
	from sklearn.metrics import mean_absolute_error

	def train_polynomial(df, train_upto,train_degree):
	# build Polynomial features up to degree train_degree
	p = PolynomialFeatures(degree=train_degree).fit(df[['dv','psi','temp1','temp2']])

joseph-allen / load_pickle.py

Created April 26, 2019 16:05

Demo of loading a pickle

	import pickle
	import pandas as pd
	import numpy as np

	# Create sample data
	df = pd.DataFrame(columns=['A','B','C'])
	df.loc[0] = [12,42,'test']

	# load stored model
	loaded_model = pickle.load(open('Pickled_Model.pkl', 'rb'))

joseph-allen / Polynomial_preprocessing.py

Created April 17, 2019 12:26

generates polynomial features out of any

	from sklearn.preprocessing import PolynomialFeatures

	p = PolynomialFeatures(degree=2).fit(df[['feature1','feature2]])
	features = pd.DataFrame(p.transform(df[['feature1','feature2]]), columns=p.get_feature_names(df[['feature1','feature2]].columns))

	features.head()

joseph-allen / read_multiple

Created April 10, 2019 18:39

Read multiple files in the same directory

	import glob

	path = r'path to file' # use your path
	all_files = glob.glob(path + "/*.csv")

	li = []

	for filename in all_files:
	df = pd.read_csv(filename, index_col=None, header=0,sep=',\|;')
	print(filename)

joseph-allen / plot_multiple_ylabels

Created December 4, 2018 14:00

plots multiple y axes on one graph

	def plot_multi(data, cols=None, spacing=.1, **kwargs):

	from pandas import plotting

	# Get default color style from pandas - can be changed to any other color list
	if cols is None: cols = data.columns
	if len(cols) == 0: return
	colors = getattr(getattr(plotting, '_style'), '_get_standard_colors')(num_colors=len(cols))

	# First axis

joseph-allen / plotly line plot

Created November 27, 2018 14:48

	import plotly as py
	py.tools.set_credentials_file(username='YOURE_USERNAME', api_key='YOUR API KEY')
	import cufflinks as cf
	import pandas as pd

	df = pd.read_csv('data.csv')

	py.plotly.iplot([{
	'x': df.var0,
	'y': df[col],

joseph-allen / plot_dates.py

Created November 26, 2018 12:47

Plot a pandas dataframe of x over some datetime

	import pandas as pd

	# Visualisation
	import matplotlib as mpl
	import matplotlib.pyplot as plt
	import matplotlib.dates as mdates

	# Configure visualisations
	%matplotlib inline
	mpl.style.use( 'ggplot' )

joseph-allen / sklearn example params

Created March 9, 2018 10:43

joseph-allen / laerning_curve

Created December 29, 2017 14:31

learning curve, kfold and gridsearch

	import matplotlib.pyplot as plt
	import numpy as np
	from sklearn.model_selection import GridSearchCV, StratifiedKFold, learning_curve
	from sklearn.ensemble import GradientBoostingClassifier


	def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
	n_jobs=-1, train_sizes=np.linspace(.1, 1.0, 5)):
	"""Generate a simple plot of the test and training learning curve"""
	plt.figure()

Joseph Allen joseph-allen