Rahul Agarwal MLWhiz

🤓

Focusing

Autodidact Data Scientist, Amateur Guitarist, Pessimistic Go-getter.

MLWhiz / associations.py

Created April 19, 2019 15:33

	player_df = player_df.fillna(0)
	results = associations(player_df,nominal_columns=catcols,return_results=True)

MLWhiz / pairplot.py

Created April 19, 2019 15:34

	filtered_player_df = player_df[(player_df['Club'].isin(['FC Barcelona', 'Paris Saint-Germain',
	'Manchester United', 'Manchester City', 'Chelsea', 'Real Madrid','FC Porto','FC Bayern München'])) &
	(player_df['Nationality'].isin(['England', 'Brazil', 'Argentina',
	'Brazil', 'Italy','Spain','Germany']))
	]
	# Single line to create pairplot
	g = sns.pairplot(filtered_player_df[['Value','SprintSpeed','Potential','Wage']])

MLWhiz / pairplot_hue.py

Created April 19, 2019 15:35

g = sns.pairplot(filtered_player_df[['Value','SprintSpeed','Potential','Wage','Club']],hue = 'Club')

MLWhiz / swarmplot_simple.py

Created April 19, 2019 15:35

	g = sns.swarmplot(y = "Club",
	x = 'Wage',
	data = filtered_player_df,
	# Decrease the size of the points to avoid crowding
	size = 7)
	# remove the top and right line in graph
	sns.despine()
	g.figure.set_size_inches(14,10)
	plt.show()

MLWhiz / swarm_box.py

Created April 19, 2019 15:36

	g = sns.boxplot(y = "Club",
	x = 'Wage',
	data = filtered_player_df, whis=np.inf)
	g = sns.swarmplot(y = "Club",
	x = 'Wage',
	data = filtered_player_df,
	# Decrease the size of the points to avoid crowding
	size = 7,color = 'black')
	# remove the top and right line in graph
	sns.despine()

MLWhiz / annotate_swarm_box.py

Created April 19, 2019 15:36

	max_wage = filtered_player_df.Wage.max()
	max_wage_player = filtered_player_df[(player_df['Wage'] == max_wage)]['Name'].values[0]
	g = sns.boxplot(y = "Club",
	x = 'Wage',
	data = filtered_player_df, whis=np.inf)
	g = sns.swarmplot(y = "Club",
	x = 'Wage',
	data = filtered_player_df,
	# Decrease the size of the points to avoid crowding
	size = 7,color='black')

MLWhiz / add_features.py

Created May 2, 2019 14:26

	import random
	import pandas as pd
	import numpy as np
	from multiprocessing import Pool

	def add_features(df):
	df['question_text'] = df['question_text'].apply(lambda x:str(x))
	df["lower_question_text"] = df["question_text"].apply(lambda x: x.lower())
	df['total_length'] = df['question_text'].apply(len)
	df['capitals'] = df['question_text'].apply(lambda comment: sum(1 for c in comment if c.isupper()))

MLWhiz / target_encoding.py

Last active July 28, 2019 18:46

	# taken from https://medium.com/@pouryaayria/k-fold-target-encoding-dfe9a594874b
	from sklearn import base
	from sklearn.model_selection import KFold

	class KFoldTargetEncoderTrain(base.BaseEstimator,
	base.TransformerMixin):
	def __init__(self,colnames,targetName,
	n_fold=5, verbosity=True,
	discardOriginal_col=False):
	self.colnames = colnames

MLWhiz / beta_sampler.py

Created June 3, 2019 17:46

	import random
	# Lets define our Beta Function to generate s for any particular state. We don't care for the normalizing constant here.
	def beta_s(w,a,b):
	return w*(a-1)(1-w)**(b-1)

	# This Function returns True if the coin with probability P of heads comes heads when flipped.
	def random_coin(p):
	unif = random.uniform(0,1)
	if unif>=p:
	return False

MLWhiz / plot.py

Created June 3, 2019 17:47