This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
player_df = player_df.fillna(0) | |
results = associations(player_df,nominal_columns=catcols,return_results=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
filtered_player_df = player_df[(player_df['Club'].isin(['FC Barcelona', 'Paris Saint-Germain', | |
'Manchester United', 'Manchester City', 'Chelsea', 'Real Madrid','FC Porto','FC Bayern München'])) & | |
(player_df['Nationality'].isin(['England', 'Brazil', 'Argentina', | |
'Brazil', 'Italy','Spain','Germany'])) | |
] | |
# Single line to create pairplot | |
g = sns.pairplot(filtered_player_df[['Value','SprintSpeed','Potential','Wage']]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
g = sns.pairplot(filtered_player_df[['Value','SprintSpeed','Potential','Wage','Club']],hue = 'Club') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
g = sns.swarmplot(y = "Club", | |
x = 'Wage', | |
data = filtered_player_df, | |
# Decrease the size of the points to avoid crowding | |
size = 7) | |
# remove the top and right line in graph | |
sns.despine() | |
g.figure.set_size_inches(14,10) | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
g = sns.boxplot(y = "Club", | |
x = 'Wage', | |
data = filtered_player_df, whis=np.inf) | |
g = sns.swarmplot(y = "Club", | |
x = 'Wage', | |
data = filtered_player_df, | |
# Decrease the size of the points to avoid crowding | |
size = 7,color = 'black') | |
# remove the top and right line in graph | |
sns.despine() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
max_wage = filtered_player_df.Wage.max() | |
max_wage_player = filtered_player_df[(player_df['Wage'] == max_wage)]['Name'].values[0] | |
g = sns.boxplot(y = "Club", | |
x = 'Wage', | |
data = filtered_player_df, whis=np.inf) | |
g = sns.swarmplot(y = "Club", | |
x = 'Wage', | |
data = filtered_player_df, | |
# Decrease the size of the points to avoid crowding | |
size = 7,color='black') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
import pandas as pd | |
import numpy as np | |
from multiprocessing import Pool | |
def add_features(df): | |
df['question_text'] = df['question_text'].apply(lambda x:str(x)) | |
df["lower_question_text"] = df["question_text"].apply(lambda x: x.lower()) | |
df['total_length'] = df['question_text'].apply(len) | |
df['capitals'] = df['question_text'].apply(lambda comment: sum(1 for c in comment if c.isupper())) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# taken from https://medium.com/@pouryaayria/k-fold-target-encoding-dfe9a594874b | |
from sklearn import base | |
from sklearn.model_selection import KFold | |
class KFoldTargetEncoderTrain(base.BaseEstimator, | |
base.TransformerMixin): | |
def __init__(self,colnames,targetName, | |
n_fold=5, verbosity=True, | |
discardOriginal_col=False): | |
self.colnames = colnames |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import random | |
# Lets define our Beta Function to generate s for any particular state. We don't care for the normalizing constant here. | |
def beta_s(w,a,b): | |
return w**(a-1)*(1-w)**(b-1) | |
# This Function returns True if the coin with probability P of heads comes heads when flipped. | |
def random_coin(p): | |
unif = random.uniform(0,1) | |
if unif>=p: | |
return False |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pylab as pl | |
import scipy.special as ss | |
%matplotlib inline | |
pl.rcParams['figure.figsize'] = (17.0, 4.0) | |
# Actual Beta PDF. | |
def beta(a, b, i): | |
e1 = ss.gamma(a + b) | |
e2 = ss.gamma(a) |