This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_rfe_feature_selection(df, target_feature, n_features=3): | |
columns_list = list(df.columns) | |
columns_list.remove(target_feature) | |
logreg = LogisticRegression(solver='lbfgs') | |
rfe = RFE(logreg, n_features) | |
rfe = rfe.fit(df[columns_list],df[target_feature].values.ravel()) | |
filtered_rfe_list = list(compress(df.columns, rfe.support_)) | |
print(rfe.support_) | |
print(rfe.ranking_) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_dummies_from_categorical_column (df,categorical_features): | |
df_copy = df.copy() | |
print('changing {}'.format(list(categorical_features))) | |
df_result = pd.DataFrame() | |
for feature in categorical_features: | |
df_temp = df_copy[feature].str.get_dummies() | |
df_temp.columns = ['{}_{}'.format(feature,column) for column in df_temp.columns] | |
df_copy = df_copy.join(df_temp,rsuffix='_dept') | |
df_copy.drop(categorical_features,axis=1,inplace=True) | |
return df_copy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def stack_plot(title, ylabel, dates, val_pairs, ymax=None, legend=None): | |
# get series values | |
fig, ax = plt.subplots(1,1) | |
fig.set_figwidth(20) | |
fig.set_figheight(10) | |
ax.set_title(title, size=20) | |
ax.set_ylabel(ylabel, size=20) | |
ax.set_xlim((0, len(dates))) | |
ax.minorticks_on() | |
ax.grid(which='major', axis='y', linestyle='-', linewidth=2, color='k') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sns.pairplot(iris, hue="species") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_box_and_scatter_plot(df,feature_1,feature_2,target_feature,figsize=(10,5)): | |
fig, ax =plt.subplots(1,2,figsize=figsize) | |
sns.boxplot(x=target_feature, y= feature_1, data=df, ax=ax[0]).set_title(feature_1) | |
sns.boxplot(x=target_feature, y= feature_2, data=df, ax=ax[1]).set_title(feature_2) | |
fig2, ax2 =plt.subplots(1,1,figsize=figsize) | |
sns.scatterplot(x=feature_1, y=feature_2, hue=target_feature, data=df, ax = ax2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_heatmap(df,figsize=(10,10)): | |
corr = df.corr() | |
fig, ax = plt.subplots(figsize=figsize) | |
ax.set_title("Heatmap") | |
sns.heatmap(corr,ax=ax, annot=True, linewidths=.5) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sns.factorplot(x='sales',data=salary_df,col='left',kind='count',aspect=.8,size=10) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_layered_boxplot(df,features, target_feature, subtitle='layered boxplot', kind='count',figsize=(20,7)): | |
n_charts = len(features) | |
fig, axes = plt.subplots(ncols=n_charts,figsize=figsize) | |
fig.suptitle(subtitle, fontsize=16) | |
for i in range(n_charts): | |
feature = features[i-1] | |
sns.boxplot(x=target_feature,y=feature,data=df,ax=axes[i-1]) | |
df[feature].describe() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def draw_plot(df,features,subtitle,figsize=(20,3),type='distplot'): | |
n_charts = len(features) | |
fig, axes = plt.subplots(ncols=n_charts,figsize=figsize) | |
fig.suptitle(subtitle, fontsize=16) | |
for i in range(n_charts): | |
feature = features[i-1] | |
if len(features) >1: | |
ax=axes[i-1] | |
else: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_pie(df,target_variable,figsize=(10,10)): | |
print(df[target_variable].value_counts()) | |
fig, ax = plt.subplots(figsize=figsize) | |
ax.pie(df[target_variable].value_counts().values, labels=df[target_variable].value_counts().index,autopct = '%1.2f%%',textprops={'fontsize': 20}) | |
ax.axis('equal') | |
plt.title(target_variable) | |
plt.show() |