Skip to content

Instantly share code, notes, and snippets.

View VincentTatan's full-sized avatar

vincentkernn VincentTatan

View GitHub Profile
def create_rfe_feature_selection(df, target_feature, n_features=3):
columns_list = list(df.columns)
columns_list.remove(target_feature)
logreg = LogisticRegression(solver='lbfgs')
rfe = RFE(logreg, n_features)
rfe = rfe.fit(df[columns_list],df[target_feature].values.ravel())
filtered_rfe_list = list(compress(df.columns, rfe.support_))
print(rfe.support_)
print(rfe.ranking_)
def create_dummies_from_categorical_column (df,categorical_features):
df_copy = df.copy()
print('changing {}'.format(list(categorical_features)))
df_result = pd.DataFrame()
for feature in categorical_features:
df_temp = df_copy[feature].str.get_dummies()
df_temp.columns = ['{}_{}'.format(feature,column) for column in df_temp.columns]
df_copy = df_copy.join(df_temp,rsuffix='_dept')
df_copy.drop(categorical_features,axis=1,inplace=True)
return df_copy
def stack_plot(title, ylabel, dates, val_pairs, ymax=None, legend=None):
# get series values
fig, ax = plt.subplots(1,1)
fig.set_figwidth(20)
fig.set_figheight(10)
ax.set_title(title, size=20)
ax.set_ylabel(ylabel, size=20)
ax.set_xlim((0, len(dates)))
ax.minorticks_on()
ax.grid(which='major', axis='y', linestyle='-', linewidth=2, color='k')
sns.pairplot(iris, hue="species")
def create_box_and_scatter_plot(df,feature_1,feature_2,target_feature,figsize=(10,5)):
fig, ax =plt.subplots(1,2,figsize=figsize)
sns.boxplot(x=target_feature, y= feature_1, data=df, ax=ax[0]).set_title(feature_1)
sns.boxplot(x=target_feature, y= feature_2, data=df, ax=ax[1]).set_title(feature_2)
fig2, ax2 =plt.subplots(1,1,figsize=figsize)
sns.scatterplot(x=feature_1, y=feature_2, hue=target_feature, data=df, ax = ax2)
def create_heatmap(df,figsize=(10,10)):
corr = df.corr()
fig, ax = plt.subplots(figsize=figsize)
ax.set_title("Heatmap")
sns.heatmap(corr,ax=ax, annot=True, linewidths=.5)
sns.factorplot(x='sales',data=salary_df,col='left',kind='count',aspect=.8,size=10)
def create_layered_boxplot(df,features, target_feature, subtitle='layered boxplot', kind='count',figsize=(20,7)):
n_charts = len(features)
fig, axes = plt.subplots(ncols=n_charts,figsize=figsize)
fig.suptitle(subtitle, fontsize=16)
for i in range(n_charts):
feature = features[i-1]
sns.boxplot(x=target_feature,y=feature,data=df,ax=axes[i-1])
df[feature].describe()
def draw_plot(df,features,subtitle,figsize=(20,3),type='distplot'):
n_charts = len(features)
fig, axes = plt.subplots(ncols=n_charts,figsize=figsize)
fig.suptitle(subtitle, fontsize=16)
for i in range(n_charts):
feature = features[i-1]
if len(features) >1:
ax=axes[i-1]
else:
def create_pie(df,target_variable,figsize=(10,10)):
print(df[target_variable].value_counts())
fig, ax = plt.subplots(figsize=figsize)
ax.pie(df[target_variable].value_counts().values, labels=df[target_variable].value_counts().index,autopct = '%1.2f%%',textprops={'fontsize': 20})
ax.axis('equal')
plt.title(target_variable)
plt.show()