Skip to content

Instantly share code, notes, and snippets.

View gvyshnya's full-sized avatar

George Vyshnya gvyshnya

View GitHub Profile
@gvyshnya
gvyshnya / fw_binning_continual_vars.py
Created April 19, 2021 21:50
The example to demonstrate binning continuous variables with featurewiz
### we bin the following numeric variables using gaussian mixture models
bin_these = {'cont0': 4, 'cont1': 5, 'cont3': 2, 'cont4': 2, 'cont6': 3, 'cont8': 3, 'cont10': 10}
train, test = FW.FE_discretize_numeric_variables(train,bin_these,test=test, strategy='gaussian')
@gvyshnya
gvyshnya / fw_add_interaction_Features.py
Created April 19, 2021 21:47
FW Add Interaction feature snippet
intxn_vars = [('cont3', 'cont7'),('cont3', 'cont8'),('cont3', 'cont9'),('cont3', 'cont10'),('cont4', 'cont5'),
('cont4', 'cont6'),('cont4', 'cont9'),('cont4', 'cont10')]
def FE_create_interaction_vars(df, intxn_vars):
"""
This handy function creates interaction variables among pairs of numeric vars you send in.
Your input must be a dataframe and a list of tuples. Each tuple must contain a pair of variables.
All variables must be numeric. Double check your input before sending them in.
"""
df = df.copy(deep=True)
@gvyshnya
gvyshnya / featurewiz.py
Created April 18, 2021 20:01
Featurewiz Snippet
from featurewiz import featurewiz
# out1 and out2: is always a tuple containing two objects.
# In our case, it is going to be "test" and "train" dataframes with the important features selected by featurewiz
trainm, testm = featurewiz(
df_train,
target='target',
corr_limit=0.7,
verbose=2,
sep=",",
@gvyshnya
gvyshnya / AutoViz Feb-21 Tab Contest
Created April 7, 2021 19:01
AutoViz Feb-21 Tab Contest
from autoviz.AutoViz_Class import AutoViz_Class
AV = AutoViz_Class()
dftc = AV.AutoViz(
filename='',
sep='' ,
depVar='target',
dfte=df_train,
header=0,
verbose=2,
@gvyshnya
gvyshnya / autoviz.py
Created October 18, 2020 20:54
AutoViz Invokation
from autoviz.AutoViz_Class import AutoViz_Class
AV = AutoViz_Class()
dft = AV.AutoViz(filename='',
sep='' ,
depVar='cp_type',
dfte=training_gcols,
header=0,
verbose=2,
lowess=False,
@gvyshnya
gvyshnya / sweetviz_compare_two_datasets.py
Created October 18, 2020 19:11
SweetViz - Comparing Two Datasets
import sweetviz as sv
df1 = sv.compare(df_tr, df_cntr, pairwise_analysis='on')
df1.show_html(cells_tr_cntr_comparison_path)
@gvyshnya
gvyshnya / sweetviz_feature_to_target_analysis.py
Created October 18, 2020 19:01
SweetViz Target-to-Feature Analysis
import sweetviz as sv
df1 = sv.analyze(final_training_df, target_label, pairwise_analysis='on')
df1.show_html(sv_report_path)
@gvyshnya
gvyshnya / plotly_facet_plot_with_violin_subplots.py
Created October 18, 2020 18:53
Plotly Facet Plot with Violin Subplots
def visualize_features_vs_target_label(df_data, label, feature_list, n_cols=3):
if len(feature_list) % n_cols == 0:
number_of_rows = int(len(feature_list)/n_cols)
else:
number_of_rows = int(len(feature_list)/n_cols) +1
fig = make_subplots(rows=number_of_rows, cols=n_cols)
@gvyshnya
gvyshnya / AutoViz_Minor_Issue
Last active October 7, 2020 21:36
AutoViz Minor Issue: crash on a dataset without any significant/important variable
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import datetime as dt
from typing import Tuple
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
@gvyshnya
gvyshnya / AutoViz_Issues
Created October 3, 2020 20:21
AutoViz Issues
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import datetime as dt
from typing import Tuple
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px