Skip to content

Instantly share code, notes, and snippets.

@jrzaurin
jrzaurin / datasets_basic_stats.csv
Created June 12, 2021 17:33
datasets basic stats
Dataset n_rows n_cols objective neg_pos_ratio
adult 45222 15 binary_classification 0.3295
bank_marketing 41188 20 binary_classification 0.127
nyc_taxi 1458644 26 regression NA
facebook_comments_vol 199029 54 regression NA
@jrzaurin
jrzaurin / train_tabtransformer.py
Created February 19, 2021 23:31
Train TabTransformer
trainer = Trainer(model, objective="binary", metrics=[(Accuracy)])
trainer.fit(X_tab=X_tab, target=target, n_epochs=5, batch_size=256, val_split=0.2)
@jrzaurin
jrzaurin / tabtransformer.py
Created February 19, 2021 23:23
TabTransformer
from pytorch_widedeep.models import TabTransformer
tabtransformer = TabTransformer(
column_idx=tab_preprocessor.column_idx,
embed_input=tab_preprocessor.embeddings_input,
continuous_cols=cont_cols,
shared_embed=True,
num_blocks=3,
)
model = WideDeep(deeptabular=tabtransformer)
@jrzaurin
jrzaurin / tabtransformer_preprocessing.py
Last active February 22, 2021 08:57
TabTransformer preprocessing
embed_cols = [
'workclass',
'education',
'marital_status',
'occupation',
'relationship',
'race'
]
tab_preprocessor = TabPreprocessor(
embed_cols=embed_cols,
@jrzaurin
jrzaurin / train_tabresnet.py
Created February 19, 2021 22:59
Train TabResnet
trainer = Trainer(model, objective="binary", metrics=[(Accuracy)])
trainer.fit(X_tab=X_tab, target=target, n_epochs=5, batch_size=256, val_split=0.2)
@jrzaurin
jrzaurin / tabresnet.py
Created February 19, 2021 22:54
TabResnet
from pytorch_widedeep.models import TabResnet
tabresnet = TabResnet(
column_idx=tab_preprocessor.column_idx,
embed_input=tab_preprocessor.embeddings_input,
continuous_cols=cont_cols,
batchnorm_cont=True,
blocks_dims=[200, 100, 100],
mlp_hidden_dims=[100, 50],
)
@jrzaurin
jrzaurin / train_tabmlp.py
Last active February 19, 2021 22:54
Train a TabMlp model
from pytorch_widedeep import Trainer
from pytorch_widedeep.metrics import Accuracy
trainer = Trainer(model, objective="binary", metrics=[(Accuracy)])
trainer.fit(X_tab=X_tab, target=target, n_epochs=5, batch_size=256, val_split=0.2)
@jrzaurin
jrzaurin / tabmlp.py
Last active February 19, 2021 22:54
TabMlp
from pytorch_widedeep.models import TabMlp, WideDeep
tabmlp = TabMlp(
mlp_hidden_dims=[200, 100],
column_idx=tab_preprocessor.column_idx,
embed_input=tab_preprocessor.embeddings_input,
continuous_cols=cont_cols,
batchnorm_cont=True,
)
model = WideDeep(deeptabular=tabmlp)
@jrzaurin
jrzaurin / tab_preprocessing.py
Last active February 22, 2021 08:19
tabular data preparation
from pytorch_widedeep.preprocessing import TabPreprocessor
# define the embedding and continuous columns, and target
embed_cols = [
('workclass', 6),
('education', 8),
('marital_status', 6),
('occupation',8),
('relationship', 6),
('race', 6)]
@jrzaurin
jrzaurin / adult_census_data_preparation.py
Last active February 19, 2021 22:55
Adult census data preparation
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
adult = pd.read_csv("data/adult/adult.csv.zip")
adult.columns = [c.replace("-", "_") for c in adult.columns]
adult["income_label"] = (adult["income"].apply(lambda x: ">50K" in x)).astype(int)
adult.drop("income", axis=1, inplace=True)
for c in adult.columns: