Skip to content

Instantly share code, notes, and snippets.

@kyoto-cheng
Last active September 17, 2021 03:10
Show Gist options
  • Save kyoto-cheng/23472127a85fb845a509bde6233f3532 to your computer and use it in GitHub Desktop.
Save kyoto-cheng/23472127a85fb845a509bde6233f3532 to your computer and use it in GitHub Desktop.
# pip install sdv
# importing the necesary libraries
import numpy as np
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
# import all 4 sdv models under the single table scenario
from sdv.tabular import GaussianCopula
from sdv.tabular import CTGAN
from sdv.tabular import CopulaGAN
from sdv.tabular import TVAE
# load the models
model_Gaussian = GaussianCopula(primary_key='CLIENTNUM')
model_CTGAN = CTGAN(primary_key='CLIENTNUM')
model_CopulaGAN = CopulaGAN(primary_key='CLIENTNUM')
model_TVAE = TVAE(primary_key='CLIENTNUM')
# fit the models
model_Gaussian.fit(data)
model_CTGAN.fit(data)
model_CopulaGAN.fit(data)
model_TVAE.fit(data)
# create synthetic data with each fitted model
new_data_model_Gaussian = model_Gaussian.sample(10000)
new_data_model_CTGAN = model_CTGAN.sample(10000)
new_data_model_CopulaGAN = model_CopulaGAN.sample(10000)
new_data_model_TVAE = model_TVAE.sample(10000)
# save each fitted model into a pkl file
model_Gaussian.save('model_Gaussian.pkl')
model_CTGAN.save('model_CTGAN.pkl')
model_CopulaGAN.save('model_CopulaGAN.pkl')
model_TVAE.save('model_TVAE.pkl')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment