Created
July 12, 2023 01:11
-
-
Save abhijeet-talaulikar/60c612ffd381b559d046a57fe05ca829 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import csv | |
import random | |
import string | |
### Create full dataset of 5 million credit card customers ### | |
def generate_cust_id(): | |
return ''.join(random.choices(string.ascii_uppercase + string.digits, k=7)) | |
def generate_age(): | |
return random.randint(25, 35) | |
def generate_num_credit_lines(): | |
return random.randint(1, 3) | |
def generate_income_tier(): | |
return random.randint(1, 3) | |
def generate_spend_grocery(): | |
return round(random.uniform(0.1, 0.6), 2) | |
def generate_spend_gas(): | |
return round(random.uniform(0.1, 0.6), 2) | |
def generate_spend_dineout(): | |
return round(random.uniform(0.1, 0.6), 2) | |
def generate_spend_travel(): | |
return round(random.uniform(0, 0.2), 2) | |
def generate_balance(): | |
return round(random.uniform(500, 5000), 2) | |
def generate_balance_frequency(): | |
return round(random.uniform(0.2, 0.8), 2) | |
def generate_avg_trn_value(): | |
return random.randint(10, 30) | |
def generate_purchases(): | |
return round(random.uniform(1, 100), 2) | |
def generate_oneoff_purchases(): | |
return round(random.uniform(0, 1), 2) | |
def generate_installments_purchases(oneoff_purchases): | |
return round(1 - oneoff_purchases, 2) | |
def generate_cash_advances(): | |
return random.randint(0, 5) | |
def generate_forex_trn(): | |
return random.randint(0, 10) | |
def calculate_credit_limit(balance, purchases, avg_trn_value): | |
return balance + purchases * avg_trn_value | |
def generate_payments(): | |
return random.randint(6, 12) | |
def generate_prcfullpayment(): | |
return round(random.uniform(0.5, 1), 2) | |
with open('synthetic_credit_card_dataset.csv', mode='w', newline='') as file: | |
writer = csv.writer(file) | |
writer.writerow(['CUST_ID', 'AGE', 'NUM_CREDIT_LINES', 'INCOME_TIER', 'SPEND_GROCERY', 'SPEND_GAS', 'SPEND_DINEOUT', 'SPEND_TRAVEL', 'BALANCE', 'BALANCE_FREQUENCY', 'AVG_TRN_VALUE', 'PURCHASES', 'ONEOFF_PURCHASES', 'INSTALLMENTS_PURCHASES', 'CASH_ADVANCES', 'FOREX_TRN', 'CREDIT_LIMIT', 'PAYMENTS', 'PRCFULLPAYMENT', 'TENURE']) | |
for i in range(5000000): | |
oneoff_purchases = generate_oneoff_purchases() | |
balance = generate_balance() | |
balance_frequency = generate_balance_frequency() | |
avg_trn_value = generate_avg_trn_value() | |
spend_grocery = generate_spend_grocery() | |
spend_gas = generate_spend_gas() | |
spend_dineout = generate_spend_dineout() | |
spend_travel = generate_spend_travel() | |
spend_grocery /= (spend_grocery+spend_gas+spend_dineout+spend_travel) | |
spend_gas /= (spend_grocery+spend_gas+spend_dineout+spend_travel) | |
spend_dineout /= (spend_grocery+spend_gas+spend_dineout+spend_travel) | |
spend_travel /= (spend_grocery+spend_gas+spend_dineout+spend_travel) | |
writer.writerow([generate_cust_id(), | |
generate_age(), | |
generate_num_credit_lines(), | |
generate_income_tier(), | |
spend_grocery, | |
spend_gas, | |
spend_dineout, | |
spend_travel, | |
balance, | |
balance_frequency, | |
avg_trn_value, | |
generate_purchases(), | |
oneoff_purchases, | |
generate_installments_purchases(oneoff_purchases), | |
generate_cash_advances(), | |
generate_forex_trn(), | |
calculate_credit_limit(balance, | |
balance_frequency, | |
avg_trn_value), | |
generate_payments(), | |
generate_prcfullpayment(), | |
10]) | |
### Create pilot dataset using 1% of the customers ### | |
data = pd.read_csv('synthetic_credit_card_dataset.csv') | |
pilot = data.sample(frac=0.01, random_state=1) | |
pilot['Treated'] = np.random.choice([0,1], pilot.shape[0]) | |
pilot['Treatment'] = pd.cut(pilot['PURCHASES'] * pilot['AVG_TRN_VALUE'], 3, labels=["Annual Fee x 30%","Annual Fee x 50%","Annual Fee x 70%"]) | |
pilot['Treatment'] = np.where(pilot['Treated'], pilot['Treatment'], "Full Annual Fee") | |
pilot['Treated'] = np.random.choice([0,1], pilot.shape[0]) | |
pilot['Renewed'] = np.where( | |
pilot['Treated'], | |
np.where(pilot['Treatment']=="Annual Fee x 30%", np.random.binomial(1, 0.7, pilot.shape[0]), | |
np.where(pilot['Treatment']=="Annual Fee x 50%", np.random.binomial(1, 0.7, pilot.shape[0]), | |
np.random.binomial(1, 0.7, pilot.shape[0]))), | |
np.random.binomial(1, 0.4, pilot.shape[0]) | |
) | |
pilot.to_csv('pilot_credit_card.csv', index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment