Skip to content

Instantly share code, notes, and snippets.

@ocoyawale
Forked from DhavalThkkar/CustomerChurn.py
Created January 30, 2018 00:44
Show Gist options
  • Save ocoyawale/778ca87a630978a74e0a406adc0497f6 to your computer and use it in GitHub Desktop.
Save ocoyawale/778ca87a630978a74e0a406adc0497f6 to your computer and use it in GitHub Desktop.
Test
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun May 7 11:52:37 2017
@author: thakkar_
"""
# Importing Libraries
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# Sklearn
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import roc_curve,auc,roc_auc_score
# Keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from sklearn.model_selection import StratifiedKFold
# Importing the Dataset
data = pd.read_csv('churn.csv')
#Removing 'Unnamed: 0' and 'Phone' columns
data.drop('Unnamed: 0',axis = 1, inplace = True)
data.drop('Phone',axis = 1, inplace = True)
data.drop('Area Code', axis = 1, inplace = True) # Area code doesn't appear to be useful
# Replace ? so label encoding works
data['VMail Plan'].replace('?','no',inplace=True)
data['Int\'l Plan'].replace('?','no',inplace=True)
le = LabelEncoder()
data['VMail Plan'] = le.fit_transform(data['VMail Plan'])
data['Int\'l Plan'] = le.fit_transform(data['Int\'l Plan'])
# Check for ?, replace with NaN (float value)
data.replace('?', 0, inplace=True)
# There are some large values which could be concerning
col_names = ['VMail Message', 'Account Length', 'Day Mins', 'Day Calls', 'Day Charge', 'Eve Mins', 'Eve Calls', 'Eve Charge', 'Night Mins', 'Night Calls', 'Night Charge', 'Intl Mins', 'Intl Calls', 'Intl Charge']
data[col_names] = data[col_names].astype(float)
for col in col_names:
data[col] = (data[col] - data[col].mean()) / data[col].std()
# Create your train and test data sets
data['Churn?'] = data['Churn?'].apply(lambda x: x.split('.')[0])
y = le.fit_transform(data['Churn?'])
data.drop('Churn?', axis=1, inplace=True)
state = pd.get_dummies(data['State'])
data.drop('State', axis=1, inplace=True)
X = pd.concat([data, state], axis=1).as_matrix()
from tqdm import tqdm
ypred = np.zeros_like(y,dtype=float)
ypredc = np.zeros_like(y,dtype=float)
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
for train, test in tqdm(kfold.split(X, y)):
# Initial tests appear to indicate no overfitting, dropout layer unneccesary
ann = Sequential()
ann.add(Dense(500, activation='tanh', kernel_initializer='random_normal', input_shape=(X[train].shape[1],)))
ann.add(Dropout(rate=0.5))
ann.add(Dense(1, activation='sigmoid', kernel_initializer='random_normal'))
ann.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
ann.fit(X[train], y[train], batch_size=100, epochs=150, verbose=0)
fpr, tpr, _ = roc_curve(y[test],ann.predict_proba(X[test])) # If I add [:,1] error for index out of bounds
ypred[test] = ann.predict_proba(X[test])
auc_score = roc_auc_score(y,ypred)
print('AUC Score: {:.3f}%'.format(auc_score))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment