Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from datetime import datetime, timedelta,date | |
import pandas as pd | |
%matplotlib inline | |
import matplotlib.pyplot as plt | |
import numpy as np | |
from __future__ import division | |
import warnings | |
warnings.filterwarnings("ignore") |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#train & test split | |
tx_class = tx_class.drop('NextPurchaseDay',axis=1) | |
X, y = tx_class.drop('NextPurchaseDayRange',axis=1), tx_class.NextPurchaseDayRange | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=44) | |
#create an array of models | |
models = [] | |
models.append(("LR",LogisticRegression())) | |
models.append(("NB",GaussianNB())) | |
models.append(("RF",RandomForestClassifier())) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#get max purchase date for Recency and create a dataframe | |
tx_max_purchase = tx_6m.groupby('CustomerID').InvoiceDate.max().reset_index() | |
tx_max_purchase.columns = ['CustomerID','MaxPurchaseDate'] | |
#find the recency in days and add it to tx_user | |
tx_max_purchase['Recency'] = (tx_max_purchase['MaxPurchaseDate'].max() - tx_max_purchase['MaxPurchaseDate']).dt.days | |
tx_user = pd.merge(tx_user, tx_max_purchase[['CustomerID','Recency']], on='CustomerID') | |
#plot recency | |
plot_data = [ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#create a dataframe with customer id and first purchase date in tx_next | |
tx_next_first_purchase = tx_next.groupby('CustomerID').InvoiceDate.min().reset_index() | |
tx_next_first_purchase.columns = ['CustomerID','MinPurchaseDate'] | |
#create a dataframe with customer id and last purchase date in tx_6m | |
tx_last_purchase = tx_6m.groupby('CustomerID').InvoiceDate.max().reset_index() | |
tx_last_purchase.columns = ['CustomerID','MaxPurchaseDate'] | |
#merge two dataframes | |
tx_purchase_dates = pd.merge(tx_last_purchase,tx_next_first_purchase,on='CustomerID',how='left') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#import libraries | |
from datetime import datetime, timedelta,date | |
import pandas as pd | |
%matplotlib inline | |
from sklearn.metrics import classification_report,confusion_matrix | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import seaborn as sns | |
from __future__ import division | |
from sklearn.cluster import KMeans |
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#preparing column names for the model | |
all_columns = [] | |
for column in df_data.columns: | |
column = column.replace(" ", "_").replace("(", "_").replace(")", "_").replace("-", "_") | |
all_columns.append(column) | |
df_data.columns = all_columns | |
glm_columns = 'gender' |