karamanbk’s gists

karamanbk / g_series_6.ipynb

Created June 9, 2019 08:23

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

karamanbk / g6_intro.py

Created June 9, 2019 06:59

	from datetime import datetime, timedelta,date
	import pandas as pd
	%matplotlib inline
	import matplotlib.pyplot as plt
	import numpy as np
	from __future__ import division

	import warnings
	warnings.filterwarnings("ignore")

karamanbk / g_series_5.ipynb

Created June 3, 2019 05:54

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

karamanbk / g5_notebook.ipynb

Last active June 3, 2019 05:27

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

karamanbk / g5_selecting_model.py

Created June 2, 2019 09:38

	#train & test split
	tx_class = tx_class.drop('NextPurchaseDay',axis=1)
	X, y = tx_class.drop('NextPurchaseDayRange',axis=1), tx_class.NextPurchaseDayRange
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=44)

	#create an array of models
	models = []
	models.append(("LR",LogisticRegression()))
	models.append(("NB",GaussianNB()))
	models.append(("RF",RandomForestClassifier()))

karamanbk / g5_rfm.py

Created June 2, 2019 08:31

	#get max purchase date for Recency and create a dataframe
	tx_max_purchase = tx_6m.groupby('CustomerID').InvoiceDate.max().reset_index()
	tx_max_purchase.columns = ['CustomerID','MaxPurchaseDate']

	#find the recency in days and add it to tx_user
	tx_max_purchase['Recency'] = (tx_max_purchase['MaxPurchaseDate'].max() - tx_max_purchase['MaxPurchaseDate']).dt.days
	tx_user = pd.merge(tx_user, tx_max_purchase[['CustomerID','Recency']], on='CustomerID')

	#plot recency
	plot_data = [

karamanbk / g5_adding_label.py

Created June 2, 2019 08:09

	#create a dataframe with customer id and first purchase date in tx_next
	tx_next_first_purchase = tx_next.groupby('CustomerID').InvoiceDate.min().reset_index()
	tx_next_first_purchase.columns = ['CustomerID','MinPurchaseDate']

	#create a dataframe with customer id and last purchase date in tx_6m
	tx_last_purchase = tx_6m.groupby('CustomerID').InvoiceDate.max().reset_index()
	tx_last_purchase.columns = ['CustomerID','MaxPurchaseDate']

	#merge two dataframes
	tx_purchase_dates = pd.merge(tx_last_purchase,tx_next_first_purchase,on='CustomerID',how='left')

karamanbk / g5_intro.py

Created June 2, 2019 06:51

	#import libraries
	from datetime import datetime, timedelta,date
	import pandas as pd
	%matplotlib inline
	from sklearn.metrics import classification_report,confusion_matrix
	import matplotlib.pyplot as plt
	import numpy as np
	import seaborn as sns
	from __future__ import division
	from sklearn.cluster import KMeans

karamanbk / g_series_4.ipynb

Created May 25, 2019 12:51

Sorry, something went wrong. Reload?

Sorry, we cannot display this file.

Sorry, this file is invalid so it cannot be displayed.

karamanbk / g4_logistic_regression.py

Created May 25, 2019 11:49

	#preparing column names for the model
	all_columns = []
	for column in df_data.columns:
	column = column.replace(" ", "_").replace("(", "_").replace(")", "_").replace("-", "_")
	all_columns.append(column)

	df_data.columns = all_columns

	glm_columns = 'gender'