Created
August 20, 2018 10:49
-
-
Save Madhivarman/e9e0ab7f5ef03b7f93ffbd62f6afb848 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
#create a dummy data | |
user_id = [x for x in range(10000)] | |
recency = np.random.randint(low=1, high=10, size=10000) | |
monetary = np.random.randint(low=1, high=10, size=10000) | |
frequency = np.random.randint(low=1, high=10, size=10000) | |
#convert above data into a dataframe | |
dummy_data = pd.DataFrame({'user_id':user_id,'Recency':recency, 'Monetary':monetary, 'Frequency':frequency}) | |
dummy_data.shape | |
dummy_data.head() | |
total_value = recency + monetary + frequency | |
total_value | |
dummy_data["total_value"] = total_value | |
dummy_data.head() | |
segment = [] | |
for i in total_value: | |
#write condition value | |
if i < 5: | |
segment.append("lost") | |
elif i >= 5 and i < 10: | |
segment.append("abouttosleep") | |
elif i >=10 and i < 17: | |
segment.append("recentcustomer") | |
elif i >= 17 and i < 25: | |
segment.append("loyalcustomer") | |
else: | |
segment.append("champions") | |
dummy_data["segment"] = segment | |
dummy_data.head() | |
#do one hot encoding | |
dummy_data["segment"] = dummy_data["segment"].apply({"recentcustomer":4.0, "champions":5.0, "abouttosleep":2.0, | |
"lost":1.0,"loyalcustomer":3.0}.get) | |
# In[11]: | |
dummy_data.head() | |
# In[12]: | |
#convert all data into float | |
dummy_data[["Frequency","Monetary","Recency"]] = dummy_data[["Frequency","Monetary","Recency"]].astype(float) | |
dummy_data.head() | |
# In[13]: | |
#input data to consider {Frequency, Monetarty, Recency} | |
#target data is Segment | |
input_data = dummy_data[["Frequency","Monetary","Recency"]] | |
output_data = dummy_data["segment"] | |
input_data.head() | |
# In[14]: | |
output_data.head() | |
# ## Split the dataset into Train and Test dataset | |
# In[15]: | |
train_X,train_Y = input_data.iloc[:7000], output_data.iloc[:7000] | |
test_X, test_Y = input_data.iloc[7000:], output_data.iloc[7000:] | |
print(train_X.shape, train_Y.shape, test_X.shape, test_Y.shape) | |
train_X, train_Y = train_X.values, train_Y.values | |
test_X, test_Y = test_X.values, test_Y.values | |
import tensorflow as tf | |
# In[26]: | |
from tensorflow.contrib.factorization import KMeans | |
with tf.name_scope("input_variables"): | |
data = tf.placeholder(tf.float32, shape=[None, 3],name="data") | |
target = tf.placeholder(tf.float32, shape=[None, 1],name="target") | |
#parameters defining | |
epochs = 500 | |
num_classes = 5 | |
num_clusters = 5 | |
batch_size = 64 | |
#defining a KMeans architecture | |
with tf.name_scope("KMeans_Architecture"): | |
Kmeans = KMeans(inputs = data, | |
num_clusters = num_clusters, | |
distance_metric = 'cosine', | |
use_mini_batch = True) | |
#building a graph | |
training_graph = Kmeans.training_graph() | |
if len(training_graph) > 6: | |
(all_scores, cluster_idx, scores, cluster_centers_initialized, | |
cluster_center_var, init_op, train_op) = training_graph | |
else: | |
(all_scores, cluster_idx, scores, cluster_centers_initialized, | |
init_op, train_op) = training_graph | |
cluster_idx = cluster_idx[0] | |
avg_distance = tf.reduce_mean(scores) | |
#initialize all variables | |
init_vars = tf.global_variables_initializer() | |
#start tensorflow session | |
sess = tf.Session() | |
#run the initializer | |
sess.run(init_vars, feed_dict={data: train_X}) | |
sess.run(init_op, feed_dict={data: train_X}) | |
#add ops to save the tensorflow model | |
saver = tf.train.Saver() | |
#Training | |
for i in range(1, epochs+1): | |
_, d, idx = sess.run([train_op, avg_distance, cluster_idx], feed_dict={data:train_X}) | |
if i%100 == 0 or i == 1: | |
print("step:{}, Avg-Distance:{}".format(i,d)) | |
save_path = saver.save(sess, "customer_segmentation_saved_model/model.ckpt") | |
print("Model in saved in the path:{dir}".format(dir=save_path)) | |
#assign a lable to each centroid | |
#count total number of labels per centroid, using lable for each training | |
counts = np.zeros(shape=(num_clusters,num_classes)) | |
for i in range(len(idx)): | |
counts[idx[i]] += train_Y[i] | |
#assign most frequent label to centroid | |
lables_map = [np.argmax(c) for c in counts] | |
lables_map = tf.convert_to_tensor(lables_map) | |
#lookup: centroid_id -> label | |
cluster_label = tf.nn.embedding_lookup(lables_map, cluster_idx) | |
#compute accuracy | |
correct_prediction = tf.equal(cluster_label, tf.cast(tf.argmax(target,1), tf.int32)) | |
accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) | |
print(correct_prediction) | |
# In[27]: | |
dup = test_Y.reshape(-1,1) | |
dup | |
# In[28]: | |
#test accuracy | |
print("Test Accuracy:{}".format(sess.run(accuracy_op, feed_dict={data:test_X, target:test_Y.reshape(-1,1)}))) | |
# ## Inspect what all variables are stored in the check point ## | |
# In[30]: | |
# import the inspect_checkpoint library | |
from tensorflow.python.tools import inspect_checkpoint as chkp | |
# print all tensors in checkpoint file | |
chkp.print_tensors_in_checkpoint_file("customer_segmentation_saved_model/model.ckpt", tensor_name='', all_tensors=True) | |
# In[31]: | |
chkp.print_tensors_in_checkpoint_file("customer_segmentation_saved_model/model.ckpt", tensor_name='data', all_tensors=False) | |
# ## Do prediction in the Test data ## | |
#generate random test data of length 200 | |
test_recency = np.random.randint(low=1, high=10, size=500) | |
test_monetary = np.random.randint(low=1, high=10, size=500) | |
test_frequency = np.random.randint(low=1, high=10, size=500) | |
test_df = pd.DataFrame({'Recency':test_recency, 'Monetary':test_monetary, 'Frequency':test_frequency}) | |
test_df.head() | |
# In[33]: | |
test_df.values | |
# In[37]: | |
## Let us restore the saved model | |
t_sess = tf.Session() | |
# Step-1: Recreate the network graph. At this step only graph is created. | |
saver = tf.train.Saver() | |
# Step-2: Now let's load the weights saved using the restore method. | |
saver.restore(t_sess, 'customer_segmentation_saved_model/model.ckpt') | |
# Accessing the default graph which we have restored | |
graph = tf.get_default_graph() | |
#accessing the default graph which is stored | |
graph = tf.get_default_graph() | |
y_pred = graph.get_tensor_by_name("input_variables_3/target:0") | |
#lets feed the data into the placeholders | |
x = graph.get_tensor_by_name("input_variables_3/data:0") | |
#create the feed_dict that is required to feed the input to the data | |
feed_dict_testing = {x: test_df.values} | |
#check the result | |
result = t_sess.run(accuracy_op, feed_dict=feed_dict_testing) | |
print(result) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment