Last active
August 3, 2021 06:40
-
-
Save narphorium/d06b7ed234287e319f18 to your computer and use it in GitHub Desktop.
Good tutorial,
We can simplify the code of calculating the means by using tf.boolean_mask
instead of tf.reshape(tf.where(..))
:
means = tf.pack([
tf.reduce_mean(
tf.boolean_mask(
vectors, tf.equal(assignments, c)
), 0)
for c in xrange(num_clusters)])
I think it's more intuitive
Hello,
when I input values of shape (1000,1), I'm getting a lot of NaNs in the centroid list.
array([[-0.0615779 ],
[ 0. ],
[-0.01855482],
[ nan],
[ nan],
[ nan],
[ nan],
[-0.03768255],
[ 0.01288017],
[ 0.01535422],
[ 0.04958867],
[ nan],
[-0.01960552],
[ 0.09472825],
[-0.09461572],
[ nan]]
Basically I want to do the same as this MATLAB code does:
>> load fisheriris
>> X = meas(:,3);
>> [idx,C] = kmeans(X,3);
>> size(X) => [150,1]
>> size(idx) => [150,1]
>> size(C) => [3,1]
I think there's problem with the calculation of means
, because this is where the assignment for centroids is coming from, but I'm not sure where the nan is coming from. Can somebody please give me a hint to fix? :)
tf.sub
need changes to tf.subtract
and
means = tf.concat(0, [
tf.reduce_mean(
tf.gather(vectors,
tf.reshape(
tf.where(
tf.equal(assignments, c)
),[1,-1])
),reduction_indices=[1])
for c in xrange(num_clusters)])
to
means = tf.concat([
tf.reduce_mean(
tf.gather(vectors,
tf.reshape(
tf.where(
tf.equal(assignments, c)
),[1,-1])
),reduction_indices=[1])
for c in xrange(num_clusters)], 0)
Thank you!!
In python 3 , I think it works!
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
num_points = 2000
vectors_set = []
for i in range(num_points):
if np.random.random() > 0.5:
vectors_set.append([np.random.normal(0.0, 0.9), np.random.normal(0.0, 0.9)])
else :
vectors_set.append([np.random.normal(3.0, 0.5), np.random.normal(1.0, 0.5)])
df = pd.DataFrame({"x": [v[0] for v in vectors_set], "y": [v[1] for v in vectors_set]})
sns.lmplot("x","y", data=df, fit_reg=False, size=6)
plt.show()
# k-means algorithm
vectors = tf.constant(vectors_set)
num_clusters = 4
centroides = tf.Variable(tf.slice(tf.random_shuffle(vectors),[0,0],[k,-1]))
expanded_vectors = tf.expand_dims(vectors, 0)
expanded_centroides = tf.expand_dims(centroides, 1)
assignments = tf.argmin(tf.reduce_sum(tf.square(tf.subtract(expanded_vectors,expanded_centroides)), 2), 0)
means = tf.concat(axis=0, values=[
tf.reduce_mean(
tf.gather(vectors,
tf.reshape(
tf.where(
tf.equal(assignments, c)
), [1,-1])
), axis=[1])
for c in range(num_clusters)])
update_centroides = tf.assign(centroides, means)
init_op = tf.initialize_all_variables()
sess = tf.Session()
sess.run(init_op)
for step in range(100):
_, centroid_values, assignment_values = sess.run([update_centroides, centroides, assignments])
data = {"x": [], "y": [], "cluster": []}
for i in range(len(assignment_values)):
data["x"].append(vectors_set[i][0])
data["y"].append(vectors_set[i][1])
data["cluster"].append(assignment_values[i])
df = pd.DataFrame(data)
sns.lmplot("x","y",data=df,fit_reg=False, size=6, hue="cluster", legend=False)
plt.show()
In python 3.6, it still works well. Thank You :D
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
num_points = 2000
vectors_set = []
for i in range(num_points):
if np.random.random() > 0.5:
vectors_set.append([np.random.normal(0.0, 0.9), np.random.normal(0.0, 0.9)])
else:
vectors_set.append([np.random.normal(3.0, 0.5), np.random.normal(1.0, 0.5)])
df = pd.DataFrame({"x": [v[0] for v in vectors_set], "y": [v[1] for v in vectors_set]})
sns.lmplot("x", "y", data=df, fit_reg=False, height=6)
plt.show()
# k-means algorithm
vectors = tf.constant(vectors_set)
num_clusters = 4
centroides = tf.Variable(tf.slice(tf.random_shuffle(vectors), [0, 0], [num_clusters, -1]))
expanded_vectors = tf.expand_dims(vectors, 0)
expanded_centroides = tf.expand_dims(centroides, 1)
assignments = tf.argmin(tf.reduce_sum(tf.square(tf.subtract(expanded_vectors, expanded_centroides)), 2), 0)
means = tf.concat(axis=0, values=[
tf.reduce_mean(
tf.gather(vectors,
tf.reshape(
tf.where(
tf.equal(assignments, c)
), [1, -1])
), axis=[1])
for c in range(num_clusters)])
update_centroides = tf.assign(centroides, means)
init_op = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init_op)
for step in range(100):
_, centroid_values, assignment_values = sess.run([update_centroides, centroides, assignments])
data = {"x": [], "y": [], "cluster": []}
for i in range(len(assignment_values)):
data["x"].append(vectors_set[i][0])
data["y"].append(vectors_set[i][1])
data["cluster"].append(assignment_values[i])
df = pd.DataFrame(data)
sns.lmplot("x", "y", data=df, fit_reg=False, height=6, hue="cluster", legend=False)
plt.show()
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
That's a good point @vlad17. You can do iteration in TF with tf.tf.while_loop but it is a bit more advanced.