Skip to content

Instantly share code, notes, and snippets.

@narphorium
Last active August 3, 2021 06:40
Show Gist options
  • Save narphorium/d06b7ed234287e319f18 to your computer and use it in GitHub Desktop.
Save narphorium/d06b7ed234287e319f18 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@ghdcjs14
Copy link

ghdcjs14 commented Nov 12, 2018

Thank you!!
In python 3 , I think it works!

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

num_points = 2000
vectors_set = []

for i in range(num_points):
  if np.random.random() > 0.5:
    vectors_set.append([np.random.normal(0.0, 0.9), np.random.normal(0.0, 0.9)])
  else :
    vectors_set.append([np.random.normal(3.0, 0.5), np.random.normal(1.0, 0.5)])
    
df = pd.DataFrame({"x": [v[0] for v in vectors_set], "y": [v[1] for v in vectors_set]})
sns.lmplot("x","y", data=df, fit_reg=False, size=6)
plt.show()

# k-means algorithm
vectors = tf.constant(vectors_set)
num_clusters = 4
centroides = tf.Variable(tf.slice(tf.random_shuffle(vectors),[0,0],[k,-1]))

expanded_vectors = tf.expand_dims(vectors, 0)
expanded_centroides = tf.expand_dims(centroides, 1)

assignments = tf.argmin(tf.reduce_sum(tf.square(tf.subtract(expanded_vectors,expanded_centroides)), 2), 0)

means = tf.concat(axis=0, values=[
    tf.reduce_mean(
        tf.gather(vectors, 
                  tf.reshape(
                      tf.where(
                          tf.equal(assignments, c)
                      ), [1,-1])
                 ), axis=[1]) 
    for c in range(num_clusters)])

update_centroides = tf.assign(centroides, means)

init_op = tf.initialize_all_variables()

sess = tf.Session()
sess.run(init_op)

for step in range(100):
  _, centroid_values, assignment_values = sess.run([update_centroides, centroides, assignments])
  
data = {"x": [], "y": [], "cluster": []}

for i in range(len(assignment_values)):
  data["x"].append(vectors_set[i][0])
  data["y"].append(vectors_set[i][1])
  data["cluster"].append(assignment_values[i])
  
df = pd.DataFrame(data)
sns.lmplot("x","y",data=df,fit_reg=False, size=6, hue="cluster", legend=False)
plt.show()

@yusinshin
Copy link

In python 3.6, it still works well. Thank You :D

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

num_points = 2000
vectors_set = []

for i in range(num_points):
    if np.random.random() > 0.5:
        vectors_set.append([np.random.normal(0.0, 0.9), np.random.normal(0.0, 0.9)])
    else:
        vectors_set.append([np.random.normal(3.0, 0.5), np.random.normal(1.0, 0.5)])

df = pd.DataFrame({"x": [v[0] for v in vectors_set], "y": [v[1] for v in vectors_set]})
sns.lmplot("x", "y", data=df, fit_reg=False, height=6)
plt.show()

# k-means algorithm
vectors = tf.constant(vectors_set)
num_clusters = 4
centroides = tf.Variable(tf.slice(tf.random_shuffle(vectors), [0, 0], [num_clusters, -1]))

expanded_vectors = tf.expand_dims(vectors, 0)
expanded_centroides = tf.expand_dims(centroides, 1)

assignments = tf.argmin(tf.reduce_sum(tf.square(tf.subtract(expanded_vectors, expanded_centroides)), 2), 0)

means = tf.concat(axis=0, values=[
    tf.reduce_mean(
        tf.gather(vectors,
                  tf.reshape(
                      tf.where(
                          tf.equal(assignments, c)
                      ), [1, -1])
                  ), axis=[1])
    for c in range(num_clusters)])

update_centroides = tf.assign(centroides, means)

init_op = tf.global_variables_initializer()

sess = tf.Session()
sess.run(init_op)

for step in range(100):
    _, centroid_values, assignment_values = sess.run([update_centroides, centroides, assignments])

data = {"x": [], "y": [], "cluster": []}

for i in range(len(assignment_values)):
    data["x"].append(vectors_set[i][0])
    data["y"].append(vectors_set[i][1])
    data["cluster"].append(assignment_values[i])

df = pd.DataFrame(data)
sns.lmplot("x", "y", data=df, fit_reg=False, height=6, hue="cluster", legend=False)
plt.show()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment