Skip to content

Instantly share code, notes, and snippets.

@taiwotman
Last active August 5, 2016 14:00
Show Gist options
  • Save taiwotman/26be9999d81f83e79f3e143fd3459268 to your computer and use it in GitHub Desktop.
Save taiwotman/26be9999d81f83e79f3e143fd3459268 to your computer and use it in GitHub Desktop.
import org.apache.spark.mllib.clustering.KMeans
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.sql.SQLContext
import org.apache.spark.{SparkConf, SparkContext}
/**
* Created by taiwoadetiloye on 2016-04-01.
*/
object kmeans extends App{
val config = new SparkConf().setAppName("My Kmeans Sample App").setMaster("local")
val sc = new SparkContext(config)
val sqlContext = new SQLContext(sc)
// Load and parse the data/kmeans/kmeans_data.txt
val lines = sc.textFile("****/kmeans_data.txt") //change this to your local file path
val data = lines.filter(_.nonEmpty)
val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble))).cache()
parsedData.cache()
// Cluster the data into two and three classes using KMeans
val numClusters = 2
val numIterations = 20
val clusters = KMeans.train(parsedData, numClusters, numIterations)
clusters.clusterCenters.foreach(println)
parsedData.foreach(println )
clusters.predict(parsedData).foreach(println) //or clusters
// Evaluate clustering by computing Within Set Sum of Squared Errors
val WSSSE = clusters.computeCost(parsedData)
println("Within Set Sum of Squared Errors = " + WSSSE)
// Save and load model
//clusters.save(sc, "/Users/taiwoadetiloye/IdeaProjects/simpleGraph/src/main/resources/")
//val sameModel = KMeansModel.load(sc, "/Users/taiwoadetiloye/IdeaProjects/simpleGraph/src/main/resources/")
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment