Last active
August 29, 2015 14:07
-
-
Save piyo7/8014c909888e2e912365 to your computer and use it in GitHub Desktop.
Spark / MLlib の K-means を Scala から利用してみる ref: http://qiita.com/piyo7/items/77cc4350bfeab75a29e7
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sbt.version=0.13.6 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name := "KMeansIris" | |
version := "1.0" | |
scalaVersion := "2.10.4" | |
libraryDependencies ++= Seq( | |
"org.apache.spark" %% "spark-core" % "1.1.0", | |
"org.apache.spark" %% "spark-mllib" % "1.1.0" | |
) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.spark.SparkContext | |
import org.apache.spark.mllib.clustering.KMeans | |
import org.apache.spark.mllib.linalg.Vectors | |
object KMeansIris extends App { | |
val context = new SparkContext("local", "demo") | |
val data = context. | |
textFile("src/main/resources/iris.data"). | |
filter(_.nonEmpty). | |
map { s => | |
val elems = s.split(",") | |
(elems.last, Vectors.dense(elems.init.map(_.toDouble))) | |
} | |
val k = 3 // クラスタの個数を指定します | |
val maxItreations = 100 // K-means のイテレーション最大回数を指定します | |
val clusters = KMeans.train(data.map(_._2), k, maxItreations) | |
// 各クラスタの中心を確認する | |
println("## クラスタの中心") | |
clusters.clusterCenters.foreach { | |
center => println(f"${center.toArray.mkString("[", ", ", "]")}%s") | |
} | |
// 各データがどのクラスタに分類されたのかを確認する | |
println("## 各データのクラスタリング結果") | |
data.foreach { tuple => | |
println(f"${tuple._2.toArray.mkString("[", ", ", "]")}%s " + | |
f"(${tuple._1}%s) : cluster = ${clusters.predict(tuple._2)}%d") | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment