Last active
March 14, 2018 18:48
-
-
Save martinsotir/d16a26b975d90bfd10716fd9a0e0e769 to your computer and use it in GitHub Desktop.
patchwork_test_for_valera_1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Instructions (requires docker): | |
# docker build -t patchwork . | |
# docker run -it --rm patchwork | |
FROM openjdk:8 | |
RUN apt-get update | |
RUN apt-get install apt-transport-https | |
# Install sbt | |
RUN echo "deb https://dl.bintray.com/sbt/debian /" | tee -a /etc/apt/sources.list.d/sbt.list | |
RUN apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 2EE0EA64E40A89B84B2DF73499E82A75642AC823 | |
RUN apt-get update | |
RUN apt-get install sbt | |
# build patchwork and set sbt version to 0.13.16 | |
RUN git clone https://github.com/crim-ca/patchwork | |
WORKDIR patchwork | |
RUN sbt compile | |
# Add test class | |
COPY test1.scala ./src/main/scala/example/test1.scala | |
RUN sbt compile | |
# Run test file locally: | |
CMD sbt "runMain Test1" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ca.crim.spark.mllib.clustering._ | |
import org.apache.spark.rdd.RDD | |
import org.apache.spark.{ SparkContext, SparkConf } | |
object Test1 extends App { | |
val sc = new SparkContext(new SparkConf().setAppName("Test1").setMaster("local[2]")) | |
val dataRDD: RDD[Array[Double]] = sc.parallelize(List( | |
Array(10,10,1000,1,1,1,1,1,1,1000), | |
Array(20,20,1,1,1,1,1,1,1,1), | |
Array(20,30,1,1,1,1,1,1,1,1), | |
Array(30,330,1,1,1,1,1,1,1,1), | |
Array(330,30,444,1,1,1,1,1,1,1))) | |
// PatchWork parameters | |
val epsilon = Array(10.1, 10.1, 1, 1, 1, 1, 1, 1, 1, 1) | |
val minPts = 1 | |
val minCellInCluster = 1 | |
val ratio = 0.0 | |
// Training a model with the data | |
val (patchworkModel, execTime) = Utils.time( | |
new PatchWork(epsilon, minPts, ratio, minCellInCluster).run(dataRDD) | |
) | |
// Display the cluster for each data point | |
dataRDD.collect().map(x => | |
x.mkString("\t") + "\t" + patchworkModel.predict(x).getID | |
).foreach(println) | |
// Display some stats about the clusters | |
var cs = "" | |
for (i <- Range(0, patchworkModel.clusters.size)) { | |
cs = cs + " cluster " + patchworkModel.clusters(i).getID + " has " + patchworkModel.clusters(i).cellsList.size + " cells \n" | |
} | |
println("\n----------------------------------------- \n" + | |
"number of points : " + dataRDD.count() + "\n" + | |
"number of clusters : " + patchworkModel.clusters.size + "\n" + | |
"----------------------------------------- \n" + | |
cs + | |
"----------------------------------------- \n" + | |
"size of epsilon : [" + epsilon.mkString(",") + "] \n" + | |
"min pts in each cell : " + minPts + "\n" + | |
"time of training : " + execTime + " ms" + "\n----------------------------------------- \n") | |
sc.stop | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment