Last active
November 25, 2020 21:37
-
-
Save tyrcho/5884241 to your computer and use it in GitHub Desktop.
Principal Component Analysis with Breeze (Scala)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// For an unkown reason, this works when copy/paste in ammonite but not with `amm pca.sc` | |
import $ivy.`org.scalanlp::breeze-natives:0.13.2` | |
import $ivy.`org.scalanlp::breeze-viz:0.13.2` | |
import $ivy.`org.scalanlp::breeze:0.13.2` | |
import breeze.linalg._ | |
import breeze.linalg.svd._ | |
import breeze.plot._ | |
import scala.util.Random._ | |
val dimensions = 50 | |
val values = 200 | |
def mean(v: Vector[Double]): Double = v.valuesIterator.sum / v.size | |
def zeroMean(m: DenseMatrix[Double]): DenseMatrix[Double] = { | |
val copy = m.copy | |
for (c <- 0 until m.cols) { | |
val col = copy(::, c) | |
val colMean = mean(col) | |
col -= colMean | |
} | |
// println("data \n" + m) | |
// println("mean \n" + copy) | |
copy | |
} | |
def pca(data: DenseMatrix[Double], components: Int): DenseMatrix[Double] = { | |
val d = zeroMean(data) | |
val SVD(_, _, v) = svd(d.t) | |
val model = v(0 until components, ::) //top 'components' eigenvectors | |
val filter = model.t * model | |
filter * d | |
} | |
//val data= csvread(new File("data.csv"), skipLines = 1).t | |
// val data = DenseMatrix( | |
// (2.0, 4.0, 5.1), | |
// (1.0, 2.5, 3.5), | |
// (8.0, 3.0, 6.4), | |
// (8.0, 5.0, 6.5), | |
// (4.3, 4.5, 6.4)) | |
def generateData: DenseMatrix[Double] = { | |
val data = DenseMatrix.zeros[Double](values, dimensions) | |
for (d <- 0 until dimensions) { | |
val c1 = 2 + 2 * (nextDouble - 0.5) | |
val r1 = nextDouble * 2 | |
val c2 = -2 + 2 * (nextDouble - 0.5) | |
val r2 = nextDouble * 2 | |
for (v <- 0 until values / 2) { | |
val vv = c1 + r1 * nextDouble | |
// println(f"a:$a b:$b d:$d v:$v vv:$vv%2f") | |
data.update(v, d, vv) | |
} | |
for (v <- values / 2 until values) { | |
val vv = c2 + r2 * nextDouble | |
// println(f"a:$a b:$b d:$d v:$v vv:$vv%2f") | |
data.update(v, d, vv) | |
} | |
} | |
data | |
} | |
val data = generateData | |
val pcaRes = pca(data, 2) | |
// or better since https://github.com/scalanlp/breeze/commit/913e1229cb3572b43062ee1f756858ac793bb8b8#diff-64378cd6c871f77715faf60c46568a8e | |
// val pca = princomp(data) | |
// val pcaRes = pca.scores | |
println("result pca \n" + pcaRes) | |
val f1 = Figure("data") | |
val f2 = Figure("pca") | |
f1.subplot(0) += scatter(data(::, 0), data(::, 3), { _ => 0.1 }) | |
f2.subplot(0) += scatter(pcaRes(::, 0), pcaRes(::, 1), { _ => 0.1 }) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
As per Breeze 1.0-RC2, line 22 should be: