Last active
August 29, 2015 14:00
-
-
Save jasonbaldridge/11089963 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* R functions to create and manipulate Breeze matrices. This should go into | |
* Breeze or a sub-project of Breeze eventually. | |
*/ | |
object RFunc { | |
import breeze.linalg._ | |
import breeze.stats.distributions._ | |
import breeze.stats.DescriptiveStats._ | |
/** | |
* Produces sample quantiles corresponding to the given probabilities. | |
*/ | |
def quantile( | |
values: TraversableOnce[Double], | |
probs: Seq[Double] = Seq(0.0, 0.25,.5,.75,1.0) | |
) = { | |
// Not efficient, but maximally reuses existing code and is a fine | |
// start. | |
probs.map(p=>percentile(values,p)) | |
} | |
/** | |
* Create a matrix of Doubles given a Seq of Double Seqs. | |
*/ | |
def matrix(values: Seq[Seq[Double]]): DenseMatrix[Double] = | |
matrix[Double](values.flatten.toArray, values.length, values.head.length) | |
/** | |
* Create a matrix of Strings given a Seq of String Seqs. This should | |
* be one method with the above for Doubles, but there was some type | |
* compile error and I couldn't be bothered to sort it out. I.e., we | |
* want matrix[T](values: Seq[Seq[T]]): DenseMatrix[T]. | |
*/ | |
def matrixString(values: Seq[Seq[String]]): DenseMatrix[String] = | |
matrix[String](values.flatten.toArray, values.length, values.head.length) | |
/** | |
* Create a Breeze matrix of the given proportions. | |
*/ | |
def matrix[T](values: Array[T], nrow: Int, ncol: Int): DenseMatrix[T] = | |
new DenseMatrix[T](nrow, ncol, values,0,ncol,true) | |
/** | |
* Create a Breeze matrix initialized with the given value for all positions. | |
*/ | |
def matrix(value: Double, nrow: Int, ncol: Int): DenseMatrix[Double] = | |
if (value == 0) | |
DenseMatrix.zeros[Double](nrow,ncol) | |
else | |
matrix(Array.fill(nrow*ncol)(value), nrow, ncol) | |
/** | |
* Create a vector repeating the given value. | |
*/ | |
def rep(value: Double, length: Int) = | |
DenseVector.fill(length)(value) | |
/** | |
* Show the dimensions of the matrix. | |
*/ | |
def dim[T](mat: DenseMatrix[T]) = (mat.rows, mat.cols) | |
/** | |
* This is an impoverished implementation of R's model.matrix function. | |
* It's also not particularly efficient. | |
*/ | |
def modelMatrix(columnNames: Seq[String], data: DenseMatrix[String]) = { | |
val info = (for (j <- 0 until data.cols) yield { | |
val name = columnNames(j) | |
val values = data(::,j).toDenseVector.toArray.toSet | |
values.toSeq.map(v=>name+"_"+v) | |
}).flatten.toIndexedSeq | |
val infoMap = info.zipWithIndex.toMap | |
val infoLength = info.length | |
val binarizedMatrix = (for (i <- 0 until data.rows) yield { | |
val demoBinary = Array.fill(info.length)(0.0) | |
for (j <- 0 until data.cols) { | |
val jstring = columnNames(j)+"_"+data(i,j) | |
demoBinary(infoMap(jstring)) = 1.0 | |
} | |
demoBinary.toSeq | |
}).toSeq | |
(info,matrix(binarizedMatrix)) | |
} | |
/** | |
* Draw values from a Gaussian with given mean and standard deviation. | |
*/ | |
def rnorm(size: Int, mean: Double = 0.0, stdev: Double = 1.0) = | |
new Gaussian(mean,stdev).sample(size).toArray | |
/** | |
* Get the max of the given value and the value at each position in the | |
* given vector. | |
*/ | |
def pmax(value: Double, vec: DenseVector[Double]) = | |
vec.map(x => math.max(value,x)) | |
/** | |
* Get the absolute value of every value in the given vector. | |
*/ | |
def abs(vec: DenseVector[Double]) = | |
vec.map(math.abs) | |
} | |
// This is code I did a while ago that goes further in allowing R operators and functions to be used with | |
// Breeze, but it was for Breeze version over one year ago (when broadcasting wasn't supported, which would | |
// have simplified some of the implementations). Would be nice to fold some of this in to the RFunc object above, | |
// and have tests and such. | |
object BreezeMatrixUtil { | |
import breeze.linalg.{DenseMatrix,DenseVector,LinearAlgebra} | |
implicit def breezeMatrixToRMatrix(mat: DenseMatrix[Double]) = new RMatrix(mat) | |
class RMatrix (mat: DenseMatrix[Double]) { | |
def %*% (that: DenseMatrix[Double]) = mat * that | |
def * (multiplier: Double) = mat.map(_*multiplier) | |
//def * (that: DenseMatrix[Double]) = { | |
// val result = DenseMatrix.zeros[Double](mat.rows,mat.cols) | |
// for (r <- 0 until mat.rows; c <- 0 until mat.cols) | |
// result(r,c) = mat(r,c) * that(r,c) | |
// result | |
//} | |
def multiplyEachColumnByVector (that: DenseVector[Double]) = { | |
val result = DenseMatrix.zeros[Double](mat.rows,mat.cols) | |
for (r <- 0 until mat.rows; c <- 0 until mat.cols) | |
result(r,c) = mat(r,c) * that(r) | |
result | |
} | |
def divideElements (divider: Double) = mat.map(_/divider) | |
def col(id: Int) = mat(::, id) | |
def row(id: Int): DenseVector[Double] = mat.t.apply(::, id) | |
def set(row: Int, col: Int, value: Double) { mat(row,col) = value } | |
} | |
implicit def breezeVectorToRVector(vec: DenseVector[Double]) = new RVector(vec) | |
class RVector (vec: DenseVector[Double]) { | |
def / (value: Double) = vec.map(_/value) | |
} | |
implicit def doubleToRDouble(x: Double) = new RDouble(x) | |
class RDouble (x: Double) { | |
def * (mat: DenseMatrix[Double]) = mat * x | |
def * (vec: DenseVector[Double]) = vec * x | |
def / (mat: DenseMatrix[Double]) = mat.map(v => if (v == 0.0) 0.0 else x/v) | |
def / (vec: DenseVector[Double]) = vec.map(v => if (v == 0.0) 0.0 else x/v) | |
def + (vec: DenseVector[Double]) = vec.map(v => x+v) | |
} | |
def multVecMat(vec: DenseVector[Double], mat: DenseMatrix[Double]) = { | |
val result = DenseMatrix.zeros[Double](mat.rows,mat.cols) | |
for (r <- 0 until mat.rows; c <- 0 until mat.cols) | |
result(r,c) = mat(r,c) * vec(r) | |
result | |
} | |
def solve(mat: DenseMatrix[Double]) = LinearAlgebra.inv(mat) | |
def t(mat: DenseMatrix[Double]) = mat.t | |
def cbind(mat: DenseMatrix[Double], newColumnValue: Double) = | |
DenseMatrix.horzcat(mat, DenseMatrix.fill[Double](mat.rows,1)(newColumnValue)) | |
def cbind(mat: DenseMatrix[Double], vec: DenseVector[Double]) = | |
DenseMatrix.horzcat(mat, vec.t.t) | |
def rbind(mat: DenseMatrix[Double], newRowValue: Double) = | |
DenseMatrix.vertcat(mat, DenseMatrix.fill[Double](1,mat.cols)(newRowValue)) | |
def outer(vec1: DenseVector[Double], vec2: DenseMatrix[Double]) = { | |
assert(vec2.rows == 1) | |
val result = DenseMatrix.zeros[Double](vec1.length,vec2.cols) | |
for (r <- 0 until vec1.length; | |
v1val = vec1(r); | |
c <- 0 until vec2.cols) | |
result(r,c) = v1val * vec2(0,c) | |
result | |
} | |
def columnAbsSums (mat: DenseMatrix[Double], columnAddValue: Double = 0.0) = { | |
val sums = (0 until mat.cols).map { j => | |
mat(::,j).map(math.abs).sum + 2 | |
}.toArray | |
DenseVector(sums) | |
} | |
def scan(filename: String) = { | |
val values = io.Source.fromFile(filename).getLines.map(_.toDouble) | |
new DenseVector(values.toArray) | |
} | |
def readTableAsMatrix(filename: String) = { | |
val values = io.Source.fromFile(filename).getLines.map { line => | |
line.split(" ").map(_.toDouble).toArray | |
}.toArray | |
createMatrixFromArray(values) | |
} | |
def readTableAsMatrix(lines: Iterator[String], blockSize: Int) = { | |
val values = lines.take(blockSize).map { line => | |
line.split(" ").map(_.toDouble).toArray | |
}.toArray | |
createMatrixFromArray(values) | |
} | |
def createMatrixFromArray(values: Array[Array[Double]]) = | |
DenseMatrix.create[Double](values.head.length, values.length, values.flatten).t | |
def array(value: Double, numRows: Int, numCols: Int, numRepetitions: Int) = | |
(1 to numRepetitions).map(i => matrix(value, numRows, numCols)).toArray | |
def array(matrix: DenseMatrix[Double], | |
numRows: Int, numCols: Int, numRepetitions: Int) = | |
(1 to numRepetitions).map(i => matrix).toArray | |
def matrix(numRows: Int, numCols: Int): DenseMatrix[Double] = | |
DenseMatrix.zeros[Double](numRows, numCols) | |
def matrix(numRows: Int, numCols: Int, values: Array[Double]): DenseMatrix[Double] = | |
DenseMatrix.create[Double](numRows, numCols, values) | |
def matrix(value: Double, numRows: Int, numCols: Int): DenseMatrix[Double] = | |
if (value == 0.0) DenseMatrix.zeros[Double](numRows, numCols) | |
else DenseMatrix.fill[Double](numRows,numCols)(value) | |
def matrix(values: Seq[Double], numRows: Int, numCols: Int): DenseMatrix[Double] = | |
DenseMatrix.create[Double](numRows,numCols, values.toArray) | |
def rep(value: Double, size: Int) = DenseVector.fill(size)(value) | |
def diag(value: Double, size: Int): DenseMatrix[Double] = diag(rep(value, size)) | |
def diag(vec: DenseVector[Double]) = breeze.linalg.diag(vec) | |
// WARNING: this modifies the input matrix | |
def setDiagonal(value: Double, mat: DenseMatrix[Double]) = { | |
(0 until mat.cols).foreach(index => mat(index, index) = value) | |
mat | |
} | |
// do this slowly for now | |
def tcrossprod(mat: DenseMatrix[Double]): DenseMatrix[Double] = tcrossprod(mat, mat) | |
def tcrossprod(matA: DenseMatrix[Double], matB: DenseMatrix[Double]): DenseMatrix[Double] = | |
matA * matB.t | |
def tcrossprod(vec: DenseVector[Double]) = vec * vec.t | |
def mean(matrices: Array[DenseMatrix[Double]]) = | |
matrices.reduce(_ :+ _).map(_/matrices.length) | |
def weightedMean(matrices: Array[DenseMatrix[Double]], weights: Array[Double]) = | |
matrices.zip(weights) | |
.map { case(mat,mult) => mat*mult } | |
.reduce(_ :+ _) | |
.map(_/weights.sum) | |
def submatrix(mat: DenseMatrix[Double], rows: Seq[Int], cols: Seq[Int]) = { | |
val data = for (i <- rows; j <- cols) yield mat(i,j) | |
new DenseMatrix(rows.length, data.toArray) | |
} | |
} | |
// Finally, here is code to allow R functions to be used to create and manipulate Colt Matrices. There | |
// is more that could be brought into RFunc (though substituting in Breeze objects), and perhaps it | |
// would be useful for anyone who wants to work with Colt instead of Breeze. | |
object ColtMatrixUtil { | |
import tempest.data._ | |
import cern.colt.matrix.tdouble.{DoubleMatrix1D=>DoubleVector,DoubleMatrix2D=>DoubleMatrix} | |
import cern.colt.matrix.tdouble.impl.{DenseDoubleMatrix1D,DenseDoubleMatrix2D} | |
import cern.colt.matrix.tdouble.algo._ | |
import cern.colt.function.tdouble._ | |
lazy val linalg = new DenseDoubleAlgebra | |
lazy val util1D = cern.colt.matrix.tdouble.DoubleFactory1D.dense | |
lazy val util2D = cern.colt.matrix.tdouble.DoubleFactory2D.dense | |
implicit def coltMatrixToRMatrix(mat: DoubleMatrix) = new RMatrix(mat) | |
implicit def coltVectorToRVector(vec: DoubleVector) = new RVector(vec) | |
implicit def doubleToRDouble(x: Double) = new RDouble(x) | |
implicit def colt1Dto2D(vec: DoubleVector) = util2D.make(vec.toArray, vec.size.toInt) | |
//implicit def arrayTo1D(a: Array[Double]) = util1D.make(a) | |
type ConcatType = ::.type | |
//implicit def concatToEmptyRange(x: ::.type) = 0 to -1 | |
//implicit def intToRange(x: Int) = x to x | |
class MultFnc(multiplier: Double) extends IntIntDoubleFunction { | |
override def apply(r: Int, c: Int, d: Double) = d*multiplier | |
} | |
class VecMultFnc(multiplier: Double) extends DoubleFunction { | |
override def apply(d: Double) = d*multiplier | |
} | |
class DivideByElementsFnc(numerator: Double) extends IntIntDoubleFunction { | |
override def apply(r: Int, c: Int, d: Double) = numerator/d | |
} | |
object PairwiseDivideFnc extends DoubleDoubleFunction { | |
override def apply(x: Double, y: Double) = x/y | |
} | |
class VecDivideByElementsFnc(numerator: Double) extends DoubleFunction { | |
override def apply(d: Double) = numerator/d | |
} | |
class AddFnc(value: Double) extends DoubleFunction { | |
override def apply(d: Double) = d + value | |
} | |
object SqrtFnc extends DoubleFunction { | |
override def apply(d: Double) = math.sqrt(d) | |
} | |
object TanhFnc extends DoubleFunction { | |
override def apply(d: Double) = math.tanh(d) | |
} | |
class MatrixAddFnc extends DoubleDoubleFunction { | |
override def apply(x: Double, y: Double) = x+y | |
} | |
class RMatrix (mat: DoubleMatrix) { | |
def %*% (that: DoubleMatrix) = mat.zMult(that, null) | |
def %*% (that: DoubleVector) = that %*% t(mat) | |
def * (value: Double) = mat.copy.forEachNonZero(new MultFnc(value)) | |
def * (vec: DoubleVector) = mat.copy.multiplyEachColumnByVector(vec) | |
def / (value: Double) = mat.copy.forEachNonZero(new MultFnc(1/value)) | |
def + (that: DoubleMatrix) = { | |
val result = mat.like | |
for (r <- 0 until mat.rows; c <- 0 until mat.columns) | |
result.setQuick(r,c, mat.getQuick(r,c) + that.getQuick(r,c)) | |
result | |
} | |
def - (that: DoubleMatrix) = { | |
val result = mat.like | |
for (r <- 0 until mat.rows; c <- 0 until mat.columns) | |
result.setQuick(r,c, mat.getQuick(r,c) - that.getQuick(r,c)) | |
result | |
} | |
def cols = mat.columns | |
def multiplyEachColumnByVector (that: DoubleVector) = { | |
val result = mat.like | |
for (r <- 0 until mat.rows; c <- 0 until mat.columns) | |
result.setQuick(r,c, mat.getQuick(r,c) * that.getQuick(r)) | |
result | |
} | |
def divideElements (divider: Double) = mat.copy.forEachNonZero(new MultFnc(1/divider)) | |
def getRow(i: Int) = mat.viewRow(i).copy | |
def getRows(rowRange: Range) = | |
mat.viewSelection(rowRange.toArray, (0 until mat.columns).toArray).copy | |
def getCol(i: Int) = mat.viewColumn(i).copy | |
def getCols(colRange: Range) = | |
mat.viewSelection((0 until mat.rows).toArray, colRange.toArray).copy | |
def apply(rowR: Range, colR: Range) = | |
mat.viewSelection(rowR.toArray, colR.toArray).copy | |
def update(r: Int, x: ConcatType, vec: DoubleVector) = | |
for (c <- 0 until mat.columns) mat.setQuick(r, c, vec.getQuick(c)) | |
def update(x: ConcatType, c: Int, vec: DoubleVector) = | |
for (r <- 0 until mat.rows) mat.setQuick(r, c, vec.getQuick(r)) | |
} | |
class RVector (vec: DoubleVector) { | |
def %*% (that: DoubleVector) = linalg.mult(vec,that) | |
def %*% (that: DoubleMatrix) = | |
util1D.make((0 until that.columns).map(c => linalg.mult(vec,that.viewColumn(c))).toArray) | |
def * (value: Double) = vec.copy.assign(new VecMultFnc(value)) | |
def * (that: DoubleVector) = | |
util1D.make(vec.toArray.zip(that.toArray).map(x=>x._1*x._2)) | |
def / (value: Double) = vec.copy.assign(new VecMultFnc(1/value)) | |
def / (that: DoubleVector) = vec.copy.assign(that, PairwiseDivideFnc) | |
def + (that: DoubleVector) = { | |
val result = vec.like | |
for (r <- 0 until vec.size.toInt) | |
result.setQuick(r, vec.getQuick(r) + that.getQuick(r)) | |
result | |
} | |
def - (value: Double) = vec.copy.assign(new AddFnc(-value)) | |
def - (that: DoubleVector) = { | |
val result = vec.like | |
for (r <- 0 until vec.size.toInt) | |
result.setQuick(r, vec.getQuick(r) - that.getQuick(r)) | |
result | |
} | |
} | |
class RDouble (x: Double) { | |
def * (mat: DoubleMatrix) = mat * x | |
def * (vec: DoubleVector) = vec * x | |
def / (mat: DoubleMatrix) = mat.copy.forEachNonZero(new DivideByElementsFnc(x)) | |
def / (vec: DoubleVector) = vec.copy.assign(new VecDivideByElementsFnc(x)) | |
def + (vec: DoubleVector) = vec.copy.assign(new AddFnc(x)) | |
} | |
def multVecMat(vec: DoubleVector, mat: DoubleMatrix) = mat.multiplyEachColumnByVector(vec) | |
def solve(mat: DoubleMatrix) = linalg.inverse(mat) | |
def t(mat: DoubleMatrix) = linalg.transpose(mat) | |
def cbind(mat: DoubleMatrix, newColumnValue: Double) = | |
util2D.appendColumn(mat, util1D.make(Array.fill(mat.rows)(newColumnValue))) | |
def cbind(mat: DoubleMatrix, vec: DoubleVector) = | |
util2D.appendColumn(mat, vec) | |
def rbind(mat: DoubleMatrix, newRowValue: Double) = | |
util2D.appendRow(mat, util1D.make(Array.fill(mat.columns)(newRowValue))) | |
def rbind(upper: DoubleMatrix, lower: DoubleMatrix) = | |
util2D.appendRows(upper, lower) | |
def outer(vec1: DoubleVector, vec2: DoubleVector) = | |
linalg.xmultOuter(vec1,vec2) | |
def scan(filename: String) = { | |
val values = io.Source.fromFile(filename).getLines.map(_.toDouble) | |
util1D.make(values.toArray) | |
} | |
def readTableAsMatrix(filename: String) = { | |
val values = io.Source.fromFile(filename).getLines.map { line => | |
line.split(" ").map(_.toDouble).toArray | |
}.toArray | |
createMatrixFromArray(values) | |
} | |
def readTableAsMatrix(lines: Iterator[String], blockSize: Int) = { | |
val values = lines.take(blockSize).map { line => | |
line.split(" ").map(_.toDouble).toArray | |
}.toArray | |
createMatrixFromArray(values) | |
} | |
def readTableAsMatrix(lines: DocumentSource, blockSize: Int) = | |
createMatrixFromArray(lines.take(blockSize).toArray) | |
def createMatrixFromArray(values: Array[Array[Double]]) = util2D.make(values) | |
def array(value: Double, numRows: Int, numCols: Int, numRepetitions: Int) = { | |
val mat = util2D.make(numRows, numCols,value) | |
(1 to numRepetitions).map(i => mat.copy).toArray | |
} | |
def array(mat: DoubleMatrix, numRows: Int, numCols: Int, numRepetitions: Int) = | |
(1 to numRepetitions).map(i => mat.copy).toArray | |
def matrix(numRows: Int, numCols: Int): DoubleMatrix = | |
util2D.make(numRows, numCols) | |
def matrix(numRows: Int, numCols: Int, values: Array[Double]): DoubleMatrix = | |
util2D.make(values, numRows) | |
def matrix(value: Double, numRows: Int, numCols: Int): DoubleMatrix = | |
util2D.make(numRows, numCols, value) | |
def matrix(values: Seq[Double], numRows: Int, numCols: Int): DoubleMatrix = | |
util2D.make(values.toArray, numRows) | |
def vector(values: Array[Double]) = util1D.make(values) | |
def rep(value: Double, size: Int) = util1D.make(size, value) | |
def diag(value: Double, size: Int): DoubleMatrix = diag(rep(value, size)) | |
def diag(vec: DoubleVector) = util2D.diagonal(vec) | |
def diag(mat: DoubleMatrix) = util2D.diagonal(mat) | |
// do this slowly for now | |
def tcrossprod(mat: DoubleMatrix): DoubleMatrix = tcrossprod(mat, mat) | |
def tcrossprod(matA: DoubleMatrix, matB: DoubleMatrix): DoubleMatrix = | |
linalg.mult(matA, t(matB)) | |
def tcrossprod(vec: DoubleVector) = linalg.xmultOuter(vec,vec) | |
def mean(matrices: Array[DoubleMatrix]) = { | |
val zeros = matrix(matrices.head.rows,matrices.head.columns) | |
val added = | |
matrices.foldLeft(zeros)((accum, next) => accum.assign(next, new MatrixAddFnc)) | |
added / matrices.length | |
} | |
def weightedMean(matrices: Array[DoubleMatrix], weights: Array[Double]) = { | |
val zeros = matrix(matrices.head.rows,matrices.head.columns) | |
val added = | |
matrices.zip(weights) | |
.map { case(mat,mult) => mat*mult } | |
.foldLeft(zeros)((accum, next) => accum.assign(next, new MatrixAddFnc)) | |
added / weights.sum | |
} | |
def submatrix(mat: DoubleMatrix, rows: Seq[Int], cols: Seq[Int]) = { | |
val data = for (i <- rows; j <- cols) yield mat.getQuick(i,j) | |
util2D.make(data.toArray,rows.length) | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment