Created
August 21, 2014 22:51
-
-
Save erikerlandson/f4b9b9a5c9469f2d9006 to your computer and use it in GitHub Desktop.
Suggestion for simple distance metric (and measure) design
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import breeze.linalg.{Vector => BV, DenseVector => DBV} | |
import org.apache.spark.annotation.Experimental | |
import org.apache.spark.mllib.linalg.{Vector, DenseVector} | |
trait DistanceMeasure extends Function2[BV[Double], BV[Double], Double] with Serializable { | |
// each measure/metric defines for itself: | |
override def apply(v1: BV[Double], v2: BV[Double]): Double | |
// a catch-all overloading of "()" for spark vectors | |
// can also be overridden on a per-class basis, if it is advantageous | |
def apply(v1: Vector, v2: Vector): Double = this(v1.toBreeze, v2.toBreeze) | |
} | |
trait DistanceMetric extends DistanceMeasure | |
class EuclideanDistance extends DistanceMetric { | |
override def apply (v1: BV[Double], v2: BV[Double]): Double = { | |
val d = v1 - v2 | |
Math.sqrt(d dot d) | |
} | |
} | |
class WeightedEuclideanDistance(val weights: BV[Double]) extends DistanceMetric { | |
override def apply (v1: BV[Double], v2: BV[Double]): Double = { | |
val d = v1 - v2 | |
Math.sqrt(d dot (weights :* d)) | |
} | |
} | |
// a measure, not a metric | |
class CosineDistance extends DistanceMeasure { | |
override def apply (v1: BV[Double], v2: BV[Double]): Double = { | |
1.0 - ((v1 dot v2) / Math.sqrt((v1 dot v1) * (v2 dot v2))) | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment