Created
November 6, 2014 16:20
-
-
Save ldacosta/89c1710af055be1e9b16 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// a Representation is just a wrap around a type | |
trait Repr[T] extends wrappers.Base.Wrapper[T] | |
// one way of representing a value is by having the value itself: | |
case class Eval[T](value: T) extends Repr[T] | |
// or: | |
case class Identity[T](value: T) extends Repr[T] | |
// another way is by println()'ing the value before: | |
case class Debug[T](raw: T) extends Repr[T] { | |
def value: T = { println(raw.toString); raw } | |
} | |
// etc. | |
// Ok, now, we want to create a String that could be compared using the Levenshtein distance: | |
case class Lev(value: String) extends wrappers.Base.Wrapper[String] with Ordered[Lev] { | |
import com.rockymadden.stringmetric.similarity.LevenshteinMetric | |
// def compare(that: Lev): Int = LevenshteinMetric.compare(this.value, that.value).getOrElse(Int.MaxValue) | |
def compare(that: Lev): Int = LevenshteinMetric.compare(this.value, that.value).getOrElse(0) // if I can not compare them, I will suppose they are equal | |
} | |
// OK. Now, how do we get the distance between 2 things of a type, under a certain representation? | |
trait Metric[T, repr[_]] { | |
def metric(a: repr[T], b: repr[T]): repr[Double] | |
} | |
// awesome. How does this work for a String represented by Lev? | |
implicit object XX extends Metric[Lev, Identity] { | |
def metric(a: Identity[Lev], b: Identity[Lev]): Identity[Double] = { | |
val r = Identity((a.value compare b.value).toDouble) | |
val x = r.value | |
r | |
} | |
} | |
// OK... Now I want to create a matcher: | |
trait BinaryMatcher[T, repr[_]] { | |
// def stopWords: repr[Set[T]] // TODO: here??? | |
def threshold: repr[Double] | |
def isCloseEnough: repr[T] => repr[T] => repr[Boolean] | |
} | |
trait BinaryMatcher2[T, repr[_] <: Repr[_]] { | |
// def stopWords: repr[Set[T]] // TODO: here??? | |
def threshold: repr[Double] | |
def isCloseEnough(implicit ops: Metric[T, repr]): repr[T] => repr[T] => repr[Boolean] = { | |
x => | |
y => { | |
val r = ops.metric(x, y) | |
val xxx = r.value | |
val tt = threshold | |
val ttx = tt.value | |
ops.metric(x, y).value <= threshold.value | |
} | |
} | |
} | |
class WordMatcher(val threshold: Identity[Double]) extends BinaryMatcher[Lev, Identity] { | |
def isCloseEnough: Identity[Lev] => Identity[Lev] => Identity[Boolean] = { | |
x => | |
y => Identity({ | |
(x.value compare y.value) <= threshold.value | |
}) | |
} | |
} | |
class WordMatcherDebug(val threshold: Debug[Double]) extends BinaryMatcher[Lev, Debug] { | |
def isCloseEnough: Debug[Lev] => Debug[Lev] => Debug[Boolean] = { | |
x => | |
y => Debug({ | |
(x.value compare y.value) <= threshold.value | |
}) | |
} | |
} | |
// and create a String matcher using all of the above: | |
// implicit class WordMatcher_Lev_Eval(val stopWords: Eval[Set[Lev]]) extends WordMatcher[Lev, Eval] { | |
// val th | |
// } | |
// or... just thinking out loud... we could do: | |
case class LevWrap(value: String) extends wrappers.Base.Wrapper[String] { | |
import com.rockymadden.stringmetric.similarity.LevenshteinMetric | |
def compareWithLev(that: String): Int = LevenshteinMetric.compare(this.value, that.value). | |
getOrElse(Int.MaxValue) | |
} | |
implicit def stringToLev(s: String): LevWrap = LevWrap(s) | |
implicit object XX2 extends Metric[String, Identity] { | |
def metric(a: Identity[String], b: Identity[String]): Identity[Double] = Identity((a.value compareWithLev b.value)) | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment