Skip to content

Instantly share code, notes, and snippets.

val ngramWithProbability = ngramWithCount.map { case (k, v) =>
(k, v.toDouble / ngramWithCount.filterKeys(_.take(n - 1) == k.take(n - 1)).values.sum)
}
// Map(List(eating, the) -> 1.0, List(blue, sky) -> 1.0, List(red, koala) -> 1.0, List(near, the) -> 1.0, List(The, blue) -> 1.0, List(the, sky) -> 0.5, List(koala, eating) -> 1.0, List(sky, is) -> 1.0, List(the, red) -> 0.5, List(is, near) -> 1.0)
val ngramWithCount = ngram.groupBy(identity).mapValues(_.size)
// Map(List(koala, near) -> 1, List(blue, sky) -> 2, List(red, koala) -> 1, List(near, the) -> 2, List(the, blue) -> 1, List(The, blue) -> 1, List(sky, is) -> 1, List(the, red) -> 1, List(is, near) -> 1)
val n = 2
val ngram = tokens.sliding(n).toList
// List(List(The, blue), List(blue, sky), List(sky, is), List(is, near), List(near, the), List(the, red), List(red, koala), List(koala, near), List(near, the), List(the, blue), List(blue, sky))
// For bigrams
tokens.sliding(2).toList
// List(List(The, blue), List(blue, sky), List(sky, is), List(is, near), List(near, the), List(the, red), List(red, koala), List(koala, near), List(near, the), List(the, blue), List(blue, sky))
// For trigrams
tokens.sliding(3).toList
// List(List(The, blue, sky), List(blue, sky, is), List(sky, is, near), List(is, near, the), List(near, the, red), List(the, red, koala), List(red, koala, near), List(koala, near, the), List(near, the, blue), List(the, blue, sky))
// For n-gram
val n = ...
val source = "The blue sky is near the red koala near the blue sky"
val tokens = source.split(" ").toList
// List(The, blue, sky, is, near, the, red, koala, near, the, blue, sky)
@Cowa
Cowa / ngrams.scala
Last active October 20, 2015 18:05
N-grams models in Scala - Step by step
// Tokenized text
val words = List("bonjour", "je", "suis", "ici", ".", "bonjour", "je", "suis", "là")
// 2-grams
val n = 2
val ngram = words.sliding(n).toList
// ngram: List[List[String]] = List(List(bonjour, je), List(je, suis), List(suis, ici), List(ici, .), List(., bonjour), List(bonjour, je), List(je, suis), List(suis, là))
val ngramWithCount = ngram.groupBy(identity).mapValues(_.size)
// ngramWithCount: Map[List[String],Int] = Map(List(bonjour, je) -> 2, List(je, suis) -> 2, List(suis, ici) -> 1, List(suis, là) -> 1, List(ici, .) -> 1, List(., bonjour) -> 1)
@Cowa
Cowa / testES6.js
Last active August 29, 2015 14:27
class Car {
constructor(make, wheels) {
this.make = make;
this.wheels = wheels;
this.currentSpeed = 25;
}
printCurrentSpeed() {
console.log(this.make + ' is going ' + this.currentSpeed + ' mph.');
}