Created
August 9, 2014 09:21
-
-
Save CaffeinatedDave/8df38e1726ad62836fc6 to your computer and use it in GitHub Desktop.
West London Hack Night - Markov Chains
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scala.io.Source | |
import java.io.File | |
import java.util.Arrays | |
import scala.collection.mutable.ArrayBuffer | |
import scala.collection.immutable.HashMap | |
import scala.annotation.tailrec | |
/** | |
* Alice in Markov Chains for West London Hack Night | |
* | |
* Developed in 1.5 hours by Scala team. | |
* Then hacked a bit more by Dave. | |
*/ | |
object HackNight extends Application { | |
val map = HashMap[String, Node]() | |
val filename = new File("/Users/dchaston/Downloads/eclipse/Eclipse.app/Contents/MacOS/workspace/aliceInMarkovChains/lyrics.txt") | |
val regex = """[^a-z]""".r | |
val lines = Source.fromFile(filename).getLines.map(line => Left('start) :: line.split(" ").map(y => Right(regex.replaceAllIn(y.toLowerCase(), ""))).toList) | |
val words = lines.foldLeft(List[Either[Symbol, String]]())((acc, line) => | |
acc ++ (line ::: List(Left('end))) | |
) | |
val chains = somekindofparsingfunction(words.toList, map) | |
println(chains) | |
println(chains("START")) | |
val startNode = chains("START") | |
println(findNext(startNode, "")) | |
println(findNext(startNode, "")) | |
println(findNext(startNode, "")) | |
println(findNext(startNode, "")) | |
println(findNext(startNode, "")) | |
def somekindofparsingfunction(list: List[Either[Symbol, String]], nodemap: Map[String, Node]): Map[String, Node] = { | |
list match { | |
case Nil => nodemap | |
case h :: Nil => nodemap | |
case Left(symbol) :: Left(eh) :: t => { | |
// If end follows start, or start follows start, or start follows end, and we're here... we have other problems, fix them somewhere else.... | |
somekindofparsingfunction(t, nodemap) | |
} | |
case Left(symbol) :: Right(token) :: t => { | |
nodemap get "START" match { | |
case None => somekindofparsingfunction(Right(token) :: t, nodemap + ("START" -> Node("START", Map(token -> 1)))) | |
case Some(x) => { | |
x.incr(Right(token)) | |
somekindofparsingfunction(Right(token) :: t, nodemap) | |
} | |
} | |
} | |
case Right(token) :: Left(next) :: t => { | |
nodemap get token match { | |
case None => somekindofparsingfunction(t, nodemap + (token -> Node(token, Map("END" -> 1)))) | |
case Some(x) => { | |
x.incr(Left('end)) | |
somekindofparsingfunction(t, nodemap) | |
} | |
} | |
} | |
case Right(token) :: Right(next) :: t => { | |
nodemap get token match { | |
case None => somekindofparsingfunction(Right(next) :: t, nodemap + (token -> Node(token, Map(next -> 1)))) | |
case Some(x) => { | |
x.incr(Right(next)) | |
somekindofparsingfunction(Right(next) :: t, nodemap) | |
} | |
} | |
} | |
} | |
} | |
@tailrec | |
def findNext(node: Node, str: String): String = { | |
val len = node.meta.foldLeft(0)((acc, x) => acc + x._2) | |
val pos = Math.floor(Math.random() * len).toInt | |
val newStr = node.findNext | |
if (newStr != "END") { | |
val newNode = chains(newStr) | |
findNext(newNode, str + newStr + " ") | |
} else { | |
str.trim() | |
} | |
} | |
} | |
case class Node(value: String, var meta: Map[String, Int]) { | |
def incr(str: Either[Symbol, String]) = { | |
str match { | |
case Left(x) if x == 'end => meta += ("END" -> (meta.getOrElse("END", 0) + 1)) | |
case Right(x) => meta += (x -> (meta.getOrElse(x, 0) + 1)) | |
case _ => throw new Exception | |
} | |
} | |
def findNext(): String = { | |
val list = meta.flatMap(x => List.fill(x._2)(x._1)) | |
val startPos = Math.floor(Math.random() * list.size).toInt | |
list.drop(startPos).head | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment