Created
February 23, 2017 11:40
-
-
Save lukashaertel/e3d337ab13c9a900c7f72c2336075b3d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package ktsandbox | |
import javax.xml.crypto.Data | |
/** | |
* A data point, where first is the hidden state, and second is the observed | |
* state. | |
*/ | |
typealias DataPoint = Pair<Int, Int> | |
/** | |
* Learns a HMM by grid searching the state matrix. | |
* @param emission The emission probabilities | |
* @param trainingSet The set of training sequences, as given by lists of | |
* pairs of hidden state to observed state | |
*/ | |
fun learnHMM( | |
emission: Matrix, | |
trainingSet: Set<List<DataPoint>>, | |
resolution: Int): HMM { | |
// Get hidden state count | |
val hs = emission.columns | |
// Prepare training sequences | |
val prepared = trainingSet.map { | |
it.map { it.first }.toIntArray() to it.map { it.second }.toIntArray() | |
} | |
// Return from the grid searched components the matrix with the highest | |
// likeliness to fit all training data | |
return markovMatrices(1, hs, resolution).flatMap { start -> | |
markovMatrices(hs, hs, resolution).flatMap { transition -> | |
markovMatrices(1, hs, resolution).map { end -> | |
hmm(start, transition, emission, end.trn) | |
} | |
} | |
}.minBy { hmm -> | |
prepared.sumByDouble { | |
it.first.sumByDouble { s -> | |
// XXX Implement with matrices instead of indices | |
// Predict the state by F/B algorithm | |
val predicted = hmm.evalState(s, *it.second) | |
.values | |
.withIndex() | |
.maxBy { it.value }!! | |
.index | |
// If prediction correct, return zero error, one otherwise | |
if (predicted == s) 0.0 else 1.0 | |
} | |
} | |
}!! | |
} | |
/** | |
* Enumerates all [n] lists that sum up to [s]. | |
* @param n The length of the resulting lists | |
* @param s The sum that is to be reached | |
* @return Returns an iterable of lists of integers | |
*/ | |
fun listsOfSum(n: Int, s: Int): Iterable<List<Int>> { | |
if (n == 1) | |
return listOf(listOf(s)) | |
else | |
return (0..s).flatMap { i -> | |
listsOfSum(n - 1, s - i).map { j -> | |
j + i | |
} | |
} | |
} | |
/** | |
* Returns all Markov matrices, that is, matrices where rows sum up to a | |
* probability of 1. | |
*/ | |
fun markovMatrices(rows: Int, columns: Int, resolution: Int): Iterable<Matrix> { | |
/** | |
* Enumerates all possible value combinations in the given search domain. | |
*/ | |
fun rowSpace() = | |
listsOfSum(columns, resolution).map { | |
it.map { it.toDouble() / resolution } | |
} | |
/** | |
* Enumerates all matrix value combinations in the given search domain. | |
*/ | |
fun valueSpace(remainingRows: Int): Iterable<List<Double>> { | |
if (remainingRows == 1) | |
return rowSpace() | |
else | |
return rowSpace().flatMap { l -> | |
valueSpace(remainingRows - 1).map { r -> | |
l + r | |
} | |
} | |
} | |
// Iterate value space and transform into matrices | |
return valueSpace(rows).map { Matrix(columns, it.toDoubleArray()) } | |
} | |
/** | |
* Assigner for matrix creation | |
*/ | |
class Assigner<T, U> { | |
// Backing for the assigner | |
private val backing = mutableMapOf<Pair<T, U>, Double>() | |
/** | |
* Assigns column [t] row [u] the [value]. | |
* @param t The column | |
* @param u The row | |
* @param value The value to assign | |
*/ | |
fun assign(t: T, u: U, value: Double) { | |
backing[t to u] = value | |
} | |
/** | |
* Builds a matrix using the given mapping. | |
* @param discretized The discretized collection | |
*/ | |
fun finalize(discretized: Discretized<T, U>): Matrix { | |
error("Not implemented") | |
} | |
} | |
/** | |
* Discretized arbitrary list of data points. | |
* @property list The original list | |
* @property discretized The discretized list | |
* @property firstMap Resolution of first to discrete | |
* @property secondMap Resolution of second to discrete | |
*/ | |
data class Discretized<T, U>( | |
val list: List<Pair<T, U>>, | |
val discretized: List<DataPoint>, | |
val firstMap: Map<T, Int>, | |
val secondMap: Map<U, Int>) { | |
/** | |
* Builds the matrix usign an assigner. | |
*/ | |
fun buildMatrix(assignBlock: Assigner<T, U>.() -> Unit): Matrix { | |
val assigner = Assigner<T, U>() | |
assigner.assignBlock() | |
return assigner.finalize(this) | |
} | |
/** | |
* Applies the discretization on another list | |
*/ | |
fun apply(list: List<Pair<T, U>>) = list.map { | |
firstMap[it.first]!! to secondMap[it.second]!! | |
} | |
} | |
fun <T, U> List<Pair<T, U>>.discretize(): Discretized<T, U> { | |
val firstMap = mutableMapOf<T, Int>() | |
val secondMap = mutableMapOf<U, Int>() | |
val result = mutableListOf<DataPoint>() | |
var tRun = 0 | |
var uRun = 0 | |
for ((t, u) in this) { | |
val ti = firstMap.computeIfAbsent(t, { tRun++ }) | |
val ui = secondMap.computeIfAbsent(u, { uRun++ }) | |
result += ti to ui | |
} | |
return Discretized(this, | |
result.toList(), | |
firstMap.toMap(), | |
secondMap.toMap()) | |
} | |
fun main(args: Array<String>) { | |
// First example | |
val a = listOf( | |
"rainy" to "umbrella", | |
"sunny" to "umbrella", | |
"sunny" to "no umbrella", | |
"sunny" to "no umbrella", | |
"sunny" to "no umbrella", | |
"sunny" to "no umbrella", | |
"rainy" to "no umbrella", | |
"rainy" to "umbrella") | |
// Second example | |
val b = listOf( | |
"rainy" to "umbrella", | |
"sunny" to "no umbrella", | |
"sunny" to "no umbrella", | |
"sunny" to "no umbrella", | |
"sunny" to "no umbrella", | |
"rainy" to "umbrella") | |
// Discretization for all data | |
val x = (a + b).discretize() | |
// Emission for all data | |
val e = x.buildMatrix { | |
assign("rainy", "umbrella", 1.0) | |
assign("sunny", "umbrella", 0.0) | |
assign("rainy", "no umbrella", 0.0) | |
assign("sunny", "no umbrella", 1.0) | |
} | |
// Learn HMM on discretized individual examples | |
val h = learnHMM(e, setOf(x.apply(a), x.apply(b)), 10) | |
println(h) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment