Skip to content

Instantly share code, notes, and snippets.

@MishaelRosenthal
Created April 5, 2018 22:29
Show Gist options
  • Select an option

  • Save MishaelRosenthal/3d3870eea3a8d14aa960e41bd9e46637 to your computer and use it in GitHub Desktop.

Select an option

Save MishaelRosenthal/3d3870eea3a8d14aa960e41bd9e46637 to your computer and use it in GitHub Desktop.
package com.twitter.mrosenthal.timelines.adhoc.dataset_transform
import scala.util.Random
object OrderingCTRProbabilities extends App {
val rand = new Random(17)
def insertionSort[T](seq: Seq[T])(insertLocationFunc: (T, Seq[T]) => Int)(implicit ord: Ordering[T]): Seq[T] = {
def insert(seq: Seq[T], location: Int, elem: T): Seq[T] = {
val (prefix, suffix) = seq.splitAt(location)
// TODO: Consider the efficiency of the bellow
val res = (prefix :+ elem) ++ suffix
res
}
def sortUsingRIMRec(acc: Seq[T], remaining: Seq[T]): Seq[T] = {
remaining.headOption match {
case Some(elem) =>
val insertLocation = insertLocationFunc(elem, acc)
sortUsingRIMRec(insert(acc, insertLocation, elem), remaining.tail)
case _ => acc
}
}
val (headSeq, tail) = seq.sorted(ord).splitAt(1)
sortUsingRIMRec(headSeq, tail)
}
def probebalisticInsertionLocation(prob: Double, probs: Seq[Double]): Int = {
def isSucc(prob: Double): Boolean = rand.nextDouble() < prob
val numOfSuccesses = probs.count(isSucc)
val numFailures = probs.size - numOfSuccesses
if(isSucc(prob)) {
numFailures + rand.nextInt(numOfSuccesses + 1)
} else {
rand.nextInt(numFailures + 1)
}
}
/**
* http://www.cs.utoronto.ca/~cebly/Papers/LuBoutilier_icml11.pdf
* https://www.researchgate.net/publication/24063477_The_repeated_insertion_model_for_rankings_Missing_link_between_two_subset_choice_models
*/
def sortUsingRIM[T](seq: Seq[T])(extractProb: T => Double): Seq[T] = {
insertionSort(seq){
case (elem, prefix) => probebalisticInsertionLocation(extractProb(elem), prefix.map(extractProb))
}(Ordering.by(extractProb))
}
/**
* https://cs.stackexchange.com/questions/73795/sorting-in-a-probabilistic-order
*/
def mallowInsertionLocation(phi: Double, size: Int): Int = {
val phiPowSize = math.pow(phi, size)
val sampledTimesDenominator = rand.nextDouble() * (1.0 - phiPowSize)
val index = Iterator.from(0).take(size)
.map(i => 1 - math.pow(phi, i + 1))
.indexWhere(sampledTimesDenominator < _)
size - index - 1
}
def sortUsingMallow[T : Ordering](seq: Seq[T])(phi: Double): Seq[T] = {
insertionSort(seq) { case (_, prefix) => mallowInsertionLocation(phi, prefix.size + 1) }
}
val list = (0 to 100) ++ List.fill(5)(20)
println(list.sorted.mkString("\n"))
/*println("------------------ sortUsingRIM")
println(sortUsingRIM(list)(_._2).mkString("\n"))*/
val phi = 0.7
println(s"------------------ sortUsingMallow $phi")
println(sortUsingMallow(list)(phi).mkString("\n"))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment