tomtung · October 23, 2012 08:13
diff --git a/example.scala b/example.scala
 // Example: distinguishing between vowels and consonants

 val corpus =
  Seq( // I swear these are all English words
    "antidisestablishmentarian", "ablutophobia", "automatonophobia", "autotonsorialist",
    "arachibutyrophobia", "automysophobia", "batrachophagous", "ballistocardiograph",
    "blandiloquence", "brachydactylous", "bathythermograph", "cacodemomania",
    "caesaropapism", "catapedamania", "cheiloproclitic", "chronosynchronicity",
    "dendrochronology", "deorsumversion", "dermatoglyphics", "dolichocephalic",
    "dysmorphophobia", "eellogofusciouhipoppokunurious", "electroencephalograph", "epiphenomenalism",
    "electrodynamometer", "ephemeromorph", "floccinaucinihilipilification", "forisfamiliate",
    "flagelliferous", "fibriophobia", "frumentaceous", "flibbertigibbet",
    "gynotikolobomassophile", "germanophilia", "gluconeogenesis", "graminivorous",
    "grammaticaster", "haematogenesis", "haussmannize", "helioseismology",
    "honorificabilitudinity", "hydrometeorology", "hypercatalectic", "iatromathematics",
    "ichthyophagous", "immunopathology", "interramification", "incomprehensibleness",
    "juglandaceous", "japanophobia", "juglandaceous", "japanophilia",
    "jaculiferous", "kakorrhaphiophobia", "kephalonomancy", "katathermometer",
    "kosmikophobia", "keraunophobia", "logizomechanophobia", "libanophorous",
    "lepidopterology", "lautenclavicymbel", "latitudinarianism", "medomalacuphobia",
    "macrocephalous", "margaritomancy", "maschalephidrosis", "micropalaeontology",
    "nucleomituphobia", "neopharmaphobia", "nyctohylophobia", "nigroglobulate",
    "neurophysiology", "oneirogmophobia", "ophthalmophobia", "otorhinolaryngology",
    "orphanotrophism", "ophthalmoscope", "paraskavedekatriaphobia", "palaeoanthropology",
    "penecontemporaneous", "pneumatophilosophy", "podobromhidrosis", "quadragesimarian",
    "quomodocunquize", "quoddamodotative", "quinquagenarian", "quasquicentennial",
    "rhabdophobia", "radiometeorograph", "rhinotillexomania", "representationalism",
    "radappertization", "siderodromophobia", "spermatophobia", "sacramentarianism",
    "spectroheliokinematograph", "sphygmomanometer", "telephonophobia", "theologicophobia",
    "triskaidekaphobia", "thanatognomonic", "theophilanthropism", "triboluminescence",
    "ultramicroscope", "umbraculiform", "ubiquitarianism", "unconsentaneous",
    "utilitarianism", "venustraphobia", "verminophobia", "vitricophobic",
    "volumenometer", "voicespondence", "walloonphobia", "weltanschauung",
    "whigmaleerie", "weatherometer", "whippoorwill", "xenodocheionology",
    "xanthophobia", "xenoglossophobia", "xeroradiography", "xanthocyanopsy",
    "yogibogeybox", "yarborough", "yarnwindle", "yeomanette",
    "yttriferous", "zemmiphobia", "zalambdodont", "zeusophobia", "zenzizenzizenzic"
  ).map(_.iterator.map(_.toString).toIndexedSeq)

 val nTag = 2 // 2 tags: vowel or consonant

 val (model, prob) = computeForwardBackward(corpus, nTag)
 println("Best corpus probability = 2^" + prob)
 model.b.zipWithIndex.drop(1).foreach({
  case (words, i) => {
    println("P(Letter|Tag" + i + "):")
    words.zipWithIndex.map({
      case (logP, wordIdx) => (logP, model.words(wordIdx))
    }).sortBy(-_._1).foreach({
      case (logP, word) =>
        println(word + ":\t" + math.pow(2, logP))
    })
    println()
  }
 })
 /*
 Output:
 P(Letter|Tag1):
 n:	0.09837982985409412
 r:	0.09747466863106026
 h:	0.08963206738930135
 p:	0.0811569848839846
 t:	0.07857387790975762
 l:	0.07634134515642844
 ...

 P(Letter|Tag2):
 o:	0.284764667819875
 i:	0.21201862830500298
 a:	0.17999825418893892
 e:	0.16713940843565922
 u:	0.05067696417951677
 y:	0.04177186447424547
 ...
 */


 corpus.foreach(x => {
  println(x.mkString)
  println(Model.computePosTags(x, model)._2.mkString)
 })
 /*
 Output:
 antidisestablishmentarian
 2112121211211211121121221
 ablutophobia
 211212112121
 automatonophobia
 1212121212112121
 autotonsorialist
 1212121121221211
 arachibutyrophobia
 212112121212112121
 ...
 */
diff --git a/forward-backward.scala b/forward-backward.scala
 import collection.mutable

 /**
 * Compute log of base 2
 */
 def log2(d: Double) = math.log(d) / math.log(2.0)

 /**
 * Given l1 = log(x), l2 = log(y), compute the approximate value of log(x+y)
 */
 def logPlus(l1: Double, l2: Double) = {
  assert(!l1.isNaN && !l2.isNaN)
  val l_larger = math.max(l1, l2)
  val l_smaller = math.min(l1, l2)
  if (l_smaller.isNegInfinity) l_larger
  else if (l_larger - l_smaller > 32) l_larger
  else l_larger + log2(1 + math.pow(2, l_smaller - l_larger))
 }

 /**
 * Normalize the given log vector, so that 2 to the power of each element sum up to 1.
 */
 def normalizeLog(log_vec: IndexedSeq[Double]) = {
  val log_sum = log_vec.reduce(logPlus)
  log_vec.map(_ - log_sum)
 }

 /**
 * Create a random double Vector whose elements are positive and sum up to 1
 */
 def randomLogProbVector(size: Int) =
  normalizeLog(IndexedSeq.fill(size)(util.Random.nextDouble()))

 /**
 * Create a random Vector[Vector[Double]], each row of which sum up to 1
 */
 def randomProbLogMatrix(nRows: Int, nColumns: Int) =
  IndexedSeq.fill(nRows)(randomLogProbVector(nColumns))

 /**
 * Parameters for part-of-speech tagging
 * @param words The vocabulary.
 *              The index of each word corresponds to the word index in b
 * @param nTag Number of distinct tags
 * @param t t[tag1, tag2] = log P(tag2|tag1), i.e. bi-gram model
 * @param b b[tag, word] = log P(word|tag)
 */
 case class Model(words: IndexedSeq[String], nTag: Int,
                 t: IndexedSeq[IndexedSeq[Double]],
                 b: IndexedSeq[IndexedSeq[Double]]) {
  assert(words.distinct.size == words.size)

  lazy val wordToIndex = words.zipWithIndex.toMap

  /**
   * Tags are indexed from 1 to nTag.
   * 0 represents the start and the end of a tag sequence.
   */
  val tags = 1 to nTag

  def params = (words, nTag, t, b)
 }

 object Model {
  /**
   * Create a model with random parameters for random restarts.
   */
  def newRandomModel(words: IndexedSeq[String], nTag: Int) =
    Model(words, nTag,
      (Double.NaN +: randomLogProbVector(nTag)) +: randomProbLogMatrix(nTag, nTag + 1),
      mutable.IndexedSeq.fill(words.size)(Double.NaN) +: randomProbLogMatrix(nTag, words.size))

  /**
   * Construct a lattice and compute the best part-of-speech tagging using Viterbi decoding
   * @return The lattice and the best path
   */
  def computePosTags(example: IndexedSeq[Int], model: Model) = {
    val (_, nTag, t, b) = model.params
    def newLattice[T](v: T) = IndexedSeq.fill(example.size)(
      mutable.IndexedSeq.fill(nTag + 1)(v)
    )
    val p = newLattice(Double.NaN)
    val track = newLattice(0)

    for (i <- 0 until example.size; tag <- model.tags) {
      if (i == 0) {
        p(i)(tag) = t(0)(tag) + b(tag)(example(i))
      }
      else {
        val (prevP, prevTag) = model.tags.map(prev_tag =>
          p(i - 1)(prev_tag) + t(prev_tag)(tag)
        ).zip(model.tags).maxBy(_._1)

        p(i)(tag) = prevP + b(tag)(example(i))
        track(i)(tag) = prevTag
      }
    }

    val path = {
      val (_, lastTag) = p(example.size - 1).zipWithIndex.drop(1).maxBy(_._1)

      def buildPath(i: Int, tag: Int, accu: List[Int] = Nil): IndexedSeq[Int] =
        if (i == 0) (tag :: accu).toIndexedSeq
        else buildPath(i - 1, track(i)(tag), tag :: accu)

      buildPath(example.size - 1, lastTag)
    }

    (p.map(_.toIndexedSeq), path)
  }

  /**
   * Construct a lattice and compute the best part-of-speech tagging using Viterbi decoding
   * @return The lattice and the best path
   */
  def computePosTags[X: ClassManifest](example: IndexedSeq[String],
                                       model: Model): (IndexedSeq[IndexedSeq[Double]], IndexedSeq[Int]) =
    computePosTags(example.map(model.wordToIndex), model)

  /**
   * Construct a lattice and compute the α values (forward)
   * @return α and the log probability of the example
   */
  def computeΑ(example: IndexedSeq[Int], model: Model) = {
    val (_, nTag, t, b) = model.params
    val α = IndexedSeq.fill(example.size)(
      mutable.IndexedSeq.fill(nTag + 1)(Double.NaN)
    )

    for (i <- 0 until example.size; tag <- model.tags) {
      α(i)(tag) =
        if (i == 0)
          t(0)(tag) + b(tag)(example(i))
        else
          model.tags.map(prev_tag =>
            α(i - 1)(prev_tag) + t(prev_tag)(tag)
          ).reduce(logPlus) + b(tag)(example(i))
    }

    val α_end = (model.tags).map(tag => α(example.size - 1)(tag) + t(tag)(0)).reduce(logPlus)

    (α.map(_.toIndexedSeq), α_end)
  }

  /**
   * Construct a lattice and compute the β values (backward)
   * @return β and the log probability of the example
   */
  def computeΒ(example: IndexedSeq[Int], model: Model) = {
    val (_, nTag, t, b) = model.params
    val β = IndexedSeq.fill(example.size)(
      mutable.IndexedSeq.fill(nTag + 1)(Double.NaN)
    )

    for (i <- (example.size - 1) to 0 by -1; tag <- model.tags) {
      β(i)(tag) =
        if (i == example.size - 1)
          t(tag)(0)
        else model.tags.map(next_tag =>
          t(tag)(next_tag) + b(next_tag)(example(i + 1)) + β(i + 1)(next_tag)
        ).reduce(logPlus)
    }

    val β_start = (model.tags).map(tag => t(0)(tag) + b(tag)(example(0)) + β(0)(tag)).reduce(logPlus)

    (β.map(_.toIndexedSeq), β_start)
  }
 }

 /**
 * The recursive helper function that computes the model for each random restart
 */
 @annotation.tailrec
 def computeForwardBackwardImpl(indexedCorpus: Seq[IndexedSeq[Int]],
                               model: Model, nIter: Int,
                               prevLogJointProb: Double,
                               isConverged: (Int, Double, Double) => Boolean): (Model, Double) = {
  import Model._
  val (words, nTag, t, b) = model.params
  val ex_α_β_logProb = indexedCorpus.map(ex => {
    val (α, α_end) = computeΑ(ex, model)
    val (β, β_start) = computeΒ(ex, model)
    assert(α_end - β_start < 1e10)

    (ex, α, β, α_end)
  })
  val logJointProb = ex_α_β_logProb.map(_._4).reduce(_ + _)

  if (nIter != 0 && isConverged(nIter, prevLogJointProb, logJointProb))
    (model, logJointProb)
  else {
    val new_model = {
      // Container for collecting partial counts for t
      val c_t = IndexedSeq.fill(nTag + 1)(mutable.IndexedSeq.fill(nTag + 1)(Double.NegativeInfinity))
      // Container for collecting partial counts for b
      val c_b =
        mutable.IndexedSeq.fill(words.size)(Double.NaN) +:
          IndexedSeq.fill(nTag)(mutable.IndexedSeq.fill(words.size)(Double.NegativeInfinity))

      ex_α_β_logProb.foreach({
        case (ex, α, β, logProb) =>
          // For each example, collect partial counts for t
          for (tag1 <- model.tags) {
            c_t(0)(tag1) =
              logPlus(c_t(0)(tag1),
                t(0)(tag1) + b(tag1)(ex(0)) + β(0)(tag1) - logProb)
            c_t(tag1)(0) =
              logPlus(c_t(tag1)(0),
                α(ex.size - 1)(tag1) + t(tag1)(0) - logProb)
            for (i <- 0 to ex.size - 2; tag2 <- model.tags) {
              c_t(tag1)(tag2) =
                logPlus(c_t(tag1)(tag2),
                  α(i)(tag1) + t(tag1)(tag2) + b(tag2)(ex(i + 1)) + β(i + 1)(tag2) - logProb)
            }
          }
          // For each example, collect partial counts for b
          for (tag <- model.tags; i <- 0 until ex.size) {
            c_b(tag)(ex(i)) =
              logPlus(c_b(tag)(ex(i)),
                α(i)(tag) + β(i)(tag) - logProb)
          }
      })

      // Compute the revised t and b by normalizing the partial counts
      val new_t = (0.0 +: normalizeLog(c_t.head.drop(1))) +: c_t.tail.map(normalizeLog)
      val new_b = mutable.IndexedSeq.fill(words.size)(Double.NaN) +: c_b.tail.map(normalizeLog)

      Model(words, nTag, new_t, new_b)
    }

    computeForwardBackwardImpl(indexedCorpus, new_model, nIter + 1, logJointProb, isConverged)
  }
 }

 /**
 * Compute parameters for unsupervised POS tagging
 * @param corpus A collection of training examples
 * @param nTag Number of tags
 * @param isConverged A function for testing convergence.
 *                    The three parameters are: the number of current iteration,
 *                    log training probability from last iteration and
 *                    log training probability of current iteration
 * @param nRandomRestart Number of random restarts
 * @param onEachRestartCompleted The function is called asynchronously after each random restart.
 *                               The three parameters are: current round of random restart,
 *                               the resulting model and the log training probability
 * @return The trained model and the log probability it assigns to corpus
 */
 def computeForwardBackward(corpus: Seq[IndexedSeq[String]],
                           nTag: Int,
                           isConverged: (Int, Double, Double) => Boolean = (i, l0, l1) => l1 < l0 && (l1 / l0) > 0.99999,
                           nRandomRestart: Int = 10,
                           onEachRestartCompleted: (Int, Model, Double) => Unit = null) = {
  import scala.actors.Futures._

  val words = corpus.flatten.distinct.toIndexedSeq
  val wordToIndex = words.zipWithIndex.toMap
  val indexedCorpus = corpus.map(_.map(wordToIndex).toIndexedSeq)

  val (bestModel, bestLogJointProb) = (1 to nRandomRestart).map(currRound => {
    val initModel = Model.newRandomModel(words, nTag)
    val (model, logJointProb) =
      computeForwardBackwardImpl(
        indexedCorpus, initModel, 0, Double.NegativeInfinity, isConverged)

    if (onEachRestartCompleted != null) {
      future {
        onEachRestartCompleted(currRound, model, logJointProb)
      }
    }
    (model, logJointProb)
  }).maxBy(_._2)

  (bestModel, bestLogJointProb)
 }
	// Example: distinguishing between vowels and consonants

	val corpus =
	Seq( // I swear these are all English words
	"antidisestablishmentarian", "ablutophobia", "automatonophobia", "autotonsorialist",
	"arachibutyrophobia", "automysophobia", "batrachophagous", "ballistocardiograph",
	"blandiloquence", "brachydactylous", "bathythermograph", "cacodemomania",
	"caesaropapism", "catapedamania", "cheiloproclitic", "chronosynchronicity",
	"dendrochronology", "deorsumversion", "dermatoglyphics", "dolichocephalic",
	"dysmorphophobia", "eellogofusciouhipoppokunurious", "electroencephalograph", "epiphenomenalism",
	"electrodynamometer", "ephemeromorph", "floccinaucinihilipilification", "forisfamiliate",
	"flagelliferous", "fibriophobia", "frumentaceous", "flibbertigibbet",
	"gynotikolobomassophile", "germanophilia", "gluconeogenesis", "graminivorous",
	"grammaticaster", "haematogenesis", "haussmannize", "helioseismology",
	"honorificabilitudinity", "hydrometeorology", "hypercatalectic", "iatromathematics",
	"ichthyophagous", "immunopathology", "interramification", "incomprehensibleness",
	"juglandaceous", "japanophobia", "juglandaceous", "japanophilia",
	"jaculiferous", "kakorrhaphiophobia", "kephalonomancy", "katathermometer",
	"kosmikophobia", "keraunophobia", "logizomechanophobia", "libanophorous",
	"lepidopterology", "lautenclavicymbel", "latitudinarianism", "medomalacuphobia",
	"macrocephalous", "margaritomancy", "maschalephidrosis", "micropalaeontology",
	"nucleomituphobia", "neopharmaphobia", "nyctohylophobia", "nigroglobulate",
	"neurophysiology", "oneirogmophobia", "ophthalmophobia", "otorhinolaryngology",
	"orphanotrophism", "ophthalmoscope", "paraskavedekatriaphobia", "palaeoanthropology",
	"penecontemporaneous", "pneumatophilosophy", "podobromhidrosis", "quadragesimarian",
	"quomodocunquize", "quoddamodotative", "quinquagenarian", "quasquicentennial",
	"rhabdophobia", "radiometeorograph", "rhinotillexomania", "representationalism",
	"radappertization", "siderodromophobia", "spermatophobia", "sacramentarianism",
	"spectroheliokinematograph", "sphygmomanometer", "telephonophobia", "theologicophobia",
	"triskaidekaphobia", "thanatognomonic", "theophilanthropism", "triboluminescence",
	"ultramicroscope", "umbraculiform", "ubiquitarianism", "unconsentaneous",
	"utilitarianism", "venustraphobia", "verminophobia", "vitricophobic",
	"volumenometer", "voicespondence", "walloonphobia", "weltanschauung",
	"whigmaleerie", "weatherometer", "whippoorwill", "xenodocheionology",
	"xanthophobia", "xenoglossophobia", "xeroradiography", "xanthocyanopsy",
	"yogibogeybox", "yarborough", "yarnwindle", "yeomanette",
	"yttriferous", "zemmiphobia", "zalambdodont", "zeusophobia", "zenzizenzizenzic"
	).map(_.iterator.map(_.toString).toIndexedSeq)

	val nTag = 2 // 2 tags: vowel or consonant

	val (model, prob) = computeForwardBackward(corpus, nTag)
	println("Best corpus probability = 2^" + prob)
	model.b.zipWithIndex.drop(1).foreach({
	case (words, i) => {
	println("P(Letter\|Tag" + i + "):")
	words.zipWithIndex.map({
	case (logP, wordIdx) => (logP, model.words(wordIdx))
	}).sortBy(-_._1).foreach({
	case (logP, word) =>
	println(word + ":\t" + math.pow(2, logP))
	})
	println()
	}
	})
	/*
	Output:
	P(Letter\|Tag1):
	n: 0.09837982985409412
	r: 0.09747466863106026
	h: 0.08963206738930135
	p: 0.0811569848839846
	t: 0.07857387790975762
	l: 0.07634134515642844
	...

	P(Letter\|Tag2):
	o: 0.284764667819875
	i: 0.21201862830500298
	a: 0.17999825418893892
	e: 0.16713940843565922
	u: 0.05067696417951677
	y: 0.04177186447424547
	...
	*/


	corpus.foreach(x => {
	println(x.mkString)
	println(Model.computePosTags(x, model)._2.mkString)
	})
	/*
	Output:
	antidisestablishmentarian
	2112121211211211121121221
	ablutophobia
	211212112121
	automatonophobia
	1212121212112121
	autotonsorialist
	1212121121221211
	arachibutyrophobia
	212112121212112121
	...
	*/
	import collection.mutable

	/**
	* Compute log of base 2
	*/
	def log2(d: Double) = math.log(d) / math.log(2.0)

	/**
	* Given l1 = log(x), l2 = log(y), compute the approximate value of log(x+y)
	*/
	def logPlus(l1: Double, l2: Double) = {
	assert(!l1.isNaN && !l2.isNaN)
	val l_larger = math.max(l1, l2)
	val l_smaller = math.min(l1, l2)
	if (l_smaller.isNegInfinity) l_larger
	else if (l_larger - l_smaller > 32) l_larger
	else l_larger + log2(1 + math.pow(2, l_smaller - l_larger))
	}

	/**
	* Normalize the given log vector, so that 2 to the power of each element sum up to 1.
	*/
	def normalizeLog(log_vec: IndexedSeq[Double]) = {
	val log_sum = log_vec.reduce(logPlus)
	log_vec.map(_ - log_sum)
	}

	/**
	* Create a random double Vector whose elements are positive and sum up to 1
	*/
	def randomLogProbVector(size: Int) =
	normalizeLog(IndexedSeq.fill(size)(util.Random.nextDouble()))

	/**
	* Create a random Vector[Vector[Double]], each row of which sum up to 1
	*/
	def randomProbLogMatrix(nRows: Int, nColumns: Int) =
	IndexedSeq.fill(nRows)(randomLogProbVector(nColumns))

	/**
	* Parameters for part-of-speech tagging
	* @param words The vocabulary.
	* The index of each word corresponds to the word index in b
	* @param nTag Number of distinct tags
	* @param t t[tag1, tag2] = log P(tag2\|tag1), i.e. bi-gram model
	* @param b b[tag, word] = log P(word\|tag)
	*/
	case class Model(words: IndexedSeq[String], nTag: Int,
	t: IndexedSeq[IndexedSeq[Double]],
	b: IndexedSeq[IndexedSeq[Double]]) {
	assert(words.distinct.size == words.size)

	lazy val wordToIndex = words.zipWithIndex.toMap

	/**
	* Tags are indexed from 1 to nTag.
	* 0 represents the start and the end of a tag sequence.
	*/
	val tags = 1 to nTag

	def params = (words, nTag, t, b)
	}

	object Model {
	/**
	* Create a model with random parameters for random restarts.
	*/
	def newRandomModel(words: IndexedSeq[String], nTag: Int) =
	Model(words, nTag,
	(Double.NaN +: randomLogProbVector(nTag)) +: randomProbLogMatrix(nTag, nTag + 1),
	mutable.IndexedSeq.fill(words.size)(Double.NaN) +: randomProbLogMatrix(nTag, words.size))

	/**
	* Construct a lattice and compute the best part-of-speech tagging using Viterbi decoding
	* @return The lattice and the best path
	*/
	def computePosTags(example: IndexedSeq[Int], model: Model) = {
	val (_, nTag, t, b) = model.params
	def newLattice[T](v: T) = IndexedSeq.fill(example.size)(
	mutable.IndexedSeq.fill(nTag + 1)(v)
	)
	val p = newLattice(Double.NaN)
	val track = newLattice(0)

	for (i <- 0 until example.size; tag <- model.tags) {
	if (i == 0) {
	p(i)(tag) = t(0)(tag) + b(tag)(example(i))
	}
	else {
	val (prevP, prevTag) = model.tags.map(prev_tag =>
	p(i - 1)(prev_tag) + t(prev_tag)(tag)
	).zip(model.tags).maxBy(_._1)

	p(i)(tag) = prevP + b(tag)(example(i))
	track(i)(tag) = prevTag
	}
	}

	val path = {
	val (_, lastTag) = p(example.size - 1).zipWithIndex.drop(1).maxBy(_._1)

	def buildPath(i: Int, tag: Int, accu: List[Int] = Nil): IndexedSeq[Int] =
	if (i == 0) (tag :: accu).toIndexedSeq
	else buildPath(i - 1, track(i)(tag), tag :: accu)

	buildPath(example.size - 1, lastTag)
	}

	(p.map(_.toIndexedSeq), path)
	}

	/**
	* Construct a lattice and compute the best part-of-speech tagging using Viterbi decoding
	* @return The lattice and the best path
	*/
	def computePosTags[X: ClassManifest](example: IndexedSeq[String],
	model: Model): (IndexedSeq[IndexedSeq[Double]], IndexedSeq[Int]) =
	computePosTags(example.map(model.wordToIndex), model)

	/**
	* Construct a lattice and compute the α values (forward)
	* @return α and the log probability of the example
	*/
	def computeΑ(example: IndexedSeq[Int], model: Model) = {
	val (_, nTag, t, b) = model.params
	val α = IndexedSeq.fill(example.size)(
	mutable.IndexedSeq.fill(nTag + 1)(Double.NaN)
	)

	for (i <- 0 until example.size; tag <- model.tags) {
	α(i)(tag) =
	if (i == 0)
	t(0)(tag) + b(tag)(example(i))
	else
	model.tags.map(prev_tag =>
	α(i - 1)(prev_tag) + t(prev_tag)(tag)
	).reduce(logPlus) + b(tag)(example(i))
	}

	val α_end = (model.tags).map(tag => α(example.size - 1)(tag) + t(tag)(0)).reduce(logPlus)

	(α.map(_.toIndexedSeq), α_end)
	}

	/**
	* Construct a lattice and compute the β values (backward)
	* @return β and the log probability of the example
	*/
	def computeΒ(example: IndexedSeq[Int], model: Model) = {
	val (_, nTag, t, b) = model.params
	val β = IndexedSeq.fill(example.size)(
	mutable.IndexedSeq.fill(nTag + 1)(Double.NaN)
	)

	for (i <- (example.size - 1) to 0 by -1; tag <- model.tags) {
	β(i)(tag) =
	if (i == example.size - 1)
	t(tag)(0)
	else model.tags.map(next_tag =>
	t(tag)(next_tag) + b(next_tag)(example(i + 1)) + β(i + 1)(next_tag)
	).reduce(logPlus)
	}

	val β_start = (model.tags).map(tag => t(0)(tag) + b(tag)(example(0)) + β(0)(tag)).reduce(logPlus)

	(β.map(_.toIndexedSeq), β_start)
	}
	}

	/**
	* The recursive helper function that computes the model for each random restart
	*/
	@annotation.tailrec
	def computeForwardBackwardImpl(indexedCorpus: Seq[IndexedSeq[Int]],
	model: Model, nIter: Int,
	prevLogJointProb: Double,
	isConverged: (Int, Double, Double) => Boolean): (Model, Double) = {
	import Model._
	val (words, nTag, t, b) = model.params
	val ex_α_β_logProb = indexedCorpus.map(ex => {
	val (α, α_end) = computeΑ(ex, model)
	val (β, β_start) = computeΒ(ex, model)
	assert(α_end - β_start < 1e10)

	(ex, α, β, α_end)
	})
	val logJointProb = ex_α_β_logProb.map(_._4).reduce(_ + _)

	if (nIter != 0 && isConverged(nIter, prevLogJointProb, logJointProb))
	(model, logJointProb)
	else {
	val new_model = {
	// Container for collecting partial counts for t
	val c_t = IndexedSeq.fill(nTag + 1)(mutable.IndexedSeq.fill(nTag + 1)(Double.NegativeInfinity))
	// Container for collecting partial counts for b
	val c_b =
	mutable.IndexedSeq.fill(words.size)(Double.NaN) +:
	IndexedSeq.fill(nTag)(mutable.IndexedSeq.fill(words.size)(Double.NegativeInfinity))

	ex_α_β_logProb.foreach({
	case (ex, α, β, logProb) =>
	// For each example, collect partial counts for t
	for (tag1 <- model.tags) {
	c_t(0)(tag1) =
	logPlus(c_t(0)(tag1),
	t(0)(tag1) + b(tag1)(ex(0)) + β(0)(tag1) - logProb)
	c_t(tag1)(0) =
	logPlus(c_t(tag1)(0),
	α(ex.size - 1)(tag1) + t(tag1)(0) - logProb)
	for (i <- 0 to ex.size - 2; tag2 <- model.tags) {
	c_t(tag1)(tag2) =
	logPlus(c_t(tag1)(tag2),
	α(i)(tag1) + t(tag1)(tag2) + b(tag2)(ex(i + 1)) + β(i + 1)(tag2) - logProb)
	}
	}
	// For each example, collect partial counts for b
	for (tag <- model.tags; i <- 0 until ex.size) {
	c_b(tag)(ex(i)) =
	logPlus(c_b(tag)(ex(i)),
	α(i)(tag) + β(i)(tag) - logProb)
	}
	})

	// Compute the revised t and b by normalizing the partial counts
	val new_t = (0.0 +: normalizeLog(c_t.head.drop(1))) +: c_t.tail.map(normalizeLog)
	val new_b = mutable.IndexedSeq.fill(words.size)(Double.NaN) +: c_b.tail.map(normalizeLog)

	Model(words, nTag, new_t, new_b)
	}

	computeForwardBackwardImpl(indexedCorpus, new_model, nIter + 1, logJointProb, isConverged)
	}
	}

	/**
	* Compute parameters for unsupervised POS tagging
	* @param corpus A collection of training examples
	* @param nTag Number of tags
	* @param isConverged A function for testing convergence.
	* The three parameters are: the number of current iteration,
	* log training probability from last iteration and
	* log training probability of current iteration
	* @param nRandomRestart Number of random restarts
	* @param onEachRestartCompleted The function is called asynchronously after each random restart.
	* The three parameters are: current round of random restart,
	* the resulting model and the log training probability
	* @return The trained model and the log probability it assigns to corpus
	*/
	def computeForwardBackward(corpus: Seq[IndexedSeq[String]],
	nTag: Int,
	isConverged: (Int, Double, Double) => Boolean = (i, l0, l1) => l1 < l0 && (l1 / l0) > 0.99999,
	nRandomRestart: Int = 10,
	onEachRestartCompleted: (Int, Model, Double) => Unit = null) = {
	import scala.actors.Futures._

	val words = corpus.flatten.distinct.toIndexedSeq
	val wordToIndex = words.zipWithIndex.toMap
	val indexedCorpus = corpus.map(_.map(wordToIndex).toIndexedSeq)

	val (bestModel, bestLogJointProb) = (1 to nRandomRestart).map(currRound => {
	val initModel = Model.newRandomModel(words, nTag)
	val (model, logJointProb) =
	computeForwardBackwardImpl(
	indexedCorpus, initModel, 0, Double.NegativeInfinity, isConverged)

	if (onEachRestartCompleted != null) {
	future {
	onEachRestartCompleted(currRound, model, logJointProb)
	}
	}
	(model, logJointProb)
	}).maxBy(_._2)

	(bestModel, bestLogJointProb)
	}