dispalt · November 14, 2017 23:29
diff --git a/step1.scala b/step1.scala
 import $ivy.`org.clulab::processors-main:6.1.3`
 import $ivy.`org.clulab::processors-corenlp:6.1.3`
 import $ivy.`org.clulab::processors-odin:6.1.3`
 import $ivy.`org.clulab::processors-modelsmain:6.1.3`
 import $ivy.`org.clulab::processors-modelscorenlp:6.1.3`
 import $ivy.`com.lihaoyi::pprint:0.5.3`

 import org.clulab.odin.ExtractorEngine
 import org.clulab.processors.Processor
 import org.clulab.processors.clu.CluProcessor
 import org.clulab.processors.corenlp.CoreNLPProcessor
 import org.clulab.processors.fastnlp.FastNLPProcessor
 import org.clulab.processors.shallownlp.ShallowNLPProcessor
 import pprint.pprintln
diff --git a/step2.scala b/step2.scala
 // Stanford one
 val proc: Processor = new CoreNLPProcessor()
 // Also Stanford but Neural Net based (faster).
 // val proc: Processor = new FastNLPProcessor()
 // CLU lab implementation (faster but less featureful)
 // val proc: Processor = new CluProcessor()
 val testPhrase = "Hello the world is not flat."
 val doc = proc.annotate(testPhrase, true)
diff --git a/step3.scala b/step3.scala

 import org.clulab.odin.{EventMention, Mention, RelationMention, TextBoundMention}
 import org.clulab.processors.{Document, Sentence}

 object NLPPrinter {

  def displayMentions(mentions: Seq[Mention], doc: Document): Unit = {
    val mentionsBySentence = mentions groupBy (_.sentence) mapValues (_.sortBy(_.start)) withDefaultValue Nil
    for ((s, i) <- doc.sentences.zipWithIndex) {
      println(s"sentence #$i")
      println(s.getSentenceText)
      println("Tokens: " + (s.words.indices, s.words, s.tags.get).zipped.mkString(", "))
      printSyntacticDependencies(s)
      println

      val sortedMentions     = mentionsBySentence(i).sortBy(_.label)
      val (events, entities) = sortedMentions.partition(_ matches "Event")
      val (tbs, rels)        = entities.partition(_.isInstanceOf[TextBoundMention])
      val sortedEntities     = tbs ++ rels.sortBy(_.label)
      println("entities:")
      sortedEntities foreach displayMention

      println
      println("events:")
      events foreach displayMention
      println("=" * 50)
    }
  }

  def printSyntacticDependencies(s: Sentence): Unit = {
    if (s.dependencies.isDefined) {
      println(s.dependencies.get.toString)
    }
  }

  def displayMention(mention: Mention) {
    val boundary = s"\t${"-" * 30}"
    println(s"${mention.labels} => ${mention.text}")
    println(boundary)
    println(s"\tRule => ${mention.foundBy}")
    val mentionType = mention.getClass.toString.split("""\.""").last
    println(s"\tType => $mentionType")
    println(boundary)
    mention match {
      case tb: TextBoundMention =>
        println(s"\t${tb.labels.mkString(", ")} => ${tb.text}")
      case em: EventMention =>
        println(s"\ttrigger => ${em.trigger.text}")
        displayArguments(em)
      case rel: RelationMention =>
        displayArguments(rel)
      case _ => ()
    }
    println(s"$boundary\n")
  }

  def displayArguments(b: Mention): Unit = {
    b.arguments foreach {
      case (argName, ms) =>
        ms foreach { v =>
          println(s"\t$argName ${v.labels.mkString("(", ", ", ")")} => ${v.text}")
        }
    }
  }
 }
diff --git a/step4.scala b/step4.scala
 val rules = """
     rules:
       - name: "obj"
         label: Object
         priority: 1
         type: token
         unit: "tag"
         pattern: |
           /^N/* [incoming=/dobj/] /^N/*
     
     
     
 """.stripMargin
 // Instantiate the yaml with our rules above.
 val engine = ExtractorEngine(rules)
 // Let's see if this works
 val phrase = "where do I go to file a claim?"

 val doc = proc.annotate(phrase)
 val mentions = engine.extractFrom(doc)
 NLPPrinter.displayMentions(mentions, doc)
	import $ivy.`org.clulab::processors-main:6.1.3`
	import $ivy.`org.clulab::processors-corenlp:6.1.3`
	import $ivy.`org.clulab::processors-odin:6.1.3`
	import $ivy.`org.clulab::processors-modelsmain:6.1.3`
	import $ivy.`org.clulab::processors-modelscorenlp:6.1.3`
	import $ivy.`com.lihaoyi::pprint:0.5.3`

	import org.clulab.odin.ExtractorEngine
	import org.clulab.processors.Processor
	import org.clulab.processors.clu.CluProcessor
	import org.clulab.processors.corenlp.CoreNLPProcessor
	import org.clulab.processors.fastnlp.FastNLPProcessor
	import org.clulab.processors.shallownlp.ShallowNLPProcessor
	import pprint.pprintln
	// Stanford one
	val proc: Processor = new CoreNLPProcessor()
	// Also Stanford but Neural Net based (faster).
	// val proc: Processor = new FastNLPProcessor()
	// CLU lab implementation (faster but less featureful)
	// val proc: Processor = new CluProcessor()
	val testPhrase = "Hello the world is not flat."
	val doc = proc.annotate(testPhrase, true)

	import org.clulab.odin.{EventMention, Mention, RelationMention, TextBoundMention}
	import org.clulab.processors.{Document, Sentence}

	object NLPPrinter {

	def displayMentions(mentions: Seq[Mention], doc: Document): Unit = {
	val mentionsBySentence = mentions groupBy (_.sentence) mapValues (_.sortBy(_.start)) withDefaultValue Nil
	for ((s, i) <- doc.sentences.zipWithIndex) {
	println(s"sentence #$i")
	println(s.getSentenceText)
	println("Tokens: " + (s.words.indices, s.words, s.tags.get).zipped.mkString(", "))
	printSyntacticDependencies(s)
	println

	val sortedMentions = mentionsBySentence(i).sortBy(_.label)
	val (events, entities) = sortedMentions.partition(_ matches "Event")
	val (tbs, rels) = entities.partition(_.isInstanceOf[TextBoundMention])
	val sortedEntities = tbs ++ rels.sortBy(_.label)
	println("entities:")
	sortedEntities foreach displayMention

	println
	println("events:")
	events foreach displayMention
	println("=" * 50)
	}
	}

	def printSyntacticDependencies(s: Sentence): Unit = {
	if (s.dependencies.isDefined) {
	println(s.dependencies.get.toString)
	}
	}

	def displayMention(mention: Mention) {
	val boundary = s"\t${"-" * 30}"
	println(s"${mention.labels} => ${mention.text}")
	println(boundary)
	println(s"\tRule => ${mention.foundBy}")
	val mentionType = mention.getClass.toString.split("""\.""").last
	println(s"\tType => $mentionType")
	println(boundary)
	mention match {
	case tb: TextBoundMention =>
	println(s"\t${tb.labels.mkString(", ")} => ${tb.text}")
	case em: EventMention =>
	println(s"\ttrigger => ${em.trigger.text}")
	displayArguments(em)
	case rel: RelationMention =>
	displayArguments(rel)
	case _ => ()
	}
	println(s"$boundary\n")
	}

	def displayArguments(b: Mention): Unit = {
	b.arguments foreach {
	case (argName, ms) =>
	ms foreach { v =>
	println(s"\t$argName ${v.labels.mkString("(", ", ", ")")} => ${v.text}")
	}
	}
	}
	}
	val rules = """
	rules:
	- name: "obj"
	label: Object
	priority: 1
	type: token
	unit: "tag"
	pattern: \|
	/^N/* [incoming=/dobj/] /^N/*



	""".stripMargin
	// Instantiate the yaml with our rules above.
	val engine = ExtractorEngine(rules)
	// Let's see if this works
	val phrase = "where do I go to file a claim?"

	val doc = proc.annotate(phrase)
	val mentions = engine.extractFrom(doc)
	NLPPrinter.displayMentions(mentions, doc)