Skip to content

Instantly share code, notes, and snippets.

@dispalt
Last active November 14, 2017 23:29
Show Gist options
  • Save dispalt/e309bdf7579a2162c84f675f4d6c264b to your computer and use it in GitHub Desktop.
Save dispalt/e309bdf7579a2162c84f675f4d6c264b to your computer and use it in GitHub Desktop.
import $ivy.`org.clulab::processors-main:6.1.3`
import $ivy.`org.clulab::processors-corenlp:6.1.3`
import $ivy.`org.clulab::processors-odin:6.1.3`
import $ivy.`org.clulab::processors-modelsmain:6.1.3`
import $ivy.`org.clulab::processors-modelscorenlp:6.1.3`
import $ivy.`com.lihaoyi::pprint:0.5.3`
import org.clulab.odin.ExtractorEngine
import org.clulab.processors.Processor
import org.clulab.processors.clu.CluProcessor
import org.clulab.processors.corenlp.CoreNLPProcessor
import org.clulab.processors.fastnlp.FastNLPProcessor
import org.clulab.processors.shallownlp.ShallowNLPProcessor
import pprint.pprintln
// Stanford one
val proc: Processor = new CoreNLPProcessor()
// Also Stanford but Neural Net based (faster).
// val proc: Processor = new FastNLPProcessor()
// CLU lab implementation (faster but less featureful)
// val proc: Processor = new CluProcessor()
val testPhrase = "Hello the world is not flat."
val doc = proc.annotate(testPhrase, true)
import org.clulab.odin.{EventMention, Mention, RelationMention, TextBoundMention}
import org.clulab.processors.{Document, Sentence}
object NLPPrinter {
def displayMentions(mentions: Seq[Mention], doc: Document): Unit = {
val mentionsBySentence = mentions groupBy (_.sentence) mapValues (_.sortBy(_.start)) withDefaultValue Nil
for ((s, i) <- doc.sentences.zipWithIndex) {
println(s"sentence #$i")
println(s.getSentenceText)
println("Tokens: " + (s.words.indices, s.words, s.tags.get).zipped.mkString(", "))
printSyntacticDependencies(s)
println
val sortedMentions = mentionsBySentence(i).sortBy(_.label)
val (events, entities) = sortedMentions.partition(_ matches "Event")
val (tbs, rels) = entities.partition(_.isInstanceOf[TextBoundMention])
val sortedEntities = tbs ++ rels.sortBy(_.label)
println("entities:")
sortedEntities foreach displayMention
println
println("events:")
events foreach displayMention
println("=" * 50)
}
}
def printSyntacticDependencies(s: Sentence): Unit = {
if (s.dependencies.isDefined) {
println(s.dependencies.get.toString)
}
}
def displayMention(mention: Mention) {
val boundary = s"\t${"-" * 30}"
println(s"${mention.labels} => ${mention.text}")
println(boundary)
println(s"\tRule => ${mention.foundBy}")
val mentionType = mention.getClass.toString.split("""\.""").last
println(s"\tType => $mentionType")
println(boundary)
mention match {
case tb: TextBoundMention =>
println(s"\t${tb.labels.mkString(", ")} => ${tb.text}")
case em: EventMention =>
println(s"\ttrigger => ${em.trigger.text}")
displayArguments(em)
case rel: RelationMention =>
displayArguments(rel)
case _ => ()
}
println(s"$boundary\n")
}
def displayArguments(b: Mention): Unit = {
b.arguments foreach {
case (argName, ms) =>
ms foreach { v =>
println(s"\t$argName ${v.labels.mkString("(", ", ", ")")} => ${v.text}")
}
}
}
}
val rules = """
rules:
- name: "obj"
label: Object
priority: 1
type: token
unit: "tag"
pattern: |
/^N/* [incoming=/dobj/] /^N/*
""".stripMargin
// Instantiate the yaml with our rules above.
val engine = ExtractorEngine(rules)
// Let's see if this works
val phrase = "where do I go to file a claim?"
val doc = proc.annotate(phrase)
val mentions = engine.extractFrom(doc)
NLPPrinter.displayMentions(mentions, doc)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment