Last active
November 14, 2017 23:29
-
-
Save dispalt/e309bdf7579a2162c84f675f4d6c264b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import $ivy.`org.clulab::processors-main:6.1.3` | |
import $ivy.`org.clulab::processors-corenlp:6.1.3` | |
import $ivy.`org.clulab::processors-odin:6.1.3` | |
import $ivy.`org.clulab::processors-modelsmain:6.1.3` | |
import $ivy.`org.clulab::processors-modelscorenlp:6.1.3` | |
import $ivy.`com.lihaoyi::pprint:0.5.3` | |
import org.clulab.odin.ExtractorEngine | |
import org.clulab.processors.Processor | |
import org.clulab.processors.clu.CluProcessor | |
import org.clulab.processors.corenlp.CoreNLPProcessor | |
import org.clulab.processors.fastnlp.FastNLPProcessor | |
import org.clulab.processors.shallownlp.ShallowNLPProcessor | |
import pprint.pprintln |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Stanford one | |
val proc: Processor = new CoreNLPProcessor() | |
// Also Stanford but Neural Net based (faster). | |
// val proc: Processor = new FastNLPProcessor() | |
// CLU lab implementation (faster but less featureful) | |
// val proc: Processor = new CluProcessor() | |
val testPhrase = "Hello the world is not flat." | |
val doc = proc.annotate(testPhrase, true) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.clulab.odin.{EventMention, Mention, RelationMention, TextBoundMention} | |
import org.clulab.processors.{Document, Sentence} | |
object NLPPrinter { | |
def displayMentions(mentions: Seq[Mention], doc: Document): Unit = { | |
val mentionsBySentence = mentions groupBy (_.sentence) mapValues (_.sortBy(_.start)) withDefaultValue Nil | |
for ((s, i) <- doc.sentences.zipWithIndex) { | |
println(s"sentence #$i") | |
println(s.getSentenceText) | |
println("Tokens: " + (s.words.indices, s.words, s.tags.get).zipped.mkString(", ")) | |
printSyntacticDependencies(s) | |
println | |
val sortedMentions = mentionsBySentence(i).sortBy(_.label) | |
val (events, entities) = sortedMentions.partition(_ matches "Event") | |
val (tbs, rels) = entities.partition(_.isInstanceOf[TextBoundMention]) | |
val sortedEntities = tbs ++ rels.sortBy(_.label) | |
println("entities:") | |
sortedEntities foreach displayMention | |
println | |
println("events:") | |
events foreach displayMention | |
println("=" * 50) | |
} | |
} | |
def printSyntacticDependencies(s: Sentence): Unit = { | |
if (s.dependencies.isDefined) { | |
println(s.dependencies.get.toString) | |
} | |
} | |
def displayMention(mention: Mention) { | |
val boundary = s"\t${"-" * 30}" | |
println(s"${mention.labels} => ${mention.text}") | |
println(boundary) | |
println(s"\tRule => ${mention.foundBy}") | |
val mentionType = mention.getClass.toString.split("""\.""").last | |
println(s"\tType => $mentionType") | |
println(boundary) | |
mention match { | |
case tb: TextBoundMention => | |
println(s"\t${tb.labels.mkString(", ")} => ${tb.text}") | |
case em: EventMention => | |
println(s"\ttrigger => ${em.trigger.text}") | |
displayArguments(em) | |
case rel: RelationMention => | |
displayArguments(rel) | |
case _ => () | |
} | |
println(s"$boundary\n") | |
} | |
def displayArguments(b: Mention): Unit = { | |
b.arguments foreach { | |
case (argName, ms) => | |
ms foreach { v => | |
println(s"\t$argName ${v.labels.mkString("(", ", ", ")")} => ${v.text}") | |
} | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
val rules = """ | |
rules: | |
- name: "obj" | |
label: Object | |
priority: 1 | |
type: token | |
unit: "tag" | |
pattern: | | |
/^N/* [incoming=/dobj/] /^N/* | |
""".stripMargin | |
// Instantiate the yaml with our rules above. | |
val engine = ExtractorEngine(rules) | |
// Let's see if this works | |
val phrase = "where do I go to file a claim?" | |
val doc = proc.annotate(phrase) | |
val mentions = engine.extractFrom(doc) | |
NLPPrinter.displayMentions(mentions, doc) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment