Last active
February 20, 2019 17:41
-
-
Save reyman/91d528ca292c777701c7605e84550411 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import gate._ | |
import gate.creole._ | |
import gate.util.persistence._ | |
import gate.corpora._ | |
import scala.collection.JavaConverters._ | |
import scala.collection.mutable.HashSet | |
object TwitterPrototype extends App { | |
Gate.init() | |
val anniePlugin = new Plugin.Maven("uk.ac.gate.plugins", "annie", "8.6-SNAPSHOT") | |
val annieFrenchPlugin = new Plugin.Maven("uk.ac.gate.plugins", "lang-french", "8.6-SNAPSHOT") | |
val twitterJsonPlugin = new Plugin.Maven("uk.ac.gate.plugins", "format-twitter", "8.6-SNAPSHOT") | |
val twitterPlugin = new Plugin.Maven("uk.ac.gate.plugins", "twitter", "8.6-SNAPSHOT") | |
//Gate.getCreoleRegister.registerPlugin(anniePlugin) | |
//Gate.getCreoleRegister.registerPlugin(annieFrenchPlugin) | |
Gate.getCreoleRegister.registerPlugin(twitterJsonPlugin) | |
Gate.getCreoleRegister.registerPlugin(twitterPlugin) | |
val resourceDef = new ResourceReference(twitterPlugin,"resources/twitie-english-only.gapp").toURL | |
val controller = PersistenceManager.loadObjectFromUrl(resourceDef).asInstanceOf[ConditionalSerialAnalyserController] | |
//val annieController= Factory.createResource("gate.creole.SerialAnalyserController", Factory.newFeatureMap(), Factory.newFeatureMap(), "ANNIE").asInstanceOf[SerialAnalyserController] | |
def infoPRS (controller:CorpusController) ={ | |
val PRNames = controller.getPRs().asScala.map{ | |
pr => pr.getName() | |
} | |
println(PRNames.mkString(",")) | |
} | |
infoPRS(controller) | |
def processString(controller:CorpusController, text:String) = { | |
val corpus = Factory.newCorpus("TwitIE Corpus") | |
val doc = Factory.newDocument(text) | |
corpus.add(doc) | |
controller.setCorpus(corpus) | |
controller.execute() | |
processResults(doc,corpus) | |
Factory.deleteResource(doc) | |
Factory.deleteResource(corpus) | |
} | |
def processResults(doc:Document, corpus:Corpus): Unit = { | |
val annotationTypesRequired= HashSet("Person","Location","Organization","UserID", "Emoticon","Hashtag").asJava | |
val c = corpus.iterator().asScala | |
val listOfAnnotationSet :Seq[AnnotationSet] = c.map{ c => | |
val doc = c.asInstanceOf[Document] | |
doc.setPreserveOriginalContent(true) | |
doc.getAnnotations().get(annotationTypesRequired) | |
}.toSeq | |
listOfAnnotationSet.map{ aset => | |
aset.asScala.map{ a => | |
println("Type = " + a.getType() + " = " + Utils.stringFor(doc,a)) | |
} | |
} | |
} | |
processString(controller,"Hi @seb :) , i'm really happy to see you with @paul and @axt in #paris #eiffeltower ! ") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Nice, just a comment on lines 43-44 - don't use
new DocumentImpl
andsetContent
, instead useFactory.newDocument(text)
, and then make sure you pass both the document and the corpus toFactory.deleteResource
before the method returns.