Last active
August 29, 2015 14:15
-
-
Save muschneider/ff3b06ddfe6e2b71a097 to your computer and use it in GitHub Desktop.
OpenNLP Portugue Example
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@Grapes([ | |
@Grab('org.apache.opennlp:opennlp-tools:1.5.2-incubating'), | |
@GrabExclude('jwnl:jwnl') | |
]) | |
import opennlp.tools.postag.* | |
import opennlp.tools.sentdetect.* | |
import opennlp.tools.tokenize.* | |
def texto = "Pense bem. Quem casa quer casa." | |
def model = [:] | |
def parser = [:] | |
model.setence = new SentenceModel ( new FileInputStream("pt-sent.bin") ) | |
model.token = new TokenizerModel ( new FileInputStream("pt-token.bin") ) | |
model.postag = new POSModel ( new FileInputStream("pt-pos-perceptron.bin") ) | |
parser.setence = new SentenceDetectorME( model.setence ) | |
parser.token = new TokenizerME ( model.token ) | |
parser.postag = new POSTaggerME ( model.postag ) | |
def setences = parser.setence.sentDetect(texto) | |
setences.each { setence -> | |
println "sentença : ${setence} " | |
def tokens = parser.token.tokenize (setence) | |
def tags = parser.postag.tag(tokens) | |
def sample = new POSSample(tokens, tags) | |
sample.sentence.eachWithIndex{s, idx -> | |
def tag = tags[idx] | |
println " $s -> $tag" | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment