Created
September 5, 2013 04:26
-
-
Save prassee/6446101 to your computer and use it in GitHub Desktop.
chalk nlp
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package nlpapp | |
import chalk.tools.sentdetect.SentenceDetectorME | |
import chalk.tools.sentdetect.SentenceModel | |
import java.io.FileInputStream | |
import chalk.tools.postag.POSModel | |
import chalk.tools.postag.POSTaggerME | |
import chalk.tools.tokenize.TokenizerModel | |
import chalk.tools.tokenize.TokenizerME | |
object WordTagger extends App { | |
val tweetString = "CSSCorp is a global company. " + | |
"The company's labs entity is providing services in Cloud and BigData Technologies." | |
val fis = (x: String) => new FileInputStream(x) | |
val model = (x: FileInputStream) => new SentenceModel(x) | |
val detector = (x: SentenceModel) => new SentenceDetectorME(x) | |
val tokenModel = (x: FileInputStream) => new TokenizerModel(x) | |
val tokenDetector = (x: TokenizerModel) => new TokenizerME(x) | |
val posmodel = (x: FileInputStream) => new POSModel(x) | |
val posdetector = (x: POSModel) => new POSTaggerME(x) | |
def splitSentence = { | |
val senDet = detector(model(fis(this.getClass().getResource("/en-sent.bin").getPath()))) | |
def findSentences(sme: SentenceDetectorME) = (x: String) => sme.sentDetect(x) | |
findSentences(senDet) | |
} | |
val sentences = splitSentence(tweetString) | |
val postagr = posdetector(posmodel(fis(this.getClass().getResource("/en-pos-maxent.bin").getPath()))).tag(sentences(1)) | |
println(postagr) | |
// output | |
// The/DT company's/NNS labs/NNS entity/NN is/VBZ providing/VBG services/NNS in/IN Cloud/NNP and/CC BigData/NNP Technologies./NNP | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment