Created
October 6, 2014 10:47
-
-
Save madaan/0e8983e9fa4380a00ac1 to your computer and use it in GitHub Desktop.
Stanford CoreNLP Dependency parsing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.File; | |
import java.io.IOException; | |
import java.util.Collection; | |
import java.util.Iterator; | |
import java.util.List; | |
import java.util.Properties; | |
import org.apache.commons.io.FileUtils; | |
import edu.stanford.nlp.ling.CoreAnnotations.SentencesAnnotation; | |
import edu.stanford.nlp.pipeline.Annotation; | |
import edu.stanford.nlp.pipeline.StanfordCoreNLP; | |
import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation; | |
import edu.stanford.nlp.trees.GrammaticalStructure; | |
import edu.stanford.nlp.trees.GrammaticalStructureFactory; | |
import edu.stanford.nlp.trees.PennTreebankLanguagePack; | |
import edu.stanford.nlp.trees.Tree; | |
import edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation; | |
import edu.stanford.nlp.trees.TreebankLanguagePack; | |
import edu.stanford.nlp.trees.TypedDependency; | |
import edu.stanford.nlp.util.CoreMap; | |
//sg | |
public class DepParser { | |
Properties prop; | |
StanfordCoreNLP pipeline; | |
DepParser() { | |
prop = new Properties(); | |
prop.put("annotators", "tokenize, ssplit, pos, lemma , parse"); | |
pipeline = new StanfordCoreNLP(prop); | |
} | |
public static void main(String args[]) throws IOException { | |
DepParser dprsr = new DepParser(); | |
String fileString = FileUtils.readFileToString(new File("sampleInput")); | |
Annotation doc = new Annotation(fileString); | |
dprsr.pipeline.annotate(doc); | |
List<CoreMap> sentences = doc.get(SentencesAnnotation.class); | |
for (CoreMap sentence : sentences) { | |
Tree tree = sentence.get(TreeAnnotation.class); | |
// Get dependency tree | |
TreebankLanguagePack tlp = new PennTreebankLanguagePack(); | |
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(); | |
GrammaticalStructure gs = gsf.newGrammaticalStructure(tree); | |
Collection<TypedDependency> td = gs.typedDependenciesCollapsed(); | |
Iterator<TypedDependency> tdi = td.iterator(); | |
while (tdi.hasNext()) { | |
System.out.println(tdi.next()); | |
} | |
System.out.println(sentence.get(CollapsedCCProcessedDependenciesAnnotation.class)); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment