igorbrigadir · September 28, 2015 18:39
diff --git a/RunDependencyParser.java b/RunDependencyParser.java
 import java.io.File;
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.ArrayList;
 import java.util.List;

 import org.apache.commons.io.FileUtils;

 import edu.stanford.nlp.ling.CoreAnnotations;
 import edu.stanford.nlp.ling.CoreLabel;
 import edu.stanford.nlp.ling.HasTag;
 import edu.stanford.nlp.ling.HasWord;
 import edu.stanford.nlp.ling.TaggedWord;
 import edu.stanford.nlp.parser.nndep.DependencyParser;
 import edu.stanford.nlp.process.DocumentPreprocessor;
 import edu.stanford.nlp.tagger.maxent.MaxentTagger;
 import edu.stanford.nlp.trees.GrammaticalStructure;
 import edu.stanford.nlp.util.CoreMap;

 public class RunDependencyParser {

  static final String taggerPath = "/stanford-parser-full-2015-04-20/pos-tagger/english-left3words/english-left3words-distsim.tagger";
  static MaxentTagger tagger = new MaxentTagger(taggerPath);

  static final String modelPath = "/stanford-parser-full-2015-04-20/models/parser/nndep/english_UD.gz";
  static DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);

  /*
   * Input file is 1 document (a few sentences) per line.
   */
  public static void main(String[] args) throws IOException {

    File input = new File("/input.txt");
    File output = new File("/output.txt");

    for (String document : FileUtils.readLines(input)) {
      // Append output file
      FileUtils.writeStringToFile(output, getConllXString(document), true);
    }

  }

  /*
   * Get a chunk of text in CoNLL-X Format:
   */
  public static String getConllXString(String text) {
    StringBuilder sb = new StringBuilder();
    // Split text into sentences:
    DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));

    for (List<HasWord> sentence : tokenizer) {
      // POS Tag & Parse each sentence
      List<TaggedWord> tagged = tagger.tagSentence(sentence);
      GrammaticalStructure gs = parser.predict(tagged);

      // CoNLL-X Output format:
      String s = GrammaticalStructure.dependenciesToCoNLLXString(gs, getCoreMap(tagged));
      sb.append(s);
      sb.append("\n\n");
    }

    return sb.toString();
  }

  /*
   * Turn a sentence (List of annotated tokens) into a CoreMap object:
   */
  public static CoreMap getCoreMap(List<? extends HasWord> sentence) {
    CoreLabel sentenceLabel = new CoreLabel();
    List<CoreLabel> tokens = new ArrayList<>();
    int i = 1;
    for (HasWord wd : sentence) {
      CoreLabel label;
      if (wd instanceof CoreLabel) {
        label = (CoreLabel) wd;
        if (label.tag() == null) {
          throw new IllegalArgumentException("Parser requires words " + "with part-of-speech tag annotations");
        }
      } else {
        label = new CoreLabel();
        label.setValue(wd.word());
        label.setWord(wd.word());
        if (!(wd instanceof HasTag)) {
          throw new IllegalArgumentException("Parser requires words " + "with part-of-speech tag annotations");
        }
        label.setTag(((HasTag) wd).tag());
      }
      label.setIndex(i);
      i++;
      tokens.add(label);
    }
    sentenceLabel.set(CoreAnnotations.TokensAnnotation.class, tokens);
    return sentenceLabel;
  }

 }
	import java.io.File;
	import java.io.IOException;
	import java.io.StringReader;
	import java.util.ArrayList;
	import java.util.List;

	import org.apache.commons.io.FileUtils;

	import edu.stanford.nlp.ling.CoreAnnotations;
	import edu.stanford.nlp.ling.CoreLabel;
	import edu.stanford.nlp.ling.HasTag;
	import edu.stanford.nlp.ling.HasWord;
	import edu.stanford.nlp.ling.TaggedWord;
	import edu.stanford.nlp.parser.nndep.DependencyParser;
	import edu.stanford.nlp.process.DocumentPreprocessor;
	import edu.stanford.nlp.tagger.maxent.MaxentTagger;
	import edu.stanford.nlp.trees.GrammaticalStructure;
	import edu.stanford.nlp.util.CoreMap;

	public class RunDependencyParser {

	static final String taggerPath = "/stanford-parser-full-2015-04-20/pos-tagger/english-left3words/english-left3words-distsim.tagger";
	static MaxentTagger tagger = new MaxentTagger(taggerPath);

	static final String modelPath = "/stanford-parser-full-2015-04-20/models/parser/nndep/english_UD.gz";
	static DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);

	/*
	* Input file is 1 document (a few sentences) per line.
	*/
	public static void main(String[] args) throws IOException {

	File input = new File("/input.txt");
	File output = new File("/output.txt");

	for (String document : FileUtils.readLines(input)) {
	// Append output file
	FileUtils.writeStringToFile(output, getConllXString(document), true);
	}

	}

	/*
	* Get a chunk of text in CoNLL-X Format:
	*/
	public static String getConllXString(String text) {
	StringBuilder sb = new StringBuilder();
	// Split text into sentences:
	DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));

	for (List<HasWord> sentence : tokenizer) {
	// POS Tag & Parse each sentence
	List<TaggedWord> tagged = tagger.tagSentence(sentence);
	GrammaticalStructure gs = parser.predict(tagged);

	// CoNLL-X Output format:
	String s = GrammaticalStructure.dependenciesToCoNLLXString(gs, getCoreMap(tagged));
	sb.append(s);
	sb.append("\n\n");
	}

	return sb.toString();
	}

	/*
	* Turn a sentence (List of annotated tokens) into a CoreMap object:
	*/
	public static CoreMap getCoreMap(List<? extends HasWord> sentence) {
	CoreLabel sentenceLabel = new CoreLabel();
	List<CoreLabel> tokens = new ArrayList<>();
	int i = 1;
	for (HasWord wd : sentence) {
	CoreLabel label;
	if (wd instanceof CoreLabel) {
	label = (CoreLabel) wd;
	if (label.tag() == null) {
	throw new IllegalArgumentException("Parser requires words " + "with part-of-speech tag annotations");
	}
	} else {
	label = new CoreLabel();
	label.setValue(wd.word());
	label.setWord(wd.word());
	if (!(wd instanceof HasTag)) {
	throw new IllegalArgumentException("Parser requires words " + "with part-of-speech tag annotations");
	}
	label.setTag(((HasTag) wd).tag());
	}
	label.setIndex(i);
	i++;
	tokens.add(label);
	}
	sentenceLabel.set(CoreAnnotations.TokensAnnotation.class, tokens);
	return sentenceLabel;
	}

	}