Last active
August 15, 2019 20:11
-
-
Save johnmiedema/e12e7359bcb17b03b8a0 to your computer and use it in GitHub Desktop.
Extract noun phrases from a single sentence using OpenNLP
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package demoParseNounPhrases; | |
import java.io.FileInputStream; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.util.HashSet; | |
import java.util.Set; | |
import opennlp.tools.cmdline.parser.ParserTool; | |
import opennlp.tools.parser.Parse; | |
import opennlp.tools.parser.Parser; | |
import opennlp.tools.parser.ParserFactory; | |
import opennlp.tools.parser.ParserModel; | |
//extract noun phrases from a single sentence using OpenNLP | |
public class Main { | |
static String sentence = "Who is the author of The Call of the Wild?"; | |
static Set<String> nounPhrases = new HashSet<>(); | |
public static void main(String[] args) { | |
InputStream modelInParse = null; | |
try { | |
//load chunking model | |
modelInParse = new FileInputStream("en-parser-chunking.bin"); //from http://opennlp.sourceforge.net/models-1.5/ | |
ParserModel model = new ParserModel(modelInParse); | |
//create parse tree | |
Parser parser = ParserFactory.create(model); | |
Parse topParses[] = ParserTool.parseLine(sentence, parser, 1); | |
//call subroutine to extract noun phrases | |
for (Parse p : topParses) | |
getNounPhrases(p); | |
//print noun phrases | |
for (String s : nounPhrases) | |
System.out.println(s); | |
//The Call | |
//the Wild? | |
//The Call of the Wild? //punctuation remains on the end of sentence | |
//the author of The Call of the Wild? | |
//the author | |
} | |
catch (IOException e) { | |
e.printStackTrace(); | |
} | |
finally { | |
if (modelInParse != null) { | |
try { | |
modelInParse.close(); | |
} | |
catch (IOException e) { | |
} | |
} | |
} | |
} | |
//recursively loop through tree, extracting noun phrases | |
public static void getNounPhrases(Parse p) { | |
if (p.getType().equals("NP")) { //NP=noun phrase | |
nounPhrases.add(p.getCoveredText()); | |
} | |
for (Parse child : p.getChildren()) | |
getNounPhrases(child); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi John - nice example and works great. A couple questions: the ParserTool.parseLine() call takes about 50ms to 100ms in my data processing on a normal Linux server with adequate memory. Do you get similar delay in your processing? If so how would you speed it up in a single Java process if you have thousands of sentences? I see some discussion about OpenNLP not thread safe, you can't use the single parser in multiple threads. Thanks.