Last active
August 9, 2018 08:42
-
-
Save johnmiedema/312819de5bd80ca3438a to your computer and use it in GitHub Desktop.
Recognize names using OpenNLP NameFinder
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package demoNameFind; | |
import java.io.FileInputStream; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import opennlp.tools.namefind.NameFinderME; | |
import opennlp.tools.namefind.TokenNameFinderModel; | |
import opennlp.tools.tokenize.Tokenizer; | |
import opennlp.tools.tokenize.TokenizerME; | |
import opennlp.tools.tokenize.TokenizerModel; | |
import opennlp.tools.util.Span; | |
public class Main { | |
static String sentence = "Jack London is the author of what novel?"; | |
public static void main(String[] args) { | |
InputStream modelInToken = null; | |
InputStream modelIn = null; | |
try { | |
//1. convert sentence into tokens | |
modelInToken = new FileInputStream("en-token.bin"); | |
TokenizerModel modelToken = new TokenizerModel(modelInToken); | |
Tokenizer tokenizer = new TokenizerME(modelToken); | |
String tokens[] = tokenizer.tokenize(sentence); | |
//2. find names | |
modelIn = new FileInputStream("en-ner-person.bin"); | |
TokenNameFinderModel model = new TokenNameFinderModel(modelIn); | |
NameFinderME nameFinder = new NameFinderME(model); | |
Span nameSpans[] = nameFinder.find(tokens); | |
//find probabilities for names | |
double[] spanProbs = nameFinder.probs(nameSpans); | |
//3. print names | |
for( int i = 0; i<nameSpans.length; i++) { | |
System.out.println("Span: "+nameSpans[i].toString()); | |
System.out.println("Covered text is: "+tokens[nameSpans[i].getStart()] + " " + tokens[nameSpans[i].getStart()+1]); | |
System.out.println("Probability is: "+spanProbs[i]); | |
} | |
//Span: [0..2) person | |
//Covered text is: Jack London | |
//Probability is: 0.7081556539712883 | |
} | |
catch (Exception ex) {} | |
finally { | |
try { if (modelInToken != null) modelInToken.close(); } catch (IOException e){}; | |
try { if (modelIn != null) modelIn.close(); } catch (IOException e){}; | |
} | |
} | |
} | |
The find
method is from the NameFinderME
class of Apache OpenNLP. See the associated Javadoc : https://opennlp.apache.org/docs/1.9.0/apidocs/opennlp-tools/opennlp/tools/namefind/NameFinderME.html#find-java.lang.String:A-
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
hey..
could you provide the code for find method?