Last active
August 29, 2015 14:04
-
-
Save madaan/1e7a784604206a3d3d7c to your computer and use it in GitHub Desktop.
Preprocessing + Feature generation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package edu.washington.multir.experiment; | |
import java.io.IOException; | |
import java.util.List; | |
import edu.stanford.nlp.ling.CoreAnnotations; | |
import edu.stanford.nlp.ling.CoreLabel; | |
import edu.stanford.nlp.pipeline.Annotation; | |
import edu.stanford.nlp.util.CoreMap; | |
import edu.stanford.nlp.util.Pair; | |
import edu.washington.multir.preprocess.CorpusPreprocessing; | |
import edu.washington.multirframework.argumentidentification.ArgumentIdentification; | |
import edu.washington.multirframework.argumentidentification.DefaultSententialInstanceGeneration; | |
import edu.washington.multirframework.argumentidentification.NERArgumentIdentification; | |
import edu.washington.multirframework.argumentidentification.SententialInstanceGeneration; | |
import edu.washington.multirframework.data.Argument; | |
import edu.washington.multirframework.data.KBArgument; | |
import edu.washington.multirframework.featuregeneration.DefaultFeatureGenerator; | |
import edu.washington.multirframework.featuregeneration.FeatureGenerator; | |
public class FeatureGenFromRawText { | |
public static void getFeatures(String sentence, String docName) { | |
try { | |
Annotation doc = CorpusPreprocessing.getTestDocumentFromRawString(sentence, docName); | |
//Do argument identification and generate features | |
//do argument identification | |
ArgumentIdentification ai = NERArgumentIdentification.getInstance(); | |
SententialInstanceGeneration sig = DefaultSententialInstanceGeneration.getInstance(); | |
FeatureGenerator fg = new DefaultFeatureGenerator(); | |
List<CoreMap> docSentences = doc.get(CoreAnnotations.SentencesAnnotation.class); | |
int count =0; | |
for(CoreMap sent: docSentences){ | |
System.out.println("Sentence " + count); | |
List<Argument> arguments = ai.identifyArguments(doc, sent); | |
System.out.println("Arguments"); | |
for(Argument arg : arguments){ | |
System.out.println(arg.getArgName()); | |
} | |
List<CoreLabel> tokens = sent.get(CoreAnnotations.TokensAnnotation.class); | |
System.out.println("Token size = " + tokens.size()); | |
System.out.println("TOKENS"); | |
for(CoreLabel t: tokens){ | |
System.out.print(t + " "); | |
} | |
List<Pair<Argument,Argument>> sententialInstances = sig.generateSententialInstances(arguments, sent); | |
for(Pair<Argument,Argument> argPair : sententialInstances){ | |
Argument arg1 = argPair.first; | |
Argument arg2 = argPair.second; | |
String arg1ID = null; | |
String arg2ID = null; | |
if(arg1 instanceof KBArgument){ | |
arg1ID = ((KBArgument)arg1).getKbId(); | |
} | |
if(arg2 instanceof KBArgument){ | |
arg2ID = ((KBArgument)arg2).getKbId(); | |
} | |
List<String> features =fg.generateFeatures(arg1.getStartOffset(), | |
arg1.getEndOffset(), | |
arg2.getStartOffset(), | |
arg2.getEndOffset(), | |
arg1ID,arg2ID, | |
sent, doc); | |
System.out.print(arg1.getArgName() + "\t" + arg2.getArgName()); | |
for(String feature: features){ | |
System.out.print("\t" + feature); | |
} | |
System.out.println(); | |
} | |
count++; | |
} | |
} catch (IOException | InterruptedException e) { | |
// TODO Auto-generated catch block | |
e.printStackTrace(); | |
} | |
} | |
public static void main(String args[]) { | |
FeatureGenFromRawText.getFeatures("Mongolia's trade with Russia has increased a lot", "doc"); | |
} | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Sentence 0 | |
Arguments | |
Mongolia | |
Russia | |
Token size = 9 | |
TOKENS | |
Mongolia 's trade with Russia has increased a lot here I am generating features!!!!! | |
Mongolia Russia inverse_false|LOCATION|'s trade with|LOCATION inverse_false|B_-1|LOCATION|'s trade with|LOCATION|has inverse_false|B_-2 B_-1|LOCATION|'s trade with|LOCATION|has increased str:'s[possessive]->|LOCATION|[poss]->trade[nsubj]<-with[prep]<-|LOCATION dep:[possessive]->|LOCATION|[poss]->[nsubj]<-[prep]<-|LOCATION dir:->|LOCATION|-><-<-|LOCATION str:LOCATION|[poss]->trade[nsubj]<-with[prep]<-|LOCATION | |
here I am generating features!!!!! | |
Russia Mongolia inverse_true|LOCATION|'s trade with|LOCATION inverse_true|B_-1|LOCATION|'s trade with|LOCATION|has inverse_true|B_-2 B_-1|LOCATION|'s trade with|LOCATION|has increased str:LOCATION|[pobj]->with[prep]->trade[nsubj]<-|LOCATION|[possessive]->'s dep:LOCATION|[pobj]->[prep]->[nsubj]<-|LOCATION|[possessive]-> dir:LOCATION|->-><-|LOCATION|-> str:LOCATION|[pobj]->with[prep]->trade[nsubj]<-|LOCATION |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment