Skip to content

Instantly share code, notes, and snippets.

@soldni
Created June 3, 2015 20:38
Show Gist options
  • Select an option

  • Save soldni/198359f85280e87176e3 to your computer and use it in GitHub Desktop.

Select an option

Save soldni/198359f85280e87176e3 to your computer and use it in GitHub Desktop.
Slightly modified version of tester for metamap apis that returns a JSON compatible object (requires [json-simple](https://code.google.com/p/json-simple/))
package gov.nih.nlm.nls.metamap;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import java.io.InputStream;
import java.io.PrintStream;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.File;
import java.util.List;
import java.util.ArrayList;
import org.json.simple.parser.JSONParser;
import se.sics.prologbeans.PrologSession;
/**
* MetaMapApiTest: An implementation of MetaMapApi Test Class.
*
* <p>
* Created: Wed May 20 15:54:08 2009
*
* @author <a href="mailto:[email protected]">Willie Rogers</a>
* @version 1.0
*/
public class MetaMapApiTest {
/** MetaMap api instance */
MetaMapApi api;
/**
* Creates a new <code>MetaMapApiTest</code> instance.
*
*/
public MetaMapApiTest() {
this.api = new MetaMapApiImpl();
}
/**
* Creates a new <code>MetaMapApiTest</code> instance using specified host and port.
*
* @param serverHostname hostname of MetaMap server.
* @param serverPort listening port used by MetaMap server.
*/
public MetaMapApiTest(String serverHostname, int serverPort) {
this.api = new MetaMapApiImpl();
this.api.setHost(serverHostname);
this.api.setPort(serverPort);
}
void setTimeout(int interval) {
this.api.setTimeout(interval);
}
/**
* Process terms using MetaMap API and display result to standard output.
*
* @param terms input terms
* @param out output printer
* @param serverOptions options to pass to metamap server before processing input text.
*/
void process(String terms, PrintStream out, List<String> serverOptions)
throws Exception
{
if (serverOptions.size() > 0) {
api.setOptions(serverOptions);
}
List<Result> resultList = api.processCitationsFromString(terms);
JSONObject outputJson = new JSONObject();
JSONParser parser = new JSONParser();
for (Result result: resultList) {
if (result != null) {
outputJson.put("input-text", result.getInputText());
List<AcronymsAbbrevs> aaList = result.getAcronymsAbbrevsList();
JSONArray aaListJson = new JSONArray();
if (aaList.size() > 0) {
out.println("Acronyms and Abbreviations:");
for (AcronymsAbbrevs e : aaList) {
JSONObject abbrJson = new JSONObject();
abbrJson.put("acronym", e.getAcronym());
abbrJson.put("expansion", e.getExpansion());
abbrJson.put("count-list", e.getCountList());
abbrJson.put("cui-list", e.getCUIList());
aaListJson.add(abbrJson);
}
outputJson.put("acronym-abbreviation", aaListJson);
}
List<Negation> negList = result.getNegationList();
if (negList.size() > 0) {
JSONArray negListJson = new JSONArray();
for (Negation e: negList) {
JSONObject negJson = new JSONObject();
negJson.put("type", e.getType());
negJson.put("trigger", e.getTrigger());
String negationPos = e.getConceptPositionList()
.toString().replaceAll("\\((\\d+, \\d+)\\)", "\\[$1\\]");
negJson.put("pos", parser.parse(negationPos));
String conceptPair = e.getConceptPairList()
.toString().replaceAll("\\((\\S+),(\\S+)\\)", "\\[\"$1\",\"$2\"\\]");
negJson.put("concept-pairs", parser.parse(conceptPair));
String negationConceptPos = e.getConceptPositionList()
.toString().replaceAll("\\((\\d+, \\d+)\\)", "\\[$1\\]");
negJson.put("concept-pairs-pos", parser.parse(negationConceptPos));
negListJson.add(negJson);
}
outputJson.put("negations", negListJson);
}
JSONArray utteranceListJSON = new JSONArray();
outputJson.put("utterances", utteranceListJSON);
for (Utterance utterance: result.getUtteranceList()) {
JSONObject utteranceJSON = new JSONObject();
utteranceListJSON.add(utteranceJSON);
utteranceJSON.put("id", utterance.getId());
utteranceJSON.put("text", utterance.getString());
// String pos = utterance.getPosition().toString().replaceAll("\\[\\((\\d+, \\d+)\\)\\]", "\\[$1\\]");
String utterancePos = utterance.getPosition()
.toString().replaceAll("\\((\\d+, \\d+)\\)", "\\[$1\\]");
utteranceJSON.put("pos", parser.parse(utterancePos));
JSONArray phrasesListJSON = new JSONArray();
utteranceJSON.put("phrases", phrasesListJSON);
for (PCM pcm: utterance.getPCMList()) {
JSONObject phraseJSON = new JSONObject();
phrasesListJSON.add(phraseJSON);
phraseJSON.put("text", pcm.getPhrase().getPhraseText());
phraseJSON.put("minimal-commitment-parse", pcm.getPhrase().getMincoManAsString());
JSONArray candidatesListJSON = new JSONArray();
phraseJSON.put("candidates", candidatesListJSON);
for (Ev ev: pcm.getCandidatesInstance().getEvList()) {
JSONObject candidateJSON = new JSONObject();
candidatesListJSON.add(candidateJSON);
candidateJSON.put("score", ev.getScore());
candidateJSON.put("id", ev.getConceptId());
candidateJSON.put("name", ev.getConceptName());
candidateJSON.put("preferred-name", ev.getPreferredName());
candidateJSON.put("matched-words", ev.getMatchedWords());
candidateJSON.put("sem-types", ev.getSemanticTypes());
candidateJSON.put("match-map", ev.getMatchMap());
// putting this on halt as I'm not sure we actually need to have
// it returned.
// candidateJSON.put("match-map-alt", ev.getMatchMapList());
candidateJSON.put("is-head", ev.isHead());
candidateJSON.put("is-overmatch", ev.isOvermatch());
candidateJSON.put("sources", ev.getSources());
candidateJSON.put("pos", ev.getPositionalInfo());
candidateJSON.put("pruning-status", ev.getPruningStatus());
candidateJSON.put("negation-status", ev.getNegationStatus());
}
JSONArray mappingListJSON = new JSONArray();
phraseJSON.put("candidates", mappingListJSON);
for (Mapping map: pcm.getMappingList()) {
JSONObject mappingJSON = new JSONObject();
mappingListJSON.add(mappingJSON);
mappingJSON.put("score", map.getScore());
JSONArray conceptListJSON = new JSONArray();
mappingJSON.put("concepts", conceptListJSON);
for (Ev mapEv: map.getEvList()) {
JSONObject conceptJSON = new JSONObject();
conceptListJSON.add(conceptJSON);
conceptJSON.put("score", mapEv.getScore());
conceptJSON.put("id", mapEv.getConceptId());
conceptJSON.put("name", mapEv.getConceptName());
conceptJSON.put("preferred-name", mapEv.getPreferredName());
conceptJSON.put("matched-words", mapEv.getMatchedWords());
conceptJSON.put("sem-types", mapEv.getSemanticTypes());
conceptJSON.put("match-map", mapEv.getMatchMap());
// putting this on halt as I'm not sure we actually need to have
// it returned.
// conceptJSON.put("match-map-alt", mapEv.getMatchMapList());
conceptJSON.put("is-head", mapEv.isHead());
conceptJSON.put("is-overmap", mapEv.isOvermatch());
conceptJSON.put("sources", mapEv.getSources());
String conceptPos = mapEv.getPositionalInfo()
.toString().replaceAll("\\((\\d+, \\d+)\\)", "\\[$1\\]");
conceptJSON.put("pos", parser.parse(conceptPos));
conceptJSON.put("pruning-status", mapEv.getPruningStatus());
conceptJSON.put("negation-status", mapEv.getNegationStatus());
}
}
}
}
}
}
out.println(outputJson.toJSONString());
this.api.resetOptions();
}
/** print information about server options */
public static void printHelp() {
System.out.println("usage: gov.nih.nlm.nls.metamap.MetaMapApiTest [options] terms|inputFilename");
System.out.println(" allowed metamap options: ");
System.out.println(" -C : use relaxed model ");
System.out.println(" -A : use strict model ");
System.out.println(" -d : no derivational variants");
System.out.println(" -D : all derivational variants");
System.out.println(" -a : allow Acronym/Abbreviation variants");
System.out.println(" -K : ignore stop phrases.");
System.out.println(" -I : allow Large N");
System.out.println(" -r : threshold ");
System.out.println(" -i : ignore word order");
System.out.println(" -Y : prefer multiple concepts");
System.out.println(" -b : compute/display all mappings");
System.out.println(" -X : truncate candidates mapping");
System.out.println(" -y : use WSD ");
System.out.println(" -z : use term processing ");
System.out.println(" -o : allow overmatches ");
System.out.println(" -g : allow concept gaps");
System.out.println(" -8 : dynamic variant generation");
System.out.println(" -@ --WSD <hostname> : Which WSD server to use.");
System.out.println(" -J --restrict_to_sts <semtypelist> : restrict to semantic types");
System.out.println(" -Q --composite_phrases <integer>");
System.out.println(" -R --restrict_to_sources <sourcelist> : restrict to sources");
System.out.println(" -S --tagger <sourcelist> : Which tagger to use.");
System.out.println(" -V --mm_data_version <name> : version of MetaMap data to use.");
System.out.println(" -Z --mm_data_year <name> : year of MetaMap data to use.");
System.out.println(" -k --exclude_sts <semtypelist> : exclude semantic types");
System.out.println(" -e --exclude_sources <sourcelist> : exclude semantic types");
System.out.println(" -r --threshold <integer> : Threshold for displaying candidates.");
System.out.println(" --blanklines <integer> : The number of empty or whitespace-only");
System.out.println(" lines required to end a citation.");
System.out.println("API options:");
System.out.println(" --metamap_server_host <hostname> : use MetaMap server on specified host");
System.out.println(" --metamap_server_port <port number> : use MetaMap server on specified host");
System.out.println(" --metamap_server_timeout <interval> : wait for MetaMap server for specified interval.");
System.out.println(" interval of 0 will wait indefinitely.");
System.out.println("Program options:");
System.out.println(" --input <filename> : get input from file.");
System.out.println(" --output <filename> : send output to file.");
}
/** @param inFile File class referencing input file. */
static String readInputFile(File inFile)
throws java.io.IOException
{
BufferedReader ib = new BufferedReader(new FileReader(inFile));
StringBuffer inputBuf = new StringBuffer();
String line = "";
while ((line = ib.readLine()) != null) {
inputBuf.append(line).append('\n');
}
ib.close();
return inputBuf.toString();
}
public static void main(String[] args)
throws Exception
{
String serverhost = MetaMapApi.DEFAULT_SERVER_HOST;
int serverport = MetaMapApi.DEFAULT_SERVER_PORT; // default port
int timeout = -1; // use default timeout
String inFilename = null;
InputStream input = System.in;
PrintStream output = System.out;
if (args.length < 1) {
printHelp();
System.exit(0);
}
StringBuffer termBuf = new StringBuffer();
List<String> options = new ArrayList<String>();
int i = 0;
while (i < args.length) {
if (args[i].charAt(0) == '-') {
if (args[i].equals("-h") || args[i].equals("--help") || args[i].equals("-?")) {
printHelp();
System.exit(0);
} else if ( args[i].equals("-%") || args[i].equals("--XML") ) {
options.add(args[i]); i++;
options.add(args[i]);
} else if ( args[i].equals("-@") || args[i].equals("--WSD") ) {
options.add(args[i]); i++;
options.add(args[i]);
} else if ( args[i].equals("-J") || args[i].equals("--restrict_to_sts") ) {
options.add(args[i]); i++;
options.add(args[i]);
} else if ( args[i].equals("-Q") || args[i].equals("--composite_phrases") ) {
options.add(args[i]); i++;
options.add(args[i]);
} else if ( args[i].equals("-R") || args[i].equals("--restrict_to_sources") ) {
options.add(args[i]); i++;
options.add(args[i]);
} else if ( args[i].equals("-S") || args[i].equals("--tagger") ) {
options.add(args[i]); i++;
options.add(args[i]);
} else if ( args[i].equals("-V") || args[i].equals("--mm_data_version") ) {
options.add(args[i]); i++;
options.add(args[i]);
} else if ( args[i].equals("-Z") || args[i].equals("--mm_data_year") ) {
options.add(args[i]); i++;
options.add(args[i]);
} else if ( args[i].equals("-e") || args[i].equals("--exclude_sources") ) {
options.add(args[i]); i++;
options.add(args[i]);
} else if ( args[i].equals("-k") || args[i].equals("--exclude_sts") ) {
options.add(args[i]); i++;
options.add(args[i]);
} else if ( args[i].equals("-r") || args[i].equals("--threshold") ) {
options.add(args[i]); i++;
options.add(args[i]);
} else if ( args[i].equals("--prune") ) {
options.add(args[i]); i++;
options.add(args[i]);
} else if ( args[i].equals("--blanklines") ) {
options.add(args[i]); i++;
options.add(args[i]);
} else if ( args[i].equals("--metamap_server_host") ) {
i++;
serverhost = args[i];
} else if ( args[i].equals("--metamap_server_port") ) {
i++;
serverport = Integer.parseInt(args[i]);
} else if (args[i].equals("--metamap_server_timeout") ) {
i++;
timeout = Integer.parseInt(args[i]);
} else if (args[i].equals("--input") ) {
i++;
inFilename = args[i];
System.out.println("output file: " + args[i]);
} else if (args[i].equals("--output") ) {
i++;
output = new PrintStream(args[i]);
System.out.println("output file: " + args[i]);
} else {
options.add(args[i]);
}
} else {
termBuf.append(args[i]).append(" ");
}
i++;
}
// System.out.println("serverport: " + serverport);
MetaMapApiTest frontEnd = new MetaMapApiTest(serverhost, serverport);
// System.out.println("options: " + options);
// System.out.println("terms: " + termBuf);
if (timeout > -1) {
frontEnd.setTimeout(timeout);
}
if (inFilename != null) {
File inFile = new File(inFilename.trim());
if (inFile.exists()) {
System.out.println("input file: " + inFilename);
frontEnd.process(readInputFile(inFile), output, options);
} else {
System.out.println("input file: " + inFilename + " does not exist!");
}
} else if (termBuf.length() > 0) {
File inFile = new File(termBuf.toString().trim());
if (inFile.exists()) {
System.out.println("input file: " + termBuf);
frontEnd.process(readInputFile(inFile), output, options);
} else {
frontEnd.process(termBuf.toString(), output, options);
}
frontEnd.api.disconnect();
} else {
printHelp();
System.exit(0);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment