Created
June 3, 2015 20:38
-
-
Save soldni/198359f85280e87176e3 to your computer and use it in GitHub Desktop.
Slightly modified version of tester for metamap apis that returns a JSON compatible object (requires [json-simple](https://code.google.com/p/json-simple/))
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| package gov.nih.nlm.nls.metamap; | |
| import org.json.simple.JSONArray; | |
| import org.json.simple.JSONObject; | |
| import java.io.InputStream; | |
| import java.io.PrintStream; | |
| import java.io.BufferedReader; | |
| import java.io.FileReader; | |
| import java.io.File; | |
| import java.util.List; | |
| import java.util.ArrayList; | |
| import org.json.simple.parser.JSONParser; | |
| import se.sics.prologbeans.PrologSession; | |
| /** | |
| * MetaMapApiTest: An implementation of MetaMapApi Test Class. | |
| * | |
| * <p> | |
| * Created: Wed May 20 15:54:08 2009 | |
| * | |
| * @author <a href="mailto:[email protected]">Willie Rogers</a> | |
| * @version 1.0 | |
| */ | |
| public class MetaMapApiTest { | |
| /** MetaMap api instance */ | |
| MetaMapApi api; | |
| /** | |
| * Creates a new <code>MetaMapApiTest</code> instance. | |
| * | |
| */ | |
| public MetaMapApiTest() { | |
| this.api = new MetaMapApiImpl(); | |
| } | |
| /** | |
| * Creates a new <code>MetaMapApiTest</code> instance using specified host and port. | |
| * | |
| * @param serverHostname hostname of MetaMap server. | |
| * @param serverPort listening port used by MetaMap server. | |
| */ | |
| public MetaMapApiTest(String serverHostname, int serverPort) { | |
| this.api = new MetaMapApiImpl(); | |
| this.api.setHost(serverHostname); | |
| this.api.setPort(serverPort); | |
| } | |
| void setTimeout(int interval) { | |
| this.api.setTimeout(interval); | |
| } | |
| /** | |
| * Process terms using MetaMap API and display result to standard output. | |
| * | |
| * @param terms input terms | |
| * @param out output printer | |
| * @param serverOptions options to pass to metamap server before processing input text. | |
| */ | |
| void process(String terms, PrintStream out, List<String> serverOptions) | |
| throws Exception | |
| { | |
| if (serverOptions.size() > 0) { | |
| api.setOptions(serverOptions); | |
| } | |
| List<Result> resultList = api.processCitationsFromString(terms); | |
| JSONObject outputJson = new JSONObject(); | |
| JSONParser parser = new JSONParser(); | |
| for (Result result: resultList) { | |
| if (result != null) { | |
| outputJson.put("input-text", result.getInputText()); | |
| List<AcronymsAbbrevs> aaList = result.getAcronymsAbbrevsList(); | |
| JSONArray aaListJson = new JSONArray(); | |
| if (aaList.size() > 0) { | |
| out.println("Acronyms and Abbreviations:"); | |
| for (AcronymsAbbrevs e : aaList) { | |
| JSONObject abbrJson = new JSONObject(); | |
| abbrJson.put("acronym", e.getAcronym()); | |
| abbrJson.put("expansion", e.getExpansion()); | |
| abbrJson.put("count-list", e.getCountList()); | |
| abbrJson.put("cui-list", e.getCUIList()); | |
| aaListJson.add(abbrJson); | |
| } | |
| outputJson.put("acronym-abbreviation", aaListJson); | |
| } | |
| List<Negation> negList = result.getNegationList(); | |
| if (negList.size() > 0) { | |
| JSONArray negListJson = new JSONArray(); | |
| for (Negation e: negList) { | |
| JSONObject negJson = new JSONObject(); | |
| negJson.put("type", e.getType()); | |
| negJson.put("trigger", e.getTrigger()); | |
| String negationPos = e.getConceptPositionList() | |
| .toString().replaceAll("\\((\\d+, \\d+)\\)", "\\[$1\\]"); | |
| negJson.put("pos", parser.parse(negationPos)); | |
| String conceptPair = e.getConceptPairList() | |
| .toString().replaceAll("\\((\\S+),(\\S+)\\)", "\\[\"$1\",\"$2\"\\]"); | |
| negJson.put("concept-pairs", parser.parse(conceptPair)); | |
| String negationConceptPos = e.getConceptPositionList() | |
| .toString().replaceAll("\\((\\d+, \\d+)\\)", "\\[$1\\]"); | |
| negJson.put("concept-pairs-pos", parser.parse(negationConceptPos)); | |
| negListJson.add(negJson); | |
| } | |
| outputJson.put("negations", negListJson); | |
| } | |
| JSONArray utteranceListJSON = new JSONArray(); | |
| outputJson.put("utterances", utteranceListJSON); | |
| for (Utterance utterance: result.getUtteranceList()) { | |
| JSONObject utteranceJSON = new JSONObject(); | |
| utteranceListJSON.add(utteranceJSON); | |
| utteranceJSON.put("id", utterance.getId()); | |
| utteranceJSON.put("text", utterance.getString()); | |
| // String pos = utterance.getPosition().toString().replaceAll("\\[\\((\\d+, \\d+)\\)\\]", "\\[$1\\]"); | |
| String utterancePos = utterance.getPosition() | |
| .toString().replaceAll("\\((\\d+, \\d+)\\)", "\\[$1\\]"); | |
| utteranceJSON.put("pos", parser.parse(utterancePos)); | |
| JSONArray phrasesListJSON = new JSONArray(); | |
| utteranceJSON.put("phrases", phrasesListJSON); | |
| for (PCM pcm: utterance.getPCMList()) { | |
| JSONObject phraseJSON = new JSONObject(); | |
| phrasesListJSON.add(phraseJSON); | |
| phraseJSON.put("text", pcm.getPhrase().getPhraseText()); | |
| phraseJSON.put("minimal-commitment-parse", pcm.getPhrase().getMincoManAsString()); | |
| JSONArray candidatesListJSON = new JSONArray(); | |
| phraseJSON.put("candidates", candidatesListJSON); | |
| for (Ev ev: pcm.getCandidatesInstance().getEvList()) { | |
| JSONObject candidateJSON = new JSONObject(); | |
| candidatesListJSON.add(candidateJSON); | |
| candidateJSON.put("score", ev.getScore()); | |
| candidateJSON.put("id", ev.getConceptId()); | |
| candidateJSON.put("name", ev.getConceptName()); | |
| candidateJSON.put("preferred-name", ev.getPreferredName()); | |
| candidateJSON.put("matched-words", ev.getMatchedWords()); | |
| candidateJSON.put("sem-types", ev.getSemanticTypes()); | |
| candidateJSON.put("match-map", ev.getMatchMap()); | |
| // putting this on halt as I'm not sure we actually need to have | |
| // it returned. | |
| // candidateJSON.put("match-map-alt", ev.getMatchMapList()); | |
| candidateJSON.put("is-head", ev.isHead()); | |
| candidateJSON.put("is-overmatch", ev.isOvermatch()); | |
| candidateJSON.put("sources", ev.getSources()); | |
| candidateJSON.put("pos", ev.getPositionalInfo()); | |
| candidateJSON.put("pruning-status", ev.getPruningStatus()); | |
| candidateJSON.put("negation-status", ev.getNegationStatus()); | |
| } | |
| JSONArray mappingListJSON = new JSONArray(); | |
| phraseJSON.put("candidates", mappingListJSON); | |
| for (Mapping map: pcm.getMappingList()) { | |
| JSONObject mappingJSON = new JSONObject(); | |
| mappingListJSON.add(mappingJSON); | |
| mappingJSON.put("score", map.getScore()); | |
| JSONArray conceptListJSON = new JSONArray(); | |
| mappingJSON.put("concepts", conceptListJSON); | |
| for (Ev mapEv: map.getEvList()) { | |
| JSONObject conceptJSON = new JSONObject(); | |
| conceptListJSON.add(conceptJSON); | |
| conceptJSON.put("score", mapEv.getScore()); | |
| conceptJSON.put("id", mapEv.getConceptId()); | |
| conceptJSON.put("name", mapEv.getConceptName()); | |
| conceptJSON.put("preferred-name", mapEv.getPreferredName()); | |
| conceptJSON.put("matched-words", mapEv.getMatchedWords()); | |
| conceptJSON.put("sem-types", mapEv.getSemanticTypes()); | |
| conceptJSON.put("match-map", mapEv.getMatchMap()); | |
| // putting this on halt as I'm not sure we actually need to have | |
| // it returned. | |
| // conceptJSON.put("match-map-alt", mapEv.getMatchMapList()); | |
| conceptJSON.put("is-head", mapEv.isHead()); | |
| conceptJSON.put("is-overmap", mapEv.isOvermatch()); | |
| conceptJSON.put("sources", mapEv.getSources()); | |
| String conceptPos = mapEv.getPositionalInfo() | |
| .toString().replaceAll("\\((\\d+, \\d+)\\)", "\\[$1\\]"); | |
| conceptJSON.put("pos", parser.parse(conceptPos)); | |
| conceptJSON.put("pruning-status", mapEv.getPruningStatus()); | |
| conceptJSON.put("negation-status", mapEv.getNegationStatus()); | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| out.println(outputJson.toJSONString()); | |
| this.api.resetOptions(); | |
| } | |
| /** print information about server options */ | |
| public static void printHelp() { | |
| System.out.println("usage: gov.nih.nlm.nls.metamap.MetaMapApiTest [options] terms|inputFilename"); | |
| System.out.println(" allowed metamap options: "); | |
| System.out.println(" -C : use relaxed model "); | |
| System.out.println(" -A : use strict model "); | |
| System.out.println(" -d : no derivational variants"); | |
| System.out.println(" -D : all derivational variants"); | |
| System.out.println(" -a : allow Acronym/Abbreviation variants"); | |
| System.out.println(" -K : ignore stop phrases."); | |
| System.out.println(" -I : allow Large N"); | |
| System.out.println(" -r : threshold "); | |
| System.out.println(" -i : ignore word order"); | |
| System.out.println(" -Y : prefer multiple concepts"); | |
| System.out.println(" -b : compute/display all mappings"); | |
| System.out.println(" -X : truncate candidates mapping"); | |
| System.out.println(" -y : use WSD "); | |
| System.out.println(" -z : use term processing "); | |
| System.out.println(" -o : allow overmatches "); | |
| System.out.println(" -g : allow concept gaps"); | |
| System.out.println(" -8 : dynamic variant generation"); | |
| System.out.println(" -@ --WSD <hostname> : Which WSD server to use."); | |
| System.out.println(" -J --restrict_to_sts <semtypelist> : restrict to semantic types"); | |
| System.out.println(" -Q --composite_phrases <integer>"); | |
| System.out.println(" -R --restrict_to_sources <sourcelist> : restrict to sources"); | |
| System.out.println(" -S --tagger <sourcelist> : Which tagger to use."); | |
| System.out.println(" -V --mm_data_version <name> : version of MetaMap data to use."); | |
| System.out.println(" -Z --mm_data_year <name> : year of MetaMap data to use."); | |
| System.out.println(" -k --exclude_sts <semtypelist> : exclude semantic types"); | |
| System.out.println(" -e --exclude_sources <sourcelist> : exclude semantic types"); | |
| System.out.println(" -r --threshold <integer> : Threshold for displaying candidates."); | |
| System.out.println(" --blanklines <integer> : The number of empty or whitespace-only"); | |
| System.out.println(" lines required to end a citation."); | |
| System.out.println("API options:"); | |
| System.out.println(" --metamap_server_host <hostname> : use MetaMap server on specified host"); | |
| System.out.println(" --metamap_server_port <port number> : use MetaMap server on specified host"); | |
| System.out.println(" --metamap_server_timeout <interval> : wait for MetaMap server for specified interval."); | |
| System.out.println(" interval of 0 will wait indefinitely."); | |
| System.out.println("Program options:"); | |
| System.out.println(" --input <filename> : get input from file."); | |
| System.out.println(" --output <filename> : send output to file."); | |
| } | |
| /** @param inFile File class referencing input file. */ | |
| static String readInputFile(File inFile) | |
| throws java.io.IOException | |
| { | |
| BufferedReader ib = new BufferedReader(new FileReader(inFile)); | |
| StringBuffer inputBuf = new StringBuffer(); | |
| String line = ""; | |
| while ((line = ib.readLine()) != null) { | |
| inputBuf.append(line).append('\n'); | |
| } | |
| ib.close(); | |
| return inputBuf.toString(); | |
| } | |
| public static void main(String[] args) | |
| throws Exception | |
| { | |
| String serverhost = MetaMapApi.DEFAULT_SERVER_HOST; | |
| int serverport = MetaMapApi.DEFAULT_SERVER_PORT; // default port | |
| int timeout = -1; // use default timeout | |
| String inFilename = null; | |
| InputStream input = System.in; | |
| PrintStream output = System.out; | |
| if (args.length < 1) { | |
| printHelp(); | |
| System.exit(0); | |
| } | |
| StringBuffer termBuf = new StringBuffer(); | |
| List<String> options = new ArrayList<String>(); | |
| int i = 0; | |
| while (i < args.length) { | |
| if (args[i].charAt(0) == '-') { | |
| if (args[i].equals("-h") || args[i].equals("--help") || args[i].equals("-?")) { | |
| printHelp(); | |
| System.exit(0); | |
| } else if ( args[i].equals("-%") || args[i].equals("--XML") ) { | |
| options.add(args[i]); i++; | |
| options.add(args[i]); | |
| } else if ( args[i].equals("-@") || args[i].equals("--WSD") ) { | |
| options.add(args[i]); i++; | |
| options.add(args[i]); | |
| } else if ( args[i].equals("-J") || args[i].equals("--restrict_to_sts") ) { | |
| options.add(args[i]); i++; | |
| options.add(args[i]); | |
| } else if ( args[i].equals("-Q") || args[i].equals("--composite_phrases") ) { | |
| options.add(args[i]); i++; | |
| options.add(args[i]); | |
| } else if ( args[i].equals("-R") || args[i].equals("--restrict_to_sources") ) { | |
| options.add(args[i]); i++; | |
| options.add(args[i]); | |
| } else if ( args[i].equals("-S") || args[i].equals("--tagger") ) { | |
| options.add(args[i]); i++; | |
| options.add(args[i]); | |
| } else if ( args[i].equals("-V") || args[i].equals("--mm_data_version") ) { | |
| options.add(args[i]); i++; | |
| options.add(args[i]); | |
| } else if ( args[i].equals("-Z") || args[i].equals("--mm_data_year") ) { | |
| options.add(args[i]); i++; | |
| options.add(args[i]); | |
| } else if ( args[i].equals("-e") || args[i].equals("--exclude_sources") ) { | |
| options.add(args[i]); i++; | |
| options.add(args[i]); | |
| } else if ( args[i].equals("-k") || args[i].equals("--exclude_sts") ) { | |
| options.add(args[i]); i++; | |
| options.add(args[i]); | |
| } else if ( args[i].equals("-r") || args[i].equals("--threshold") ) { | |
| options.add(args[i]); i++; | |
| options.add(args[i]); | |
| } else if ( args[i].equals("--prune") ) { | |
| options.add(args[i]); i++; | |
| options.add(args[i]); | |
| } else if ( args[i].equals("--blanklines") ) { | |
| options.add(args[i]); i++; | |
| options.add(args[i]); | |
| } else if ( args[i].equals("--metamap_server_host") ) { | |
| i++; | |
| serverhost = args[i]; | |
| } else if ( args[i].equals("--metamap_server_port") ) { | |
| i++; | |
| serverport = Integer.parseInt(args[i]); | |
| } else if (args[i].equals("--metamap_server_timeout") ) { | |
| i++; | |
| timeout = Integer.parseInt(args[i]); | |
| } else if (args[i].equals("--input") ) { | |
| i++; | |
| inFilename = args[i]; | |
| System.out.println("output file: " + args[i]); | |
| } else if (args[i].equals("--output") ) { | |
| i++; | |
| output = new PrintStream(args[i]); | |
| System.out.println("output file: " + args[i]); | |
| } else { | |
| options.add(args[i]); | |
| } | |
| } else { | |
| termBuf.append(args[i]).append(" "); | |
| } | |
| i++; | |
| } | |
| // System.out.println("serverport: " + serverport); | |
| MetaMapApiTest frontEnd = new MetaMapApiTest(serverhost, serverport); | |
| // System.out.println("options: " + options); | |
| // System.out.println("terms: " + termBuf); | |
| if (timeout > -1) { | |
| frontEnd.setTimeout(timeout); | |
| } | |
| if (inFilename != null) { | |
| File inFile = new File(inFilename.trim()); | |
| if (inFile.exists()) { | |
| System.out.println("input file: " + inFilename); | |
| frontEnd.process(readInputFile(inFile), output, options); | |
| } else { | |
| System.out.println("input file: " + inFilename + " does not exist!"); | |
| } | |
| } else if (termBuf.length() > 0) { | |
| File inFile = new File(termBuf.toString().trim()); | |
| if (inFile.exists()) { | |
| System.out.println("input file: " + termBuf); | |
| frontEnd.process(readInputFile(inFile), output, options); | |
| } else { | |
| frontEnd.process(termBuf.toString(), output, options); | |
| } | |
| frontEnd.api.disconnect(); | |
| } else { | |
| printHelp(); | |
| System.exit(0); | |
| } | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment