Created
July 14, 2013 22:45
-
-
Save charlespunk/5996401 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
A search engine is badly in need of a feature where it can understand common acronyms, or abbreviations. | |
To begin with, we’d like it to be able to figure out, the expansions of abbreviations which refer to popular organizations, which might include agencies, colleges, universities or companies. | |
Input format | |
The first line contains N. N lines follow. Each line will contain a small text snippet, picked up from either their Wikipedia entry, or the home page of that organization. Each snippet will be a one liner. From each snippet, you need to identify the acronyms/abbreviations, and their expansions. | |
This is followed by N tests, each in a new line. Each of these tests, contains an acronym/abbreviation (not necessarily in the same order) which you will need to expand. | |
Constraints | |
Each snippet will be on one line, and will contain no more than five sentences or five hundred words. Embedded acronyms are typically in upper case. Test acronyms are always in upper case. | |
N <= 10 | |
Output Format | |
T lines. The ith line is the expansion for the ith test input. | |
Sample Input | |
3 | |
The United Nations Children's Fund (UNICEF) is a United Nations Programme headquartered in New York City, that provides long-term humanitarian and developmental assistance to children and mothers in developing countries. | |
The National University of Singapore is a leading global university located in Singapore, Southeast Asia. NUS is Singapore's flagship university which offers a global approach to education and research. | |
Massachusetts Institute of Technology (MIT) is a private research university located in Cambridge, Massachusetts, United States. | |
NUS | |
MIT | |
UNICEF | |
Sample Output | |
National University of Singapore | |
Massachusetts Institute of Technology | |
United Nations Children's Fund | |
Explanation | |
The expansions of NUS, MIT and UNICEF were inferred from the chunks of provided text. UNICEF is an example of a somewhat difficult test case. There will be a limited number of challenging tests of this level to acknowledge the extra effort of those who handled a variety of cases. There is no perfect or deterministic solution to this answer, and users will be rated on the fraction of tests they expand correctly. | |
Scoring | |
Score for a test case = MaxScore * C/N where N = number of tests in the input file; C = Number of abbreviations expanded correctly by you. Case and punctuation variations will be ignored. Do not include extra leading words like “THE”. |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.ArrayList; | |
import java.util.Scanner; | |
public class Solution { | |
public static void main(String[] args) throws Exception{ | |
Scanner scanner = new Scanner(System.in); | |
// read text | |
int lineNumber = scanner.nextInt(); | |
scanner.nextLine(); | |
String[] text = readText(scanner, lineNumber); | |
//read acronyms | |
ArrayList<String> acronyms = readAcronyms(scanner); | |
//System.out.println(Arrays.toString(text)); | |
//System.out.println(acronyms); | |
scanner.close(); | |
// make abbre lines | |
ArrayList<ArrayList<Capital>> capitalMatix = new ArrayList<>(); | |
for(int i = 0; i < lineNumber; i++){ | |
ArrayList<Capital> capitalLine = makeAbbreLine(text[i]); | |
capitalMatix.add(capitalLine); | |
//System.out.println(capitalLine.toString()); | |
} | |
// extract acronyms | |
for(int i = 0; i < acronyms.size(); i++){ | |
String thisAcronyms = acronyms.get(i); | |
Hit thisHit = new Hit(0, 0, 0, 0); | |
for(int line = 0; line < lineNumber; line++){ | |
ArrayList<Capital> thisCapitals = capitalMatix.get(line); | |
for(int j = 0; j < thisCapitals.size(); j++){ | |
Hit hit = computeHit(line, thisCapitals, j, thisAcronyms); | |
if(hit.length > thisHit.length) thisHit = hit; | |
} | |
} | |
//System.out.println(thisHit); | |
String resultLine = text[thisHit.lineIndex]; | |
String[] resultSplits = resultLine.split(" "); | |
String result = ""; | |
for(int p = thisHit.beginIndex; p <= thisHit.endIndex; p++){ | |
result += resultSplits[p] + " "; | |
} | |
System.out.println(result.trim()); | |
} | |
} | |
static Hit computeHit(int line, ArrayList<Capital> capitals, int pos, | |
String acronyms){ | |
if(capitals.get(pos).ch != acronyms.charAt(0)) return | |
new Hit(line, pos, pos, 0); | |
int linePos = pos; | |
int endPos = pos; | |
int acronymsPos = 0; | |
int length = 0; | |
while(linePos < capitals.size() && acronymsPos < acronyms.length()){ | |
if(capitals.get(linePos).ch == acronyms.charAt(acronymsPos)){ | |
endPos = linePos; | |
length++; | |
linePos++; | |
acronymsPos++; | |
} | |
else{ | |
acronymsPos++; | |
} | |
} | |
return new Hit(line, capitals.get(pos).pos, capitals.get(endPos).pos, length); | |
} | |
static ArrayList<Capital> makeAbbreLine(String line){ | |
ArrayList<Capital> capitals = new ArrayList<>(); | |
String[] words = line.split(" "); | |
int pos = -1; | |
for(String word : words){ | |
pos++; | |
if(word.toLowerCase().equals("the")) continue; | |
if(validAbbre(word)){ | |
capitals.add(new Capital(word.charAt(0), pos)); | |
} | |
} | |
return capitals; | |
} | |
static boolean validAbbre(String word){ | |
if(word.charAt(0) > 'Z' || word.charAt(0) < 'A') return false; | |
for(int i = 1; i < word.length(); i++){ | |
if(word.charAt(i) <= 'z' && word.charAt(i) >= 'a') return true; | |
} | |
return false; | |
} | |
static String[] readText(Scanner scanner, int lineNumber){ | |
String[] text = new String[lineNumber]; | |
for(int i = 0; i < lineNumber; i++){ | |
text[i] = scanner.nextLine(); | |
} | |
return text; | |
} | |
static ArrayList<String> readAcronyms(Scanner scanner){ | |
ArrayList<String> acronyms = new ArrayList<>(); | |
while(scanner.hasNextLine()){ | |
acronyms.add(scanner.nextLine()); | |
} | |
return acronyms; | |
} | |
} | |
class Hit{ | |
int lineIndex, beginIndex, endIndex, length = 0; | |
Hit(int lineIndex, int beginIndex, int endIndex, int length){ | |
this.lineIndex = lineIndex; | |
this.beginIndex = beginIndex; | |
this.endIndex = endIndex; | |
this.length = length; | |
} | |
@Override | |
public String toString(){ | |
return lineIndex + " " + beginIndex + " " + endIndex + " " + length; | |
} | |
} | |
class Capital{ | |
char ch; | |
int pos; | |
Capital(char ch, int pos){ | |
this.ch = ch; | |
this.pos = pos; | |
} | |
@Override | |
public String toString(){ | |
return ch + " " + pos; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment