charlespunk · July 14, 2013 22:45
diff --git a/Expand the Acronyms b/Expand the Acronyms
 A search engine is badly in need of a feature where it can understand common acronyms, or abbreviations.

 To begin with, we’d like it to be able to figure out, the expansions of abbreviations which refer to popular organizations, which might include agencies, colleges, universities or companies.

 Input format
 The first line contains N. N lines follow. Each line will contain a small text snippet, picked up from either their Wikipedia entry, or the home page of that organization. Each snippet will be a one liner. From each snippet, you need to identify the acronyms/abbreviations, and their expansions.

 This is followed by N tests, each in a new line. Each of these tests, contains an acronym/abbreviation (not necessarily in the same order) which you will need to expand.

 Constraints
 Each snippet will be on one line, and will contain no more than five sentences or five hundred words. Embedded acronyms are typically in upper case. Test acronyms are always in upper case.

 N <= 10

 Output Format
 T lines. The ith line is the expansion for the ith test input.

 Sample Input

 3
 The United Nations Children's Fund (UNICEF) is a United Nations Programme headquartered in New York City, that provides long-term humanitarian and developmental assistance to children and mothers in developing countries.
 The National University of Singapore is a leading global university located in Singapore, Southeast Asia. NUS is Singapore's flagship university which offers a global approach to education and research.
 Massachusetts Institute of Technology (MIT) is a private research university located in Cambridge, Massachusetts, United States.
 NUS
 MIT
 UNICEF
 Sample Output

 National University of Singapore
 Massachusetts Institute of Technology
 United Nations Children's Fund
 Explanation
 The expansions of NUS, MIT and UNICEF were inferred from the chunks of provided text. UNICEF is an example of a somewhat difficult test case. There will be a limited number of challenging tests of this level to acknowledge the extra effort of those who handled a variety of cases. There is no perfect or deterministic solution to this answer, and users will be rated on the fraction of tests they expand correctly.

 Scoring
 Score for a test case = MaxScore * C/N where N = number of tests in the input file; C = Number of abbreviations expanded correctly by you. Case and punctuation variations will be ignored. Do not include extra leading words like “THE”.
diff --git a/Solution.java b/Solution.java
 import java.util.ArrayList;
 import java.util.Scanner;


 public class Solution {
  
 	public static void main(String[] args) throws Exception{
 		Scanner scanner = new Scanner(System.in);
 		
 		// read text
 		int lineNumber = scanner.nextInt();
 		scanner.nextLine();
 		String[] text = readText(scanner, lineNumber);
 		
 		//read acronyms
 		ArrayList<String> acronyms = readAcronyms(scanner);
 		
 		//System.out.println(Arrays.toString(text));
 		//System.out.println(acronyms);
 		scanner.close();
 		
 		// make abbre lines
 		ArrayList<ArrayList<Capital>> capitalMatix = new ArrayList<>();
 		for(int i = 0; i < lineNumber; i++){
 			ArrayList<Capital> capitalLine = makeAbbreLine(text[i]);
 			capitalMatix.add(capitalLine);
 			//System.out.println(capitalLine.toString());
 		}
 		
 		// extract acronyms
 		for(int i = 0; i < acronyms.size(); i++){
 			String thisAcronyms = acronyms.get(i);
 			Hit thisHit = new Hit(0, 0, 0, 0);
 			for(int line = 0; line < lineNumber; line++){
 				ArrayList<Capital> thisCapitals = capitalMatix.get(line);
 				for(int j = 0; j < thisCapitals.size(); j++){
 					Hit hit = computeHit(line, thisCapitals, j, thisAcronyms); 
 					if(hit.length > thisHit.length) thisHit = hit;
 				}
 			}
 			//System.out.println(thisHit);
 			String resultLine = text[thisHit.lineIndex];
 			String[] resultSplits = resultLine.split(" ");
 			String result = "";
 			for(int p = thisHit.beginIndex; p <= thisHit.endIndex; p++){
 				result += resultSplits[p] + " ";
 			}
 			System.out.println(result.trim());
 			
 		}
 		
 	}
 	
 	static Hit computeHit(int line, ArrayList<Capital> capitals, int pos,
 			String acronyms){
 		if(capitals.get(pos).ch != acronyms.charAt(0)) return
 				new Hit(line, pos, pos, 0);
 		int linePos = pos;
 		int endPos = pos;
 		int acronymsPos = 0;
 		int length = 0;
 		while(linePos < capitals.size() && acronymsPos < acronyms.length()){
 			if(capitals.get(linePos).ch == acronyms.charAt(acronymsPos)){
 				endPos = linePos;
 				length++;
 				linePos++;
 				acronymsPos++;
 			}
 			else{
 				acronymsPos++;
 			}
 		}
 		return new Hit(line, capitals.get(pos).pos, capitals.get(endPos).pos, length);
 	}
 	
 	static ArrayList<Capital> makeAbbreLine(String line){
 		ArrayList<Capital> capitals = new ArrayList<>();
 		String[] words = line.split(" ");
 		int pos = -1;
 		for(String word : words){
 			pos++;
 			if(word.toLowerCase().equals("the")) continue;
 			if(validAbbre(word)){
 				capitals.add(new Capital(word.charAt(0), pos));
 			}
 		}
 		return capitals;
 	}
 	
 	static boolean validAbbre(String word){
 		if(word.charAt(0) > 'Z' || word.charAt(0) < 'A') return false;
 		for(int i = 1; i < word.length(); i++){
 			if(word.charAt(i) <= 'z' && word.charAt(i) >= 'a') return true;
 		}
 		return false;
 	}
 	
 	static String[] readText(Scanner scanner, int lineNumber){
 		String[] text = new String[lineNumber];
 		for(int i = 0; i < lineNumber; i++){
 			text[i] = scanner.nextLine();
 		}
 		return text;
 	}
 	
 	static ArrayList<String> readAcronyms(Scanner scanner){
 		ArrayList<String> acronyms = new ArrayList<>();
 		while(scanner.hasNextLine()){
 			acronyms.add(scanner.nextLine());
 		}
 		return acronyms;
 	}
 }

 class Hit{
 	int lineIndex, beginIndex, endIndex, length = 0;
 	Hit(int lineIndex, int beginIndex, int endIndex, int length){
 		this.lineIndex = lineIndex;
 		this.beginIndex = beginIndex;
 		this.endIndex = endIndex;
 		this.length = length;
 	}
 	@Override
 	public String toString(){
 		return lineIndex + " " + beginIndex + " " + endIndex + " " + length;
 	}
 	
 }

 class Capital{
 	char ch;
 	int pos;
 	Capital(char ch, int pos){
 		this.ch = ch;
 		this.pos = pos;
 	}
 	@Override
 	public String toString(){
 		return ch + " " + pos;
 	}
 	
 }
	A search engine is badly in need of a feature where it can understand common acronyms, or abbreviations.

	To begin with, we’d like it to be able to figure out, the expansions of abbreviations which refer to popular organizations, which might include agencies, colleges, universities or companies.

	Input format
	The first line contains N. N lines follow. Each line will contain a small text snippet, picked up from either their Wikipedia entry, or the home page of that organization. Each snippet will be a one liner. From each snippet, you need to identify the acronyms/abbreviations, and their expansions.

	This is followed by N tests, each in a new line. Each of these tests, contains an acronym/abbreviation (not necessarily in the same order) which you will need to expand.

	Constraints
	Each snippet will be on one line, and will contain no more than five sentences or five hundred words. Embedded acronyms are typically in upper case. Test acronyms are always in upper case.

	N <= 10

	Output Format
	T lines. The ith line is the expansion for the ith test input.

	Sample Input

	3
	The United Nations Children's Fund (UNICEF) is a United Nations Programme headquartered in New York City, that provides long-term humanitarian and developmental assistance to children and mothers in developing countries.
	The National University of Singapore is a leading global university located in Singapore, Southeast Asia. NUS is Singapore's flagship university which offers a global approach to education and research.
	Massachusetts Institute of Technology (MIT) is a private research university located in Cambridge, Massachusetts, United States.
	NUS
	MIT
	UNICEF
	Sample Output

	National University of Singapore
	Massachusetts Institute of Technology
	United Nations Children's Fund
	Explanation
	The expansions of NUS, MIT and UNICEF were inferred from the chunks of provided text. UNICEF is an example of a somewhat difficult test case. There will be a limited number of challenging tests of this level to acknowledge the extra effort of those who handled a variety of cases. There is no perfect or deterministic solution to this answer, and users will be rated on the fraction of tests they expand correctly.

	Scoring
	Score for a test case = MaxScore * C/N where N = number of tests in the input file; C = Number of abbreviations expanded correctly by you. Case and punctuation variations will be ignored. Do not include extra leading words like “THE”.
	import java.util.ArrayList;
	import java.util.Scanner;


	public class Solution {

	public static void main(String[] args) throws Exception{
	Scanner scanner = new Scanner(System.in);

	// read text
	int lineNumber = scanner.nextInt();
	scanner.nextLine();
	String[] text = readText(scanner, lineNumber);

	//read acronyms
	ArrayList<String> acronyms = readAcronyms(scanner);

	//System.out.println(Arrays.toString(text));
	//System.out.println(acronyms);
	scanner.close();

	// make abbre lines
	ArrayList<ArrayList<Capital>> capitalMatix = new ArrayList<>();
	for(int i = 0; i < lineNumber; i++){
	ArrayList<Capital> capitalLine = makeAbbreLine(text[i]);
	capitalMatix.add(capitalLine);
	//System.out.println(capitalLine.toString());
	}

	// extract acronyms
	for(int i = 0; i < acronyms.size(); i++){
	String thisAcronyms = acronyms.get(i);
	Hit thisHit = new Hit(0, 0, 0, 0);
	for(int line = 0; line < lineNumber; line++){
	ArrayList<Capital> thisCapitals = capitalMatix.get(line);
	for(int j = 0; j < thisCapitals.size(); j++){
	Hit hit = computeHit(line, thisCapitals, j, thisAcronyms);
	if(hit.length > thisHit.length) thisHit = hit;
	}
	}
	//System.out.println(thisHit);
	String resultLine = text[thisHit.lineIndex];
	String[] resultSplits = resultLine.split(" ");
	String result = "";
	for(int p = thisHit.beginIndex; p <= thisHit.endIndex; p++){
	result += resultSplits[p] + " ";
	}
	System.out.println(result.trim());

	}

	}

	static Hit computeHit(int line, ArrayList<Capital> capitals, int pos,
	String acronyms){
	if(capitals.get(pos).ch != acronyms.charAt(0)) return
	new Hit(line, pos, pos, 0);
	int linePos = pos;
	int endPos = pos;
	int acronymsPos = 0;
	int length = 0;
	while(linePos < capitals.size() && acronymsPos < acronyms.length()){
	if(capitals.get(linePos).ch == acronyms.charAt(acronymsPos)){
	endPos = linePos;
	length++;
	linePos++;
	acronymsPos++;
	}
	else{
	acronymsPos++;
	}
	}
	return new Hit(line, capitals.get(pos).pos, capitals.get(endPos).pos, length);
	}

	static ArrayList<Capital> makeAbbreLine(String line){
	ArrayList<Capital> capitals = new ArrayList<>();
	String[] words = line.split(" ");
	int pos = -1;
	for(String word : words){
	pos++;
	if(word.toLowerCase().equals("the")) continue;
	if(validAbbre(word)){
	capitals.add(new Capital(word.charAt(0), pos));
	}
	}
	return capitals;
	}

	static boolean validAbbre(String word){
	if(word.charAt(0) > 'Z' \|\| word.charAt(0) < 'A') return false;
	for(int i = 1; i < word.length(); i++){
	if(word.charAt(i) <= 'z' && word.charAt(i) >= 'a') return true;
	}
	return false;
	}

	static String[] readText(Scanner scanner, int lineNumber){
	String[] text = new String[lineNumber];
	for(int i = 0; i < lineNumber; i++){
	text[i] = scanner.nextLine();
	}
	return text;
	}

	static ArrayList<String> readAcronyms(Scanner scanner){
	ArrayList<String> acronyms = new ArrayList<>();
	while(scanner.hasNextLine()){
	acronyms.add(scanner.nextLine());
	}
	return acronyms;
	}
	}

	class Hit{
	int lineIndex, beginIndex, endIndex, length = 0;
	Hit(int lineIndex, int beginIndex, int endIndex, int length){
	this.lineIndex = lineIndex;
	this.beginIndex = beginIndex;
	this.endIndex = endIndex;
	this.length = length;
	}
	@Override
	public String toString(){
	return lineIndex + " " + beginIndex + " " + endIndex + " " + length;
	}

	}

	class Capital{
	char ch;
	int pos;
	Capital(char ch, int pos){
	this.ch = ch;
	this.pos = pos;
	}
	@Override
	public String toString(){
	return ch + " " + pos;
	}

	}