Created
November 22, 2012 01:06
-
-
Save ferhatelmas/4128830 to your computer and use it in GitHub Desktop.
A class to find the words from a sentence that is a concatenation of the permutation of the words
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package com.ferhatelmas; | |
import java.util.ArrayList; | |
import java.util.Arrays; | |
import java.util.HashMap; | |
import java.util.List; | |
public class Tokenizer { | |
private HashMap<String, String> dict; | |
public Tokenizer(String[] words) { | |
this.dict = new HashMap<String, String>(); | |
for(String word : words) | |
dict.put(sorted(word), word); | |
} | |
public static String sorted(String w) { | |
char[] chars = w.toCharArray(); | |
Arrays.sort(chars); | |
return new String(chars); | |
} | |
public List<String> getWords(String sentence) { | |
if(sentence.isEmpty()) return new ArrayList<String>(); | |
List<String> words = new ArrayList<String>(); | |
int len = sentence.length(); | |
for(int i=1; i<=len; i++) { | |
String token = sorted(sentence.substring(0, i)); | |
if(dict.containsKey(token)) { | |
List<String> found = getWords(sentence.substring(i)); | |
if(found != null) { | |
words.add(dict.get(token)); | |
words.addAll(found); | |
break; | |
} | |
} | |
} | |
return words.size() == 0 ? null : words; | |
} | |
public static void main(String[] args) { | |
Tokenizer t = new Tokenizer(new String[] {"ferhat", "elmas", "huseyn", "gasimov", "that", "is", "a", "sample"}); | |
List<String> words = t.getWords("usnyehttahsiaaspmle"); | |
if(words != null) // unmatched returns null since empty string returns empty list | |
for(String w : words) | |
System.out.println(w); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment