Last active
November 5, 2015 02:19
-
-
Save microaeris/6c3eb8133a28b30c8599 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.util.*; | |
/** | |
* CryptoEquivalence is defined by two words having a | |
* character mapping where the characters in one word | |
* can singularly map to the character of the same index | |
* of the second word without a conflict. | |
* | |
* book -> room GOOD | |
* book -> roam BAD (o maps to both o and a) | |
* | |
* @author Alice | |
* | |
*/ | |
public class CryptoEquivalence2 { | |
final static int TOTAL_DIGITS = 3; | |
/** | |
* Create a string hash for a word. | |
* A pattern is defined by the character count at each index | |
* For every character, write to the string how many | |
* times we have seen that character already. This method | |
* assumes that each word has to be under 999 characters, | |
* which should accommodate all but two words in the | |
* English language. | |
* | |
* Also needs to save the previous index we've seen a letter before | |
* A character's hash is: 3 digits for lastIndex seen and then 3 digits for a counter | |
* This forces the count to match up to what character it's counting. | |
* With this, "book" won't match to "crct". | |
* | |
* Examples: | |
* "book" would hash to "0010010000100001001". | |
* "room" would also has to "0010010000100001001". | |
* "alice" would hash to "001001001001001". | |
* "car" and "bat" would hash to "001001001". | |
* | |
* @param s | |
*/ | |
public static String cryptoHash(String s) { | |
HashMap<Character, String> charCount = new HashMap<Character, String>(); | |
HashMap<Character, Integer> prevIndex = new HashMap<Character, Integer>(); | |
StringBuilder sb = new StringBuilder(); | |
for (int i = 0; i < s.length(); ++i) { | |
Character c = s.charAt(i); | |
String count = charCount.get(c); | |
if (count == null || count.equals("")) { | |
// Found a new character | |
charCount.put(c, "001"); | |
sb.append("001"); | |
prevIndex.put(c, i); | |
} else { | |
// Update last index c was seen | |
int prev = prevIndex.get(c); | |
int prevLength = (int)(Math.log10(prev)+1); | |
int fillerBits = TOTAL_DIGITS - prevLength; | |
prevIndex.put(c, i); | |
// Write the last index seen | |
for (int j = 0; j < fillerBits; ++j) { | |
sb.append("0"); | |
} | |
sb.append(count.toString()); | |
// Found a character we've seen before | |
int countValue = Integer.parseInt(count) + 1; | |
int countLength = (int)(Math.log10(countValue)+1); | |
fillerBits = TOTAL_DIGITS - countLength; | |
// Write the count | |
for (int j = 0; j < fillerBits; ++j) { | |
sb.append("0"); | |
} | |
sb.append(count.toString()); | |
} | |
} | |
return sb.toString(); | |
} | |
/** | |
* Assigns each word to the set of its crypto equivalence. | |
* Equivalence is found by checking each word's hash value. | |
* ie. Words with the same hash have crypto equivalence. | |
* @param list | |
* @return | |
*/ | |
public static List<LinkedList<String>> findCESets(List<String> list) { | |
HashMap<String, LinkedList<String>> mapHashToList = | |
new HashMap<String, LinkedList<String>>(); | |
// Map all strings to a set of words with the same hash. | |
for (String s : list) { | |
String hash = cryptoHash(s); | |
LinkedList<String> hashList = mapHashToList.get(hash); | |
if (hashList == null) { | |
// Create a new list for this hash | |
LinkedList<String> newList = new LinkedList<String>(); | |
newList.add(s); | |
mapHashToList.put(hash, newList); | |
} else { | |
// Add current word to the list | |
hashList.add(s); | |
} | |
} | |
// Convert hashmap to a list of lists | |
LinkedList<LinkedList<String>> result = new LinkedList<LinkedList<String>>(); | |
for (String key : mapHashToList.keySet()) { | |
LinkedList<String> ll = mapHashToList.get(key); | |
result.add(ll); | |
} | |
return result; | |
} | |
public static void main(String[] args) { | |
List<String> list = Arrays.asList("car", "mom", "dad", "ant", "book", "crct", | |
"room", "abbc", "alice", "asdasdasdas", "aluce", "bat", "roam"); | |
System.out.println(findCESets(list)); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment