Created
September 13, 2017 21:54
-
-
Save attacco/d4d2586114d8b52e5851a10537491dd4 to your computer and use it in GitHub Desktop.
Homoglyphs directory.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import javax.annotation.Nullable; | |
import java.util.Arrays; | |
import java.util.HashSet; | |
import java.util.List; | |
import java.util.Set; | |
public class Homoglyphs { | |
private static final List<char[]> LIST = Arrays.asList( | |
prepare("! ǃ !"), | |
prepare("\" ״ ″ ""), | |
prepare("$ $"), | |
prepare("% %"), | |
prepare("& &"), | |
prepare("' '"), | |
prepare("( ﹝ ("), | |
prepare(") ﹞ )"), | |
prepare("{ {"), | |
prepare("| ǀ ا |"), | |
prepare("} }"), | |
prepare("~ ⁓ ~"), | |
prepare("* ⁎ *"), | |
prepare("+ +"), | |
prepare(", ‚ ,"), | |
prepare("- ‐ \uD800 \uD991 -"), | |
prepare(". ٠ ۔ ܁ ܂ ․ ‧ 。 . 。"), | |
prepare("/ ̸ ⁄ ∕ ╱ ⫻ ⫽ / ノ"), | |
prepare("1 I ا 1"), | |
prepare("2 2"), | |
prepare("3 3 з З э Э"), | |
prepare("4 4 ч Ч"), | |
prepare("5 5"), | |
prepare("6 6 б"), | |
prepare("7 \uD801 \uD887 7"), | |
prepare("8 Ց 8"), | |
prepare("9 9"), | |
prepare(": ։ ܃ ܄ ∶ ꞉ :"), | |
prepare("; ; ;"), | |
prepare("< ‹ <"), | |
prepare("= \uD800 \uD990 ="), | |
prepare("> › >"), | |
prepare("? ?"), | |
prepare("@ @"), | |
prepare("[ ["), | |
prepare("\\ \"), | |
prepare("^ ^"), | |
prepare("_ _"), | |
prepare("` `"), | |
prepare("A a À Á Â Ã Ä Å à á â ã ä å ɑ Α α а Ꭺ A a"), | |
prepare("B b ß ʙ Β β В Ь Ᏼ ᛒ B b ъ Ъ"), | |
prepare("C c ϲ Ϲ С с Ꮯ Ⅽ ⅽ \uD801 \uD8A8 C c"), | |
prepare("D d Ď ď Đ đ ԁ ժ Ꭰ Ⅾ ⅾ D d"), | |
prepare("E e È É Ê Ë é ê ë Ē ē Ĕ ĕ Ė ė Ę Ě ě Ε Е е Ꭼ E e ё Ё"), | |
prepare("F f Ϝ F f"), | |
prepare("G g ɡ ɢ Ԍ ն Ꮐ G g"), | |
prepare("H h ʜ Η Н һ Ꮋ H h"), | |
prepare("I i l ɩ Ι І і ا Ꭵ ᛁ Ⅰ ⅰ \uD801 \uD883 I i"), | |
prepare("J j ϳ Ј ј յ Ꭻ J j"), | |
prepare("K k Κ κ К Ꮶ ᛕ K K k"), | |
prepare("L l ʟ ι ا Ꮮ Ⅼ ⅼ L l"), | |
prepare("M m Μ Ϻ М Ꮇ ᛖ Ⅿ ⅿ M m"), | |
prepare("N n ɴ Ν N n"), | |
prepare("O o 0 O o Ο ο О о Օ \uD801 \uD886 O o ö Ö Ӧ"), | |
prepare("P p Ρ ρ Р р Ꮲ P p"), | |
prepare("Q q Ⴍ Ⴓ Q q"), | |
prepare("R r ʀ Ի Ꮢ ᚱ R r г"), | |
prepare("S s Ѕ ѕ Տ Ⴝ Ꮪ \uD801 \uD896 S s"), | |
prepare("T t Τ τ Т Ꭲ T t"), | |
prepare("U u μ υ Ա Ս ⋃ U u"), | |
prepare("V v ν Ѵ ѵ Ꮩ Ⅴ ⅴ V v"), | |
prepare("W w ѡ Ꮃ W w"), | |
prepare("X x Χ χ Х х Ⅹ ⅹ X x"), | |
prepare("Y y ʏ Υ γ у Ү Y y"), | |
prepare("Z z Ζ Ꮓ Z z") | |
); | |
private static char[] prepare(String homoglyphs) { | |
char[] chars = homoglyphs.toCharArray(); | |
final Set<Character> set = new HashSet<>(); | |
int newLen = 0; | |
for (int i = 0; i < chars.length; i++) { | |
final char ch = Character.toLowerCase(chars[i]); | |
if (ch != ' ' && set.add(ch)) { | |
chars[newLen++] = ch; | |
} | |
} | |
if (newLen < chars.length) { | |
final char[] newChars = new char[newLen]; | |
System.arraycopy(chars, 0, newChars, 0, newLen); | |
chars = newChars; | |
} | |
Arrays.sort(chars); | |
return chars; | |
} | |
@Nullable | |
public static char[] getHomoglyphs(char charInLowerCase) { | |
for (char[] chars : LIST) { | |
final int i = Arrays.binarySearch(chars, charInLowerCase); | |
if (i >= 0) { | |
return chars; | |
} | |
} | |
return null; | |
} | |
private static void printDebug() { | |
for (char[] chars : LIST) { | |
System.out.println("[" + new String(chars) + "]"); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment