Skip to content

Instantly share code, notes, and snippets.

@fforbeck
Last active December 14, 2015 13:09
Show Gist options
  • Select an option

  • Save fforbeck/5091927 to your computer and use it in GitHub Desktop.

Select an option

Save fforbeck/5091927 to your computer and use it in GitHub Desktop.
import java.text.Normalizer;
public class StringNormalizer {
public String normalize(String word) {
if (word == null || word.trim().isEmpty())
return word;
word = word.replaceAll("[ÂÀÁÄÃ]", "A");
word = word.replaceAll("[âãàáä]", "a");
word = word.replaceAll("[ÊÈÉË]", "E");
word = word.replaceAll("[êèéë]", "e");
word = word.replaceAll("ÎÍÌÏ", "I");
word = word.replaceAll("îíìï", "i");
word = word.replaceAll("[ÔÕÒÓÖ]", "O");
word = word.replaceAll("[ôõòóö]", "o");
word = word.replaceAll("[ÛÙÚÜ]", "U");
word = word.replaceAll("[ûúùü]", "u");
word = word.replaceAll("Ç", "C");
word = word.replaceAll("ç", "c");
word = word.replaceAll("[ýÿ]", "y");
word = word.replaceAll("Ý", "Y");
word = word.replaceAll("ñ", "n");
word = word.replaceAll("Ñ", "N");
word = word.replaceAll("['<>\\|/]", "");
return word;
}
public static String removeAccents(String str) {
str = Normalizer.normalize(str, Normalizer.Form.NFD);
str = str.replaceAll("[^\\p{ASCII}]", "");
return str;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment