Created
September 25, 2013 13:01
-
-
Save ebouchut/6699245 to your computer and use it in GitHub Desktop.
Javascript regex to test if a language code is RFC5646 compliant [locale] [I18N]
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
private static RegExp buildRegexpLangRfc5646() | |
{ | |
String extLang = "([A-Za-z]{3}(-[A-Za-z]{3}){0,2})"; | |
String language = | |
"(([a-zA-Z]{2,3}(-" + extLang + ")?)|([a-zA-Z]{5,8}))"; | |
String script = "([A-Za-z]{4})"; | |
String region = "([A-Za-z]{2}|\\d{3})"; | |
String variant = "([A-Za-z0-9]{5,8}|(\\d[A-Z-a-z0-9]{3}))"; | |
String singleton = "(\\d|[A-W]|[Y-Z]|[a-w]|[y-z])"; | |
String extension = "(" + singleton + "(-[A-Za-z0-9]{2,8})+)"; | |
String privateUse = "(x(-[A-Za-z0-9]{1,8})+)"; | |
String langTag = | |
language + "(-" + script + ")?(-" + region + ")?(-" + variant | |
+ ")*(-" + extension + ")*(-" + privateUse + ")?"; | |
String irregular = | |
"((en-GB-oed)|(i-ami)|(i-bnn)|(i-default)|(i-enochian)|(i-hak)|(i-klingon)|(i-lux)|(i-mingo)|(i-navajo)|(i-pwn)|(i-tao)|(i-tay)|(i-tsu)|(sgn-BE-FR)|(sgn-BE-NL)|(sgn-CH-DE))"; | |
String regular = | |
"((art-lojban)|(cel-gaulish)|(no-bok)|(no-nyn)|(zh-guoyu)|(zh-hakka)|(zh-min)|(zh-min-nan)|(zh-xiang))"; | |
String grandFathered = "(" + irregular + "|" + regular + ")"; | |
StringBuffer languageTag = new StringBuffer(); | |
languageTag.append("(^").append(privateUse).append("$)"); | |
languageTag.append('|'); | |
languageTag.append("(^").append(grandFathered).append("$)"); | |
languageTag.append('|'); | |
languageTag.append("(^").append(langTag).append("$)"); | |
return RegExp.compile(languageTag.toString()); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment