Skip to content

Instantly share code, notes, and snippets.

@pior
Created April 2, 2015 19:18
Show Gist options
  • Save pior/785552bbfeb1f6470f05 to your computer and use it in GitHub Desktop.
Save pior/785552bbfeb1f6470f05 to your computer and use it in GitHub Desktop.
def make_bcp47_language_tag_re():
"""
Reference: http://www.rfc-editor.org/rfc/bcp/bcp47.txt#
Validator: http://schneegans.de/lv/
"""
regular = (r"(art-lojban|cel-gaulish|no-bok|no-nyn|zh-guoyu|zh-hakka"
"|zh-min|zh-min-nan|zh-xiang)")
irregular = (r"(en-GB-oed|i-ami|i-bnn|i-default|i-enochian|i-hak|i-klingon"
"|i-lux|i-mingo|i-navajo|i-pwn|i-tao|i-tay|i-tsu|sgn-BE-FR"
"|sgn-BE-NL|sgn-CH-DE)")
grandfathered = r"(?P<grandfathered>" + irregular + "|" + regular + ")"
privateUse = r"(x(-[A-Za-z0-9]{1,8})+)"
singleton = r"[0-9A-WY-Za-wy-z]"
extension = r"(?P<extension>" + singleton + "(-[A-Za-z0-9]{2,8})+)"
variant = r"(?P<variant>[A-Za-z0-9]{5,8}|[0-9][A-Za-z0-9]{3})"
region = r"(?P<region>[A-Za-z]{2}|[0-9]{3})"
script = r"(?P<script>[A-Za-z]{4})"
extlang = r"(?P<extlang>[A-Za-z]{3}(-[A-Za-z]{3}){0,2})"
language = (r"(?P<language>([A-Za-z]{2,3}(-" + extlang + ")?)|[A-Za-z]{4}"
"|[A-Za-z]{5,8})")
langtag = (r"(" + language + "(-" + script + ")?" + "(-" + region + ")?" +
"(-" + variant + ")*" + "(-" + extension + ")*" + "(-" +
privateUse + ")?" + ")")
language_tag = (r"^(" + langtag + "|" + privateUse + "|" + grandfathered +
")$")
return re.compile(language_tag)
RE_BCP47_LANGUAGE_TAG = make_bcp47_language_tag_re()
def parse_language_tag(tag):
match = RE_BCP47_LANGUAGE_TAG.match(tag)
return None if match is None else match.groupdict()
def validate_language_tag(tag):
validate = Schema(Match(RE_BCP47_LANGUAGE_TAG, msg='Invalid language tag'))
return validate(tag)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment