Skip to content

Instantly share code, notes, and snippets.

@batisteo
Created April 3, 2014 11:02
Show Gist options
  • Save batisteo/9952422 to your computer and use it in GitHub Desktop.
Save batisteo/9952422 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""
Regenarate list of languages based of the sil.org website.
"""
import os
import codecs
import csv
import requests
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
TEMPLATE = """# -*- coding: utf-8 -*-
from django.utils.translation import ugettext_lazy as _
LANGUAGES_SUBSET = (
%(languages_subset)s
)
LANGUAGES = (
%(languages)s
)
"""
def open_url(location, encoding):
r = requests.get(location)
if not 'charset=' in r.headers['content-type']:
r.encoding = encoding
raw_data = StringIO(r.text)
return raw_data
def to_dict(raw_data):
"""Returns a single list filled of dict for each language.
"""
reader = csv.reader(raw_data, delimiter='\t')
header = next(reader)
items = []
for row in reader:
items.append(dict(zip(header, row)))
return items
def write_file(languages, languages_subset, filename=None):
if not filename:
filename = os.path.join(os.path.dirname(
os.path.realpath(__file__)), 'languages.py')
with codecs.open(filename, 'w', 'utf-8') as f:
f.write(TEMPLATE % {
'languages_subset': '\n '.join((languages_subset)),
'languages': '\n '.join((languages)),
})
def regenerate(location='http://www-01.sil.org/iso639-3/iso-639-3.tab',
encoding='utf-8',
filename=None):
raw_data = open_url(location, encoding)
# Languages list (about 8000)
data = to_dict(raw_data)
languages = ['"' + d['Id'] + '": _("' + d['Ref_Name'] + '"),' for d in data]
# Subset of Languages which have a 2 char code (about 200)
data2 = [d for d in data if d['Part1']]
languages2 = ['"' + d['Part1'] + '": _("' + d['Ref_Name'] + '"),' for d in data2]
write_file(languages, languages2)
if __name__ == '__main__':
regenerate()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment