Created
December 2, 2017 06:13
-
-
Save serser/bd95cba693d92723f72d42bca799a809 to your computer and use it in GitHub Desktop.
get list of italian community with wikitables
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from wikitables import import_tables | |
import string | |
import json | |
import sys | |
title_fmt = "Comuni d'Italia (%s)" | |
alphabet=string.ascii_uppercase[:26] | |
titles = [] | |
for i in range(26): | |
ch = alphabet[i] | |
# not in the list | |
if ch in ['I','K','W','X','Y']: | |
continue | |
# h,i are combined | |
if ch == 'H': | |
ch = 'H-I' | |
titles.append(title_fmt % ch) | |
def peep(**params): | |
for param in params: | |
print param | |
for title in titles: | |
tables = import_tables(title, lang='it') | |
#print(tables[0].name) | |
for table in tables: | |
for row in table.rows: | |
row_dict = json.loads(row.json()) | |
#print row_dict, type(row_dict) | |
lr = len(row_dict) | |
#print fmt[:-1], lr | |
#peep(**row_dict) | |
# warning: dictionary unpack requires py3 | |
#print(fmt.format(**row_dict)) | |
if lr ==3: | |
row['Popolazione'] = '' | |
if row.has_key('Provincia'): | |
row['Area'] = row['Provincia'] | |
fmt = '{},'*4 | |
print fmt[:-1].format(row['Comune'],row['Popolazione'],row['Area'],row['Regione']) | |
#sys.exit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment