Skip to content

Instantly share code, notes, and snippets.

@jayvdb
Last active February 20, 2018 04:58
Show Gist options
  • Save jayvdb/d1d870cf0e5f09837fd78cf82cce276b to your computer and use it in GitHub Desktop.
Save jayvdb/d1d870cf0e5f09837fd78cf82cce276b to your computer and use it in GitHub Desktop.
Indonesian Regency code comparison
"""
Indonesia regency code comparison.
Provinces differ for two codes, 90s, covering Papua and Papua Barat.
More information provinces at:
https://gitlab.com/ciptamedia/ciptamedia-dev.gitlab.io/issues/104#note_59625585
This script is to compare the regencies to determine how much they overlapped.
"""
import csv
import os
def load_csv(filename, fieldnames=None):
with open(filename) as f:
reader = csv.DictReader(f, fieldnames=fieldnames)
return list(reader)
def get_wdq_regencies():
# This file is export of http://tinyurl.com/y9nacntd
filename = os.path.join('..', 'wdq-regencies.csv')
data = list(load_csv(filename))
return data
def get_bps_regencies():
# This file is from
# https://github.com/edwardsamuel/Wilayah-Administratif-Indonesia/blob/master/csv/regencies.csv
filename = os.path.join('..', 'bps-regencies.csv')
data = list(load_csv(filename,
fieldnames=['code', 'province_code', 'name']))
return data
def check_regency_lists():
wdq_data = get_wdq_regencies()
bps_data = get_bps_regencies()
wdq_dict = {}
for row in wdq_data:
wdq_dict[row['rawcode']] = row
ok = not_ok = 0
for row in bps_data:
code = row['code']
if code not in wdq_dict:
print('Code %s missing from Wikidata' % code)
continue
name = row['name'].lower()
name = name.replace('kabupaten', '')
name = name.strip()
name = name.replace('-', '')
name = name.replace(' ', '')
wdq_row = wdq_dict[code]
wd_name = wdq_row['itemLabel'].lower()
wd_name = wd_name.replace('-', '')
wd_name = wd_name.replace(' ', '')
if wd_name == name:
ok = ok + 1
print('%s ok' % wdq_row['itemLabel'])
else:
not_ok = not_ok + 1
print('%s != %s' % (wdq_row['itemLabel'], row['name']))
print('Matches: %d' % ok)
print('Not ok: %d' % not_ok)
def main(argv=None):
check_regency_lists()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment