Created
March 29, 2017 13:08
-
-
Save jbothma/545a74ce8fad123b9d2d0bbc09e22d7b to your computer and use it in GitHub Desktop.
Flatten/denormalise nested json objects in jsonlines input file to a flat CSV.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import fileinput | |
| import json | |
| import sys | |
| import csv | |
| from copy import copy | |
| fieldnames = [ | |
| 'regno', | |
| 'name', | |
| 'status', | |
| 'member_id', | |
| 'member_first_names', | |
| 'member_last_name', | |
| 'member_position', | |
| 'member_status', | |
| 'type', | |
| 'regdate', | |
| 'physaddress', | |
| 'postaddress', | |
| 'compliance', | |
| ] | |
| writer = csv.DictWriter(sys.stdout, fieldnames=fieldnames) | |
| #writer.writeheader() | |
| for line in fileinput.input(): | |
| company = json.loads(line) | |
| company['physaddress'] = company['physaddress'].replace("\n", ", ") | |
| company['postaddress'] = company['postaddress'].replace("\n", ", ") | |
| # fake an empty directory if none | |
| if not company['directors']: | |
| company['directors'] = [[None, None, None, None, None]] | |
| for director in company['directors']: | |
| row = copy(company) | |
| del row['directors'] | |
| row['member_id'] = director[0] | |
| row['member_first_names'] = director[1] | |
| row['member_last_name'] = director[2] | |
| row['member_position'] = director[3] | |
| row['member_status'] = director[4] | |
| writer.writerow(row) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment