Last active
January 4, 2016 20:29
-
-
Save mccutchen/8674878 to your computer and use it in GitHub Desktop.
Trying to help Marshal munge some data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| import csv | |
| import fileinput | |
| import json | |
| import sys | |
| def main(): | |
| fields = set() | |
| rows = [] | |
| for line in fileinput.input(): | |
| whole_row = json.loads(line) | |
| # This assumes that you want the top-level "event" field in the output | |
| # along with every field from the "properties" sub-field. | |
| row = whole_row['properties'] | |
| row['event'] = whole_row['event'] | |
| fields.update(row.keys()) | |
| rows.append(row) | |
| keys = sorted(fields) | |
| # change the restval param to control what gets inserted in place of a | |
| # missing value | |
| writer = csv.DictWriter(sys.stdout, keys, restval='') | |
| # remove this line if you don't want a header row | |
| writer.writeheader() | |
| writer.writerows(rows) | |
| return 0 | |
| if __name__ == '__main__': | |
| sys.exit(main()) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| import csv | |
| import json | |
| import StringIO | |
| def json_to_csv(input_rows, outfile): | |
| fields = set() | |
| rows = [] | |
| for whole_row in input_rows: | |
| # This assumes that you want the top-level "event" field in the output | |
| # along with every field from the "properties" sub-field. | |
| row = whole_row['properties'] | |
| row['event'] = whole_row['event'] | |
| fields.update(row.keys()) | |
| rows.append(row) | |
| keys = sorted(fields) | |
| # change the restval param to control what gets inserted in place of a | |
| # missing value | |
| writer = csv.DictWriter(outfile, keys, restval='') | |
| # remove this line if you don't want a header row | |
| writer.writeheader() | |
| writer.writerows(rows) | |
| # dummy implementation of this function | |
| def fetch_latest_json(): | |
| with open('data.json') as json_file: | |
| return [json.loads(line) for line in json_file] | |
| # Here's an example of how to actually use the function above | |
| def main(): | |
| # assuming some function like this exists | |
| input_rows = fetch_latest_json() | |
| # here we write CSV data directly to a file | |
| with open('path/to/output.csv', 'wb') as csv_file: | |
| json_to_csv(input_rows, csv_file) | |
| # here we write CSV data to a string buffer, in case, say, you need to do | |
| # some post processing on the CSV text for some reason | |
| csv_buf = StringIO.StringIO() | |
| json_to_csv(input_rows, csv_buf) | |
| csv_string = csv_buf.getvalue() | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment