Skip to content

Instantly share code, notes, and snippets.

@mccutchen
Last active January 4, 2016 20:29
Show Gist options
  • Select an option

  • Save mccutchen/8674878 to your computer and use it in GitHub Desktop.

Select an option

Save mccutchen/8674878 to your computer and use it in GitHub Desktop.
Trying to help Marshal munge some data
#!/usr/bin/env python
import csv
import fileinput
import json
import sys
def main():
fields = set()
rows = []
for line in fileinput.input():
whole_row = json.loads(line)
# This assumes that you want the top-level "event" field in the output
# along with every field from the "properties" sub-field.
row = whole_row['properties']
row['event'] = whole_row['event']
fields.update(row.keys())
rows.append(row)
keys = sorted(fields)
# change the restval param to control what gets inserted in place of a
# missing value
writer = csv.DictWriter(sys.stdout, keys, restval='')
# remove this line if you don't want a header row
writer.writeheader()
writer.writerows(rows)
return 0
if __name__ == '__main__':
sys.exit(main())
#!/usr/bin/env python
import csv
import json
import StringIO
def json_to_csv(input_rows, outfile):
fields = set()
rows = []
for whole_row in input_rows:
# This assumes that you want the top-level "event" field in the output
# along with every field from the "properties" sub-field.
row = whole_row['properties']
row['event'] = whole_row['event']
fields.update(row.keys())
rows.append(row)
keys = sorted(fields)
# change the restval param to control what gets inserted in place of a
# missing value
writer = csv.DictWriter(outfile, keys, restval='')
# remove this line if you don't want a header row
writer.writeheader()
writer.writerows(rows)
# dummy implementation of this function
def fetch_latest_json():
with open('data.json') as json_file:
return [json.loads(line) for line in json_file]
# Here's an example of how to actually use the function above
def main():
# assuming some function like this exists
input_rows = fetch_latest_json()
# here we write CSV data directly to a file
with open('path/to/output.csv', 'wb') as csv_file:
json_to_csv(input_rows, csv_file)
# here we write CSV data to a string buffer, in case, say, you need to do
# some post processing on the CSV text for some reason
csv_buf = StringIO.StringIO()
json_to_csv(input_rows, csv_buf)
csv_string = csv_buf.getvalue()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment