Skip to content

Instantly share code, notes, and snippets.

@karimofthecrop
Last active August 29, 2015 14:07
Show Gist options
  • Save karimofthecrop/48c8c823d0ae57399467 to your computer and use it in GitHub Desktop.
Save karimofthecrop/48c8c823d0ae57399467 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
# Takes in some JSON objects (one per line) on stdin
# Outputs a TSV on stdout
# Pipe it to a file or pipe it to your favorite shell utility
import codecs
import json
import string
import sys
from optparse import OptionParser
def parse_line(line, columns):
# remove UTF-8 byte alignment characters (they mess up unicode encoding)
line = unicode(line.strip(codecs.BOM_UTF8), 'utf-8')
line_hash = json.loads(line)
output_col = []
for col in columns:
try:
output_col += [unicode(line_hash[col])]
except KeyError:
output_col += [unicode('')]
tab = unicode("\t")
output_str = tab.join(output_col)
# unless you encode output_str as UTF-8, you can get encoding errors
# (don't ask me why a unicode string joined to a unicode string isn't UTF-8)
output_str = output_str.encode('utf-8')
sys.stdout.write( output_str )
sys.stdout.write("\n")
#print output_str
return
def get_columns(line, want_columns):
line_hash = json.loads(line)
# if no columns are specified, get all possible keys
if want_columns == None:
ret_columns = line_hash.keys()
ret_columns.sort()
else:
ret_columns = want_columns
return ret_columns
def main():
parser = OptionParser()
# option for comma-separated list of column headers/key names
# ex. --cols=a,b,c
parser.add_option("--cols", action="store", type="string", dest="columns")
(options, args) = parser.parse_args()
want_columns = None
if options.columns != None:
want_columns = options.columns.split(",")
try:
for line in sys.stdin:
columns = get_columns(line, want_columns)
parse_line(line, columns)
except KeyboardInterrupt:
sys.exit()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment