Created
September 9, 2011 07:08
-
-
Save ConradIrwin/1205655 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| from jsonpath import jsonpath | |
| import argparse | |
| import sys | |
| import os | |
| def main(args): | |
| input = [json.loads(line) for line in args.input.readlines()] if args.multiline else json.load(args.input) | |
| for record in jsonpath(input, args.lines) or []: | |
| cells = format_record(record, args) | |
| if not args.quiet and any(map(lambda c: args.delimeter in c or args.separator in c, cells)): | |
| sys.stderr.write(repr(cells) + "contains a delimeter!\nTry using --escape\n") | |
| return | |
| args.output.write(((args.delimeter.join(cells) + args.separator)).encode('utf-8')) | |
| def format_record(record, args): | |
| if args.keys: | |
| if isinstance(record, dict): | |
| cells = record.keys() | |
| elif isinstance(record, list): | |
| cells = [str(i) for i in range(0, len(record))] | |
| else: | |
| cells = [{unicode: "<string>", | |
| bool: "<boolean>", | |
| int: "<number>", | |
| long: "<number>", | |
| float: "<number>", | |
| None.__class__: "<null>" | |
| }[record.__class__]] | |
| elif args.array: | |
| cells = [json.dumps(jsonpath(record, field)) for field in args.fields] | |
| else: | |
| cells = [] | |
| for field in args.fields: | |
| cell = jsonpath(record, field) | |
| if not cell: | |
| cell = args.blank | |
| elif len(cell) > 1 and args.join is None: | |
| sys.stderr.write(json.dumps(record) + " matches " + repr(field) + " more than once!\n") | |
| return | |
| elif cell[0] is None: | |
| cell = args.null | |
| elif cell[0] is False: | |
| cell = args.false | |
| elif cell[0] is True: | |
| cell = args.true | |
| elif isinstance(cell[0], unicode) and not args.escape: | |
| cell = cell[0] | |
| else: | |
| cell = json.dumps(cell[0]) | |
| cells.append(cell) | |
| return cells | |
| if __name__ == '__main__': | |
| parser = argparse.ArgumentParser(description="Convert a JSON document to a TSV document using JSONPath.") | |
| parser.add_argument('--input', '-i', help='File to read from (default STDIN)', metavar='<file>', type=argparse.FileType('r'), default=sys.stdin) | |
| parser.add_argument('--multiline', '-m', help='Join a multi-line file into a JSON array (otherwise the entire file is one JSON object)', action='store_true') | |
| parser.add_argument('--lines', '-l', help='JSONPath expression to find records (instead of just using values of the root object)', metavar='JSONPATH', default='$.*') | |
| parser.add_argument('--output', '-o', help='File to write to (default STDOUT)', metavar='FILE', type=argparse.FileType('w'), default=sys.stdout) | |
| parser.add_argument('--delimeter', '-d', help='delimeter to use between fields', metavar='"\\t"', default='\t') | |
| parser.add_argument('--separator', '-s', help='separator to use between records', metavar='"\\n"', default='\n') | |
| parser.add_argument('--quiet', '-q', help="Don't explode if one of the output fields contains a delimeter", action='store_true') | |
| parser.add_argument('--keys', '-k', help='Output keys instead of values', action='store_true') | |
| parser.add_argument('--array', '-a', help="Output each field as a JSON array instead of as a single value.", action='store_true') | |
| parser.add_argument('--escape', '-e', help="Output strings escaped as JSON (with surrounding quotes)", action='store_true') | |
| parser.add_argument('--true', '-t', help="How to represent the JSON value true", default='true', metavar='true') | |
| parser.add_argument('--false', '-f', help="How to represent the JSON value false", default='false', metavar='false') | |
| parser.add_argument('--null', '-n', help="How to represent the JSON value null", default='null', metavar='null') | |
| parser.add_argument('--blank', '-b', help="How to represent empty fields (different from a field that equals null)", default='', metavar="''") | |
| parser.add_argument('fields', help='JSONPath expressions for fields to output', nargs='*') | |
| args = parser.parse_args() | |
| if not args.keys and not args.fields: | |
| parser.error("You must provide a list of fields to output (or use --keys)") | |
| if os.isatty(args.input.fileno()): | |
| sys.stderr.write("Waiting for you to type JSON on STDIN...\n") | |
| main(args) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment