Skip to content

Instantly share code, notes, and snippets.

@pezon
Last active September 13, 2015 14:52
Show Gist options
  • Save pezon/659c0a987df1e89c00bf to your computer and use it in GitHub Desktop.
Save pezon/659c0a987df1e89c00bf to your computer and use it in GitHub Desktop.
Proof of Concept: Convert JSON object to tabular data using ijson.
"""
Proof of concept: json-to-table
Convert JSON object to tabular data using ijson,
which provides a SAX-like iterable for JSON objects
"""
from urlparse import urlparse
from collections import Counter, OrderedDict
import ijson
import requests
format_key = lambda key: key.split('.')[-1].lower()
ignore_events = ['map_key', 'start_map', 'end_map', 'start_array', 'end_array']
def main():
url = 'http://annacw.annapolis.gov/ArcGIS/rest/services/maps/Crimes/MapServer/0/query?f=json&where=(category%20like%20%27%25ASSAULT%25%27%20or%20category%20like%20%27%25BURGLARY%25%27%20or%20category%20like%20%27%25HOMICIDE%25%27%20or%20category%20like%20%27%25MOTOR%20VEHICLE%25%27%20or%20category%20like%20%27%25ROBBERY%25%27%20or%20category%20like%20%27%25THEFT%25%27)%20and%20REPORTED_ON%20between%20date%20%2708%2F13%2F15%27%20and%20date%20%279%2F12%2F2015%27&returnGeometry=true&spatialRel=esriSpatialRelIntersects&outFields=Category%2CCALL_CODE_1%2CINCIDENT_NUM%2CSTREETADDR%2CREPORTED_ON'
stream = StringIO(requests.get(url).text)
path, keys = guess_iterable(stream)
return create_table(stream, path=path, keys=keys)
def guess_iterable(stream):
keys = set()
depth = Counter()
stream.seek(0)
for prefix, event, value in ijson.parse(stream):
if event in ignore_events:
continue
d = len(prefix.split('.'))
depth[d] += 1
keys.add(prefix)
iter_depth = depth.most_common()[1][0] if len(depth) > 1 else 0
path = '.'.join(keys[0].split('.', iter_depth)[0:iter_depth])
keys = [format_key(key) for key in keys if key.startswith(path)]
return path, keys
def create_table(stream, path, keys):
obj_list = []
stream.seek(0)
for prefix, event, value in ijson.parse(stream):
if prefix == root_key:
if event == 'start_map':
obj = OrderedDict.fromkeys(keys)
elif event == 'end_map':
obj_list.append(obj)
elif event in ignore_events or prefix.startswith(path):
key = format_key(prefix)
obj[key] = value
return obj_list
def get_object_keys(stream, formatter=None, exclude=None):
if not formatter:
formatter = lambda key: key
if not exclude:
exclude = lambda key: False
keys = set()
stream.seek(0)
for prefix, event, value in ijson.parse(stream):
if event in ignore_events:
continue
keys.add(prefix)
return [formatter(key) for key in keys if not exclude(key)]
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment