Skip to content

Instantly share code, notes, and snippets.

@nloadholtes
Created March 19, 2016 18:08
Show Gist options
  • Save nloadholtes/e2e8870dfd96087dad1d to your computer and use it in GitHub Desktop.
Save nloadholtes/e2e8870dfd96087dad1d to your computer and use it in GitHub Desktop.
An example of using ijson to process streaming JSON in a memory constrained way
#
# This is the core of my handler function
#
batch_size = 100 # This needs to change based on the size of your objects
parser = ijson.parse(f)
counter = 0
to_process = []
for prefix, event, value in parser:
if (prefix, event) == ("item", "start_map"):
music_item = dict(albums=[])
elif (prefix, event) == ("item.album", "start_array"):
album = dict()
elif (prefix, event) == ("item.album.item", "start_map"):
album = dict()
elif (prefix, event) == ("item.album.item", "end_map"):
album["id"] = program["album_id"]
music_item["album"].append(album)
elif (prefix, event) == ("item.album.item.title", "string"):
album["title"] = value
elif (prefix, event) == ("item", "end_map"):
to_process.append(music_item)
counter += 1
if counter > batch_size:
write_to_mongo(to_process)
counter = 0
to_process = []
if to_process:
write_to_mongo(to_process)
#
# and we're done!
#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment