Skip to content

Instantly share code, notes, and snippets.

@salekseev
Created February 27, 2015 17:32
Show Gist options
  • Save salekseev/b51691c6c3b54b4e0cb4 to your computer and use it in GitHub Desktop.
Save salekseev/b51691c6c3b54b4e0cb4 to your computer and use it in GitHub Desktop.
Python script that flattens a JSON file or a dictionary with nested lists and/or dictionaries.
"""
This flattens a JSON file or a dictionary with nested lists and/or dictionaries.
The output is a flattened dictionary that use dot-chained names for keys,
based on the dictionary structure. This allows for reconstructing the JSON
structure or converting it to other formats without loosing any structural
information.
"""
__author__ = "Stas Alekseev"
__version__ = '0.1'
import sys
try:
import simplejson
except ImportError:
import json as simplejson
def flattenDict(d, result=None):
if result is None:
result = {}
for key in d:
value = d[key]
if isinstance(value, dict):
value1 = {}
for keyIn in value:
value1[".".join([key,keyIn])]=value[keyIn]
flattenDict(value1, result)
elif isinstance(value, (list, tuple)):
for indexB, element in enumerate(value):
if isinstance(element, dict):
value1 = {}
index = 0
for keyIn in element:
newkey = ".".join([key,keyIn])
value1[".".join([key,keyIn])]=value[indexB][keyIn]
index += 1
for keyA in value1:
flattenDict(value1, result)
else:
result[key]=value
return result
def main():
if len(sys.argv) == 1:
infile = sys.stdin
outfile = sys.stdout
elif len(sys.argv) == 2:
infile = open(sys.argv[1], 'rb')
outfile = sys.stdout
elif len(sys.argv) == 3:
infile = open(sys.argv[1], 'rb')
outfile = open(sys.argv[2], 'wb')
else:
raise SystemExit("{0} [infile [outfile]]".format(sys.argv[0]))
try:
obj = simplejson.load(infile)
except ValueError, e:
raise SystemExit(e)
flat = flattenDict(obj)
simplejson.dump(flat, outfile, sort_keys=True, indent=2)
outfile.write('\n')
if __name__ == '__main__':
main()
@salekseev
Copy link
Author

turns

{
  "cluster_name" : "athena.logstash",
  "nodes" : {
    "EWhCGaa3Rk6QwlUfXj8zEg" : {
      "timestamp" : 1425058426323,
      "name" : "logstash105-data-01",
      "transport_address" : "inet[/172.28.21.188:9301]",
      "host" : "logstash105.athenahealth.com",
      "ip" : [ "inet[/172.28.21.188:9301]", "NONE" ],
      "attributes" : {

to:

{
  "cluster_name": "athena.logstash",
  "nodes.6-BGtxfGS9W08MrtixNmVA.attributes.data": "false",
  "nodes.6-BGtxfGS9W08MrtixNmVA.attributes.disk_type": "hdd",
  "nodes.6-BGtxfGS9W08MrtixNmVA.attributes.master": "true",
  "nodes.6-BGtxfGS9W08MrtixNmVA.breakers.fielddata.estimated_size": "0b",
  "nodes.6-BGtxfGS9W08MrtixNmVA.breakers.fielddata.estimated_size_in_bytes": 0,
  "nodes.6-BGtxfGS9W08MrtixNmVA.breakers.fielddata.limit_size": "2.3gb",
  "nodes.6-BGtxfGS9W08MrtixNmVA.breakers.fielddata.limit_size_in_bytes": 2491102003,
  "nodes.6-BGtxfGS9W08MrtixNmVA.breakers.fielddata.overhead": 1.03,

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment