Skip to content

Instantly share code, notes, and snippets.

@johnwilson
Last active August 29, 2015 13:56
Show Gist options
  • Save johnwilson/9009797 to your computer and use it in GitHub Desktop.
Save johnwilson/9009797 to your computer and use it in GitHub Desktop.
Quick indexing of JSON data for storage in 'SQL' or 'NoSQL' database
# This Python file uses the following encoding: utf-8
import json
from types import NoneType
def type_to_string(typ):
"""Return a custom Type name for python Type
:param typ: The python Type
:type typ: type.
:raises: TypeError
"""
if typ is dict: return "object"
elif typ is list: return "array"
elif typ is bool: return "boolean"
elif typ is NoneType: return "null"
elif typ is float or typ is int: return "number"
elif typ is unicode or typ is str: return "string"
raise TypeError("Type not supported")
def create_json_string(value, indent=None):
"""Return JSON string representation of python object
:param value: The python object
:type value: dict list.
:raises: TypeError
"""
typ = type(value)
if typ is dict or typ is list:
out = json.dumps(value, ensure_ascii=False, indent=indent).encode('utf8')
# return a unicode object
return unicode(out, "utf-8")
raise TypeError("Type not supported")
def process_node(typ, val, action_func, depth, path, typ_name):
"""Helper function for traversing json object
:param typ: The python Type
:type typ: type.
:param val: The node to process
:type val: dict, list, str, unicode, float, int, bool, null.
:param action_func: Callback function
:type action_func: function.
:param depth: The current depth of JSON object
:type depth: int.
:param path: The current JSON object path i.e. 'user.cars[0].model'
:type path: str.
:param typ_name: Custom string representation of python Type
:type typ_name: str.
"""
if typ is dict or typ is list:
js_val = create_json_string(val)
action_func(depth, path, js_val, typ_name)
walk(val, path + ".", action_func, depth + 1)
else:
action_func(depth, path, val, typ_name)
def walk(node, path, action_func, depth=0):
"""Traverse a JSON object (represented by 'dict' or 'list')
:param node: The object to traverse
:type node: dict, list.
:param path: The current JSON object path i.e. 'user.cars[0].model'
:type path: str.
:param action_func: Callback function
:type action_func: function.
:param depth: The current depth of JSON object
:type depth: int.
:raises: ValueError
"""
t_node = type(node)
if t_node is dict:
cur_depth = depth + 1
for key, val in node.items():
t_val = type(val)
n_path = path + key
t_name = type_to_string(t_val)
process_node(t_val, val, action_func, depth, n_path, t_name)
elif t_node is list:
index = 0
cur_depth = depth + 1
for val in node:
t_val = type(val)
n_path = path + "[{0}]".format(index)
index += 1
t_name = type_to_string(t_val)
process_node(t_val, val, action_func, depth, n_path, t_name)
else:
raise ValueError("Invalide node type")
def index_json(jstring):
"""Index JSON object
:param jstring: The object as string
:type jstring: str.
"""
parts = []
# callback / closure
def save_to(depth, path, data, type_name):
parts.append((depth, path, data, type_name))
data = json.loads(jstring)
walk(data, "", save_to)
return parts
def rebuild_json(parts):
"""Rebuild JSON object from Index parts
:param parts: The index parts
:type parts: list.
"""
data = {}
for row in parts:
if row[0] == 0: # depth = 0 means root level
key = row[1] # path
typ_name = row[3]
if typ_name == "object" or typ_name == "array":
val = json.loads(row[2])
elif typ_name == "number":
val = float(row[2])
else:
val = row[2]
data[key] = val
return data
if __name__ == '__main__':
jstring = u"""
{
"fighter":"iron monkey",
"points":96,
"available":true,
"dob":null,
"styles":[
{
"name":"功夫",
"name_eng":"kung-fu"
}
]
}
"""
print "\n----------- Original --------------\n"
print jstring
print "\n----------- Index parts --------------\n"
parts = index_json(jstring) # create index
for row in parts:
print u"[{0}] {1} -> {2}".format(row[0], row[1], row[2])
print "\n----------- Rebuilt from Index --------------\n"
data = rebuild_json(parts) # rebuild dict
print create_json_string(data, indent=2) # get json string
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment