Last active
August 29, 2015 13:56
-
-
Save johnwilson/9009797 to your computer and use it in GitHub Desktop.
Quick indexing of JSON data for storage in 'SQL' or 'NoSQL' database
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This Python file uses the following encoding: utf-8 | |
import json | |
from types import NoneType | |
def type_to_string(typ): | |
"""Return a custom Type name for python Type | |
:param typ: The python Type | |
:type typ: type. | |
:raises: TypeError | |
""" | |
if typ is dict: return "object" | |
elif typ is list: return "array" | |
elif typ is bool: return "boolean" | |
elif typ is NoneType: return "null" | |
elif typ is float or typ is int: return "number" | |
elif typ is unicode or typ is str: return "string" | |
raise TypeError("Type not supported") | |
def create_json_string(value, indent=None): | |
"""Return JSON string representation of python object | |
:param value: The python object | |
:type value: dict list. | |
:raises: TypeError | |
""" | |
typ = type(value) | |
if typ is dict or typ is list: | |
out = json.dumps(value, ensure_ascii=False, indent=indent).encode('utf8') | |
# return a unicode object | |
return unicode(out, "utf-8") | |
raise TypeError("Type not supported") | |
def process_node(typ, val, action_func, depth, path, typ_name): | |
"""Helper function for traversing json object | |
:param typ: The python Type | |
:type typ: type. | |
:param val: The node to process | |
:type val: dict, list, str, unicode, float, int, bool, null. | |
:param action_func: Callback function | |
:type action_func: function. | |
:param depth: The current depth of JSON object | |
:type depth: int. | |
:param path: The current JSON object path i.e. 'user.cars[0].model' | |
:type path: str. | |
:param typ_name: Custom string representation of python Type | |
:type typ_name: str. | |
""" | |
if typ is dict or typ is list: | |
js_val = create_json_string(val) | |
action_func(depth, path, js_val, typ_name) | |
walk(val, path + ".", action_func, depth + 1) | |
else: | |
action_func(depth, path, val, typ_name) | |
def walk(node, path, action_func, depth=0): | |
"""Traverse a JSON object (represented by 'dict' or 'list') | |
:param node: The object to traverse | |
:type node: dict, list. | |
:param path: The current JSON object path i.e. 'user.cars[0].model' | |
:type path: str. | |
:param action_func: Callback function | |
:type action_func: function. | |
:param depth: The current depth of JSON object | |
:type depth: int. | |
:raises: ValueError | |
""" | |
t_node = type(node) | |
if t_node is dict: | |
cur_depth = depth + 1 | |
for key, val in node.items(): | |
t_val = type(val) | |
n_path = path + key | |
t_name = type_to_string(t_val) | |
process_node(t_val, val, action_func, depth, n_path, t_name) | |
elif t_node is list: | |
index = 0 | |
cur_depth = depth + 1 | |
for val in node: | |
t_val = type(val) | |
n_path = path + "[{0}]".format(index) | |
index += 1 | |
t_name = type_to_string(t_val) | |
process_node(t_val, val, action_func, depth, n_path, t_name) | |
else: | |
raise ValueError("Invalide node type") | |
def index_json(jstring): | |
"""Index JSON object | |
:param jstring: The object as string | |
:type jstring: str. | |
""" | |
parts = [] | |
# callback / closure | |
def save_to(depth, path, data, type_name): | |
parts.append((depth, path, data, type_name)) | |
data = json.loads(jstring) | |
walk(data, "", save_to) | |
return parts | |
def rebuild_json(parts): | |
"""Rebuild JSON object from Index parts | |
:param parts: The index parts | |
:type parts: list. | |
""" | |
data = {} | |
for row in parts: | |
if row[0] == 0: # depth = 0 means root level | |
key = row[1] # path | |
typ_name = row[3] | |
if typ_name == "object" or typ_name == "array": | |
val = json.loads(row[2]) | |
elif typ_name == "number": | |
val = float(row[2]) | |
else: | |
val = row[2] | |
data[key] = val | |
return data | |
if __name__ == '__main__': | |
jstring = u""" | |
{ | |
"fighter":"iron monkey", | |
"points":96, | |
"available":true, | |
"dob":null, | |
"styles":[ | |
{ | |
"name":"功夫", | |
"name_eng":"kung-fu" | |
} | |
] | |
} | |
""" | |
print "\n----------- Original --------------\n" | |
print jstring | |
print "\n----------- Index parts --------------\n" | |
parts = index_json(jstring) # create index | |
for row in parts: | |
print u"[{0}] {1} -> {2}".format(row[0], row[1], row[2]) | |
print "\n----------- Rebuilt from Index --------------\n" | |
data = rebuild_json(parts) # rebuild dict | |
print create_json_string(data, indent=2) # get json string |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment