Created
July 26, 2022 04:14
-
-
Save ryancollingwood/4410087d528f81c0fceef2e174e97b6a to your computer and use it in GitHub Desktop.
flattens a JSON parsed obj into multiple lines. Last Element [-1] in each line is the value. Elements [0:-1] is the path to that value. line_prefix if specified will prefix a value to the begining of each outputted line. add_key_index will add 0-based index position when encoutering lists or tuples. remove_keys if a key (any point in the path) m…
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def flatten_json_object_to_lines(obj, remove_keys=None, line_prefix=None, add_key_index=True): | |
''' | |
flattens a JSON parsed obj into multiple lines | |
Last Element [-1] in each line is the value | |
Elements [0:-1] is the path to that value | |
line_prefix if specified will prefix a value to the begining of each outputted line | |
add_key_index will add 0-based index position when encoutering lists or tuples | |
remove_keys if a key (any point in the path) matches then remove it | |
adapted from: https://thispointer.com/python-how-to-iterate-over-nested-dictionary-dict-of-dicts/ | |
''' | |
def must_iterate_over(obj): | |
return isinstance(obj, list) or isinstance(obj, tuple) or isinstance(obj, dict) | |
if isinstance(obj, dict): | |
# Iterate over all key-value pairs of dict argument | |
for key, value in obj.items(): | |
if remove_keys is not None: | |
if key in remove_keys: | |
continue | |
if line_prefix is not None: | |
out_key = (line_prefix, key) | |
else: | |
out_key = (key,) | |
if isinstance(value, dict): | |
for sub_index, pair in enumerate(flatten_json_object_to_lines(value, remove_keys=remove_keys, line_prefix=None, add_key_index=add_key_index)): | |
yield (out_key + (*pair,)) | |
elif must_iterate_over(value): | |
for sub_index, sub_value in enumerate(value): | |
# out_key is already a tuple so unpack it | |
if add_key_index: | |
sub_out_key = (*out_key, sub_index) | |
else: | |
sub_out_key = (*out_key,) | |
if must_iterate_over(sub_value) or isinstance(sub_value, dict): | |
for pair in flatten_json_object_to_lines(sub_value, remove_keys=remove_keys, line_prefix=None, add_key_index=add_key_index): | |
yield (sub_out_key + (*pair,)) | |
else: | |
yield (sub_out_key + (sub_value,)) | |
else: | |
# If value is not dict or list/tuple type then yield the value | |
yield (out_key + (value,)) | |
elif must_iterate_over(obj): | |
for index, item in enumerate(obj): | |
if line_prefix is not None: | |
out_key = line_prefix | |
if add_key_index: | |
out_key = (out_key, index) | |
else: | |
out_key = None | |
if add_key_index: | |
out_key = (index,) | |
for sub_item in flatten_json_object_to_lines(item, remove_keys=remove_keys, line_prefix=None, add_key_index=add_key_index): | |
if must_iterate_over(sub_item) or isinstance(sub_item, dict): | |
yield(out_key + (*sub_item,)) | |
else: | |
yield(out_key + (sub_item,)) | |
else: | |
yield obj |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
For example given the following input for
obj
We'll get, note that each line has been prefixed with
"data"
as specifed and because we passed in a list withadd_key_index = True
every line has the prefix along with the index of the element - given it was a list with only one dictionary it's returning zeroSome more examples of input and output
input
output