lmiller1990 · August 12, 2024 11:24
diff --git a/code.py b/code.py
 import pandas as pd
 import json
 from camel_converter import to_snake


 def read(jsonfile):
    with open(jsonfile, "r") as f:
        return json.loads(f.read())


 def pp(dic):
    print(json.dumps(dic, indent=4))


 def flatten_json_1(nested_json, parent_key="", sep="_"):
    items = {}
    for key, value in nested_json.items():
        new_key = to_snake(f"{parent_key}{sep}{key}" if parent_key else key)

        if isinstance(value, dict):
            # If value is a dict, recurse into it
            items.update(flatten_json_1(value, new_key, sep=sep))
        elif isinstance(value, list):
            # Handle each item in the list as part of the same path without indexing
            # Assumes structure doesn't require distinction between items in arrays
            for item in value:
                items.update(flatten_json_1(item, new_key, sep=sep))
        else:
            # Simply set the value at the calculated key
            items[new_key] = value

    return items


 def flatten_json2(nested_json, parent_key="", sep="_"):
    items = {}
    for key, value in nested_json.items():
        # Only update new_key if we're not dealing with a list item
        if parent_key and not isinstance(nested_json, list):
            new_key = f"{parent_key}{sep}{to_snake(key)}"
        else:
            new_key = to_snake(key)

        if isinstance(value, dict):
            items.update(flatten_json2(value, new_key, sep=sep))
        elif isinstance(value, list):
            for item in value:
                # Not adding any new key component for items in a list; propagate the current key
                items.update(
                    flatten_json2(
                        item, new_key if not key == "items" else parent_key, sep=sep
                    )
                )
        else:
            items[new_key] = value

    return items


 data = read("data.json")
 # flat1 = [flatten_json_1(group) for group in data["data"]["groups"]]
 # print(json.dumps(flat1, indent=4))

 data2 = read("data_2.json")
 flat2 = [flatten_json2(group) for group in data2["data"]["groups"]]
 print(json.dumps(flat2, indent=4))
 # print(flat)
 #

 import pandas as pd


 # def extract_result_matrix(online_section_data):
 #     # Convert the dictionary to DataFrame
 #     df = pd.json_normalize(online_section_data)

 #     # Filter and process data
 #     result_matrix = (
 #         df[df["type"] == "RESULT_MATRIX"]  # Filter rows
 #         .explode("components")  # Similar to unnest() in R but for one level
 #         .reset_index(drop=True)["components"]
 #         .apply(
 #             pd.Series
 #         )  # Expanding the 'components' dictionaries into separate columns
 #         .query("componentCode == 'RESULT_MATRIX'")  # Additional filtering
 #         .explode("data")  # 'Explode' or 'unnest' the data list
 #         .reset_index(drop=True)["data"]
 #         .apply(pd.Series)  # Expanding the 'data' dictionaries
 #         .explode("groups")
 #         .reset_index(drop=True)["groups"]
 #         .apply(pd.Series)  # Expanding the 'groups' dictionaries
 #         .explode("items")
 #         .reset_index(drop=True)
 #         .pipe(
 #             lambda x: pd.json_normalize(x["items"])
 #         )  # Normalizing the items field into a flat table
 #         .pipe(
 #             lambda x: x.dropna(axis=1, how="all")
 #         )  # Optional: Drop columns that are entirely NA
 #         .pipe(
 #             lambda x: x.rename(columns=lambda c: c.replace(".", "_"))
 #         )  # Clean column names
 #     )

 #     return result_matrix


 def extract_result_matrix(online_section_data):
    # Convert the dictionary to DataFrame
    df = pd.json_normalize(online_section_data)

    # Filter and process data
    result_matrix = (
        df[df["type"] == "RESULT_MATRIX"]  # Filter rows
        .explode("components")  # Similar to unnest() in R but for one level
        .reset_index(drop=True)["components"]
        .apply(
            pd.Series
        )  # Expanding the 'components' dictionaries into separate columns
        .query("componentCode == 'RESULT_MATRIX'")  # Additional filtering
        .explode("data")  # 'Explode' or 'unnest' the data list
    )

    return result_matrix


 # Example usage
 online_section_data = {
    "type": "RESULT_MATRIX",
    "components": [
        {
            "componentCode": "RESULT_MATRIX",
            "data": [
                {
                    "groups": [
                        {
                            "items": [
                                {"key": "value1", "value": 123},
                                {"key": "value2", "value": 456},
                            ]
                        }
                    ]
                }
            ],
        }
    ],
 }

 # online_section_data = read("data_2.json")

 # df_result = extract_result_matrix(online_section_data)
 # print(df_result)
diff --git a/data.json b/data.json
 {
  "data": {
    "groups": [
      {
        "label": "Diversity",
        "seq": 1,
        "items": [
          {
            "healthAssociation": null,
            "markerId": "8ef98b48-4684-4ee8-af47-70331d9f08db",
            "marker": "Microbial Diversity",
            "markerReference": "Microbial Diversity",
            "markerDisplayName": "Microbial Diversity",
            "type": "DIVERSITY",
            "value": "3.87",
            "rawValue": 3.874,
            "unit": null,
            "prevalence": "100.00%",
            "prevalenceLevel": {
              "label": "Very common",
              "definition": "{x | x >= 90%}"
            },
            "detoxRetoxRisk": null,
            "markerRangeChart": {
              "type": "MARKER_RANGE_CHART",
              "markerChartType": "REFERENCE_RANGE_TYPE_2",
              "markerChartName": "One way descending (Bad -> Good -> Good)",
              "dataPoints": {
                "average": {
                  "value": 0.0,
                  "widthMultiple": 0.3
                }
              }
            }
          }
        ]
      }
    ]
  }
 }
	import pandas as pd
	import json
	from camel_converter import to_snake


	def read(jsonfile):
	with open(jsonfile, "r") as f:
	return json.loads(f.read())


	def pp(dic):
	print(json.dumps(dic, indent=4))


	def flatten_json_1(nested_json, parent_key="", sep="_"):
	items = {}
	for key, value in nested_json.items():
	new_key = to_snake(f"{parent_key}{sep}{key}" if parent_key else key)

	if isinstance(value, dict):
	# If value is a dict, recurse into it
	items.update(flatten_json_1(value, new_key, sep=sep))
	elif isinstance(value, list):
	# Handle each item in the list as part of the same path without indexing
	# Assumes structure doesn't require distinction between items in arrays
	for item in value:
	items.update(flatten_json_1(item, new_key, sep=sep))
	else:
	# Simply set the value at the calculated key
	items[new_key] = value

	return items


	def flatten_json2(nested_json, parent_key="", sep="_"):
	items = {}
	for key, value in nested_json.items():
	# Only update new_key if we're not dealing with a list item
	if parent_key and not isinstance(nested_json, list):
	new_key = f"{parent_key}{sep}{to_snake(key)}"
	else:
	new_key = to_snake(key)

	if isinstance(value, dict):
	items.update(flatten_json2(value, new_key, sep=sep))
	elif isinstance(value, list):
	for item in value:
	# Not adding any new key component for items in a list; propagate the current key
	items.update(
	flatten_json2(
	item, new_key if not key == "items" else parent_key, sep=sep
	)
	)
	else:
	items[new_key] = value

	return items


	data = read("data.json")
	# flat1 = [flatten_json_1(group) for group in data["data"]["groups"]]
	# print(json.dumps(flat1, indent=4))

	data2 = read("data_2.json")
	flat2 = [flatten_json2(group) for group in data2["data"]["groups"]]
	print(json.dumps(flat2, indent=4))
	# print(flat)
	#

	import pandas as pd


	# def extract_result_matrix(online_section_data):
	# # Convert the dictionary to DataFrame
	# df = pd.json_normalize(online_section_data)

	# # Filter and process data
	# result_matrix = (
	# df[df["type"] == "RESULT_MATRIX"] # Filter rows
	# .explode("components") # Similar to unnest() in R but for one level
	# .reset_index(drop=True)["components"]
	# .apply(
	# pd.Series
	# ) # Expanding the 'components' dictionaries into separate columns
	# .query("componentCode == 'RESULT_MATRIX'") # Additional filtering
	# .explode("data") # 'Explode' or 'unnest' the data list
	# .reset_index(drop=True)["data"]
	# .apply(pd.Series) # Expanding the 'data' dictionaries
	# .explode("groups")
	# .reset_index(drop=True)["groups"]
	# .apply(pd.Series) # Expanding the 'groups' dictionaries
	# .explode("items")
	# .reset_index(drop=True)
	# .pipe(
	# lambda x: pd.json_normalize(x["items"])
	# ) # Normalizing the items field into a flat table
	# .pipe(
	# lambda x: x.dropna(axis=1, how="all")
	# ) # Optional: Drop columns that are entirely NA
	# .pipe(
	# lambda x: x.rename(columns=lambda c: c.replace(".", "_"))
	# ) # Clean column names
	# )

	# return result_matrix


	def extract_result_matrix(online_section_data):
	# Convert the dictionary to DataFrame
	df = pd.json_normalize(online_section_data)

	# Filter and process data
	result_matrix = (
	df[df["type"] == "RESULT_MATRIX"] # Filter rows
	.explode("components") # Similar to unnest() in R but for one level
	.reset_index(drop=True)["components"]
	.apply(
	pd.Series
	) # Expanding the 'components' dictionaries into separate columns
	.query("componentCode == 'RESULT_MATRIX'") # Additional filtering
	.explode("data") # 'Explode' or 'unnest' the data list
	)

	return result_matrix


	# Example usage
	online_section_data = {
	"type": "RESULT_MATRIX",
	"components": [
	{
	"componentCode": "RESULT_MATRIX",
	"data": [
	{
	"groups": [
	{
	"items": [
	{"key": "value1", "value": 123},
	{"key": "value2", "value": 456},
	]
	}
	]
	}
	],
	}
	],
	}

	# online_section_data = read("data_2.json")

	# df_result = extract_result_matrix(online_section_data)
	# print(df_result)
	{
	"data": {
	"groups": [
	{
	"label": "Diversity",
	"seq": 1,
	"items": [
	{
	"healthAssociation": null,
	"markerId": "8ef98b48-4684-4ee8-af47-70331d9f08db",
	"marker": "Microbial Diversity",
	"markerReference": "Microbial Diversity",
	"markerDisplayName": "Microbial Diversity",
	"type": "DIVERSITY",
	"value": "3.87",
	"rawValue": 3.874,
	"unit": null,
	"prevalence": "100.00%",
	"prevalenceLevel": {
	"label": "Very common",
	"definition": "{x \| x >= 90%}"
	},
	"detoxRetoxRisk": null,
	"markerRangeChart": {
	"type": "MARKER_RANGE_CHART",
	"markerChartType": "REFERENCE_RANGE_TYPE_2",
	"markerChartName": "One way descending (Bad -> Good -> Good)",
	"dataPoints": {
	"average": {
	"value": 0.0,
	"widthMultiple": 0.3
	}
	}
	}
	}
	]
	}
	]
	}
	}