Created
June 17, 2024 18:25
-
-
Save rvndbalaji/be1c7df1d81cb1fe0e035ca472ca6457 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Given an incomplete JSON string, extract a subset JSON that is valid | |
# Obtain the maximum valid portion of this string | |
# by removing characters one by one from the end and checking the validity | |
# By default the incoming json is considered to be an object surrounded by {} | |
# Set is_list=True if incoming json is a list surrounded by [] | |
# This method might be inefficient for now since it removes 1 character and validates | |
# TODO Future optimizaion - | |
# Matching blocks of {} for objects and removing block | |
# Matching commas (,) for list items and removing each item | |
def is_json(myjson, debug=False): | |
try: | |
json.loads(myjson) | |
except ValueError as e: | |
if debug is True: | |
print(e) | |
return False | |
return True | |
def get_maximum_valid_json(json_string:str, is_list=False, debug=False): | |
if is_json(json_string): | |
return json.loads(json_string) | |
#Remove first character '[' or '{' from the string, we'll add this back later | |
temp_string = json_string.strip()[1:] | |
#Keep removing characters from the end, until a valid json is found | |
while len(temp_string) > 0: | |
partial_json = '{' + temp_string + '}' # for objects, which is default | |
if is_list is True: | |
partial_json = '[' + temp_string + ']' # for lists | |
if is_json(partial_json, debug=debug): | |
return json.loads(partial_json) | |
else: | |
if debug is True: | |
print(partial_json) | |
# Remove last character for next iteration. #TODO - Improve this later with optimization | |
temp_string = temp_string[:-1] | |
print("No valid json found for string - " + json_string) | |
return None |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment