Created
July 7, 2023 01:35
-
-
Save carlosplanchon/22c40d72bb315f22fa8749ef532fde33 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import attrs | |
from attrs_strict import type_validator | |
import json5 | |
from typing import Any, Optional | |
UNEXPECTED_END_OF_INPUT: str = "at column" | |
from enum import StrEnum, verify, UNIQUE, auto | |
@verify(UNIQUE) | |
class TokenType(StrEnum): | |
VALID_JSON = auto() | |
MISSING_SYMBOL = auto() | |
INVALID_JSON = auto() | |
@attrs.define() | |
class JsonParsingToken: | |
token_type: TokenType = attrs.field(validator=type_validator()) | |
token_value: Any = attrs.field(validator=type_validator()) | |
@attrs.define() | |
class JSONMissingCharsFixer: | |
json_resp: str = attrs.field( | |
validator=type_validator() | |
) | |
cutted_json: str = attrs.field( | |
validator=type_validator(), | |
init=False | |
) | |
old_cutted_json: str = attrs.field( | |
validator=type_validator(), | |
init=False | |
) | |
actual_token: JsonParsingToken = attrs.field( | |
validator=type_validator(), | |
init=False | |
) | |
def cut_json(self): | |
hook_start_i: int =\ | |
self.json_resp.find("{") | |
hook_end_i: int =\ | |
self.json_resp.rfind("}") | |
self.cutted_json: str = self.json_resp[ | |
hook_start_i: hook_end_i + 1] | |
def try_parse(self) -> None: | |
try: | |
parsed_json = json5.loads(self.cutted_json) | |
self.actual_token = JsonParsingToken( | |
token_type=TokenType.VALID_JSON, | |
token_value=parsed_json | |
) | |
except Exception as e: | |
exception_text: str = str(e) | |
print(f"Exception text: {exception_text}") | |
# Interpret exception. | |
if UNEXPECTED_END_OF_INPUT in exception_text: | |
column_num: int = int( | |
exception_text.split(UNEXPECTED_END_OF_INPUT)[-1] | |
) | |
self.actual_token = JsonParsingToken( | |
token_type=TokenType.MISSING_SYMBOL, | |
token_value=column_num | |
) | |
else: | |
self.actual_token = JsonParsingToken( | |
token_type=TokenType.INVALID_JSON, | |
token_value=None | |
) | |
return None | |
def try_adding_symbol(self, symbol: str, column_num: int) -> bool: | |
print(f"Try adding symbol: {symbol}") | |
self.cutted_json += symbol | |
self.try_parse() | |
match self.actual_token.token_type: | |
case TokenType.VALID_JSON: | |
raise StopIteration | |
case TokenType.INVALID_JSON: | |
self.cutted_json = self.old_cutted_json | |
return False | |
case TokenType.MISSING_SYMBOL: | |
new_column_num: int = self.actual_token.token_value | |
print(f"New column num: {new_column_num}") | |
if new_column_num > column_num: | |
return True | |
else: | |
self.cutted_json = self.old_cutted_json | |
return False | |
def missing_symbol(self): | |
column_num: int = self.actual_token.token_value | |
self.old_cutted_json: str = self.cutted_json | |
# Try adding "]": | |
valid: bool = self.try_adding_symbol( | |
symbol="]", | |
column_num=column_num | |
) | |
if valid is True: | |
return None | |
# Try adding "}": | |
valid: bool = self.try_adding_symbol( | |
symbol="}", | |
column_num=column_num | |
) | |
if valid is True: | |
return None | |
def __iter__(self): | |
self.cut_json() | |
return self | |
def __next__(self): | |
self.try_parse() | |
print(self.actual_token.token_type.value) | |
match self.actual_token.token_type: | |
case TokenType.VALID_JSON | TokenType.INVALID_JSON: | |
raise StopIteration | |
case TokenType.MISSING_SYMBOL: | |
print("MISSING SYMBOL") | |
print(f"COLUMN NUM: {self.actual_token.token_value}") | |
self.missing_symbol() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment