Created
July 16, 2015 12:42
-
-
Save skliarpawlo/a225a5fb952190271d51 to your computer and use it in GitHub Desktop.
Parse json from piece of html/script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def _get_json_from_string(s): | |
"""Parses first found json object from string (piece of html). | |
>>> _get_json_from_string('{"a": "b"}') | |
{u'a': u'b'} | |
>>> _get_json_from_string('dskljasd{"a": "b"}asdkljasd') | |
{u'a': u'b'} | |
>>> _get_json_from_string('require(asd=[{"a": "b"}, {"c":"d"}]spamspamspam)') | |
[{u'a': u'b'}, {u'c': u'd'}] | |
>>> _get_json_from_string('{"a": [1,2,{"b":"d[1,2,3]sd"}]}') | |
{u'a': [1, 2, {u'b': u'd[1,2,3]sd'}]} | |
>>> _get_json_from_string('xxxxxx') | |
>>> _get_json_from_string('{{}') | |
Traceback (most recent call last): | |
... | |
RuntimeError: Incorrect json format: not all brances closed | |
>>> _get_json_from_string('{]{}') | |
Traceback (most recent call last): | |
... | |
RuntimeError: Incorrect json format | |
""" | |
stack = [] | |
start_idx = 0 | |
while start_idx < len(s) and not s[start_idx] in {'{', '['}: | |
start_idx += 1 | |
if start_idx == len(s): | |
return None | |
end_idx = start_idx | |
open_close_map = { | |
'{': '}', | |
'[': ']', | |
} | |
in_quotes = False | |
slashed_next = False | |
for c in s[start_idx:]: | |
end_idx += 1 | |
if in_quotes: | |
if c == '"' and not slashed_next: | |
in_quotes = False | |
slashed_next = False | |
if c == '\\' and not slashed_next: | |
slashed_next = True | |
else: | |
slashed_next = False | |
elif c == '"': | |
in_quotes = True | |
elif c in {'[', '{'}: | |
stack.append(c) | |
elif c in {']', '}'}: | |
opener = stack.pop() | |
if open_close_map[opener] != c: | |
raise RuntimeError('Incorrect json format') | |
if not stack: | |
break | |
if stack: | |
raise RuntimeError('Incorrect json format: not all brances closed') | |
return json.loads(s[start_idx:end_idx]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment