Last active
January 29, 2016 19:59
-
-
Save liftoff/261bf0b75c9884259d86 to your computer and use it in GitHub Desktop.
Clean up JSON-like data before decoding as JSON
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os, re, sys | |
comments_re = re.compile( | |
r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', | |
re.DOTALL | re.MULTILINE | |
) | |
trailing_commas_re = re.compile( | |
r'(,)\s*}(?=([^"\\]*(\\.|"([^"\\]*\\.)*[^"\\]*"))*[^"]*$)') | |
def remove_comments(json_like): | |
""" | |
Removes C-style comments from *json_like* and returns the result. Example:: | |
>>> test_json = '''\ | |
{ | |
"foo": "bar", // This is a single-line comment | |
"baz": "blah" /* Multi-line | |
Comment */ | |
}''' | |
>>> remove_comments('{"foo":"bar","baz":"blah",}') | |
'{\n "foo":"bar",\n "baz":"blah"\n}' | |
""" | |
def replacer(match): | |
s = match.group(0) | |
if s[0] == '/': return "" | |
return s | |
return comments_re.sub(replacer, json_like) | |
def remove_trailing_commas(json_like): | |
""" | |
Removes trailing commas from *json_like* and returns the result. Example:: | |
>>> remove_trailing_commas('{"foo":"bar","baz":"blah",}') | |
'{"foo":"bar","baz":"blah"}' | |
""" | |
return trailing_commas_re.sub("}", json_like) | |
# These two functions together can be used like so: | |
with open('some_file.json') as f: | |
almost_json = remove_comments(f.read()) # Remove comments | |
proper_json = remove_trailing_commas(almost_json) # Remove trailing commas |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment