Last active
November 3, 2015 20:45
-
-
Save kdeloach/510a29c0aeefc108a7fc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Coalesce a single file containing multiple JS arrays (undelimited) into | |
one giant JS array. | |
Ex. "[{...}]\n[{..}]" becomes "[{...},{...}] | |
This assumes each JS array contains a list of JS object literals. | |
Usage: | |
> coalesce.py broken.json > fixed.json | |
""" | |
import os | |
import sys | |
import re | |
# Source: http://stackoverflow.com/questions/3862010/is-there-a-generator-version-of-string-split-in-python | |
def itersplit(s, sep=None): | |
exp = re.compile(r'\s+' if sep is None else re.escape(sep)) | |
pos = 0 | |
while True: | |
m = exp.search(s, pos) | |
if not m: | |
if pos < len(s) or sep is not None: | |
yield s[pos:] | |
break | |
if pos < m.start() or sep is not None: | |
yield s[pos:m.start()] | |
pos = m.end() | |
contents = open(sys.argv[1], 'r').read() | |
i = 0 | |
sys.stdout.write('[') | |
for line in itersplit(contents, '\n'): | |
# Strip leading and trailing characters. | |
line = line.strip(',[]') | |
# Filter out empty rows. | |
if len(line) == 0: | |
continue | |
# Filter out rows that do not contain whole JSON objects. | |
if line[0] != '{': | |
continue | |
if line[len(line) - 1] != '}': | |
continue | |
if i > 0: | |
sys.stdout.write('\n,') | |
sys.stdout.write(line) | |
i += 1 | |
sys.stdout.write(']') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment