Created
December 10, 2014 19:57
-
-
Save harlo/a69df02fb4d39d567d9e to your computer and use it in GitHub Desktop.
real quick word count json-to-csv for deeplab
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sys import argv, exit | |
def page_map_to_csv(file_in, file_out=None): | |
import os | |
if not os.path.exists(file_in): | |
print "NOPE: %s is not there" % file_in | |
return False | |
from json import loads | |
with open(file_in, 'rb') as J: | |
try: | |
page_map = loads(J.read()) | |
except Exception as e: | |
print "NOPE: %s" % e | |
return False | |
import csv | |
if file_out is None: | |
file_out = "%s.csv" % file_in | |
print "writing csv to %s" % file_out | |
with open(file_out, 'wb+') as C: | |
csv_writer = csv.writer(C, quotechar='|', quoting=csv.QUOTE_MINIMAL) | |
for k in page_map.keys(): | |
if k == "uv_page_map": | |
continue | |
try: | |
csv_writer.writerow([k, page_map[k]]) | |
except Exception as e: | |
print "JFYI, cannot burn %s to csv because %s" % (k, e) | |
return True | |
if __name__ == "__main__": | |
if len(argv) not in [2,3]: | |
print "usage: page_map_to_csv.py file_in.json [file_out.csv]" | |
exit(-1) | |
if page_map_to_csv(argv[1], file_out=None if len(argv) == 2 else argv[3]): | |
print "OK!" | |
exit(0) | |
exit(-1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment