Skip to content

Instantly share code, notes, and snippets.

@sweemeng
Created September 20, 2015 02:52
Show Gist options
  • Save sweemeng/4f2328578500e7beadb8 to your computer and use it in GitHub Desktop.
Save sweemeng/4f2328578500e7beadb8 to your computer and use it in GitHub Desktop.
import json
class DataMerger(object):
def __init__(self):
self.data = {}
# Run this several time with different file
def add_data(self, filename):
f = open(filename)
for line in f:
# Because we are using json line
temp = json.loads(line)
page_id = temp["meta"]["id"]
data = self.data.setdefault(page_id, {})
for key in temp:
data[key] = temp[key]
f.close()
def output_file(self, filename):
f = open(filename, "w")
for key, data in self.data.iteritems():
f.write(json.dumps(data) + "\n")
f.close()
if __name__ == "__main__":
merger = DataMerger()
#merger.add_data("data/project_data_20150914_1221.jsonl")
#merger.add_data("data/results_20150910_1158.json")
#merger.output_file("data/contractors_20150914_1858.jsonl")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment