Created
February 9, 2018 02:24
-
-
Save rendoaw/d212d8ecf0b226e7328434cdad6a8c5c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import os | |
import json | |
import datetime | |
from elasticsearch import Elasticsearch,helpers | |
import sys | |
import time | |
def read_file(filename): | |
cmds = "" | |
if filename is not '': | |
finput = open(filename) | |
lines = [x.replace('\n', '') for x in finput] | |
finput.close() | |
return lines | |
def jsonpretty(text): | |
return json.dumps(text, indent=4, sort_keys=True) | |
def write_json_file(d, outfilename): | |
fo = open(outfilename, "w") | |
json.dump(d, fo, indent=4) | |
fo.close() | |
return | |
if __name__ == "__main__": | |
actions = [] | |
es = Elasticsearch([{'host': 'localhost', 'port': 9200}]) | |
lines = read_file("json/EL99_Sinergi_Bangun_Negeri.jsonl") | |
for line in lines: | |
doc = json.loads(line) | |
doc['ts'] = time.strftime('%Y-%m-%dT%H:%M:%S.000Z', time.gmtime(doc['date'])) | |
doc['ts_hour'] = time.strftime('%H', time.gmtime(doc['date'])) | |
doc['ts_day'] = time.strftime('%w', time.gmtime(doc['date'])) | |
if not 'media' in doc: | |
if 'text' in doc: | |
doc['media'] = {} | |
doc['media']['type'] = 'text' | |
if 'text' in doc: | |
doc['text_len'] = len(doc['text']) | |
if 'print_name' not in doc['from']: | |
doc['from']['print_name'] = doc['from']['first_name']+'_'+doc['from']['last_name'] | |
if 'phone' not in doc['from']: | |
doc['from']['phone'] = "0000000000" | |
action = { | |
"_index": "el99", | |
"_type": "telegram", | |
"_id": str(doc['date'])+"_"+doc['from']['id'], | |
"_source": doc | |
} | |
actions.append(action) | |
#print jsonpretty(action) | |
write_json_file(actions, "dump.json") | |
es.indices.delete(index='el99', ignore=[400, 404]) | |
es.indices.create(index='el99') | |
res = helpers.bulk(es, actions) | |
print res |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment