Skip to content

Instantly share code, notes, and snippets.

@sincejune
Created May 8, 2019 06:30
Show Gist options
  • Save sincejune/b0455ac38052c3a2ab6929fd46b47a93 to your computer and use it in GitHub Desktop.
Save sincejune/b0455ac38052c3a2ab6929fd46b47a93 to your computer and use it in GitHub Desktop.
clean json
import os
import sys
import json
# from kafka import KafkaProducer
#
# topic = os.getenv("KAFKA_TOPIC", None)
# if not topic:
# print("KAFKA_TOPIC not defined, exiting")
# sys.exit(-1)
#
# broker = os.getenv("KAFKA_BROKER", None)
# if not broker:
# print("KAFKA_BROKER not defined, exiting")
# sys.exit(-1)
#
# filename = os.getenv("FILENAME", None)
# if not filename:
# print("FILENAME not defined, exiting")
# sys.exit(-1)
#
# producer = KafkaProducer(value_serializer=lambda m: json.dumps(m).encode('utf-8'), bootstrap_servers=[broker])
if __name__ == '__main__':
f = open("nginx_public.json")
fout = open("nginx_public_cleaned.json", "w+")
line = f.readline()
i = 0
while line:
# print(line)
data = json.loads(line)
try:
del data["@timestamp"]
except KeyError:
print("Key '@timestamp' not found")
try:
data['ip'] = data['host']
del data["host"]
except KeyError:
print("Key 'host' not found")
fout.write(json.dumps(data) + "\n")
i += 1
if i % 10000 == 0:
print(i)
line = f.readline()
fout.close()
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment