Last active
May 16, 2019 14:49
-
-
Save oneshot719/91e91e4c9998126990323a5524724ca0 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
import csv | |
import json | |
import re | |
import datetime | |
import sys, getopt | |
############ | |
# GET OPTS # | |
############ | |
#Get Command Line Arguments | |
def main(argv): | |
inputfile = '' | |
outputfile = '' | |
try: | |
opts, args = getopt.getopt(argv,"hi:o:",["ifile=","ofile="]) | |
except getopt.GetoptError: | |
print('Usage: file.py -i <inputfile> -o <outputfile>') | |
sys.exit(2) | |
for opt, arg in opts: | |
if opt == '-h': | |
print('Usage: file.py -i <inputfile> -o <outputfile>') | |
sys.exit() | |
elif opt in ("-i", "--ifile"): | |
inputfile = arg | |
elif opt in ("-o", "--ofile"): | |
outputfile = arg | |
print('Input file is: ', inputfile) | |
print('Output file is: ', outputfile) | |
print("") | |
csvfile = open(inputfile , 'r') | |
jsonfile = open(outputfile , 'w') | |
rawreader = csv.reader(csvfile) | |
for rawrow in rawreader: | |
print("") | |
# print('Row #' + str(rawreader.line_num) + ' ' + str(rawrow)) | |
stringrow=str(rawrow) | |
############# | |
# REGEX PULL | |
############# | |
# regex_pull = re.match(r"^..(\d{10,99})\'\,.\'(.*\>)", stringrow) | |
regex_pull = re.match(r"^..(\d{10,99})\'\,.\'(.*\>)\'\,.\'(.*)\'\,.\'(.*)\'\,.\'(.*)\'\,.\'(.*)..", stringrow) | |
# print('Regex Mapping: ' + str(regex_pull)) | |
epoch_date = int(regex_pull.group(1)) | |
msg_id = str(regex_pull.group(2)) | |
sender = str(regex_pull.group(3)) | |
recipients = str(regex_pull.group(4)) | |
topic = str(regex_pull.group(5)) | |
mode = str(regex_pull.group(6)) | |
# print('match3: ' + str(match3)) | |
# print('match4: ' + str(match4)) | |
# print('match5: ' + str(match5)) | |
# print('match6: ' + str(match6)) | |
############# | |
# DATE_TIME # | |
############# | |
# epoch_date = re.match(r"^..(\d{10,99})", stringrow) | |
# print(epoch_date.group(1)) | |
# Now we convert from epoch to UTC, generally epoch is always UTC, but we're not trusting our local system clock TZ. | |
epoch_date_seconds_int = int(epoch_date / 1000.0) | |
iso_date=datetime.datetime.utcfromtimestamp(epoch_date_seconds_int).strftime('%Y-%m-%d %H:%M:%S') | |
# print('Epoch Date as seconds is: ' + str(epoch_date_seconds_int)) | |
print('ISO Formatted date is: ' + str(iso_date)) | |
########## | |
# MSG ID # | |
########## | |
print('MSG ID: ' + str(msg_id)) | |
########## | |
# SENDER # | |
########## | |
print('SENDER: ' + str(sender)) | |
############## | |
# RECIPIENTS # | |
############## | |
print('RECIPIENTS: ' + str(recipients)) | |
recipients_newline = str(recipients.replace("|"," \n")) | |
number_of_recipients = int(recipients_newline.count('\n')) | |
#new line counts needs x+1 to be proper so we will do that now | |
number_of_recipients = number_of_recipients + 1 | |
print('number_of_recipients: ' + str(number_of_recipients)) | |
######### | |
# TOPIC # | |
######### | |
print('TOPIC: ' + str(topic)) | |
######## | |
# MODE # | |
######## | |
print('MODE: ' + str(mode)) | |
print("") | |
# json.dump(row, jsonfile) | |
# jsonfile.write('\n') | |
#fieldnames = ("timestamp","msgid","sender","recipients","topic","mode") | |
#comboreader = csv.DictReader( csvfile, fieldnames) | |
fieldnames = ("timestamp","msgid","sender","recipients","topic","mode") | |
xreader = csv.DictReader( csvfile, fieldnames) | |
for row in xreader: | |
json.dump(row, jsonfile) | |
jsonfile.write('\n') | |
if __name__ == "__main__": | |
main(sys.argv[1:]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment