Created
August 23, 2017 21:13
-
-
Save tyrostone/7c6054a9dacda349b81c571a3e68fe24 to your computer and use it in GitHub Desktop.
Script to search through s3 for missing company, campaign, and/or email data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import argparse | |
import csv | |
import fnmatch | |
import multiprocessing | |
import os | |
import re | |
import sys | |
def search_logfiles_for_email(email): | |
logfiles = get_email_sending_logfiles(args.logfile_path) | |
print email | |
for file_ in logfiles: | |
f = open(file_, "r") | |
for line in f: | |
if re.search(email, line): | |
print "***************************" | |
print "match for: {}".format(email) | |
print "***************************" | |
print line | |
def get_emails_from_csv(csv_file): | |
emails = [] | |
with open(csv_file, 'rb') as f: | |
reader = csv.reader(f) | |
for row in reader: | |
emails.append(row[1]) | |
# First entry returned is just "Email" | |
# Removing it to remove a lot of false positives | |
emails.append('[email protected]') | |
return emails[1:] | |
def get_email_sending_logfiles(filepath, filetype='syslog'): | |
matches = [] | |
for root, dirnames, filenames in os.walk(filepath): | |
for filename in fnmatch.filter(filenames, filetype): | |
matches.append(os.path.join(root, filename)) | |
return matches | |
def search_applogs_for_ids(campaign_id, company_id=None): | |
logfiles = get_email_sending_logfiles(args.logfile_path, filetype='app.log') | |
for file_ in logfiles: | |
f = open(file_, "r") | |
for line in f: | |
if re.search(campaign_id, line): | |
print "***************************" | |
print "match for: {}".format(campaign_id) | |
print "***************************" | |
print line | |
continue | |
if company_id: | |
if re.search(company_id, line): | |
print "***************************" | |
print "match for: {}".format(company_id) | |
print line | |
def main(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument('--csv', dest="csv_file", required=True, help='CSV containing email data') | |
parser.add_argument('--logfiles', dest="logfile_path", required=True, help='Log data location') | |
parser.add_argument('--campaign-id', dest="campaign_id", help='The ID of the missing campaign') | |
parser.add_argument('--company-id', dest="company_id", help='The ID of the company with the missing campaign') | |
global args | |
args = parser.parse_args() | |
if args.campaign_id: | |
if args.company_id: | |
search_applogs_for_ids(args.campaign_id, args.company_id) | |
else: | |
search_applogs_for_ids(args.campaign_id) | |
sys.exit(0) | |
emails = get_emails_from_csv(args.csv_file) | |
print "Checking logfiles for email matches" | |
cpu_count = multiprocessing.cpu_count() | |
pool = multiprocessing.Pool(cpu_count) | |
pool.map(search_logfiles_for_email, emails) | |
pool.close() | |
pool.join() | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment