Created
October 7, 2019 14:15
-
-
Save NicholasBallard/25cec3de5e4192e02e3d1adaf252ac70 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from abc import ABC, abstractmethod | |
import os | |
import sys | |
import time | |
import pandas as pd | |
from helper_functions import ( | |
connectdbmethod, | |
toUtcDay, | |
) | |
testing = False | |
offices = [ | |
'irvine', | |
'venice' | |
] | |
coms = { | |
'bronto': { | |
'date_field': 'sent_date', | |
'acct_field': 'subaccount', | |
'view': 'brontoDeliveryData', | |
'domain': 'subaccount', | |
}, | |
'email_analyst': { | |
'date_field': 'date_received', | |
'acct_field': 'sender_domain', | |
'view': 'eaCampaignData', | |
'domain': 'sender_domain', | |
}, | |
'iterable': { | |
'date_field': 'start_at', | |
'acct_field': 'domain', | |
'view': 'itCampaignData', | |
'domain': 'domain', | |
}, | |
} | |
def writer(fp): | |
return pd.ExcelWriter( | |
fp, | |
engine='xlsxwriter', | |
) | |
class Query(object): | |
def __init__(self, start: '%m%d%Y', end: '%m%d%Y', testing=False, **kwargs): | |
self.testing = testing | |
self.schema = 'delivery' | |
self.startdate = toUtcDay(start, '%m%d%Y') | |
self.enddate = toUtcDay(end, '%m%d%Y') | |
self.fp = os.path.join( | |
'./example_output', | |
f'{self.startdate} to {self.enddate} data - {time.perf_counter()}.xlsx' | |
) | |
@connectdbmethod(testing) | |
def query(self, vendor: dict, office: str, *args, cnx=None, **kwargs,): | |
sql = f""" | |
select v.* from | |
( select * from | |
`{self.schema}`.`{vendor['view']}` | |
where `{vendor['date_field']}` >= '{self.startdate}' | |
and `{vendor['date_field']}` <= '{self.enddate}' | |
) v | |
left join {self.schema}.accounts a on v.{vendor['domain']} = a.name | |
left join {self.schema}.offices o on a.office_id = o.id | |
where | |
o.name = '{office}' | |
""" | |
df = pd.read_sql(sql, cnx) | |
# drop duplicates just in case | |
df = df.drop_duplicates() | |
return df | |
@staticmethod | |
def write(dataframe: 'pd.DataFrame', sheetname: str, writer): | |
dataframe.to_excel( | |
excel_writer=writer, | |
sheet_name=sheetname, | |
index=False, | |
) | |
if __name__ == '__main__': | |
sys.argv = [i.lower() for i in sys.argv] | |
print(sys.argv) | |
testing = True if set(sys.argv) & {'t', 'test', 'testing'} else False | |
dates = sorted([x for x in sys.argv if (x.isnumeric() and len(x) == 8)]) | |
if len(dates) != 2: | |
print('Need two dates, a start and end formatted as YYYYMMDD.') | |
sys.exit() | |
vendors = [] | |
for i in sys.argv: | |
if len(set(sys.argv) & {'e', 'ea', 'email', 'email_analyst'}) > 0: | |
vendors.append('email_analyst') | |
if len(set(sys.argv) & {'b', 'br', 'bronto'}) > 0: | |
vendors.append('bronto') | |
if len(set(sys.argv) & {'i', 'it', 'iterable'}) > 0: | |
vendors.append('iterable') | |
vendors = set(vendors) | |
query = Query(*dates) | |
writer = writer(query.fp) | |
for k, v in coms.items(): | |
if k in vendors: | |
for o in offices: | |
df = query.query(v, o) | |
sn = f'{o}_{k}' | |
query.write(df, sn, writer) | |
writer.save() | |
print(f'Wrote data to sheet in file {os.path.abspath(query.fp)}') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The SQL in the
query
method is the meat of the script -- it queries for a date range of a view in thedelivery
schema for any one of the ESPs. You can take this and use whatever scripting language you like to pass values to the SQL query coming from the GET request.