Skip to content

Instantly share code, notes, and snippets.

@NicholasBallard
Created October 7, 2019 14:15
Show Gist options
  • Save NicholasBallard/25cec3de5e4192e02e3d1adaf252ac70 to your computer and use it in GitHub Desktop.
Save NicholasBallard/25cec3de5e4192e02e3d1adaf252ac70 to your computer and use it in GitHub Desktop.
from abc import ABC, abstractmethod
import os
import sys
import time
import pandas as pd
from helper_functions import (
connectdbmethod,
toUtcDay,
)
testing = False
offices = [
'irvine',
'venice'
]
coms = {
'bronto': {
'date_field': 'sent_date',
'acct_field': 'subaccount',
'view': 'brontoDeliveryData',
'domain': 'subaccount',
},
'email_analyst': {
'date_field': 'date_received',
'acct_field': 'sender_domain',
'view': 'eaCampaignData',
'domain': 'sender_domain',
},
'iterable': {
'date_field': 'start_at',
'acct_field': 'domain',
'view': 'itCampaignData',
'domain': 'domain',
},
}
def writer(fp):
return pd.ExcelWriter(
fp,
engine='xlsxwriter',
)
class Query(object):
def __init__(self, start: '%m%d%Y', end: '%m%d%Y', testing=False, **kwargs):
self.testing = testing
self.schema = 'delivery'
self.startdate = toUtcDay(start, '%m%d%Y')
self.enddate = toUtcDay(end, '%m%d%Y')
self.fp = os.path.join(
'./example_output',
f'{self.startdate} to {self.enddate} data - {time.perf_counter()}.xlsx'
)
@connectdbmethod(testing)
def query(self, vendor: dict, office: str, *args, cnx=None, **kwargs,):
sql = f"""
select v.* from
( select * from
`{self.schema}`.`{vendor['view']}`
where `{vendor['date_field']}` >= '{self.startdate}'
and `{vendor['date_field']}` <= '{self.enddate}'
) v
left join {self.schema}.accounts a on v.{vendor['domain']} = a.name
left join {self.schema}.offices o on a.office_id = o.id
where
o.name = '{office}'
"""
df = pd.read_sql(sql, cnx)
# drop duplicates just in case
df = df.drop_duplicates()
return df
@staticmethod
def write(dataframe: 'pd.DataFrame', sheetname: str, writer):
dataframe.to_excel(
excel_writer=writer,
sheet_name=sheetname,
index=False,
)
if __name__ == '__main__':
sys.argv = [i.lower() for i in sys.argv]
print(sys.argv)
testing = True if set(sys.argv) & {'t', 'test', 'testing'} else False
dates = sorted([x for x in sys.argv if (x.isnumeric() and len(x) == 8)])
if len(dates) != 2:
print('Need two dates, a start and end formatted as YYYYMMDD.')
sys.exit()
vendors = []
for i in sys.argv:
if len(set(sys.argv) & {'e', 'ea', 'email', 'email_analyst'}) > 0:
vendors.append('email_analyst')
if len(set(sys.argv) & {'b', 'br', 'bronto'}) > 0:
vendors.append('bronto')
if len(set(sys.argv) & {'i', 'it', 'iterable'}) > 0:
vendors.append('iterable')
vendors = set(vendors)
query = Query(*dates)
writer = writer(query.fp)
for k, v in coms.items():
if k in vendors:
for o in offices:
df = query.query(v, o)
sn = f'{o}_{k}'
query.write(df, sn, writer)
writer.save()
print(f'Wrote data to sheet in file {os.path.abspath(query.fp)}')
@NicholasBallard
Copy link
Author

The SQL in the query method is the meat of the script -- it queries for a date range of a view in the delivery schema for any one of the ESPs. You can take this and use whatever scripting language you like to pass values to the SQL query coming from the GET request.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment