Last active
November 24, 2020 15:34
-
-
Save Azlirn/fb55e42f976599939130310df1aa8ee1 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# coding: utf-8 | |
""" | |
This script will migrate data stored in the `D:\Data Analytics Projects\JTF-COVID\Testing Site Trends\Daily Reports` | |
directory to a a new `.xlsx` file which can be used for simple data analytics regarding JTF-COVID controlled COVID-19 | |
testing sites. | |
**Note:** This file has hard coded paths in the `main()` and `writeTrendsReport()` functions. | |
""" | |
import os | |
import pandas as pd | |
import numpy as np | |
from datetime import datetime, timedelta | |
import time | |
import openpyxl | |
from openpyxl import load_workbook | |
# Global Variables | |
cwd = os.getcwd() | |
def getFileDate(): | |
# Create a date object for 'yesterday' so we can pull the current Daily Mission Report | |
e = datetime.today() - timedelta(days=1) | |
today = e.strftime("%d2300%b%Y").upper() | |
return today | |
def getDictDate(): | |
# Create a datetime object for 'yesterday' to be used in the Trends Report. | |
e = datetime.now() - timedelta(days=1) | |
dateStr = e.strftime("%Y-%m-%d 00:00:00") | |
date = datetime.strptime(dateStr, "%Y-%m-%d %H:%M:%S") | |
return date | |
def getMonth(): | |
month = '' | |
todayDate = datetime.today() | |
monthNumber = str(todayDate.month) | |
if monthNumber == '09': | |
month = "AUG" | |
elif monthNumber == '10': | |
month = "OCT" | |
elif monthNumber == '11': | |
month = "NOV" | |
elif monthNumber == '12': | |
month = "DEC" | |
return month | |
def cleanDict(): | |
# The below variable is used to establish needed values during initial testing of the script. | |
dataDate = getDictDate() | |
throughputDict = { | |
'Binghamton/Broome Test Site': {'Throughput': '', "County": "Binghamton", "Regional Council": "Southern Tier", | |
"Command Post": "CP Syracuse", "Date": dataDate}, | |
"Oneida Test Site": {'Throughput': '', "County": "Oneida", "Regional Council": "Mohawk Valley", | |
"Command Post": "CP Syracuse", "Date": dataDate}, | |
"Erie Test Site": {'Throughput': '', "County": "Erie", "Regional Council": "Western New York", | |
"Command Post": "CP Syracuse", "Date": dataDate}, | |
"Monroe Test Site": {'Throughput': '', "County": "Monroe", "Regional Council": "Finger Lakes", | |
"Command Post": "CP Syracuse", "Date": dataDate}, | |
"Niagra Test Site": {'Throughput': '', "County": "Niagra", "Regional Council": "Western New York", | |
"Command Post": "CP Syracuse", "Date": dataDate}, | |
"Anthony Wayne Test Site": {'Throughput': '', "County": "Rockland", "Regional Council": "Mid-Hudson", | |
"Command Post": "CP South", "Date": dataDate}, | |
"Glen Island Test Site": {'Throughput': '', "County": "Westchester", "Regional Council": "Mid-Hudson", | |
"Command Post": "CP South", "Date": dataDate}, | |
"Queens - Aqueduct Park": {'Throughput': '', "County": "Queens", "Regional Council": "New York City", | |
"Command Post": "CP South", "Date": dataDate}, | |
"Bronx - Bay Plaza": {'Throughput': '', "County": "Bronx", "Regional Council": "New York City", | |
"Command Post": "CP South", "Date": dataDate}, | |
"Bronx - Lehman": {'Throughput': '', "County": "Bronx", "Regional Council": "New York City", | |
"Command Post": "CP South", "Date": dataDate}, | |
"Staten Island Test Site": {'Throughput': '', "County": "Richmond", "Regional Council": "New York City", | |
"Command Post": "CP South", "Date": dataDate}, | |
"Brooklyn - Fountain Ave": {'Throughput': '', "County": "Kings", "Regional Council": "New York City", | |
"Command Post": "CP South", "Date": dataDate}, | |
"Nassau County Test Site": {'Throughput': '', "County": "Nassau", "Regional Council": "Long Island", | |
"Command Post": "CP South", "Date": dataDate}, | |
"Suffolk County - Stony Brook": {'Throughput': '', "County": "Suffolk", "Regional Council": "Long Island", | |
"Command Post": "CP South", "Date": dataDate}, | |
"Albany County Test Site": {'Throughput': '', "County": "Albany", "Regional Council": "Capital Region", | |
"Command Post": "CP Capital District", "Date": dataDate} | |
} | |
print("[2] Blank dictionary established... Ready for new data...\n") | |
print(throughputDict) | |
print("\n") | |
return throughputDict | |
def importDailyReport(): | |
""" | |
The analyst is required to save the prior day's Mission Update report, which is completed by the J3 Night Shift. | |
This file can be found on Microsoft Teams under `Staff > J3 > Swing Shift Products > {MMM} > {DDMMMYYYY}` | |
The file **must** be saved in an `.xlsx` file format (this will require the analyst to open the file and | |
conduct a `Save As`) in the appropriate `Month` folder located at: | |
`D:\Data Analytics Projects\JTF-COVID\Testing Site Trends\Daily Reports\{MMM}` | |
# TODO: Add a function that will convert the file format from .xls to .xlsx automagically | |
""" | |
today = getFileDate() | |
month = getMonth() | |
# The J3 is known to us inconsistent file naming conventions. | |
# This variable supports the naming convention used as of 23NOV2020. | |
fileName = ('Throughput # as of ' + today + '.xlsx') | |
path = os.path.join(cwd, 'Daily Reports', month, fileName) | |
try: | |
print("[3] Opening the daily mission report...\n") | |
todayFile = pd.read_excel(path, sheet_name='Mission_Throughput_Tracker', index_col=None) | |
print(todayFile) | |
return todayFile | |
except FileNotFoundError: | |
print("[!] Today's file could not be found - please ensure you have downloaded and saved the Daily Mission " | |
"Update as an \'.xlsx\' file in the proper folder.") | |
print("{*} The current file naming convention as of 23NOV2020 is \"Throughput # as of DD2300MMMYYYY.xlsx\"") | |
print("[*] Exiting now to prevent this script from screwing up the trends report...") | |
exit() | |
except Exception as err: | |
print(f"[!] An unhandled error occurred attempting to open today's daily mission report: {err}") | |
print("[*] Exiting now to prevent this script from screwing up the trends report...") | |
exit() | |
def createDailyDF(): | |
""" | |
This function pulls data from the daily Mission Report (imported by the function above), | |
and appends test site throughput numbers to the dictionary established in the `cleanDict()` function. | |
Once this process is complete, the function converts the dictionary to a `pandas` data frame and returns the new | |
object. This object will be later used to write the daily "trends report." | |
""" | |
# Establish an empty dictionary | |
throughputDict = cleanDict() | |
# Import the daily report | |
dailyReport = importDailyReport() | |
# Attempt to fill all NA or inf values with 0 to prevent Exceptions | |
dailyReport = dailyReport.fillna(0) | |
print("[3] Daily report imported...") | |
# Create a datetime object to be used to call the column containing yesterday's data | |
e = datetime.now() - timedelta(days=1) | |
keyNamestr = e.strftime("%Y-%m-%d 00:00:00") | |
dfColName = datetime.strptime(keyNamestr, "%Y-%m-%d %H:%M:%S") | |
# The below try statements will attempt to retrieve the throughput numbers from the daily report for each test site | |
# and assign them to a dictionary. | |
try: | |
print("[3-a] Pulling throughput data into dictionary...") | |
throughputDict['Binghamton/Broome Test Site']['Throughput'] = \ | |
dailyReport.loc[dailyReport['Sites'] == "BROOME", dfColName].values[0].astype(np.int64) | |
throughputDict['Oneida Test Site']['Throughput'] = \ | |
dailyReport.loc[dailyReport['Sites'] == "ONEIDA", dfColName].values[0].astype(np.int64) | |
throughputDict['Erie Test Site']['Throughput'] = \ | |
dailyReport.loc[dailyReport['Sites'] == "ERIE", dfColName].values[0].astype(np.int64) | |
throughputDict['Monroe Test Site']['Throughput'] = \ | |
dailyReport.loc[dailyReport['Sites'] == "MONROE", dfColName].values[0].astype(np.int64) | |
throughputDict['Niagra Test Site']['Throughput'] = \ | |
dailyReport.loc[dailyReport['Sites'] == "NIAGARA", dfColName].values[0].astype(np.int64) | |
throughputDict['Anthony Wayne Test Site']['Throughput'] = \ | |
dailyReport.loc[dailyReport['Sites'] == "ANTHONY WAYNE", dfColName].values[0].astype(np.int64) | |
throughputDict['Glen Island Test Site']['Throughput'] = \ | |
dailyReport.loc[dailyReport['Sites'] == "GLEN ISLAND", dfColName].values[0].astype(np.int64) | |
throughputDict['Queens - Aqueduct Park']['Throughput'] = \ | |
dailyReport.loc[dailyReport['Sites'] == "QUEENS", dfColName].values[0].astype(np.int64) | |
throughputDict['Bronx - Bay Plaza']['Throughput'] = \ | |
dailyReport.loc[dailyReport['Sites'] == "BRONX BAY PLAZA", dfColName].values[0].astype(np.int64) | |
throughputDict['Bronx - Lehman']['Throughput'] = \ | |
dailyReport.loc[dailyReport['Sites'] == "BRONX LEHMAN", dfColName].values[0].astype(np.int64) | |
throughputDict['Staten Island Test Site']['Throughput'] = \ | |
dailyReport.loc[dailyReport['Sites'] == "STATEN ISLAND", dfColName].values[0].astype(np.int64) | |
throughputDict['Brooklyn - Fountain Ave']['Throughput'] = \ | |
dailyReport.loc[dailyReport['Sites'] == "BROOKLYN FOUNTAIN AVE", dfColName].values[0].astype(np.int64) | |
throughputDict['Nassau County Test Site']['Throughput'] = \ | |
dailyReport.loc[dailyReport['Sites'] == "JONES BEACH", dfColName].values[0].astype(np.int64) | |
throughputDict['Suffolk County - Stony Brook']['Throughput'] = \ | |
dailyReport.loc[dailyReport['Sites'] == "STONY BROOK", dfColName].values[0].astype(np.int64) | |
throughputDict['Albany County Test Site']['Throughput'] = \ | |
dailyReport.loc[dailyReport['Sites'] == "ALBANY", dfColName].values[0].astype(np.int64) | |
# broomTS = dailyReport.loc[dailyReport['Sites'] == "BROOME", dtKeyName].astype(np.int64)[10] | |
# oneidaTS = dailyReport.loc[dailyReport['Sites'] == "ONEIDA", dtKeyName].astype(np.int64)[9] | |
# erieTS = dailyReport.loc[dailyReport['Sites'] == "ERIE", dtKeyName].astype(np.int64)[7] | |
# monroeTS = dailyReport.loc[dailyReport['Sites'] == "MONROE", dtKeyName].astype(np.int64)[8] | |
# niagraTS = dailyReport.loc[dailyReport['Sites'] == "NIAGARA", dtKeyName].astype(np.int64)[6] | |
# anthonyWTS = dailyReport.loc[dailyReport['Sites'] == "ANTHONY WAYNE", dtKeyName].astype(np.int64)[12] | |
# glenITS = dailyReport.loc[dailyReport['Sites'] == "GLEN ISLAND", dtKeyName].astype(np.int64)[14] | |
# queensTS = dailyReport.loc[dailyReport['Sites'] == "QUEENS", dtKeyName].astype(np.int64)[17] | |
# bronxBPTS = dailyReport.loc[dailyReport['Sites'] == "BRONX BAY PLAZA", dtKeyName].astype(np.int64)[13] | |
# bronxLTS = dailyReport.loc[dailyReport['Sites'] == "BRONX LEHMAN", dtKeyName].astype(np.int64)[18] | |
# statenIS = dailyReport.loc[dailyReport['Sites'] == "STATEN ISLAND", dtKeyName].astype(np.int64)[19] | |
# brooklynTS = dailyReport.loc[dailyReport['Sites'] == "BROOKLYN FOUNTAIN AVE", dtKeyName].astype(np.int64)[20] | |
# nassauTS = dailyReport.loc[dailyReport['Sites'] == "JONES BEACH", dtKeyName].astype(np.int64)[16] | |
# suffolkTS = dailyReport.loc[dailyReport['Sites'] == "STONY BROOK", dtKeyName].astype(np.int64)[15] | |
# albanyTS = dailyReport.loc[dailyReport['Sites'] == "ALBANY", dtKeyName].astype(np.int64)[4] | |
except Exception as err: | |
print(f"[!] An error occurred when creating the dictionary: {err}\n") | |
print("Exiting now...\n") | |
exit() | |
# Convert dict to a data frame | |
print("[3-b] Converting dictionary to data frame...\n") | |
df = pd.DataFrame(data=throughputDict, index=None, columns=None, dtype=None, copy=False) | |
print(df) | |
time.sleep(10) | |
# Transpose DataFrame: swap rows and columns | |
throughputDF = df.transpose() | |
# Return the data frame | |
return throughputDF | |
def setUpWorkbook(): | |
""" | |
This function was established during testing to ensure an appropriate "new" file was ready to be written to. | |
If a file is not ready, a new one will be created. This function is likely not necessary, but is a "nice to have" | |
if you are using this script on a new system. | |
""" | |
path = r"D:\Data Analytics Projects\JTF-COVID\Testing Site Trends" | |
file = "Trends_Report.xlsx" | |
dailyReportFile = os.path.join(path, file) | |
try: | |
sheet_name = "Daily Mission Data" | |
workbook = load_workbook(dailyReportFile) | |
if not sheet_name in workbook.sheetnames: | |
workbook.create_sheet(sheet_name) | |
for sheet in workbook.sheetnames: | |
if sheet != sheet_name: | |
workbook.remove(workbook[sheet]) | |
print(f" [1-a-a] Sheet names in the trend report: {workbook.sheetnames}\n") | |
workbook.save(dailyReportFile) | |
except FileNotFoundError: | |
print("[!] Trends report file not found - creating one for you...\n") | |
file = openpyxl.Workbook() | |
file.save(filename='Trends_Report.xlsx') | |
print("Restarting the script...") | |
main() | |
except Exception as err: | |
print(f"An unhandled error occurred in the setUpWorkbook() function when attempting to write file: {err}") | |
print("Exiting now...\n") | |
exit() | |
def writeTrendsReport(): | |
""" | |
This function essentially kicks off the entire process of writing the "trends report" | |
(i.e. an excel file with some throughput numbers in it). The most effective method I found to write the daily report | |
is to pull any existing data from prior "trends reports" (if any have been established) into a data frame and | |
concatenate this with the new data frame assembled in the `createDailyDF()` function. There may be a better way, | |
but this is the only way I was able to "append" data to an existing `.xlsx` sheet using `Python`. | |
""" | |
df = createDailyDF() | |
print(df) | |
try: | |
trendReportpath = r"D:\Data Analytics Projects\JTF-COVID\Testing Site Trends" | |
trendReportFileName = "Trends_Report.xlsx" | |
trendReportFile = os.path.join(trendReportpath, trendReportFileName) | |
sheet_name = "Daily Mission Data" | |
print("\n[4] Getting ready to write the Trends Report...") | |
# pull in data from existing report and combine it with new data frame - write to file. | |
print("\n[4-a] Grabbing existing data out of the trends report...") | |
existingData = pd.read_excel(trendReportFile, sheet_name=sheet_name, index_col=0) | |
print(existingData) | |
print("\n[4-b] Combining the new data frame and the historic data...") | |
combinedData = pd.concat([existingData, df]) | |
print(combinedData) | |
print("\n [4-c] Removing old data from the file...") | |
""" | |
These few lines serve as a work around to eliminate duplicate sheets within the workbook. | |
Without these few lines, the script would append a new sheet each time the script is run. | |
As the data from the original sheet is already stored in memory, it is safe to delete the original sheet at | |
this point. | |
""" | |
# TODO: Explore improving this function further - can we eliminate the need to delete a sheet? | |
# TODO: Check on the date format between the old data and the new data - they do not match in the daily report. | |
workbook = load_workbook(trendReportFile) | |
print(workbook.get_sheet_names) | |
dmd = workbook['Daily Mission Data'] | |
workbook.remove(dmd) | |
workbook.save(trendReportFile) | |
print("\n[4-d] Writing data frame to new sheet in Trends Report...\n") | |
""" | |
The use of `mode='a'` forces ExcelWriter to append the data to be written as a new sheet. | |
I have not been able to find a way to use the append function to append data to an existing sheet. | |
It is necessary to read in the entire workbook if you wish to retain old sheets. | |
""" | |
writer = pd.ExcelWriter(trendReportFile, mode='a') | |
# Each time the file is written, as it is essentially rewriting the sheet each time, | |
# you must include your index_label and header statement in your to_excel writer | |
combinedData.to_excel(writer, sheet_name=sheet_name, index_label="Test Site", header=True) | |
writer.save() | |
except Exception as err: | |
print(f"An unhandled error occurred in the writeTrendsReport() function when attempting to write file: {err}") | |
print("Exiting now...\n") | |
exit() | |
def main(): | |
print("[1] Warming up...\n") | |
cwd = os.getcwd() | |
print("[***] Verifying cwd and prepping to start script...\n") | |
# TODO: Update the below to use os.path.join to ensure cross platform compatibility | |
# TODO: Add a check to see if data for yesterday has already been added to the Trends Report. | |
if cwd != "D:\\Data Analytics Projects\\JTF-COVID\\Testing Site Trends": | |
os.chdir("D:\\Data Analytics Projects\\JTF-COVID\\Testing Site Trends") | |
cwd = os.getcwd() | |
else: | |
pass | |
try: | |
# print("[1-a] Verifying the daily trends report file is set up correctly...\n") | |
# setUpWorkbook() | |
# # Attempt to create the trends report | |
print("[1-a] Starting report generation now...\n") | |
writeTrendsReport() | |
except Exception as err: | |
print(f"An unhandled error occurred when attempting to create the trends report: {err}") | |
print("[***] No issues encountered... process complete!") | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment