Last active
February 2, 2023 10:38
-
-
Save YuriyGuts/9a92bad5a8573a64381380c47e6dab97 to your computer and use it in GitHub Desktop.
Given a dataset of blackout events, generate daily downtime stats and calendar visualizations
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
""" | |
Given a dataset of blackout events, generate daily downtime stats and calendar visualizations. | |
------------------- | |
Prerequisites | |
------------------- | |
python3 -m venv ~/.virtualenvs/blackout-stats | |
source ~/.virtualenvs/blackout-stats/bin/activate | |
pip install pandas==1.5.2 matplotlib==3.6.2 july==0.1.3 | |
------------------- | |
Usage | |
------------------- | |
1. Prepare a CSV file named "blackout-events.csv" in the following format: | |
``` | |
start_date,end_date | |
2022-10-20 00:11:00,2022-10-20 05:19:00 | |
2022-10-30 07:40:00,2022-10-30 12:55:00 | |
2022-11-15 16:07:00,2022-11-16 04:24:00 | |
``` | |
start_date: power OFF event time | |
end_date: power ON event time | |
2. Run "python3 blackout-daily-stats.py" | |
3. The data will be saved to "blackout-daily-stats.csv" | |
4. The plot will be saved to "blackout-daily-stats.png" | |
""" | |
from datetime import datetime | |
from datetime import timedelta | |
import july | |
import matplotlib.pyplot as plt | |
import pandas as pd | |
def generate_daily_data(df_blackout_events, output_filename=None): | |
""" Given a dataframe of blackout events, generate a daily downtime dataframe. """ | |
df = df_blackout_events | |
df["start_date"] = pd.to_datetime(df["start_date"], format="%Y-%m-%d %H:%M:%S") | |
df["end_date"] = pd.to_datetime(df["end_date"], format="%Y-%m-%d %H:%M:%S") | |
df = df.sort_values(by="start_date") | |
# Determine the date range for the report. | |
min_date = df["start_date"].min() | |
max_date = datetime.now() | |
min_date = datetime(min_date.year, min_date.month, min_date.day) | |
max_date = datetime(max_date.year, max_date.month, max_date.day) + timedelta(days=1) | |
# Calculate the downtime for each day in the date range. | |
daily_downtime_records = [] | |
current_date = min_date | |
while current_date < max_date: | |
next_date = current_date + timedelta(days=1) | |
relevant_rows = df[ | |
((df["start_date"] < next_date) & (df["end_date"] > current_date)) | |
| pd.isnull(df["start_date"]) | |
| pd.isnull(df["end_date"]) | |
] | |
daily_downtime = timedelta(seconds=0) | |
for _, row in relevant_rows.iterrows(): | |
blackout_start = row["start_date"] | |
blackout_end = row["end_date"] | |
# Case 1: the day began as DOWN, stayed DOWN till the end. | |
if blackout_start < current_date and (pd.isnull(blackout_end) or blackout_end >= next_date): | |
daily_downtime = timedelta(hours=24) | |
# Case 2: the day began as UP, ended as DOWN with one blackout. | |
elif blackout_start >= current_date and blackout_start < next_date and (pd.isnull(blackout_end) or blackout_end >= next_date): | |
daily_downtime += next_date - blackout_start | |
# Case 3: the day began as DOWN, ended as UP. | |
elif blackout_start < current_date and blackout_end >= current_date: | |
daily_downtime += blackout_end - current_date | |
# Case 4: blackout occurred during the day and recovered within that day. | |
elif blackout_start >= current_date and blackout_end < next_date: | |
daily_downtime += blackout_end - blackout_start | |
daily_downtime_records.append({ | |
"date": current_date, | |
"daily_downtime": round(daily_downtime.total_seconds() / 3600.0, 2), | |
}) | |
current_date = next_date | |
df_daily_downtime = pd.DataFrame(daily_downtime_records).sort_values(by="date") | |
if output_filename: | |
df_daily_downtime.to_csv(output_filename, header=True, index=None) | |
print(f"Daily CSV data saved to {output_filename}") | |
return df_daily_downtime | |
def generate_daily_plot(df_daily_downtime, output_filename): | |
""" Given a dataframe of daily blackout durations, generate a calendar plot. """ | |
date_range = july.utils.date_range( | |
df_daily_downtime["date"].min(), | |
df_daily_downtime["date"].max(), | |
) | |
july.calendar_plot( | |
dates=date_range, | |
data=df_daily_downtime["daily_downtime"], | |
title=None, | |
value_label=True, | |
fontfamily="Helvetica", | |
cmap="Oranges", | |
ncols=3, | |
figsize=(15, 9), | |
dpi=300, | |
) | |
plt.suptitle( | |
x=0.49, | |
"Тривалість відключень, год", | |
fontfamily="Helvetica", | |
fontsize=20, | |
horizontalalignment="center", | |
) | |
if output_filename: | |
plt.savefig(output_filename) | |
print(f"Calendar plot saved to {output_filename}") | |
else: | |
plt.show() | |
def main(): | |
df_blackout_events = pd.read_csv("blackout-events.csv") | |
df_daily_downtime = generate_daily_data(df_blackout_events, output_filename="blackout-daily-stats.csv") | |
generate_daily_plot(df_daily_downtime, output_filename="blackout-daily-stats.png") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment