Skip to content

Instantly share code, notes, and snippets.

@YuriyGuts
Last active February 2, 2023 10:38
Show Gist options
  • Save YuriyGuts/9a92bad5a8573a64381380c47e6dab97 to your computer and use it in GitHub Desktop.
Save YuriyGuts/9a92bad5a8573a64381380c47e6dab97 to your computer and use it in GitHub Desktop.
Given a dataset of blackout events, generate daily downtime stats and calendar visualizations
#!/usr/bin/env python3
"""
Given a dataset of blackout events, generate daily downtime stats and calendar visualizations.
-------------------
Prerequisites
-------------------
python3 -m venv ~/.virtualenvs/blackout-stats
source ~/.virtualenvs/blackout-stats/bin/activate
pip install pandas==1.5.2 matplotlib==3.6.2 july==0.1.3
-------------------
Usage
-------------------
1. Prepare a CSV file named "blackout-events.csv" in the following format:
```
start_date,end_date
2022-10-20 00:11:00,2022-10-20 05:19:00
2022-10-30 07:40:00,2022-10-30 12:55:00
2022-11-15 16:07:00,2022-11-16 04:24:00
```
start_date: power OFF event time
end_date: power ON event time
2. Run "python3 blackout-daily-stats.py"
3. The data will be saved to "blackout-daily-stats.csv"
4. The plot will be saved to "blackout-daily-stats.png"
"""
from datetime import datetime
from datetime import timedelta
import july
import matplotlib.pyplot as plt
import pandas as pd
def generate_daily_data(df_blackout_events, output_filename=None):
""" Given a dataframe of blackout events, generate a daily downtime dataframe. """
df = df_blackout_events
df["start_date"] = pd.to_datetime(df["start_date"], format="%Y-%m-%d %H:%M:%S")
df["end_date"] = pd.to_datetime(df["end_date"], format="%Y-%m-%d %H:%M:%S")
df = df.sort_values(by="start_date")
# Determine the date range for the report.
min_date = df["start_date"].min()
max_date = datetime.now()
min_date = datetime(min_date.year, min_date.month, min_date.day)
max_date = datetime(max_date.year, max_date.month, max_date.day) + timedelta(days=1)
# Calculate the downtime for each day in the date range.
daily_downtime_records = []
current_date = min_date
while current_date < max_date:
next_date = current_date + timedelta(days=1)
relevant_rows = df[
((df["start_date"] < next_date) & (df["end_date"] > current_date))
| pd.isnull(df["start_date"])
| pd.isnull(df["end_date"])
]
daily_downtime = timedelta(seconds=0)
for _, row in relevant_rows.iterrows():
blackout_start = row["start_date"]
blackout_end = row["end_date"]
# Case 1: the day began as DOWN, stayed DOWN till the end.
if blackout_start < current_date and (pd.isnull(blackout_end) or blackout_end >= next_date):
daily_downtime = timedelta(hours=24)
# Case 2: the day began as UP, ended as DOWN with one blackout.
elif blackout_start >= current_date and blackout_start < next_date and (pd.isnull(blackout_end) or blackout_end >= next_date):
daily_downtime += next_date - blackout_start
# Case 3: the day began as DOWN, ended as UP.
elif blackout_start < current_date and blackout_end >= current_date:
daily_downtime += blackout_end - current_date
# Case 4: blackout occurred during the day and recovered within that day.
elif blackout_start >= current_date and blackout_end < next_date:
daily_downtime += blackout_end - blackout_start
daily_downtime_records.append({
"date": current_date,
"daily_downtime": round(daily_downtime.total_seconds() / 3600.0, 2),
})
current_date = next_date
df_daily_downtime = pd.DataFrame(daily_downtime_records).sort_values(by="date")
if output_filename:
df_daily_downtime.to_csv(output_filename, header=True, index=None)
print(f"Daily CSV data saved to {output_filename}")
return df_daily_downtime
def generate_daily_plot(df_daily_downtime, output_filename):
""" Given a dataframe of daily blackout durations, generate a calendar plot. """
date_range = july.utils.date_range(
df_daily_downtime["date"].min(),
df_daily_downtime["date"].max(),
)
july.calendar_plot(
dates=date_range,
data=df_daily_downtime["daily_downtime"],
title=None,
value_label=True,
fontfamily="Helvetica",
cmap="Oranges",
ncols=3,
figsize=(15, 9),
dpi=300,
)
plt.suptitle(
x=0.49,
"Тривалість відключень, год",
fontfamily="Helvetica",
fontsize=20,
horizontalalignment="center",
)
if output_filename:
plt.savefig(output_filename)
print(f"Calendar plot saved to {output_filename}")
else:
plt.show()
def main():
df_blackout_events = pd.read_csv("blackout-events.csv")
df_daily_downtime = generate_daily_data(df_blackout_events, output_filename="blackout-daily-stats.csv")
generate_daily_plot(df_daily_downtime, output_filename="blackout-daily-stats.png")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment