Created
January 8, 2023 15:56
-
-
Save snewcomer/c89599076e204787bebf1ca3c1c27171 to your computer and use it in GitHub Desktop.
Busy in 30 min intervals
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from datetime import datetime | |
import matplotlib.pyplot as plt | |
cal = pd.read_csv('cal.csv') | |
# clean summary | |
cal = cal.query("summary not in ('Mid-week Meditation', 'Coffee Chat: Meet our New Hires!', 'PED Lunch & Learn Hold', 'AskPeople')") | |
cal = cal[cal['summary'].str.contains("Company Holiday") == False] | |
cal = cal[cal['summary'].str.contains("L&L") == False] | |
#cal.drop(['summary'], axis=1, inplace=True) | |
cal = cal.drop_duplicates() | |
# clean dtstart | |
parsed = pd.to_datetime(cal["dtstart"], errors="coerce").fillna(pd.to_datetime(cal["dtstart"], format="%Y-%d-%m", errors="coerce")) | |
ordinal = pd.to_numeric(cal["dtstart"], errors="coerce").apply(lambda x: pd.Timestamp("1899-12-30")+pd.Timedelta(x, unit="D")) | |
parsed_e = pd.to_datetime(cal["dtend"], errors="coerce").fillna(pd.to_datetime(cal["dtend"], format="%Y-%d-%m", errors="coerce")) | |
ordinal_e = pd.to_numeric(cal["dtend"], errors="coerce").apply(lambda x: pd.Timestamp("1899-12-30")+pd.Timedelta(x, unit="D")) | |
cal['dtstart'] = parsed.fillna(ordinal) | |
cal['dtend'] = parsed_e.fillna(ordinal_e) | |
# create unique column we can group and resample over | |
cal['DtCombined'] = cal['dtstart'].astype(str) + '_' + cal['dtend'].astype(str) | |
# still might have problems, drop Zulu | |
cal = cal.dropna() | |
cal['dtstart'] = cal['dtstart'].apply(lambda x: datetime.replace(x, tzinfo=None)) | |
cal['dtend'] = cal['dtend'].apply(lambda x: datetime.replace(x, tzinfo=None)) | |
cal['MeetingLength'] = cal['dtend'] - cal['dtstart'] | |
cal['Date'] = cal['dtstart'].dt.date | |
cal['Date'] = pd.to_datetime(cal['Date'], errors="coerce") | |
cal['StartTime'] = cal['dtstart'].dt.time | |
cal['EndTime'] = cal['dtend'].dt.time | |
cal = cal.melt(id_vars=['summary', 'Date', 'dtstart', 'dtend', 'DtCombined', 'MeetingLength'], var_name='Start/End', value_name='TimeOfDay') | |
cal['Busy'] = 1 | |
# conditional create column that we will ultimately resample | |
cal['DateTime'] = np.where(cal['Start/End'] == 'StartTime', cal['dtstart'], cal['dtend']) | |
cal.sort_values(by=['dtstart', 'TimeOfDay'], inplace=True) | |
cal.drop(['dtstart', 'dtend', 'Start/End'], axis=1, inplace=True) | |
cal = cal.groupby('DtCombined').apply(lambda x: x.drop_duplicates('DateTime').set_index('DateTime').resample('30Min').ffill()).reset_index('DtCombined', drop=True).drop('DtCombined', 1).reset_index() | |
cal['TimeOfDay'] = cal['DateTime'].dt.time | |
# resample drops non numeric columns | |
cal = cal.set_index('DateTime').resample('30Min').mean().reset_index() | |
cal['Busy'] = cal['Busy'].fillna(0) | |
cal['Weekday'] = cal['DateTime'].dt.weekday |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment