snewcomer · January 8, 2023 15:56
diff --git a/cal_time.py b/cal_time.py
 import pandas as pd
 import numpy as np
 from datetime import datetime
 import matplotlib.pyplot as plt

 cal = pd.read_csv('cal.csv')

 # clean summary
 cal = cal.query("summary not in ('Mid-week Meditation', 'Coffee Chat: Meet our New Hires!', 'PED Lunch & Learn Hold', 'AskPeople')")
 cal = cal[cal['summary'].str.contains("Company Holiday") == False]
 cal = cal[cal['summary'].str.contains("L&L") == False]
 #cal.drop(['summary'], axis=1, inplace=True)
 cal = cal.drop_duplicates()

 # clean dtstart
 parsed = pd.to_datetime(cal["dtstart"], errors="coerce").fillna(pd.to_datetime(cal["dtstart"], format="%Y-%d-%m", errors="coerce"))
 ordinal = pd.to_numeric(cal["dtstart"], errors="coerce").apply(lambda x: pd.Timestamp("1899-12-30")+pd.Timedelta(x, unit="D"))
 parsed_e = pd.to_datetime(cal["dtend"], errors="coerce").fillna(pd.to_datetime(cal["dtend"], format="%Y-%d-%m", errors="coerce"))
 ordinal_e = pd.to_numeric(cal["dtend"], errors="coerce").apply(lambda x: pd.Timestamp("1899-12-30")+pd.Timedelta(x, unit="D"))
 
 cal['dtstart'] = parsed.fillna(ordinal)
 cal['dtend'] = parsed_e.fillna(ordinal_e)
 # create unique column we can group and resample over
 cal['DtCombined'] = cal['dtstart'].astype(str) + '_' + cal['dtend'].astype(str)
 # still might have problems, drop Zulu
 cal = cal.dropna()
 cal['dtstart'] = cal['dtstart'].apply(lambda x: datetime.replace(x, tzinfo=None))
 cal['dtend'] = cal['dtend'].apply(lambda x: datetime.replace(x, tzinfo=None))
 cal['MeetingLength'] = cal['dtend'] - cal['dtstart']

 cal['Date'] = cal['dtstart'].dt.date
 cal['Date'] = pd.to_datetime(cal['Date'], errors="coerce")
 cal['StartTime'] = cal['dtstart'].dt.time
 cal['EndTime'] = cal['dtend'].dt.time

 cal = cal.melt(id_vars=['summary', 'Date', 'dtstart', 'dtend', 'DtCombined', 'MeetingLength'], var_name='Start/End', value_name='TimeOfDay')
 cal['Busy'] = 1
 # conditional create column that we will ultimately resample
 cal['DateTime'] = np.where(cal['Start/End'] == 'StartTime', cal['dtstart'], cal['dtend'])

 cal.sort_values(by=['dtstart', 'TimeOfDay'], inplace=True)
 cal.drop(['dtstart', 'dtend', 'Start/End'], axis=1, inplace=True)

 cal = cal.groupby('DtCombined').apply(lambda x: x.drop_duplicates('DateTime').set_index('DateTime').resample('30Min').ffill()).reset_index('DtCombined', drop=True).drop('DtCombined', 1).reset_index()
 cal['TimeOfDay'] = cal['DateTime'].dt.time
 # resample drops non numeric columns
 cal = cal.set_index('DateTime').resample('30Min').mean().reset_index()
 cal['Busy'] = cal['Busy'].fillna(0)
 cal['Weekday'] = cal['DateTime'].dt.weekday
	import pandas as pd
	import numpy as np
	from datetime import datetime
	import matplotlib.pyplot as plt

	cal = pd.read_csv('cal.csv')

	# clean summary
	cal = cal.query("summary not in ('Mid-week Meditation', 'Coffee Chat: Meet our New Hires!', 'PED Lunch & Learn Hold', 'AskPeople')")
	cal = cal[cal['summary'].str.contains("Company Holiday") == False]
	cal = cal[cal['summary'].str.contains("L&L") == False]
	#cal.drop(['summary'], axis=1, inplace=True)
	cal = cal.drop_duplicates()

	# clean dtstart
	parsed = pd.to_datetime(cal["dtstart"], errors="coerce").fillna(pd.to_datetime(cal["dtstart"], format="%Y-%d-%m", errors="coerce"))
	ordinal = pd.to_numeric(cal["dtstart"], errors="coerce").apply(lambda x: pd.Timestamp("1899-12-30")+pd.Timedelta(x, unit="D"))
	parsed_e = pd.to_datetime(cal["dtend"], errors="coerce").fillna(pd.to_datetime(cal["dtend"], format="%Y-%d-%m", errors="coerce"))
	ordinal_e = pd.to_numeric(cal["dtend"], errors="coerce").apply(lambda x: pd.Timestamp("1899-12-30")+pd.Timedelta(x, unit="D"))

	cal['dtstart'] = parsed.fillna(ordinal)
	cal['dtend'] = parsed_e.fillna(ordinal_e)
	# create unique column we can group and resample over
	cal['DtCombined'] = cal['dtstart'].astype(str) + '_' + cal['dtend'].astype(str)
	# still might have problems, drop Zulu
	cal = cal.dropna()
	cal['dtstart'] = cal['dtstart'].apply(lambda x: datetime.replace(x, tzinfo=None))
	cal['dtend'] = cal['dtend'].apply(lambda x: datetime.replace(x, tzinfo=None))
	cal['MeetingLength'] = cal['dtend'] - cal['dtstart']

	cal['Date'] = cal['dtstart'].dt.date
	cal['Date'] = pd.to_datetime(cal['Date'], errors="coerce")
	cal['StartTime'] = cal['dtstart'].dt.time
	cal['EndTime'] = cal['dtend'].dt.time

	cal = cal.melt(id_vars=['summary', 'Date', 'dtstart', 'dtend', 'DtCombined', 'MeetingLength'], var_name='Start/End', value_name='TimeOfDay')
	cal['Busy'] = 1
	# conditional create column that we will ultimately resample
	cal['DateTime'] = np.where(cal['Start/End'] == 'StartTime', cal['dtstart'], cal['dtend'])

	cal.sort_values(by=['dtstart', 'TimeOfDay'], inplace=True)
	cal.drop(['dtstart', 'dtend', 'Start/End'], axis=1, inplace=True)

	cal = cal.groupby('DtCombined').apply(lambda x: x.drop_duplicates('DateTime').set_index('DateTime').resample('30Min').ffill()).reset_index('DtCombined', drop=True).drop('DtCombined', 1).reset_index()
	cal['TimeOfDay'] = cal['DateTime'].dt.time
	# resample drops non numeric columns
	cal = cal.set_index('DateTime').resample('30Min').mean().reset_index()
	cal['Busy'] = cal['Busy'].fillna(0)
	cal['Weekday'] = cal['DateTime'].dt.weekday
No results found