Skip to content

Instantly share code, notes, and snippets.

@kperry2215
Created July 30, 2019 22:23
Show Gist options
  • Save kperry2215/b0dd16a3b71b870c45194e745c368185 to your computer and use it in GitHub Desktop.
Save kperry2215/b0dd16a3b71b870c45194e745c368185 to your computer and use it in GitHub Desktop.
def generate_histogram_of_aggregated_counts(df,
peak_demand_hour_column,
group_by_column):
"""
Generate a histogram of peak demand hour counts, grouped by a column
Arguments:
df: Pandas dataframe
peak_demand_hour_column: String. Name of the column for peak demand hour
group_by_column: String. Name of column to group by
"""
#Create a histogram of counts by hour, grouped by month
fig = plt.figure(figsize = (20,15))
ax = fig.gca()
axarr = df[peak_demand_hour_column].hist(by=df[group_by_column], bins=24, ax=ax)
for ax in axarr.flatten():
ax.set_xlabel("Peak Demand Hour (0-23)")
ax.set_ylabel("Number of Occurrences")
#Count number of peak hour occurrences, grouped by month
peak_hour_counts=pd.DataFrame(df.groupby([peak_demand_hour_column,
group_by_column]).size()).reset_index().rename(columns={0:'Counts'})
#Pull peak hour for each month and write back as a column
peak_hour_counts['Number_Occurrences']=peak_hour_counts.groupby([group_by_column])['Counts'].transform('max')
#Subset the dataframe to only include max counts for peak demand hours for each month
peak_hour_counts=peak_hour_counts[peak_hour_counts['Counts']==peak_hour_counts['Number_Occurrences']]
#Order the dataframe by group_by_column
peak_hour_counts=peak_hour_counts.sort_values(by=[group_by_column])
#Print the subsetted dataframe
print(peak_hour_counts[[group_by_column, peak_demand_hour_column, 'Number_Occurrences']])
##Execute in main block
#Create a histogram of counts by peak demand hour, grouped by day of the week
generate_histogram_of_aggregated_counts(peak_demand_hour_df,
peak_demand_hour_column='Peak_Demand_Hour',
group_by_column='Day_Of_Week_Numeric')
#Create a histogram of counts by peak demand hour, grouped by month
generate_histogram_of_aggregated_counts(peak_demand_hour_df,
peak_demand_hour_column='Peak_Demand_Hour',
group_by_column='Month_Numeric')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment