Created
July 30, 2019 22:23
-
-
Save kperry2215/b0dd16a3b71b870c45194e745c368185 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def generate_histogram_of_aggregated_counts(df, | |
peak_demand_hour_column, | |
group_by_column): | |
""" | |
Generate a histogram of peak demand hour counts, grouped by a column | |
Arguments: | |
df: Pandas dataframe | |
peak_demand_hour_column: String. Name of the column for peak demand hour | |
group_by_column: String. Name of column to group by | |
""" | |
#Create a histogram of counts by hour, grouped by month | |
fig = plt.figure(figsize = (20,15)) | |
ax = fig.gca() | |
axarr = df[peak_demand_hour_column].hist(by=df[group_by_column], bins=24, ax=ax) | |
for ax in axarr.flatten(): | |
ax.set_xlabel("Peak Demand Hour (0-23)") | |
ax.set_ylabel("Number of Occurrences") | |
#Count number of peak hour occurrences, grouped by month | |
peak_hour_counts=pd.DataFrame(df.groupby([peak_demand_hour_column, | |
group_by_column]).size()).reset_index().rename(columns={0:'Counts'}) | |
#Pull peak hour for each month and write back as a column | |
peak_hour_counts['Number_Occurrences']=peak_hour_counts.groupby([group_by_column])['Counts'].transform('max') | |
#Subset the dataframe to only include max counts for peak demand hours for each month | |
peak_hour_counts=peak_hour_counts[peak_hour_counts['Counts']==peak_hour_counts['Number_Occurrences']] | |
#Order the dataframe by group_by_column | |
peak_hour_counts=peak_hour_counts.sort_values(by=[group_by_column]) | |
#Print the subsetted dataframe | |
print(peak_hour_counts[[group_by_column, peak_demand_hour_column, 'Number_Occurrences']]) | |
##Execute in main block | |
#Create a histogram of counts by peak demand hour, grouped by day of the week | |
generate_histogram_of_aggregated_counts(peak_demand_hour_df, | |
peak_demand_hour_column='Peak_Demand_Hour', | |
group_by_column='Day_Of_Week_Numeric') | |
#Create a histogram of counts by peak demand hour, grouped by month | |
generate_histogram_of_aggregated_counts(peak_demand_hour_df, | |
peak_demand_hour_column='Peak_Demand_Hour', | |
group_by_column='Month_Numeric') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment