kperry2215 · July 30, 2019 22:23
diff --git a/aggregated_histogram.py b/aggregated_histogram.py
 def generate_histogram_of_aggregated_counts(df, 
                                            peak_demand_hour_column, 
                                            group_by_column):
    """
    Generate a histogram of peak demand hour counts, grouped by a column
    Arguments:
        df: Pandas dataframe
        peak_demand_hour_column: String. Name of the column for peak demand hour
        group_by_column: String. Name of column to group by 
    """
    #Create a histogram of counts by hour, grouped by month
    fig = plt.figure(figsize = (20,15))
    ax = fig.gca()
    axarr = df[peak_demand_hour_column].hist(by=df[group_by_column], bins=24, ax=ax)
    for ax in axarr.flatten():
        ax.set_xlabel("Peak Demand Hour (0-23)")
        ax.set_ylabel("Number of Occurrences")
    #Count number of peak hour occurrences, grouped by month
    peak_hour_counts=pd.DataFrame(df.groupby([peak_demand_hour_column, 
                                              group_by_column]).size()).reset_index().rename(columns={0:'Counts'})
    #Pull peak hour for each month and write back as a column
    peak_hour_counts['Number_Occurrences']=peak_hour_counts.groupby([group_by_column])['Counts'].transform('max')
    #Subset the dataframe to only include max counts for peak demand hours for each month
    peak_hour_counts=peak_hour_counts[peak_hour_counts['Counts']==peak_hour_counts['Number_Occurrences']]
    #Order the dataframe by group_by_column
    peak_hour_counts=peak_hour_counts.sort_values(by=[group_by_column])
    #Print the subsetted dataframe
    print(peak_hour_counts[[group_by_column, peak_demand_hour_column, 'Number_Occurrences']])


 ##Execute in main block

 #Create a histogram of counts by peak demand hour, grouped by day of the week
 generate_histogram_of_aggregated_counts(peak_demand_hour_df, 
                                            peak_demand_hour_column='Peak_Demand_Hour', 
                                            group_by_column='Day_Of_Week_Numeric')
 #Create a histogram of counts by peak demand hour, grouped by month
 generate_histogram_of_aggregated_counts(peak_demand_hour_df, 
                                            peak_demand_hour_column='Peak_Demand_Hour', 
                                            group_by_column='Month_Numeric')
	def generate_histogram_of_aggregated_counts(df,
	peak_demand_hour_column,
	group_by_column):
	"""
	Generate a histogram of peak demand hour counts, grouped by a column
	Arguments:
	df: Pandas dataframe
	peak_demand_hour_column: String. Name of the column for peak demand hour
	group_by_column: String. Name of column to group by
	"""
	#Create a histogram of counts by hour, grouped by month
	fig = plt.figure(figsize = (20,15))
	ax = fig.gca()
	axarr = df[peak_demand_hour_column].hist(by=df[group_by_column], bins=24, ax=ax)
	for ax in axarr.flatten():
	ax.set_xlabel("Peak Demand Hour (0-23)")
	ax.set_ylabel("Number of Occurrences")
	#Count number of peak hour occurrences, grouped by month
	peak_hour_counts=pd.DataFrame(df.groupby([peak_demand_hour_column,
	group_by_column]).size()).reset_index().rename(columns={0:'Counts'})
	#Pull peak hour for each month and write back as a column
	peak_hour_counts['Number_Occurrences']=peak_hour_counts.groupby([group_by_column])['Counts'].transform('max')
	#Subset the dataframe to only include max counts for peak demand hours for each month
	peak_hour_counts=peak_hour_counts[peak_hour_counts['Counts']==peak_hour_counts['Number_Occurrences']]
	#Order the dataframe by group_by_column
	peak_hour_counts=peak_hour_counts.sort_values(by=[group_by_column])
	#Print the subsetted dataframe
	print(peak_hour_counts[[group_by_column, peak_demand_hour_column, 'Number_Occurrences']])


	##Execute in main block

	#Create a histogram of counts by peak demand hour, grouped by day of the week
	generate_histogram_of_aggregated_counts(peak_demand_hour_df,
	peak_demand_hour_column='Peak_Demand_Hour',
	group_by_column='Day_Of_Week_Numeric')
	#Create a histogram of counts by peak demand hour, grouped by month
	generate_histogram_of_aggregated_counts(peak_demand_hour_df,
	peak_demand_hour_column='Peak_Demand_Hour',
	group_by_column='Month_Numeric')