Last active
February 15, 2019 23:52
-
-
Save WillKoehrsen/121b2ffc9bbdca0617b630af6d49dcde to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def make_dataset(carrier_list, range_start = -60, range_end = 120, bin_width = 5): | |
# Check to make sure the start is less than the end! | |
assert range_start < range_end, "Start must be less than end!" | |
by_carrier = pd.DataFrame(columns=['proportion', 'left', 'right', | |
'f_proportion', 'f_interval', | |
'name', 'color']) | |
range_extent = range_end - range_start | |
# Iterate through all the carriers | |
for i, carrier_name in enumerate(carrier_list): | |
# Subset to the carrier | |
subset = flights[flights['name'] == carrier_name] | |
# Create a histogram with specified bins and range | |
arr_hist, edges = np.histogram(subset['arr_delay'], | |
bins = int(range_extent / bin_width), | |
range = [range_start, range_end]) | |
# Divide the counts by the total to get a proportion and create df | |
arr_df = pd.DataFrame({'proportion': arr_hist / np.sum(arr_hist), | |
'left': edges[:-1], 'right': edges[1:] }) | |
# Format the proportion | |
arr_df['f_proportion'] = ['%0.5f' % proportion for proportion in arr_df['proportion']] | |
# Format the interval | |
arr_df['f_interval'] = ['%d to %d minutes' % (left, right) for left, | |
right in zip(arr_df['left'], arr_df['right'])] | |
# Assign the carrier for labels | |
arr_df['name'] = carrier_name | |
# Color each carrier differently | |
arr_df['color'] = Category20_16[i] | |
# Add to the overall dataframe | |
by_carrier = by_carrier.append(arr_df) | |
# Overall dataframe | |
by_carrier = by_carrier.sort_values(['name', 'left']) | |
# Convert dataframe to column data source | |
return ColumnDataSource(by_carrier) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment