Created
July 19, 2017 09:17
-
-
Save c-rhodes/4d6ea119c2ec3a8ac32e5702d38d06f7 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import matplotlib.pyplot as plt | |
dataset = '/home/cullenrhodes/Downloads/as-and-a-level-mathematics-data-set.xls' | |
# Build DataFrame containing Heathrow 1987 data. | |
# Header starts at row 6 (substract 1 for 0-based indexing). | |
df_heathrow_1987 = pd.read_excel( | |
dataset, sheetname='Heathrow May-Oct 1987', header=5, index_col='Date' | |
) | |
# Build DataFrame containing Heathrow 2015 data. | |
# Header starts at row 6 (substract 1 for 0-based indexing). | |
df_heathrow_2015 = pd.read_excel( | |
dataset, sheetname='Heathrow May-Oct 2015', header=5, index_col='Date' | |
) | |
total_rainfall_col = 'Daily Total Rainfall (0900-0900) (mm)' | |
# Build Pandas Series object indexed by Date with total rainfall values from | |
# Heathrow 1987 (time series) | |
heathrow_1987_total_rainfall = df_heathrow_1987.loc[ | |
# Drop rows where total rainfall == 'tr' (trace value) | |
df_heathrow_1987[total_rainfall_col] != 'tr' | |
][total_rainfall_col].dropna() # Select only rainfall col and drop null values | |
# Build Pandas Series object indexed by Date with total rainfall values from | |
# Heathrow 2015 (time series) | |
heathrow_2015_total_rainfall = df_heathrow_2015.loc[ | |
# Drop rows where total rainfall == 'tr' (trace value) | |
df_heathrow_2015[total_rainfall_col] != 'tr' | |
][total_rainfall_col].dropna() # Select only rainfall col and drop null values | |
# Create DataFrame by concatenating time series objects, ie: | |
# | |
# Index 1987 2015 | |
# ------------------------ | |
# ... | |
# July-16 5.1 6.8 | |
# ... ... ... | |
# October-14 10.4 0.6 | |
# October-17 0.1 0.0 | |
# ... | |
df_heathrow_total_rainfall = pd.concat( | |
[heathrow_1987_total_rainfall, heathrow_2015_total_rainfall] | |
).to_frame() | |
# Pivot DataFrame, ie: | |
# Index 1987 2015 | |
# ------------------------- | |
# September-22 0.0 0.2 | |
# September-23 6.6 2.0 | |
# September-27 0.0 0.0 | |
# September-28 0.0 0.0 | |
# September-30 0.0 0.0 | |
table_heathrow_total_rainfall = df_heathrow_total_rainfall.pivot_table( | |
# '%B' - Month as locale’s full name. e.g. September. | |
# '%d' - Day of month. e.g. 07. | |
index=df_heathrow_total_rainfall.index.strftime('%B-%d'), | |
columns=df_heathrow_total_rainfall.index.year, | |
values=total_rainfall_col, | |
aggfunc='sum' | |
).dropna() | |
table_heathrow_total_rainfall.plot() | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment