Skip to content

Instantly share code, notes, and snippets.

@c-rhodes
Created July 19, 2017 09:17
Show Gist options
  • Save c-rhodes/4d6ea119c2ec3a8ac32e5702d38d06f7 to your computer and use it in GitHub Desktop.
Save c-rhodes/4d6ea119c2ec3a8ac32e5702d38d06f7 to your computer and use it in GitHub Desktop.
import pandas as pd
import matplotlib.pyplot as plt
dataset = '/home/cullenrhodes/Downloads/as-and-a-level-mathematics-data-set.xls'
# Build DataFrame containing Heathrow 1987 data.
# Header starts at row 6 (substract 1 for 0-based indexing).
df_heathrow_1987 = pd.read_excel(
dataset, sheetname='Heathrow May-Oct 1987', header=5, index_col='Date'
)
# Build DataFrame containing Heathrow 2015 data.
# Header starts at row 6 (substract 1 for 0-based indexing).
df_heathrow_2015 = pd.read_excel(
dataset, sheetname='Heathrow May-Oct 2015', header=5, index_col='Date'
)
total_rainfall_col = 'Daily Total Rainfall (0900-0900) (mm)'
# Build Pandas Series object indexed by Date with total rainfall values from
# Heathrow 1987 (time series)
heathrow_1987_total_rainfall = df_heathrow_1987.loc[
# Drop rows where total rainfall == 'tr' (trace value)
df_heathrow_1987[total_rainfall_col] != 'tr'
][total_rainfall_col].dropna() # Select only rainfall col and drop null values
# Build Pandas Series object indexed by Date with total rainfall values from
# Heathrow 2015 (time series)
heathrow_2015_total_rainfall = df_heathrow_2015.loc[
# Drop rows where total rainfall == 'tr' (trace value)
df_heathrow_2015[total_rainfall_col] != 'tr'
][total_rainfall_col].dropna() # Select only rainfall col and drop null values
# Create DataFrame by concatenating time series objects, ie:
#
# Index 1987 2015
# ------------------------
# ...
# July-16 5.1 6.8
# ... ... ...
# October-14 10.4 0.6
# October-17 0.1 0.0
# ...
df_heathrow_total_rainfall = pd.concat(
[heathrow_1987_total_rainfall, heathrow_2015_total_rainfall]
).to_frame()
# Pivot DataFrame, ie:
# Index 1987 2015
# -------------------------
# September-22 0.0 0.2
# September-23 6.6 2.0
# September-27 0.0 0.0
# September-28 0.0 0.0
# September-30 0.0 0.0
table_heathrow_total_rainfall = df_heathrow_total_rainfall.pivot_table(
# '%B' - Month as locale’s full name. e.g. September.
# '%d' - Day of month. e.g. 07.
index=df_heathrow_total_rainfall.index.strftime('%B-%d'),
columns=df_heathrow_total_rainfall.index.year,
values=total_rainfall_col,
aggfunc='sum'
).dropna()
table_heathrow_total_rainfall.plot()
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment