In [136]:
import altair as alt
import pandas as pd
import numpy as np


In [102]:
def rolling_zscore(col, years=50):
 return ((col - col.rolling(years, center=True, min_periods=5).mean())
 / col.rolling(years, center=True, min_periods=5).std())

In [135]:
years = (pd.read_csv('year_metrics.csv')
 .set_index('year')
 .sort_index()
 .reset_index()
 .melt(id_vars='year', var_name='type', value_name='number')
 .loc[lambda df: df['type']=='events']
 )


In [182]:
chart = \
(alt.Chart(
 years.rolling(3).mean().round(decimals=2),
 width=600, height=500
 )
 .encode(alt.X('year:Q'),
 alt.Y('number:Q'),
 color=alt.value('grey'))
 .mark_line()
)

chart.savechart('timeline-linear-scale.json')
chart.savechart('timeline-linear-scale.html')

chart

<VegaLite 2 object>

In [183]:
chart = \
(alt.Chart(
 years
 .replace(0, np.nan)
 .fillna(method='bfill')
 .assign(log_number=lambda df: np.log10(df['number']))
 .rolling(3).mean()
 .round(decimals=2)
 [['year', 'log_number']],
 width=600, height=500
 )
 .encode(alt.X('year:Q'),
 alt.Y('log_number:Q', scale=alt.Scale(zero=False)),
 color=alt.value('grey'))
 .mark_line()
)

chart.savechart('timeline-log-scale.json')
chart.savechart('timeline-log-scale.html')

chart

<VegaLite 2 object>

In [154]:
df = (years.loc[lambda df: df['type']=='events']
 .assign(number=lambda df: rolling_zscore(df['number'], years=30)))

chart = (
 alt.Chart(
 df, width=800, height=600
 )
 .encode(x=alt.X('year:Q'),
 y=alt.Y('number:Q', axis=alt.Axis(title='z score ( no. events )')),
 color=alt.Color('number:Q', legend=None))
 .mark_point(filled=True)
) + (
 alt.Chart(df)
 .transform_filter('datum.number > 3')
 .transform_calculate('number', 'datum.number + 0.1')
 .encode(x=alt.X('year:Q'),
 y=alt.Y('number:Q', axis=alt.Axis(title='z score ( no. events )')),
# color=alt.Color('number:Q'),
 text=alt.Text('year'))
 .mark_text(dy=0.2)
)

chart.savechart('z-score-scatter.json')
chart.savechart('z-score-scatter.html')

chart

<VegaLite 2 object>