Skip to content

Instantly share code, notes, and snippets.

@jeethu
Created August 25, 2020 17:01
Show Gist options
  • Save jeethu/9039f5bdaa69692cff5cab839c048d67 to your computer and use it in GitHub Desktop.
Save jeethu/9039f5bdaa69692cff5cab839c048d67 to your computer and use it in GitHub Desktop.
Numerai Model Evaluation
import functools
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.table as table
import numpy as np
import pandas as pd
from scipy.stats import spearmanr
TOURNAMENT_NAME = "kazutsugi"
TARGET_NAME = f"target_{TOURNAMENT_NAME}"
PREDICTION_NAME = f"prediction_{TOURNAMENT_NAME}"
VAL1_ERAS = tuple(range(121, 133))
VAL2_ERAS = tuple(range(197, 207))
VAL_ERAS = VAL1_ERAS + VAL2_ERAS
def score(df, prediction_col):
return np.corrcoef(
df[TARGET_NAME],
df[prediction_col].rank(pct=True, method="first")
)[0, 1]
def feature_exposures(df, prediction_col):
feature_names = [f for f in df.columns
if f.startswith("feature")]
exposures = []
for f in feature_names:
fe = spearmanr(df[prediction_col], df[f])[0]
exposures.append(fe)
return np.array(exposures)
def max_feature_exposure(df, prediction_col=PREDICTION_NAME):
fe = feature_exposures(df, prediction_col=prediction_col)
return np.max(np.abs(fe))
def feature_exposure(df):
return np.sqrt(np.mean(np.square(feature_exposures(df))))
def evaluate_df(df, prediction_col=PREDICTION_NAME, plot_title="Validation"):
score_fn = functools.partial(score, prediction_col=prediction_col)
era_scores = df.groupby("era").apply(score_fn)
colors = []
fig, axes = plt.subplots(1, 2, figsize=(15, 5))
for i in era_scores.index:
era = int(i[len("era"):])
if era_scores[i] < 0:
colors.append("tab:red")
elif era in VAL1_ERAS:
colors.append("tab:blue")
elif era in VAL2_ERAS:
colors.append("tab:orange")
else:
colors.append("tab:gray")
plt_ax = era_scores.plot(x="era",
kind="bar", legend=False, use_index=True,
color=colors,
title=plot_title,
label="val1",
ax=axes[0])
plt_ax.axhline(y=era_scores.mean(),
linewidth=1, color='tab:blue', label="Mean CORR")
red_patch = mpatches.Patch(color='tab:orange', label='val2')
handles = plt_ax.get_legend_handles_labels()
plt_ax.legend(handles=handles[0] + [red_patch])
fe = feature_exposures(df, prediction_col=prediction_col)
max_fe = np.max(np.abs(fe))
rms_fe = np.sqrt(np.mean(np.square(fe)))
s = pd.Series(np.abs(fe))
plt_ax = s.plot(kind="bar", title="Feature exposures",
use_index=False, ax=axes[1], label="feature exp")
plt_ax.axhline(y=s.max(), linewidth=1, color="tab:red", label="max feature exp")
plt_ax.legend()
plt_ax.axes.get_xaxis().set_visible(False)
celltext = [[f"{rms_fe:.4f}"], [f"{max_fe:.4f}"]]
table.table(plt_ax, cellText=celltext, rowLabels=["fe", "max fe"])
plt.show()
corr = era_scores.mean()
sharpe = corr / era_scores.std()
print(f"Val CORR: {corr:.4f}")
print(f"Val Sharpe: {sharpe:.4f}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment