Created
August 25, 2020 17:01
-
-
Save jeethu/9039f5bdaa69692cff5cab839c048d67 to your computer and use it in GitHub Desktop.
Numerai Model Evaluation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import functools | |
import matplotlib.pyplot as plt | |
import matplotlib.patches as mpatches | |
import matplotlib.table as table | |
import numpy as np | |
import pandas as pd | |
from scipy.stats import spearmanr | |
TOURNAMENT_NAME = "kazutsugi" | |
TARGET_NAME = f"target_{TOURNAMENT_NAME}" | |
PREDICTION_NAME = f"prediction_{TOURNAMENT_NAME}" | |
VAL1_ERAS = tuple(range(121, 133)) | |
VAL2_ERAS = tuple(range(197, 207)) | |
VAL_ERAS = VAL1_ERAS + VAL2_ERAS | |
def score(df, prediction_col): | |
return np.corrcoef( | |
df[TARGET_NAME], | |
df[prediction_col].rank(pct=True, method="first") | |
)[0, 1] | |
def feature_exposures(df, prediction_col): | |
feature_names = [f for f in df.columns | |
if f.startswith("feature")] | |
exposures = [] | |
for f in feature_names: | |
fe = spearmanr(df[prediction_col], df[f])[0] | |
exposures.append(fe) | |
return np.array(exposures) | |
def max_feature_exposure(df, prediction_col=PREDICTION_NAME): | |
fe = feature_exposures(df, prediction_col=prediction_col) | |
return np.max(np.abs(fe)) | |
def feature_exposure(df): | |
return np.sqrt(np.mean(np.square(feature_exposures(df)))) | |
def evaluate_df(df, prediction_col=PREDICTION_NAME, plot_title="Validation"): | |
score_fn = functools.partial(score, prediction_col=prediction_col) | |
era_scores = df.groupby("era").apply(score_fn) | |
colors = [] | |
fig, axes = plt.subplots(1, 2, figsize=(15, 5)) | |
for i in era_scores.index: | |
era = int(i[len("era"):]) | |
if era_scores[i] < 0: | |
colors.append("tab:red") | |
elif era in VAL1_ERAS: | |
colors.append("tab:blue") | |
elif era in VAL2_ERAS: | |
colors.append("tab:orange") | |
else: | |
colors.append("tab:gray") | |
plt_ax = era_scores.plot(x="era", | |
kind="bar", legend=False, use_index=True, | |
color=colors, | |
title=plot_title, | |
label="val1", | |
ax=axes[0]) | |
plt_ax.axhline(y=era_scores.mean(), | |
linewidth=1, color='tab:blue', label="Mean CORR") | |
red_patch = mpatches.Patch(color='tab:orange', label='val2') | |
handles = plt_ax.get_legend_handles_labels() | |
plt_ax.legend(handles=handles[0] + [red_patch]) | |
fe = feature_exposures(df, prediction_col=prediction_col) | |
max_fe = np.max(np.abs(fe)) | |
rms_fe = np.sqrt(np.mean(np.square(fe))) | |
s = pd.Series(np.abs(fe)) | |
plt_ax = s.plot(kind="bar", title="Feature exposures", | |
use_index=False, ax=axes[1], label="feature exp") | |
plt_ax.axhline(y=s.max(), linewidth=1, color="tab:red", label="max feature exp") | |
plt_ax.legend() | |
plt_ax.axes.get_xaxis().set_visible(False) | |
celltext = [[f"{rms_fe:.4f}"], [f"{max_fe:.4f}"]] | |
table.table(plt_ax, cellText=celltext, rowLabels=["fe", "max fe"]) | |
plt.show() | |
corr = era_scores.mean() | |
sharpe = corr / era_scores.std() | |
print(f"Val CORR: {corr:.4f}") | |
print(f"Val Sharpe: {sharpe:.4f}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment