jeethu · August 25, 2020 17:01
diff --git a/evaluation.py b/evaluation.py
 import functools

 import matplotlib.pyplot as plt
 import matplotlib.patches as mpatches
 import matplotlib.table as table
 import numpy as np
 import pandas as pd
 from scipy.stats import spearmanr

 TOURNAMENT_NAME = "kazutsugi"
 TARGET_NAME = f"target_{TOURNAMENT_NAME}"
 PREDICTION_NAME = f"prediction_{TOURNAMENT_NAME}"

 VAL1_ERAS = tuple(range(121, 133))
 VAL2_ERAS = tuple(range(197, 207))
 VAL_ERAS = VAL1_ERAS + VAL2_ERAS


 def score(df, prediction_col):
    return np.corrcoef(
        df[TARGET_NAME],
        df[prediction_col].rank(pct=True, method="first")
    )[0, 1]


 def feature_exposures(df, prediction_col):
    feature_names = [f for f in df.columns
                     if f.startswith("feature")]
    exposures = []
    for f in feature_names:
        fe = spearmanr(df[prediction_col], df[f])[0]
        exposures.append(fe)
    return np.array(exposures)


 def max_feature_exposure(df, prediction_col=PREDICTION_NAME):
    fe = feature_exposures(df, prediction_col=prediction_col)
    return np.max(np.abs(fe))


 def feature_exposure(df):
    return np.sqrt(np.mean(np.square(feature_exposures(df))))


 def evaluate_df(df, prediction_col=PREDICTION_NAME, plot_title="Validation"):
    score_fn = functools.partial(score, prediction_col=prediction_col)
    era_scores = df.groupby("era").apply(score_fn)
    colors = []
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    for i in era_scores.index:
        era = int(i[len("era"):])
        if era_scores[i] < 0:
            colors.append("tab:red")
        elif era in VAL1_ERAS:
            colors.append("tab:blue")
        elif era in VAL2_ERAS:
            colors.append("tab:orange")
        else:
            colors.append("tab:gray")
    plt_ax = era_scores.plot(x="era",
                             kind="bar", legend=False, use_index=True,
                             color=colors,
                             title=plot_title,
                             label="val1",
                             ax=axes[0])
    plt_ax.axhline(y=era_scores.mean(),
                   linewidth=1, color='tab:blue', label="Mean CORR")
    red_patch = mpatches.Patch(color='tab:orange', label='val2')
    handles = plt_ax.get_legend_handles_labels()
    plt_ax.legend(handles=handles[0] + [red_patch])

    fe = feature_exposures(df, prediction_col=prediction_col)
    max_fe = np.max(np.abs(fe))
    rms_fe = np.sqrt(np.mean(np.square(fe)))
    s = pd.Series(np.abs(fe))
    plt_ax = s.plot(kind="bar", title="Feature exposures",
                    use_index=False, ax=axes[1], label="feature exp")
    plt_ax.axhline(y=s.max(), linewidth=1, color="tab:red", label="max feature exp")
    plt_ax.legend()
    plt_ax.axes.get_xaxis().set_visible(False)

    celltext = [[f"{rms_fe:.4f}"], [f"{max_fe:.4f}"]]
    table.table(plt_ax, cellText=celltext, rowLabels=["fe", "max fe"])

    plt.show()

    corr = era_scores.mean()
    sharpe = corr / era_scores.std()
    print(f"Val CORR: {corr:.4f}")
    print(f"Val Sharpe: {sharpe:.4f}")
	import functools

	import matplotlib.pyplot as plt
	import matplotlib.patches as mpatches
	import matplotlib.table as table
	import numpy as np
	import pandas as pd
	from scipy.stats import spearmanr

	TOURNAMENT_NAME = "kazutsugi"
	TARGET_NAME = f"target_{TOURNAMENT_NAME}"
	PREDICTION_NAME = f"prediction_{TOURNAMENT_NAME}"

	VAL1_ERAS = tuple(range(121, 133))
	VAL2_ERAS = tuple(range(197, 207))
	VAL_ERAS = VAL1_ERAS + VAL2_ERAS


	def score(df, prediction_col):
	return np.corrcoef(
	df[TARGET_NAME],
	df[prediction_col].rank(pct=True, method="first")
	)[0, 1]


	def feature_exposures(df, prediction_col):
	feature_names = [f for f in df.columns
	if f.startswith("feature")]
	exposures = []
	for f in feature_names:
	fe = spearmanr(df[prediction_col], df[f])[0]
	exposures.append(fe)
	return np.array(exposures)


	def max_feature_exposure(df, prediction_col=PREDICTION_NAME):
	fe = feature_exposures(df, prediction_col=prediction_col)
	return np.max(np.abs(fe))


	def feature_exposure(df):
	return np.sqrt(np.mean(np.square(feature_exposures(df))))


	def evaluate_df(df, prediction_col=PREDICTION_NAME, plot_title="Validation"):
	score_fn = functools.partial(score, prediction_col=prediction_col)
	era_scores = df.groupby("era").apply(score_fn)
	colors = []
	fig, axes = plt.subplots(1, 2, figsize=(15, 5))
	for i in era_scores.index:
	era = int(i[len("era"):])
	if era_scores[i] < 0:
	colors.append("tab:red")
	elif era in VAL1_ERAS:
	colors.append("tab:blue")
	elif era in VAL2_ERAS:
	colors.append("tab:orange")
	else:
	colors.append("tab:gray")
	plt_ax = era_scores.plot(x="era",
	kind="bar", legend=False, use_index=True,
	color=colors,
	title=plot_title,
	label="val1",
	ax=axes[0])
	plt_ax.axhline(y=era_scores.mean(),
	linewidth=1, color='tab:blue', label="Mean CORR")
	red_patch = mpatches.Patch(color='tab:orange', label='val2')
	handles = plt_ax.get_legend_handles_labels()
	plt_ax.legend(handles=handles[0] + [red_patch])

	fe = feature_exposures(df, prediction_col=prediction_col)
	max_fe = np.max(np.abs(fe))
	rms_fe = np.sqrt(np.mean(np.square(fe)))
	s = pd.Series(np.abs(fe))
	plt_ax = s.plot(kind="bar", title="Feature exposures",
	use_index=False, ax=axes[1], label="feature exp")
	plt_ax.axhline(y=s.max(), linewidth=1, color="tab:red", label="max feature exp")
	plt_ax.legend()
	plt_ax.axes.get_xaxis().set_visible(False)

	celltext = [[f"{rms_fe:.4f}"], [f"{max_fe:.4f}"]]
	table.table(plt_ax, cellText=celltext, rowLabels=["fe", "max fe"])

	plt.show()

	corr = era_scores.mean()
	sharpe = corr / era_scores.std()
	print(f"Val CORR: {corr:.4f}")
	print(f"Val Sharpe: {sharpe:.4f}")