Last active
December 25, 2020 05:51
-
-
Save e-mon/12aac08c4546f2981a804c515d4b3ff5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def qwk(a1, a2): | |
""" | |
Source: https://www.kaggle.com/c/data-science-bowl-2019/discussion/114133#latest-660168 | |
:param a1: | |
:param a2: | |
:param max_rat: | |
:return: | |
""" | |
max_rat = 3 | |
a1 = np.asarray(a1, dtype=int) | |
a2 = np.asarray(a2, dtype=int) | |
hist1 = np.zeros((max_rat + 1, )) | |
hist2 = np.zeros((max_rat + 1, )) | |
o = 0 | |
for k in range(a1.shape[0]): | |
i, j = a1[k], a2[k] | |
hist1[i] += 1 | |
hist2[j] += 1 | |
o += (i - j) * (i - j) | |
e = 0 | |
for i in range(max_rat + 1): | |
for j in range(max_rat + 1): | |
e += hist1[i] * hist2[j] * (i - j) * (i - j) | |
e = e / a1.shape[0] | |
return 1 - o / e | |
class OptimizedRounderWithHistgram(object): | |
def __init__(self): | |
self.coef_ = None | |
self.dist = None | |
def fit(self, X, y): | |
self.dist = Counter(y) | |
for k in self.dist: | |
self.dist[k] /= y.shape[0] | |
def classify(self, bound, x): | |
if x <= bound[0]: | |
return 0 | |
elif x <= bound[1]: | |
return 1 | |
elif x <= bound[2]: | |
return 2 | |
else: | |
return 3 | |
def predict(self, preds): | |
acum = 0 | |
bound = np.zeros(shape=(3,)) | |
for i in range(3): | |
acum += self.dist[i] | |
bound[i] = np.percentile(preds, acum * 100) | |
self.coef_ = bound | |
return np.array(list(map(lambda x: self.classify(bound, x), preds))) | |
def coefficients(self): | |
return self.coef_ | |
def to_bins(x, borders): | |
for i in range(len(borders)): | |
if x <= borders[i]: | |
return i | |
return len(borders) | |
class OptimizedRounder(object): | |
""" | |
An optimizer for rounding thresholds | |
to maximize Quadratic Weighted Kappa (QWK) score | |
# https://www.kaggle.com/naveenasaithambi/optimizedrounder-improved | |
""" | |
def __init__(self): | |
self.coef_ = 0 | |
def _kappa_loss(self, coef, X, y, idx): | |
X_p = np.array([to_bins(pred, coef) for pred in X]) | |
ll = -qwk(y, X_p) | |
return ll | |
def fit(self, X, y): | |
coef = [0.5, 1.5, 2.5] | |
golden1 = 0.618 | |
golden2 = 1 - golden1 | |
ab_start = [(0.5, 1.5), (1, 2), (1.5, 2.5)] | |
for it1 in range(10): | |
for idx in range(len(coef)): | |
# golden section search | |
a, b = ab_start[idx] | |
# calc losses | |
coef[idx] = a | |
la = self._kappa_loss(coef, X, y, idx) | |
coef[idx] = b | |
lb = self._kappa_loss(coef, X, y, idx) | |
for it in range(20): | |
# choose value | |
if la > lb: | |
a = b - (b - a) * golden1 | |
coef[idx] = a | |
la = self._kappa_loss(coef, X, y, idx) | |
else: | |
b = b - (b - a) * golden2 | |
coef[idx] = b | |
lb = self._kappa_loss(coef, X, y, idx) | |
self.coef_ = {'x': coef} | |
def predict(self, X, coef=None): | |
""" | |
Make predictions with specified thresholds | |
:param X: The raw predictions | |
:param coef: A list of coefficients that will be used for rounding | |
""" | |
if coef is None: | |
coef = self.coefficients() | |
return pd.cut(X, [-np.inf] + list(np.sort(coef)) + [np.inf], labels = [0, 1, 2, 3]) | |
def coefficients(self): | |
""" | |
Return the optimized coefficients | |
""" | |
return self.coef_['x'] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment