-
-
Save benoitdescamps/af5a8e42d5cfc7981e960e4d559dad19 to your computer and use it in GitHub Desktop.
class XGBQuantile(XGBRegressor): | |
def __init__(self,quant_alpha=0.95,quant_delta = 1.0,quant_thres=1.0,quant_var =1.0,base_score=0.5, booster='gbtree', colsample_bylevel=1, | |
colsample_bytree=1, gamma=0, learning_rate=0.1, max_delta_step=0,max_depth=3, min_child_weight=1, missing=None, n_estimators=100, | |
n_jobs=1, nthread=None, objective='reg:linear', random_state=0,reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,silent=True, subsample=1): | |
self.quant_alpha = quant_alpha | |
self.quant_delta = quant_delta | |
self.quant_thres = quant_thres | |
self.quant_var = quant_var | |
super().__init__(base_score=base_score, booster=booster, colsample_bylevel=colsample_bylevel, | |
colsample_bytree=colsample_bytree, gamma=gamma, learning_rate=learning_rate, max_delta_step=max_delta_step, | |
max_depth=max_depth, min_child_weight=min_child_weight, missing=missing, n_estimators=n_estimators, | |
n_jobs= n_jobs, nthread=nthread, objective=objective, random_state=random_state, | |
reg_alpha=reg_alpha, reg_lambda=reg_lambda, scale_pos_weight=scale_pos_weight, seed=seed, | |
silent=silent, subsample=subsample) | |
self.test = None | |
def fit(self, X, y): | |
super().set_params(objective=partial(XGBQuantile.quantile_loss,alpha = self.quant_alpha,delta = self.quant_delta,threshold = self.quant_thres,var = self.quant_var) ) | |
super().fit(X,y) | |
return self | |
def predict(self,X): | |
return super().predict(X) | |
def score(self, X, y): | |
y_pred = super().predict(X) | |
score = XGBQuantile.quantile_score(y, y_pred, self.quant_alpha) | |
score = 1./score | |
return score | |
@staticmethod | |
def quantile_loss(y_true,y_pred,alpha,delta,threshold,var): | |
x = y_true - y_pred | |
grad = (x<(alpha-1.0)*delta)*(1.0-alpha)- ((x>=(alpha-1.0)*delta)& (x<alpha*delta) )*x/delta-alpha*(x>alpha*delta) | |
hess = ((x>=(alpha-1.0)*delta)& (x<alpha*delta) )/delta | |
grad = (np.abs(x)<threshold )*grad - (np.abs(x)>=threshold )*(2*np.random.randint(2, size=len(y_true)) -1.0)*var | |
hess = (np.abs(x)<threshold )*hess + (np.abs(x)>=threshold ) | |
return grad, hess | |
@staticmethod | |
def original_quantile_loss(y_true,y_pred,alpha,delta): | |
x = y_true - y_pred | |
grad = (x<(alpha-1.0)*delta)*(1.0-alpha)-((x>=(alpha-1.0)*delta)& (x<alpha*delta) )*x/delta-alpha*(x>alpha*delta) | |
hess = ((x>=(alpha-1.0)*delta)& (x<alpha*delta) )/delta | |
return grad,hess | |
@staticmethod | |
def quantile_score(y_true, y_pred, alpha): | |
score = XGBQuantile.quantile_cost(x=y_true-y_pred,alpha=alpha) | |
score = np.sum(score) | |
return score | |
@staticmethod | |
def quantile_cost(x, alpha): | |
return (alpha-1.0)*x*(x<0)+alpha*x*(x>=0) | |
@staticmethod | |
def get_split_gain(gradient,hessian,l=1): | |
split_gain = list() | |
for i in range(gradient.shape[0]): | |
split_gain.append(np.sum(gradient[:i])/(np.sum(hessian[:i])+l)+np.sum(gradient[i:])/(np.sum(hessian[i:])+l)-np.sum(gradient)/(np.sum(hessian)+l) ) | |
return np.array(split_gain) |
Could you explain this line score = (self.quant_alpha-1.0)(y-y_pred)(y=y_pred) ?
Because this is not working in code
Sorry for the late response. This code came from an old post i migrated from an old website to medium.
Code has been refactored.
I have also created a collab notebook
I use your collab notebook and change the simulation size to 1000, 10000, 100000 and found that your 0.95 bound still out perform the GBR but, the 0.05 bound is not as good. The charts also show some clear issues. More tweaks coming? Thanks.
Hi, I have downloaded the code from the collab notebook and cannot get it to run. I keep getting the error 'XGBQuantile' object has no attribute 'nthread' when I try to use set_params() or fit() on the model. Should this still run as is?
I am also having the same issue as stweddle, any potential solution? Thanks!
The nthread issue arises due to the newer xgb package version 1.7.5. To fix I had to manually set some of the regressor parameters:
from sklearn.utils._openmp_helpers import _openmp_effective_n_threads
reg = XGBQuantile(...)
reg.nthread = openmp_effective_n_threads()
reg.seed = None
reg.silent = True
Can you clarify your
quantile_loss
function? It seems like you have attributions inside attributions, and that you just overwrite whatever you're putting on the first two attributions ingrad
andhess
with the two later attributions.