-
-
Save bshishov/5dc237f59f019b26145648e2124ca1c9 to your computer and use it in GitHub Desktop.
import numpy as np | |
EPSILON = 1e-10 | |
def _error(actual: np.ndarray, predicted: np.ndarray): | |
""" Simple error """ | |
return actual - predicted | |
def _percentage_error(actual: np.ndarray, predicted: np.ndarray): | |
""" | |
Percentage error | |
Note: result is NOT multiplied by 100 | |
""" | |
return _error(actual, predicted) / (actual + EPSILON) | |
def _naive_forecasting(actual: np.ndarray, seasonality: int = 1): | |
""" Naive forecasting method which just repeats previous samples """ | |
return actual[:-seasonality] | |
def _relative_error(actual: np.ndarray, predicted: np.ndarray, benchmark: np.ndarray = None): | |
""" Relative Error """ | |
if benchmark is None or isinstance(benchmark, int): | |
# If no benchmark prediction provided - use naive forecasting | |
if not isinstance(benchmark, int): | |
seasonality = 1 | |
else: | |
seasonality = benchmark | |
return _error(actual[seasonality:], predicted[seasonality:]) /\ | |
(_error(actual[seasonality:], _naive_forecasting(actual, seasonality)) + EPSILON) | |
return _error(actual, predicted) / (_error(actual, benchmark) + EPSILON) | |
def _bounded_relative_error(actual: np.ndarray, predicted: np.ndarray, benchmark: np.ndarray = None): | |
""" Bounded Relative Error """ | |
if benchmark is None or isinstance(benchmark, int): | |
# If no benchmark prediction provided - use naive forecasting | |
if not isinstance(benchmark, int): | |
seasonality = 1 | |
else: | |
seasonality = benchmark | |
abs_err = np.abs(_error(actual[seasonality:], predicted[seasonality:])) | |
abs_err_bench = np.abs(_error(actual[seasonality:], _naive_forecasting(actual, seasonality))) | |
else: | |
abs_err = np.abs(_error(actual, predicted)) | |
abs_err_bench = np.abs(_error(actual, benchmark)) | |
return abs_err / (abs_err + abs_err_bench + EPSILON) | |
def _geometric_mean(a, axis=0, dtype=None): | |
""" Geometric mean """ | |
if not isinstance(a, np.ndarray): # if not an ndarray object attempt to convert it | |
log_a = np.log(np.array(a, dtype=dtype)) | |
elif dtype: # Must change the default dtype allowing array type | |
if isinstance(a, np.ma.MaskedArray): | |
log_a = np.log(np.ma.asarray(a, dtype=dtype)) | |
else: | |
log_a = np.log(np.asarray(a, dtype=dtype)) | |
else: | |
log_a = np.log(a) | |
return np.exp(log_a.mean(axis=axis)) | |
def mse(actual: np.ndarray, predicted: np.ndarray): | |
""" Mean Squared Error """ | |
return np.mean(np.square(_error(actual, predicted))) | |
def rmse(actual: np.ndarray, predicted: np.ndarray): | |
""" Root Mean Squared Error """ | |
return np.sqrt(mse(actual, predicted)) | |
def nrmse(actual: np.ndarray, predicted: np.ndarray): | |
""" Normalized Root Mean Squared Error """ | |
return rmse(actual, predicted) / (actual.max() - actual.min()) | |
def me(actual: np.ndarray, predicted: np.ndarray): | |
""" Mean Error """ | |
return np.mean(_error(actual, predicted)) | |
def mae(actual: np.ndarray, predicted: np.ndarray): | |
""" Mean Absolute Error """ | |
return np.mean(np.abs(_error(actual, predicted))) | |
def mad(actual: np.ndarray, predicted: np.ndarray): | |
""" Mean Absolute Deviation """ | |
error = _error(actual, predicted) | |
return np.mean(np.abs(error - np.mean(error))) | |
def gmae(actual: np.ndarray, predicted: np.ndarray): | |
""" Geometric Mean Absolute Error """ | |
return _geometric_mean(np.abs(_error(actual, predicted))) | |
def mdae(actual: np.ndarray, predicted: np.ndarray): | |
""" Median Absolute Error """ | |
return np.median(np.abs(_error(actual, predicted))) | |
def mpe(actual: np.ndarray, predicted: np.ndarray): | |
""" Mean Percentage Error """ | |
return np.mean(_percentage_error(actual, predicted)) | |
def mape(actual: np.ndarray, predicted: np.ndarray): | |
""" | |
Mean Absolute Percentage Error | |
Properties: | |
+ Easy to interpret | |
+ Scale independent | |
- Biased, not symmetric | |
- Undefined when actual[t] == 0 | |
Note: result is NOT multiplied by 100 | |
""" | |
return np.mean(np.abs(_percentage_error(actual, predicted))) | |
def mdape(actual: np.ndarray, predicted: np.ndarray): | |
""" | |
Median Absolute Percentage Error | |
Note: result is NOT multiplied by 100 | |
""" | |
return np.median(np.abs(_percentage_error(actual, predicted))) | |
def smape(actual: np.ndarray, predicted: np.ndarray): | |
""" | |
Symmetric Mean Absolute Percentage Error | |
Note: result is NOT multiplied by 100 | |
""" | |
return np.mean(2.0 * np.abs(actual - predicted) / ((np.abs(actual) + np.abs(predicted)) + EPSILON)) | |
def smdape(actual: np.ndarray, predicted: np.ndarray): | |
""" | |
Symmetric Median Absolute Percentage Error | |
Note: result is NOT multiplied by 100 | |
""" | |
return np.median(2.0 * np.abs(actual - predicted) / ((np.abs(actual) + np.abs(predicted)) + EPSILON)) | |
def maape(actual: np.ndarray, predicted: np.ndarray): | |
""" | |
Mean Arctangent Absolute Percentage Error | |
Note: result is NOT multiplied by 100 | |
""" | |
return np.mean(np.arctan(np.abs((actual - predicted) / (actual + EPSILON)))) | |
def mase(actual: np.ndarray, predicted: np.ndarray, seasonality: int = 1): | |
""" | |
Mean Absolute Scaled Error | |
Baseline (benchmark) is computed with naive forecasting (shifted by @seasonality) | |
""" | |
return mae(actual, predicted) / mae(actual[seasonality:], _naive_forecasting(actual, seasonality)) | |
def std_ae(actual: np.ndarray, predicted: np.ndarray): | |
""" Normalized Absolute Error """ | |
__mae = mae(actual, predicted) | |
return np.sqrt(np.sum(np.square(_error(actual, predicted) - __mae))/(len(actual) - 1)) | |
def std_ape(actual: np.ndarray, predicted: np.ndarray): | |
""" Normalized Absolute Percentage Error """ | |
__mape = mape(actual, predicted) | |
return np.sqrt(np.sum(np.square(_percentage_error(actual, predicted) - __mape))/(len(actual) - 1)) | |
def rmspe(actual: np.ndarray, predicted: np.ndarray): | |
""" | |
Root Mean Squared Percentage Error | |
Note: result is NOT multiplied by 100 | |
""" | |
return np.sqrt(np.mean(np.square(_percentage_error(actual, predicted)))) | |
def rmdspe(actual: np.ndarray, predicted: np.ndarray): | |
""" | |
Root Median Squared Percentage Error | |
Note: result is NOT multiplied by 100 | |
""" | |
return np.sqrt(np.median(np.square(_percentage_error(actual, predicted)))) | |
def rmsse(actual: np.ndarray, predicted: np.ndarray, seasonality: int = 1): | |
""" Root Mean Squared Scaled Error """ | |
q = np.abs(_error(actual, predicted)) / mae(actual[seasonality:], _naive_forecasting(actual, seasonality)) | |
return np.sqrt(np.mean(np.square(q))) | |
def inrse(actual: np.ndarray, predicted: np.ndarray): | |
""" Integral Normalized Root Squared Error """ | |
return np.sqrt(np.sum(np.square(_error(actual, predicted))) / np.sum(np.square(actual - np.mean(actual)))) | |
def rrse(actual: np.ndarray, predicted: np.ndarray): | |
""" Root Relative Squared Error """ | |
return np.sqrt(np.sum(np.square(actual - predicted)) / np.sum(np.square(actual - np.mean(actual)))) | |
def mre(actual: np.ndarray, predicted: np.ndarray, benchmark: np.ndarray = None): | |
""" Mean Relative Error """ | |
return np.mean(_relative_error(actual, predicted, benchmark)) | |
def rae(actual: np.ndarray, predicted: np.ndarray): | |
""" Relative Absolute Error (aka Approximation Error) """ | |
return np.sum(np.abs(actual - predicted)) / (np.sum(np.abs(actual - np.mean(actual))) + EPSILON) | |
def mrae(actual: np.ndarray, predicted: np.ndarray, benchmark: np.ndarray = None): | |
""" Mean Relative Absolute Error """ | |
return np.mean(np.abs(_relative_error(actual, predicted, benchmark))) | |
def mdrae(actual: np.ndarray, predicted: np.ndarray, benchmark: np.ndarray = None): | |
""" Median Relative Absolute Error """ | |
return np.median(np.abs(_relative_error(actual, predicted, benchmark))) | |
def gmrae(actual: np.ndarray, predicted: np.ndarray, benchmark: np.ndarray = None): | |
""" Geometric Mean Relative Absolute Error """ | |
return _geometric_mean(np.abs(_relative_error(actual, predicted, benchmark))) | |
def mbrae(actual: np.ndarray, predicted: np.ndarray, benchmark: np.ndarray = None): | |
""" Mean Bounded Relative Absolute Error """ | |
return np.mean(_bounded_relative_error(actual, predicted, benchmark)) | |
def umbrae(actual: np.ndarray, predicted: np.ndarray, benchmark: np.ndarray = None): | |
""" Unscaled Mean Bounded Relative Absolute Error """ | |
__mbrae = mbrae(actual, predicted, benchmark) | |
return __mbrae / (1 - __mbrae) | |
def mda(actual: np.ndarray, predicted: np.ndarray): | |
""" Mean Directional Accuracy """ | |
return np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - actual[:-1])).astype(int)) | |
METRICS = { | |
'mse': mse, | |
'rmse': rmse, | |
'nrmse': nrmse, | |
'me': me, | |
'mae': mae, | |
'mad': mad, | |
'gmae': gmae, | |
'mdae': mdae, | |
'mpe': mpe, | |
'mape': mape, | |
'mdape': mdape, | |
'smape': smape, | |
'smdape': smdape, | |
'maape': maape, | |
'mase': mase, | |
'std_ae': std_ae, | |
'std_ape': std_ape, | |
'rmspe': rmspe, | |
'rmdspe': rmdspe, | |
'rmsse': rmsse, | |
'inrse': inrse, | |
'rrse': rrse, | |
'mre': mre, | |
'rae': rae, | |
'mrae': mrae, | |
'mdrae': mdrae, | |
'gmrae': gmrae, | |
'mbrae': mbrae, | |
'umbrae': umbrae, | |
'mda': mda, | |
} | |
def evaluate(actual: np.ndarray, predicted: np.ndarray, metrics=('mae', 'mse', 'smape', 'umbrae')): | |
results = {} | |
for name in metrics: | |
try: | |
results[name] = METRICS[name](actual, predicted) | |
except Exception as err: | |
results[name] = np.nan | |
print('Unable to compute metric {0}: {1}'.format(name, err)) | |
return results | |
def evaluate_all(actual: np.ndarray, predicted: np.ndarray): | |
return evaluate(actual, predicted, metrics=set(METRICS.keys())) |
Hey!
I'm guessing the nrmse(actual, predicted)
function is normalised between 0
and 1
?
I believe the Root Mean Squared Scaled Error rmsse
function is wrong. According to the definition on Page 6 in the M5 Competitors Guide.
It is currently:
def rmsse(actual: np.ndarray, predicted: np.ndarray, seasonality: int = 1):
""" Root Mean Squared Scaled Error """
q = np.abs(_error(actual, predicted)) / mae(actual[seasonality:], _naive_forecasting(actual, seasonality))
return np.sqrt(np.mean(np.square(q)))
I believe it should be:
def rmsse(actual: np.ndarray, predicted: np.ndarray, seasonality: int = 1):
""" Root Mean Squared Scaled Error """
q = mse(actual, predicted) / mse(actual[seasonality:], _naive_forecasting(actual, seasonality))
return np.sqrt(q)
It might be that its defined differently somewhere else. Do you have a reference?
Hi, nice finding
I've implemented these metrics couple a years ago, long before M5 competition. I've used these definitions:
scaled error
Mean absolute scaled error (MASE)
Median absolute scaled error (MdASE)
Root mean squared scaled error (RMSSE)
I don't remember an actual reference I used. It seems like M5 introduced an ambiguity.
It seems that these are references I used:
Theodosiou, M. (2011). Forecasting monthly and quarterly time series using STL decomposition. International Journal of Forecasting, 27(4), 1178–1195. doi:10.1016/j.ijforecast.2010.11.002
Shcherbakov, Maxim Vladimirovich, et al. "A survey of forecast error measures." World Applied Sciences Journal 24.24 (2013): 171-176.
Interesting. I had a look at the references and they do indeed define it as you describe. So it looks like there isn't a standard definition. Personally I prefer the M5 definition as your applying the same square-sum-squareroot treatment to both the error and scale factor separately and then dividing one by the other to get a scaled error. But like I said that is personal preference and I'm unsure if it theoretically changes anything.
Wouldn't MASE (mean absolute scaled error) always throw a zero division error?
actual[seasonality:] - actual[:-seasonality]
would always return an array of NaNs and zeroes for any array actual
. Am I missing something here?
OMG, thanks a lot
Hi, thank you for this! It has helped quite a lot!
One question - I am struggling with MDA.
According to wiki the formula should be:
np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - actual[:-1])).astype(int))
instead of:
np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - predicted[:-1])).astype(int))
Could it be that I am missing something important?
Cheers
Hi, thank you for this! It has helped quite a lot!
One question - I am struggling with MDA.
According to wiki the formula should be:
np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - actual[:-1])).astype(int))
instead of:
np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - predicted[:-1])).astype(int))
Could it be that I am missing something important?
Cheers
Hi, thanks for the finding. Looks like you're right and there is actually an error in the gist.
Have checked the sources (Blaskowitz and Herwartz (2009, 2011) On economic evaluation of directional forecasts
, section 2) - they define the predicted part as the indicator of a difference between predicted and actual. Same goes for Bergmeir C., Costantini M., Benítez J. M. On the usefulness of cross-validation for directional forecast evaluation //Computational Statistics & Data Analysis. – 2014
The thing is... wikipedia page was incorrect at the point where I've implemented the formula and you can actually see the diff
Hello.
I thing MASE is implemented incorrectly.
According to
https://en.wikipedia.org/wiki/Mean_absolute_scaled_error#Non_seasonal_time_series
or
https://otexts.com/fpp3/accuracy.html#scaled-errors
code part:
/ mae(actual[seasonality:], _naive_forecasting(actual, seasonality))
should be calculated on training data, not on testing.
Yes, it is confusing at the beginning, but it makes sense because you want to compare your forecasts with naive forecast.
Hello. I thing MASE is implemented incorrectly. According to https://en.wikipedia.org/wiki/Mean_absolute_scaled_error#Non_seasonal_time_series or https://otexts.com/fpp3/accuracy.html#scaled-errors
code part: / mae(actual[seasonality:], _naive_forecasting(actual, seasonality))
should be calculated on training data, not on testing.
Yes, it is confusing at the beginning, but it makes sense because you want to compare your forecasts with naive forecast.
Hi, i've double checked the sources.
Original paper is Hyndman, R. J., & Koehler, A. B. (2006). Another look at measures of forecast accuracy. International Journal of Forecasting, 22(4), 679–688. And also the links you've provided.
I also might be missing something, however the only place your forecasts is used is in numerator (mae(actual, predicted)
). And the denominator act as a baseline forecast. I.e. MASE answers question about how good your forecasting is (using MAE) relative to basic naive forecasting which is basically subtracting samples with time lag.
Regarding terminology, in paper, Y_t
stands for actual
, known data, while ^y_t
, F_t
or predicted
stands for something your algorithm produces, i.e. forecast. There is no division on training and testing done in these functions - it is up to you how you divide your original series into training and testing parts. But known data (training or testing samples) should got into the actual
parameters, since it is known data. While your algorithm (or neural network) output (aka forecast) should go into predicted
argument.
Wouldn't MASE (mean absolute scaled error) always throw a zero division error?
actual[seasonality:] - actual[:-seasonality]
would always return an array of NaNs and zeroes for any arrayactual
. Am I missing something here?
No it would not (at least for small seasonality), it is just a fancy way to compute a series of differences using python slices and numpy arrays:
seasonality = 1
array = [1, 2, 4]
assert array[seasonality:] == [2, 4] # take items from array starting from index = seasonality
assert array[:-seasonality] == [1, 2] # take items from array until index = len(array) - seasonality
# pairwise subtraction
result = np.array(actual[seasonality:]) - np.array(actual[:-seasonality])
assert list(result) == [2 - 1, 4 - 2]
print(result)
thanks, this helps a lot!
Hi! I was wondering about the equality of MAE and MAD.
The MAE is defined as
The MAD, however, is defined as the mean absolute difference around a central/reference point
This reference point is the mean (or the median) of the error. Only if we explicitly set the reference point to zero, MAE = MAD (as also explained in this paper). Hence, I think the general implementation of the MAD should look like:
def mad(actual: np.ndarray, predicted: np.ndarray):
""" Mean Absolute Deviation """
error = _error(actual, predicted)
return np.mean(np.abs(error - np.mean(error)))
pandas
has a mad()
function, which also follows this definition (see here).
Hello, thanks for this. This saved a great amount of time.
However, I believe the mda (Mean Directional Accuracy) Function is wrong.
Instead of comparing the signs of the predicted values with the signs of the actual values, you are comparing the signs of the predicted values with the signs of the differences between the actual values.
Currently -
def mda(actual: np.ndarray, predicted: np.ndarray):
""" Mean Directional Accuracy """
return np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - actual[:-1])).astype(int))
It should be -
def mda(actual: np.ndarray, predicted: np.ndarray):
""" Mean Directional Accuracy """
return np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - predicted[:-1])).astype(int))
Hi! I was wondering about the equality of MAE and MAD. The MAE is defined as
The MAD, however, is defined as the mean absolute difference around a central/reference point m(E)
This reference point is the mean (or the median) of the error. Only if we explicitly set the reference point to zero, MAE = MAD (as also explained in this paper). Hence, I think the general implementation of the MAD should look like:
def mad(actual: np.ndarray, predicted: np.ndarray): """ Mean Absolute Deviation """ error = _error(actual, predicted) return np.mean(np.abs(error - np.mean(error)))
pandas
has amad()
function, which also follows this definition (see here).
@marvinfriede, Thank you for the contribution. I've updated the gist.
Hello, thanks for this. This saved a great amount of time. However, I believe the mda (Mean Directional Accuracy) Function is wrong.
Instead of comparing the signs of the predicted values with the signs of the actual values, you are comparing the signs of the predicted values with the signs of the differences between the actual values.
Currently -
def mda(actual: np.ndarray, predicted: np.ndarray): """ Mean Directional Accuracy """ return np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - actual[:-1])).astype(int))It should be -
def mda(actual: np.ndarray, predicted: np.ndarray): """ Mean Directional Accuracy """ return np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - predicted[:-1])).astype(int))
According to the references:
- Blaskowitz and Herwartz (2009, 2011) On economic evaluation of directional forecasts see section 2
- Bergmeir, C., Costantini, M., & Benítez, J. M. (2014). On the usefulness of cross-validation for directional forecast evaluation. Computational Statistics & Data Analysis, 76, 132-143. see p.7 here
- MDA on wiki
implementation in the gist is correct. Am I missing something? Can you please provide a reference where to prove your point?
Note that there was a confusion previously due to an error in the wiki :) see my comment above
Thanks <3
Thanks for the file, I'm finding it quite useful! I made a fork where I added a minimal pyproject.toml so that I could add this to my project's dependencies and have it be installed automatically with pip. Feel free to copy it to your original version if you find it useful. (I would make a pull request but looks like those aren't supported for gists.) In the meantime, here's my version for anyone else who might wish to install this with pip: https://gist.github.com/fkivela/53434b22458b350fc71b8fc32379d20f
The hero I needed