Skip to content

Instantly share code, notes, and snippets.

@bshishov
Last active October 15, 2024 23:41
Show Gist options
  • Save bshishov/5dc237f59f019b26145648e2124ca1c9 to your computer and use it in GitHub Desktop.
Save bshishov/5dc237f59f019b26145648e2124ca1c9 to your computer and use it in GitHub Desktop.
Python Numpy functions for most common forecasting metrics
import numpy as np
EPSILON = 1e-10
def _error(actual: np.ndarray, predicted: np.ndarray):
""" Simple error """
return actual - predicted
def _percentage_error(actual: np.ndarray, predicted: np.ndarray):
"""
Percentage error
Note: result is NOT multiplied by 100
"""
return _error(actual, predicted) / (actual + EPSILON)
def _naive_forecasting(actual: np.ndarray, seasonality: int = 1):
""" Naive forecasting method which just repeats previous samples """
return actual[:-seasonality]
def _relative_error(actual: np.ndarray, predicted: np.ndarray, benchmark: np.ndarray = None):
""" Relative Error """
if benchmark is None or isinstance(benchmark, int):
# If no benchmark prediction provided - use naive forecasting
if not isinstance(benchmark, int):
seasonality = 1
else:
seasonality = benchmark
return _error(actual[seasonality:], predicted[seasonality:]) /\
(_error(actual[seasonality:], _naive_forecasting(actual, seasonality)) + EPSILON)
return _error(actual, predicted) / (_error(actual, benchmark) + EPSILON)
def _bounded_relative_error(actual: np.ndarray, predicted: np.ndarray, benchmark: np.ndarray = None):
""" Bounded Relative Error """
if benchmark is None or isinstance(benchmark, int):
# If no benchmark prediction provided - use naive forecasting
if not isinstance(benchmark, int):
seasonality = 1
else:
seasonality = benchmark
abs_err = np.abs(_error(actual[seasonality:], predicted[seasonality:]))
abs_err_bench = np.abs(_error(actual[seasonality:], _naive_forecasting(actual, seasonality)))
else:
abs_err = np.abs(_error(actual, predicted))
abs_err_bench = np.abs(_error(actual, benchmark))
return abs_err / (abs_err + abs_err_bench + EPSILON)
def _geometric_mean(a, axis=0, dtype=None):
""" Geometric mean """
if not isinstance(a, np.ndarray): # if not an ndarray object attempt to convert it
log_a = np.log(np.array(a, dtype=dtype))
elif dtype: # Must change the default dtype allowing array type
if isinstance(a, np.ma.MaskedArray):
log_a = np.log(np.ma.asarray(a, dtype=dtype))
else:
log_a = np.log(np.asarray(a, dtype=dtype))
else:
log_a = np.log(a)
return np.exp(log_a.mean(axis=axis))
def mse(actual: np.ndarray, predicted: np.ndarray):
""" Mean Squared Error """
return np.mean(np.square(_error(actual, predicted)))
def rmse(actual: np.ndarray, predicted: np.ndarray):
""" Root Mean Squared Error """
return np.sqrt(mse(actual, predicted))
def nrmse(actual: np.ndarray, predicted: np.ndarray):
""" Normalized Root Mean Squared Error """
return rmse(actual, predicted) / (actual.max() - actual.min())
def me(actual: np.ndarray, predicted: np.ndarray):
""" Mean Error """
return np.mean(_error(actual, predicted))
def mae(actual: np.ndarray, predicted: np.ndarray):
""" Mean Absolute Error """
return np.mean(np.abs(_error(actual, predicted)))
def mad(actual: np.ndarray, predicted: np.ndarray):
""" Mean Absolute Deviation """
error = _error(actual, predicted)
return np.mean(np.abs(error - np.mean(error)))
def gmae(actual: np.ndarray, predicted: np.ndarray):
""" Geometric Mean Absolute Error """
return _geometric_mean(np.abs(_error(actual, predicted)))
def mdae(actual: np.ndarray, predicted: np.ndarray):
""" Median Absolute Error """
return np.median(np.abs(_error(actual, predicted)))
def mpe(actual: np.ndarray, predicted: np.ndarray):
""" Mean Percentage Error """
return np.mean(_percentage_error(actual, predicted))
def mape(actual: np.ndarray, predicted: np.ndarray):
"""
Mean Absolute Percentage Error
Properties:
+ Easy to interpret
+ Scale independent
- Biased, not symmetric
- Undefined when actual[t] == 0
Note: result is NOT multiplied by 100
"""
return np.mean(np.abs(_percentage_error(actual, predicted)))
def mdape(actual: np.ndarray, predicted: np.ndarray):
"""
Median Absolute Percentage Error
Note: result is NOT multiplied by 100
"""
return np.median(np.abs(_percentage_error(actual, predicted)))
def smape(actual: np.ndarray, predicted: np.ndarray):
"""
Symmetric Mean Absolute Percentage Error
Note: result is NOT multiplied by 100
"""
return np.mean(2.0 * np.abs(actual - predicted) / ((np.abs(actual) + np.abs(predicted)) + EPSILON))
def smdape(actual: np.ndarray, predicted: np.ndarray):
"""
Symmetric Median Absolute Percentage Error
Note: result is NOT multiplied by 100
"""
return np.median(2.0 * np.abs(actual - predicted) / ((np.abs(actual) + np.abs(predicted)) + EPSILON))
def maape(actual: np.ndarray, predicted: np.ndarray):
"""
Mean Arctangent Absolute Percentage Error
Note: result is NOT multiplied by 100
"""
return np.mean(np.arctan(np.abs((actual - predicted) / (actual + EPSILON))))
def mase(actual: np.ndarray, predicted: np.ndarray, seasonality: int = 1):
"""
Mean Absolute Scaled Error
Baseline (benchmark) is computed with naive forecasting (shifted by @seasonality)
"""
return mae(actual, predicted) / mae(actual[seasonality:], _naive_forecasting(actual, seasonality))
def std_ae(actual: np.ndarray, predicted: np.ndarray):
""" Normalized Absolute Error """
__mae = mae(actual, predicted)
return np.sqrt(np.sum(np.square(_error(actual, predicted) - __mae))/(len(actual) - 1))
def std_ape(actual: np.ndarray, predicted: np.ndarray):
""" Normalized Absolute Percentage Error """
__mape = mape(actual, predicted)
return np.sqrt(np.sum(np.square(_percentage_error(actual, predicted) - __mape))/(len(actual) - 1))
def rmspe(actual: np.ndarray, predicted: np.ndarray):
"""
Root Mean Squared Percentage Error
Note: result is NOT multiplied by 100
"""
return np.sqrt(np.mean(np.square(_percentage_error(actual, predicted))))
def rmdspe(actual: np.ndarray, predicted: np.ndarray):
"""
Root Median Squared Percentage Error
Note: result is NOT multiplied by 100
"""
return np.sqrt(np.median(np.square(_percentage_error(actual, predicted))))
def rmsse(actual: np.ndarray, predicted: np.ndarray, seasonality: int = 1):
""" Root Mean Squared Scaled Error """
q = np.abs(_error(actual, predicted)) / mae(actual[seasonality:], _naive_forecasting(actual, seasonality))
return np.sqrt(np.mean(np.square(q)))
def inrse(actual: np.ndarray, predicted: np.ndarray):
""" Integral Normalized Root Squared Error """
return np.sqrt(np.sum(np.square(_error(actual, predicted))) / np.sum(np.square(actual - np.mean(actual))))
def rrse(actual: np.ndarray, predicted: np.ndarray):
""" Root Relative Squared Error """
return np.sqrt(np.sum(np.square(actual - predicted)) / np.sum(np.square(actual - np.mean(actual))))
def mre(actual: np.ndarray, predicted: np.ndarray, benchmark: np.ndarray = None):
""" Mean Relative Error """
return np.mean(_relative_error(actual, predicted, benchmark))
def rae(actual: np.ndarray, predicted: np.ndarray):
""" Relative Absolute Error (aka Approximation Error) """
return np.sum(np.abs(actual - predicted)) / (np.sum(np.abs(actual - np.mean(actual))) + EPSILON)
def mrae(actual: np.ndarray, predicted: np.ndarray, benchmark: np.ndarray = None):
""" Mean Relative Absolute Error """
return np.mean(np.abs(_relative_error(actual, predicted, benchmark)))
def mdrae(actual: np.ndarray, predicted: np.ndarray, benchmark: np.ndarray = None):
""" Median Relative Absolute Error """
return np.median(np.abs(_relative_error(actual, predicted, benchmark)))
def gmrae(actual: np.ndarray, predicted: np.ndarray, benchmark: np.ndarray = None):
""" Geometric Mean Relative Absolute Error """
return _geometric_mean(np.abs(_relative_error(actual, predicted, benchmark)))
def mbrae(actual: np.ndarray, predicted: np.ndarray, benchmark: np.ndarray = None):
""" Mean Bounded Relative Absolute Error """
return np.mean(_bounded_relative_error(actual, predicted, benchmark))
def umbrae(actual: np.ndarray, predicted: np.ndarray, benchmark: np.ndarray = None):
""" Unscaled Mean Bounded Relative Absolute Error """
__mbrae = mbrae(actual, predicted, benchmark)
return __mbrae / (1 - __mbrae)
def mda(actual: np.ndarray, predicted: np.ndarray):
""" Mean Directional Accuracy """
return np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - actual[:-1])).astype(int))
METRICS = {
'mse': mse,
'rmse': rmse,
'nrmse': nrmse,
'me': me,
'mae': mae,
'mad': mad,
'gmae': gmae,
'mdae': mdae,
'mpe': mpe,
'mape': mape,
'mdape': mdape,
'smape': smape,
'smdape': smdape,
'maape': maape,
'mase': mase,
'std_ae': std_ae,
'std_ape': std_ape,
'rmspe': rmspe,
'rmdspe': rmdspe,
'rmsse': rmsse,
'inrse': inrse,
'rrse': rrse,
'mre': mre,
'rae': rae,
'mrae': mrae,
'mdrae': mdrae,
'gmrae': gmrae,
'mbrae': mbrae,
'umbrae': umbrae,
'mda': mda,
}
def evaluate(actual: np.ndarray, predicted: np.ndarray, metrics=('mae', 'mse', 'smape', 'umbrae')):
results = {}
for name in metrics:
try:
results[name] = METRICS[name](actual, predicted)
except Exception as err:
results[name] = np.nan
print('Unable to compute metric {0}: {1}'.format(name, err))
return results
def evaluate_all(actual: np.ndarray, predicted: np.ndarray):
return evaluate(actual, predicted, metrics=set(METRICS.keys()))
@Varad2305
Copy link

This just made my life a bit simpler. Thank you.

@antonioalmeida
Copy link

The hero I needed

@rish-16
Copy link

rish-16 commented Jul 2, 2020

Hey!

I'm guessing the nrmse(actual, predicted) function is normalised between 0 and 1?

@joshlk
Copy link

joshlk commented Jan 14, 2021

I believe the Root Mean Squared Scaled Error rmsse function is wrong. According to the definition on Page 6 in the M5 Competitors Guide.

It is currently:

def rmsse(actual: np.ndarray, predicted: np.ndarray, seasonality: int = 1):
    """ Root Mean Squared Scaled Error """
    q = np.abs(_error(actual, predicted)) / mae(actual[seasonality:], _naive_forecasting(actual, seasonality))
    return np.sqrt(np.mean(np.square(q)))

I believe it should be:

def rmsse(actual: np.ndarray, predicted: np.ndarray, seasonality: int = 1):
    """ Root Mean Squared Scaled Error """
    q = mse(actual, predicted) / mse(actual[seasonality:], _naive_forecasting(actual, seasonality))
    return np.sqrt(q)

It might be that its defined differently somewhere else. Do you have a reference?

@bshishov
Copy link
Author

Hi, nice finding

I've implemented these metrics couple a years ago, long before M5 competition. I've used these definitions:

scaled error

q

Mean absolute scaled error (MASE)

mase

Median absolute scaled error (MdASE)

mase

Root mean squared scaled error (RMSSE)

mase

I don't remember an actual reference I used. It seems like M5 introduced an ambiguity.
It seems that these are references I used:

Theodosiou, M. (2011). Forecasting monthly and quarterly time series using STL decomposition. International Journal of Forecasting, 27(4), 1178–1195. doi:10.1016/j.ijforecast.2010.11.002

Shcherbakov, Maxim Vladimirovich, et al. "A survey of forecast error measures." World Applied Sciences Journal 24.24 (2013): 171-176.

@joshlk
Copy link

joshlk commented Jan 17, 2021

Interesting. I had a look at the references and they do indeed define it as you describe. So it looks like there isn't a standard definition. Personally I prefer the M5 definition as your applying the same square-sum-squareroot treatment to both the error and scale factor separately and then dividing one by the other to get a scaled error. But like I said that is personal preference and I'm unsure if it theoretically changes anything.

@vyaduvanshi
Copy link

Wouldn't MASE (mean absolute scaled error) always throw a zero division error?

actual[seasonality:] - actual[:-seasonality] would always return an array of NaNs and zeroes for any array actual. Am I missing something here?

@stefanieKobsar
Copy link

OMG, thanks a lot

@nocciolate
Copy link

Hi, thank you for this! It has helped quite a lot!

One question - I am struggling with MDA.

According to wiki the formula should be:

np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - actual[:-1])).astype(int))

instead of:

np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - predicted[:-1])).astype(int))

Could it be that I am missing something important?

Cheers

@bshishov
Copy link
Author

Hi, thank you for this! It has helped quite a lot!

One question - I am struggling with MDA.

According to wiki the formula should be:

np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - actual[:-1])).astype(int))

instead of:

np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - predicted[:-1])).astype(int))

Could it be that I am missing something important?

Cheers

Hi, thanks for the finding. Looks like you're right and there is actually an error in the gist.

Have checked the sources (Blaskowitz and Herwartz (2009, 2011) On economic evaluation of directional forecasts, section 2) - they define the predicted part as the indicator of a difference between predicted and actual. Same goes for Bergmeir C., Costantini M., Benítez J. M. On the usefulness of cross-validation for directional forecast evaluation //Computational Statistics & Data Analysis. – 2014

The thing is... wikipedia page was incorrect at the point where I've implemented the formula and you can actually see the diff

@Kr1zA
Copy link

Kr1zA commented Dec 18, 2022

Hello.
I thing MASE is implemented incorrectly.
According to
https://en.wikipedia.org/wiki/Mean_absolute_scaled_error#Non_seasonal_time_series
or
https://otexts.com/fpp3/accuracy.html#scaled-errors

code part:
/ mae(actual[seasonality:], _naive_forecasting(actual, seasonality))

should be calculated on training data, not on testing.

Yes, it is confusing at the beginning, but it makes sense because you want to compare your forecasts with naive forecast.

@bshishov
Copy link
Author

bshishov commented Dec 21, 2022

Hello. I thing MASE is implemented incorrectly. According to https://en.wikipedia.org/wiki/Mean_absolute_scaled_error#Non_seasonal_time_series or https://otexts.com/fpp3/accuracy.html#scaled-errors

code part: / mae(actual[seasonality:], _naive_forecasting(actual, seasonality))

should be calculated on training data, not on testing.

Yes, it is confusing at the beginning, but it makes sense because you want to compare your forecasts with naive forecast.

Hi, i've double checked the sources.
Original paper is Hyndman, R. J., & Koehler, A. B. (2006). Another look at measures of forecast accuracy. International Journal of Forecasting, 22(4), 679–688. And also the links you've provided.

I also might be missing something, however the only place your forecasts is used is in numerator (mae(actual, predicted)). And the denominator act as a baseline forecast. I.e. MASE answers question about how good your forecasting is (using MAE) relative to basic naive forecasting which is basically subtracting samples with time lag.

Regarding terminology, in paper, Y_t stands for actual, known data, while ^y_t, F_t or predicted stands for something your algorithm produces, i.e. forecast. There is no division on training and testing done in these functions - it is up to you how you divide your original series into training and testing parts. But known data (training or testing samples) should got into the actual parameters, since it is known data. While your algorithm (or neural network) output (aka forecast) should go into predicted argument.

@bshishov
Copy link
Author

bshishov commented Dec 21, 2022

Wouldn't MASE (mean absolute scaled error) always throw a zero division error?

actual[seasonality:] - actual[:-seasonality] would always return an array of NaNs and zeroes for any array actual. Am I missing something here?

No it would not (at least for small seasonality), it is just a fancy way to compute a series of differences using python slices and numpy arrays:

seasonality = 1
array                        = [1, 2, 4]
assert array[seasonality:] ==     [2, 4]  # take items from array starting from index = seasonality
assert array[:-seasonality] == [1, 2]     # take items from array until index = len(array) - seasonality 

# pairwise subtraction
result = np.array(actual[seasonality:]) - np.array(actual[:-seasonality])
assert list(result) == [2 - 1, 4 - 2]

print(result)

@SJ-CAI
Copy link

SJ-CAI commented Mar 1, 2023

thanks, this helps a lot!

@marvinfriede
Copy link

Hi! I was wondering about the equality of MAE and MAD.
The MAE is defined as

$$\text{MAE}=\dfrac{1}{n} \sum_{i=1}^{n} |\text{actual}_i - \text{predict}_i| = \dfrac{1}{n} \sum_{i=1}^{n} |\text{error}_i|$$

The MAD, however, is defined as the mean absolute difference around a central/reference point $m(\text{E})$

$$\text{MAD}=\dfrac{1}{n} \sum_{i=1}^{n} |\text{error}_i - m(\text{E})|$$

This reference point is the mean (or the median) of the error. Only if we explicitly set the reference point to zero, MAE = MAD (as also explained in this paper). Hence, I think the general implementation of the MAD should look like:

def mad(actual: np.ndarray, predicted: np.ndarray):
    """ Mean Absolute Deviation """
    error = _error(actual, predicted)
    return np.mean(np.abs(error - np.mean(error)))

pandas has a mad() function, which also follows this definition (see here).

@tejaskhare99
Copy link

tejaskhare99 commented Aug 29, 2023

Hello, thanks for this. This saved a great amount of time.
However, I believe the mda (Mean Directional Accuracy) Function is wrong.

Instead of comparing the signs of the predicted values with the signs of the actual values, you are comparing the signs of the predicted values with the signs of the differences between the actual values.

Currently -

def mda(actual: np.ndarray, predicted: np.ndarray):
    """ Mean Directional Accuracy """
    return np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - actual[:-1])).astype(int))

It should be -

def mda(actual: np.ndarray, predicted: np.ndarray):
    """ Mean Directional Accuracy """
    return np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - predicted[:-1])).astype(int))

@bshishov
Copy link
Author

bshishov commented Sep 2, 2023

Hi! I was wondering about the equality of MAE and MAD. The MAE is defined as

The MAD, however, is defined as the mean absolute difference around a central/reference point m(E)

This reference point is the mean (or the median) of the error. Only if we explicitly set the reference point to zero, MAE = MAD (as also explained in this paper). Hence, I think the general implementation of the MAD should look like:

def mad(actual: np.ndarray, predicted: np.ndarray):
    """ Mean Absolute Deviation """
    error = _error(actual, predicted)
    return np.mean(np.abs(error - np.mean(error)))

pandas has a mad() function, which also follows this definition (see here).

@marvinfriede, Thank you for the contribution. I've updated the gist.

@bshishov
Copy link
Author

bshishov commented Sep 2, 2023

Hello, thanks for this. This saved a great amount of time. However, I believe the mda (Mean Directional Accuracy) Function is wrong.

Instead of comparing the signs of the predicted values with the signs of the actual values, you are comparing the signs of the predicted values with the signs of the differences between the actual values.

Currently -

def mda(actual: np.ndarray, predicted: np.ndarray):
    """ Mean Directional Accuracy """
    return np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - actual[:-1])).astype(int))

It should be -

def mda(actual: np.ndarray, predicted: np.ndarray):
    """ Mean Directional Accuracy """
    return np.mean((np.sign(actual[1:] - actual[:-1]) == np.sign(predicted[1:] - predicted[:-1])).astype(int))

According to the references:

  • Blaskowitz and Herwartz (2009, 2011) On economic evaluation of directional forecasts see section 2
  • Bergmeir, C., Costantini, M., & Benítez, J. M. (2014). On the usefulness of cross-validation for directional forecast evaluation. Computational Statistics & Data Analysis, 76, 132-143. see p.7 here
  • MDA on wiki

implementation in the gist is correct. Am I missing something? Can you please provide a reference where to prove your point?
Note that there was a confusion previously due to an error in the wiki :) see my comment above

@MildredVL
Copy link

Thanks <3

@fkivela
Copy link

fkivela commented Apr 2, 2024

Thanks for the file, I'm finding it quite useful! I made a fork where I added a minimal pyproject.toml so that I could add this to my project's dependencies and have it be installed automatically with pip. Feel free to copy it to your original version if you find it useful. (I would make a pull request but looks like those aren't supported for gists.) In the meantime, here's my version for anyone else who might wish to install this with pip: https://gist.github.com/fkivela/53434b22458b350fc71b8fc32379d20f

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment