Skip to content

Instantly share code, notes, and snippets.

@rohan-paul
Created October 30, 2021 14:03
Show Gist options
  • Save rohan-paul/8256d0b58d6b2365b881227602510138 to your computer and use it in GitHub Desktop.
Save rohan-paul/8256d0b58d6b2365b881227602510138 to your computer and use it in GitHub Desktop.
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.seasonal import seasonal_decompose
import statsmodels.api as sm
from sklearn.metrics import mean_squared_error, mean_absolute_error
import math
#Test for staionarity
def test_stationarity(timeseries):
#Determing rolling statistics
rolling_mean = timeseries.rolling(12).mean()
rolling_std = timeseries.rolling(12).std()
#Plot rolling statistics:
plt.plot(timeseries, color='blue',label='Original')
plt.plot(rolling_mean, color='red', label='Rolling Mean')
plt.plot(rolling_std, color='black', label = 'Rolling Std')
plt.legend(loc='best')
plt.title('Rolling Mean and Standard Deviation')
plt.show(block=False)
print("Results of dickey fuller test")
timeseries = timeseries.iloc[:,0].values
adfuller_result = adfuller(timeseries,autolag='AIC')
print(adfuller_result)
""" By setting the autolag='AIC' the adfuller will choose the number of lags that yields the lowest AIC (Akaike information criterion ).
This is usually a good option to follow.
The Akaike information criterion (AIC) is an estimator of prediction error and
thereby relative quality of statistical models for a given set of data."""
# output for dft will give us without defining what the values are.
#hence we manually write what values does it explain using a for loop
output = pd.Series(adfuller_result[0:4],index=['Test Statistics','p-value','No. of lags used','Number of observations used'])
for key,values in adfuller_result[4].items():
output['critical value (%s)'%key] = values
print(output)
test_stationarity(tsla_closing_prices_df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment