Last active
January 17, 2016 22:55
-
-
Save mkolod/8afe0b22ec58f00db590 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # import forecast library | |
| library(tseries) | |
| library(forecast) | |
| ########## Data loading and exploratory analysis | |
| # NYC birth data | |
| births <- scan("http://robjhyndman.com/tsdldata/data/nybirths.dat") | |
| # convert to time series object - monthly data, starts in January 1946 | |
| birthsTs <- ts(births, frequency=12, start=c(1946, 1)) | |
| # print birth values and plot them | |
| print("NYC births") | |
| print(birthsTs) | |
| plot(x = birthsTs, main = "NYC births") | |
| # decomposition into seasonal, trend and random components | |
| birthComps <- decompose(birthsTs) | |
| print("seasonal component") | |
| print(birthComps$seasonal) | |
| print("trend component") | |
| print(birthComps$trend) | |
| print("random component") | |
| print(birthComps$random) | |
| plot(birthComps) | |
| # plot seasonally adjusted births | |
| seasAdjBirths <- birthsTs - birthComps$seasonal | |
| ts.plot(birthsTs, seasAdjBirths, main = "Births and seasonally adjusted births", col = c("blue", "red")) | |
| legend("bottomright", c("births","seas. adj. births"), lty=c(1, 1), lwd=c(2.5, 2.5),col=c("blue", "red")) | |
| # Stationarity test (Augmented Dickey-Fuller test) | |
| adf.test(x = birthsTs, alternative = "stationary") | |
| # Looks like the data is non-stationary. Let's take the first difference and examine it | |
| dBirthsTs <- diff(birthsTs, differences = 1) | |
| plot(x = dBirthsTs, main = "First difference of births") | |
| # auto-correlation function for level and first difference | |
| acf(birthsTs, lag.max=20) | |
| acf(dBirthsTs, lag.max=20) | |
| # partial auto-correlation function for level and first difference | |
| pacf(birthsTs, lag.max=20) | |
| pacf(dBirthsTs, lag.max=20) | |
| ########## ARIMA models | |
| # choose model manually | |
| birthModel1 <- arima(x = birthsTs, order = c(2, 1, 2), seasonal = c(1, 1, 1)) | |
| # choose model automatically | |
| numPer <- round(length(birthsTs)^(1/3)) | |
| birthModel2 <- auto.arima(x = birthsTs, max.p = numPer, max.q = numPer, max.d = 1, max.P = 2, max.Q = 2) | |
| print(birthModel2) | |
| # show actual vs. fitted | |
| inSampleFit <- fitted(birthModel2) | |
| plot(birthsTs, col = "blue", main = "actual vs. predicted births") | |
| lines(inSampleFit, type = "l", lty = 2, col="red") | |
| legend("topright", c("actual", "predicted"), lty=c(1, 2), lwd=c(2.5, 2.5),col=c("blue", "red")) | |
| # forecast the automatically-chosen model | |
| predictions <- forecast(birthModel2, h=24) | |
| plot(x = predictions, main = "NYC birth predictions for 24 months") | |
| # normality plot function | |
| normalityPlot <- function(data) { | |
| plot(density(data, kernel="gaussian"), col="blue", xlab = "residuals", ylab = "density", main = "Residual density against normal distribution") | |
| x <- seq(-2, 2, length = 100) | |
| hx <- dnorm(x, mean = mean(data), sd = sd(data)) | |
| lines(x, hx, type="l", lty = 2, col = "red") | |
| legend("topright", c("residual density", "normal density"), lty=c(1, 2), lwd=c(2.5, 2.5),col=c("blue", "red")) | |
| } | |
| # plot ACF of the in-sample forecast | |
| acf(predictions$residuals, lag.max=20) | |
| # normality plot of residuals | |
| normalityPlot(predictions$residuals) | |
| # Ljung-Box test for autocorrelation | |
| Box.test(predictions$residuals, lag=20, type="Ljung-Box") | |
| # plot predictions for 240 months to show the exponentially growing confidence interval | |
| predictions <- forecast(birthModel2, h=240) | |
| plot(x = predictions, main = "NYC birth predictions for 240 months") | |
| ########## Exponential smoothing | |
| birthHw <- HoltWinters(birthsTs) | |
| birthHwPred <- forecast.HoltWinters(birthHw, h=24) | |
| plot(birthHwPred) | |
| # plot predictions for 240 months to show the exponentially growing confidence interval | |
| birthHwPred2 <- forecast.HoltWinters(birthHw, h=240) | |
| plot(birthHwPred2) | |
| # residual normality plot | |
| normalityPlot(birthHwPred$residuals) | |
| # plot ACF of the residuals | |
| acf(birthHwPred$residuals, lag.max=20) | |
| # Ljung-Box test for autocorrelation | |
| Box.test(birthHwPred$residuals, lag=20, type="Ljung-Box") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment