Skip to content

Instantly share code, notes, and snippets.

@mkolod
Last active January 17, 2016 22:55
Show Gist options
  • Select an option

  • Save mkolod/8afe0b22ec58f00db590 to your computer and use it in GitHub Desktop.

Select an option

Save mkolod/8afe0b22ec58f00db590 to your computer and use it in GitHub Desktop.
# import forecast library
library(tseries)
library(forecast)
########## Data loading and exploratory analysis
# NYC birth data
births <- scan("http://robjhyndman.com/tsdldata/data/nybirths.dat")
# convert to time series object - monthly data, starts in January 1946
birthsTs <- ts(births, frequency=12, start=c(1946, 1))
# print birth values and plot them
print("NYC births")
print(birthsTs)
plot(x = birthsTs, main = "NYC births")
# decomposition into seasonal, trend and random components
birthComps <- decompose(birthsTs)
print("seasonal component")
print(birthComps$seasonal)
print("trend component")
print(birthComps$trend)
print("random component")
print(birthComps$random)
plot(birthComps)
# plot seasonally adjusted births
seasAdjBirths <- birthsTs - birthComps$seasonal
ts.plot(birthsTs, seasAdjBirths, main = "Births and seasonally adjusted births", col = c("blue", "red"))
legend("bottomright", c("births","seas. adj. births"), lty=c(1, 1), lwd=c(2.5, 2.5),col=c("blue", "red"))
# Stationarity test (Augmented Dickey-Fuller test)
adf.test(x = birthsTs, alternative = "stationary")
# Looks like the data is non-stationary. Let's take the first difference and examine it
dBirthsTs <- diff(birthsTs, differences = 1)
plot(x = dBirthsTs, main = "First difference of births")
# auto-correlation function for level and first difference
acf(birthsTs, lag.max=20)
acf(dBirthsTs, lag.max=20)
# partial auto-correlation function for level and first difference
pacf(birthsTs, lag.max=20)
pacf(dBirthsTs, lag.max=20)
########## ARIMA models
# choose model manually
birthModel1 <- arima(x = birthsTs, order = c(2, 1, 2), seasonal = c(1, 1, 1))
# choose model automatically
numPer <- round(length(birthsTs)^(1/3))
birthModel2 <- auto.arima(x = birthsTs, max.p = numPer, max.q = numPer, max.d = 1, max.P = 2, max.Q = 2)
print(birthModel2)
# show actual vs. fitted
inSampleFit <- fitted(birthModel2)
plot(birthsTs, col = "blue", main = "actual vs. predicted births")
lines(inSampleFit, type = "l", lty = 2, col="red")
legend("topright", c("actual", "predicted"), lty=c(1, 2), lwd=c(2.5, 2.5),col=c("blue", "red"))
# forecast the automatically-chosen model
predictions <- forecast(birthModel2, h=24)
plot(x = predictions, main = "NYC birth predictions for 24 months")
# normality plot function
normalityPlot <- function(data) {
plot(density(data, kernel="gaussian"), col="blue", xlab = "residuals", ylab = "density", main = "Residual density against normal distribution")
x <- seq(-2, 2, length = 100)
hx <- dnorm(x, mean = mean(data), sd = sd(data))
lines(x, hx, type="l", lty = 2, col = "red")
legend("topright", c("residual density", "normal density"), lty=c(1, 2), lwd=c(2.5, 2.5),col=c("blue", "red"))
}
# plot ACF of the in-sample forecast
acf(predictions$residuals, lag.max=20)
# normality plot of residuals
normalityPlot(predictions$residuals)
# Ljung-Box test for autocorrelation
Box.test(predictions$residuals, lag=20, type="Ljung-Box")
# plot predictions for 240 months to show the exponentially growing confidence interval
predictions <- forecast(birthModel2, h=240)
plot(x = predictions, main = "NYC birth predictions for 240 months")
########## Exponential smoothing
birthHw <- HoltWinters(birthsTs)
birthHwPred <- forecast.HoltWinters(birthHw, h=24)
plot(birthHwPred)
# plot predictions for 240 months to show the exponentially growing confidence interval
birthHwPred2 <- forecast.HoltWinters(birthHw, h=240)
plot(birthHwPred2)
# residual normality plot
normalityPlot(birthHwPred$residuals)
# plot ACF of the residuals
acf(birthHwPred$residuals, lag.max=20)
# Ljung-Box test for autocorrelation
Box.test(birthHwPred$residuals, lag=20, type="Ljung-Box")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment