mkolod · January 17, 2016 22:55
diff --git a/time_series_examples.R b/time_series_examples.R
 # import forecast library
 library(tseries)
 library(forecast)

 ########## Data loading and exploratory analysis

 # NYC birth data
 births <- scan("http://robjhyndman.com/tsdldata/data/nybirths.dat")

 # convert to time series object - monthly data, starts in January 1946
 birthsTs <- ts(births, frequency=12, start=c(1946, 1))

 # print birth values and plot them
 print("NYC births")
 print(birthsTs)
 plot(x = birthsTs, main = "NYC births")

 # decomposition into seasonal, trend and random components
 birthComps <- decompose(birthsTs)
 print("seasonal component")
 print(birthComps$seasonal)
 print("trend component")
 print(birthComps$trend)
 print("random component")
 print(birthComps$random)
 plot(birthComps)

 # plot seasonally adjusted births
 seasAdjBirths <- birthsTs - birthComps$seasonal
 ts.plot(birthsTs, seasAdjBirths, main = "Births and seasonally adjusted births", col = c("blue", "red"))
 legend("bottomright", c("births","seas. adj. births"), lty=c(1, 1), lwd=c(2.5, 2.5),col=c("blue", "red"))

 # Stationarity test (Augmented Dickey-Fuller test)
 adf.test(x = birthsTs, alternative = "stationary")

 # Looks like the data is non-stationary. Let's take the first difference and examine it
 dBirthsTs <- diff(birthsTs, differences = 1)
 plot(x = dBirthsTs, main = "First difference of births")

 # auto-correlation function for level and first difference
 acf(birthsTs, lag.max=20)
 acf(dBirthsTs, lag.max=20)

 # partial auto-correlation function for level and first difference
 pacf(birthsTs, lag.max=20)
 pacf(dBirthsTs, lag.max=20)


 ##########  ARIMA models

 # choose model manually
 birthModel1 <- arima(x = birthsTs, order = c(2, 1, 2), seasonal = c(1, 1, 1))

 # choose model automatically
 numPer <- round(length(birthsTs)^(1/3))
 birthModel2 <- auto.arima(x = birthsTs, max.p = numPer, max.q = numPer, max.d = 1, max.P = 2, max.Q = 2)
 print(birthModel2)

 # show actual vs. fitted
 inSampleFit <- fitted(birthModel2)
 plot(birthsTs, col = "blue", main = "actual vs. predicted births")
 lines(inSampleFit, type = "l", lty = 2, col="red")
 legend("topright", c("actual", "predicted"), lty=c(1, 2), lwd=c(2.5, 2.5),col=c("blue", "red"))

 # forecast the automatically-chosen model
 predictions <- forecast(birthModel2, h=24)
 plot(x = predictions, main = "NYC birth predictions for 24 months")


 # normality plot function
 normalityPlot <- function(data) {
  plot(density(data, kernel="gaussian"), col="blue", xlab = "residuals", ylab = "density", main = "Residual density against normal distribution")
  x <- seq(-2, 2, length = 100)
  hx <- dnorm(x, mean = mean(data), sd = sd(data))
  lines(x, hx, type="l", lty = 2, col = "red")
  legend("topright", c("residual density", "normal density"), lty=c(1, 2), lwd=c(2.5, 2.5),col=c("blue", "red"))
 }

 # plot ACF of the in-sample forecast
 acf(predictions$residuals, lag.max=20)

 # normality plot of residuals
 normalityPlot(predictions$residuals)

 # Ljung-Box test for autocorrelation
 Box.test(predictions$residuals, lag=20, type="Ljung-Box")

 # plot predictions for 240 months to show the exponentially growing confidence interval
 predictions <- forecast(birthModel2, h=240)
 plot(x = predictions, main = "NYC birth predictions for 240 months")


 ########## Exponential smoothing
 birthHw <- HoltWinters(birthsTs)
 birthHwPred <- forecast.HoltWinters(birthHw, h=24)
 plot(birthHwPred)

 # plot predictions for 240 months to show the exponentially growing confidence interval
 birthHwPred2 <- forecast.HoltWinters(birthHw, h=240)
 plot(birthHwPred2)

 # residual normality plot
 normalityPlot(birthHwPred$residuals)

 # plot ACF of the residuals
 acf(birthHwPred$residuals, lag.max=20)

 # Ljung-Box test for autocorrelation
 Box.test(birthHwPred$residuals, lag=20, type="Ljung-Box")
	# import forecast library
	library(tseries)
	library(forecast)

	########## Data loading and exploratory analysis

	# NYC birth data
	births <- scan("http://robjhyndman.com/tsdldata/data/nybirths.dat")

	# convert to time series object - monthly data, starts in January 1946
	birthsTs <- ts(births, frequency=12, start=c(1946, 1))

	# print birth values and plot them
	print("NYC births")
	print(birthsTs)
	plot(x = birthsTs, main = "NYC births")

	# decomposition into seasonal, trend and random components
	birthComps <- decompose(birthsTs)
	print("seasonal component")
	print(birthComps$seasonal)
	print("trend component")
	print(birthComps$trend)
	print("random component")
	print(birthComps$random)
	plot(birthComps)

	# plot seasonally adjusted births
	seasAdjBirths <- birthsTs - birthComps$seasonal
	ts.plot(birthsTs, seasAdjBirths, main = "Births and seasonally adjusted births", col = c("blue", "red"))
	legend("bottomright", c("births","seas. adj. births"), lty=c(1, 1), lwd=c(2.5, 2.5),col=c("blue", "red"))

	# Stationarity test (Augmented Dickey-Fuller test)
	adf.test(x = birthsTs, alternative = "stationary")

	# Looks like the data is non-stationary. Let's take the first difference and examine it
	dBirthsTs <- diff(birthsTs, differences = 1)
	plot(x = dBirthsTs, main = "First difference of births")

	# auto-correlation function for level and first difference
	acf(birthsTs, lag.max=20)
	acf(dBirthsTs, lag.max=20)

	# partial auto-correlation function for level and first difference
	pacf(birthsTs, lag.max=20)
	pacf(dBirthsTs, lag.max=20)


	########## ARIMA models

	# choose model manually
	birthModel1 <- arima(x = birthsTs, order = c(2, 1, 2), seasonal = c(1, 1, 1))

	# choose model automatically
	numPer <- round(length(birthsTs)^(1/3))
	birthModel2 <- auto.arima(x = birthsTs, max.p = numPer, max.q = numPer, max.d = 1, max.P = 2, max.Q = 2)
	print(birthModel2)

	# show actual vs. fitted
	inSampleFit <- fitted(birthModel2)
	plot(birthsTs, col = "blue", main = "actual vs. predicted births")
	lines(inSampleFit, type = "l", lty = 2, col="red")
	legend("topright", c("actual", "predicted"), lty=c(1, 2), lwd=c(2.5, 2.5),col=c("blue", "red"))

	# forecast the automatically-chosen model
	predictions <- forecast(birthModel2, h=24)
	plot(x = predictions, main = "NYC birth predictions for 24 months")


	# normality plot function
	normalityPlot <- function(data) {
	plot(density(data, kernel="gaussian"), col="blue", xlab = "residuals", ylab = "density", main = "Residual density against normal distribution")
	x <- seq(-2, 2, length = 100)
	hx <- dnorm(x, mean = mean(data), sd = sd(data))
	lines(x, hx, type="l", lty = 2, col = "red")
	legend("topright", c("residual density", "normal density"), lty=c(1, 2), lwd=c(2.5, 2.5),col=c("blue", "red"))
	}

	# plot ACF of the in-sample forecast
	acf(predictions$residuals, lag.max=20)

	# normality plot of residuals
	normalityPlot(predictions$residuals)

	# Ljung-Box test for autocorrelation
	Box.test(predictions$residuals, lag=20, type="Ljung-Box")

	# plot predictions for 240 months to show the exponentially growing confidence interval
	predictions <- forecast(birthModel2, h=240)
	plot(x = predictions, main = "NYC birth predictions for 240 months")


	########## Exponential smoothing
	birthHw <- HoltWinters(birthsTs)
	birthHwPred <- forecast.HoltWinters(birthHw, h=24)
	plot(birthHwPred)

	# plot predictions for 240 months to show the exponentially growing confidence interval
	birthHwPred2 <- forecast.HoltWinters(birthHw, h=240)
	plot(birthHwPred2)

	# residual normality plot
	normalityPlot(birthHwPred$residuals)

	# plot ACF of the residuals
	acf(birthHwPred$residuals, lag.max=20)

	# Ljung-Box test for autocorrelation
	Box.test(birthHwPred$residuals, lag=20, type="Ljung-Box")
No results found