lwiklendt · July 2, 2018 07:43
diff --git a/nonlin_data_lin_model.R b/nonlin_data_lin_model.R
 # testing effect of correlated predictors on non-linearity when assuming a linear model

 rho = 0.9
 N = 20
 set.seed(1)

 # generate correlated x1 and x2 predictors
 z = matrix(rnorm(N*2, 0, 1), 2)
 L = t(chol(matrix(c(1, rho, rho, 1), 2)))
 x = L %*% z

 # y depends only on x1, not on x2
 y = (1 + x[1,])^2 + rnorm(N, 0, 0.1)

 # and yet we have a "significant" effect for both x1 and x2
 fit = lm(y ~ x1 + x2, data = data.frame(y = y, x1 = x[1,], x2 = x[,2]))
 print(summary(fit))
	# testing effect of correlated predictors on non-linearity when assuming a linear model

	rho = 0.9
	N = 20
	set.seed(1)

	# generate correlated x1 and x2 predictors
	z = matrix(rnorm(N*2, 0, 1), 2)
	L = t(chol(matrix(c(1, rho, rho, 1), 2)))
	x = L %*% z

	# y depends only on x1, not on x2
	y = (1 + x[1,])^2 + rnorm(N, 0, 0.1)

	# and yet we have a "significant" effect for both x1 and x2
	fit = lm(y ~ x1 + x2, data = data.frame(y = y, x1 = x[1,], x2 = x[,2]))
	print(summary(fit))