Skip to content

Instantly share code, notes, and snippets.

@azlkiniue
Created December 16, 2024 07:54
Show Gist options
  • Save azlkiniue/ad3c8a242a25d06dec50de81287833b4 to your computer and use it in GitHub Desktop.
Save azlkiniue/ad3c8a242a25d06dec50de81287833b4 to your computer and use it in GitHub Desktop.
Statistical Data Analysis on "airquality" R Dataset
data("airquality")
old_airquality <- airquality
# clean the dataset
airquality <- airquality[
complete.cases(airquality),
!(names(airquality) %in% c("Month", "Day"))
]
pairs(airquality)
summary(airquality)
attach(airquality)
var(Ozone)
var(Solar.R)
var(Wind)
var(Temp)
sd(Ozone)
sd(Solar.R)
sd(Wind)
sd(Temp)
summary(lm(Ozone ~ ., data = airquality))
summary(lm(Solar.R ~ ., data = airquality))
summary(lm(Wind ~ ., data = airquality))
summary(lm(Temp ~ ., data = airquality))
summary(lm(Ozone ~ Solar.R, data = airquality))
summary(lm(Ozone ~ Wind, data = airquality))
summary(lm(Ozone ~ Temp, data = airquality))
summary(lm(Ozone ~ Wind + Temp, data = airquality))
linear_plot <- function(response, predictor, dataset, ...) {
lm_fit <- lm(response ~ predictor, data = dataset)
plot(predictor, response, ...)
abline(lm_fit, col = "red", lwd = 2)
}
par(mfrow = c(1, 3), mar = c(5, 5, 1, 1), cex.lab = 2)
linear_plot(Ozone, Solar.R, airquality, ylab = "Ozone", xlab = "Solar.R")
linear_plot(Ozone, Wind, airquality, ylab = "Ozone", xlab = "Wind")
linear_plot(Ozone, Temp, airquality, ylab = "Ozone", xlab = "Temp")
library("plot3D")
multiple_linear_plot_3d <- function(z, x, y, ...) {
# Compute the linear regression
fit <- lm(z ~ x + y)
# create a grid from the x and y values (min to max) and predict
# values for every point this will become the regression plane
grid_lines <- 40
x_pred <- seq(min(x), max(x), length.out = grid_lines)
y_pred <- seq(min(y), max(y), length.out = grid_lines)
xy <- expand.grid(x = x_pred, y = y_pred)
z_pred <- matrix(predict(fit, newdata = xy),
nrow = grid_lines, ncol = grid_lines
)
# create the fitted points for drop lines to the surface
fitpoints <- predict(fit)
# scatter plot with regression plane
scatter3D(x, y, z,
pch = 19, cex = 1, colvar = NULL, col = "red", bty = "b",
surf = list(
x = x_pred, y = y_pred, z = z_pred,
facets = TRUE, fit = fitpoints,
col = ramp.col(col = c("dodgerblue3", "seagreen2"), n = 300, alpha = 0.9),
border = "black"
), ...
)
}
par(mfrow = c(1, 1), mar = c(0, 2, 0, 0))
multiple_linear_plot_3d(Ozone, Wind, Temp,
theta = -60, phi = 10, xlab = "Wind", ylab = "Temp", zlab = "Ozone"
)
summary(lm(Ozone ~ poly(Solar.R, 2), data = airquality))
summary(lm(Ozone ~ poly(Wind, 2), data = airquality))
summary(lm(Ozone ~ poly(Temp, 2), data = airquality))
quadratic_plot <- function(response, predictor, dataset, ...) {
lm_fit <- lm(response ~ poly(predictor, 2), data = dataset)
plot(predictor, response, ...)
prediction <- predict(lm_fit)
ix <- sort(predictor, index.return = TRUE)$ix
lines(predictor[ix], prediction[ix], col = "red", lwd = 2)
}
par(mfrow = c(1, 3), mar = c(5, 5, 1, 1), cex.lab = 2)
quadratic_plot(Ozone, Solar.R, airquality, ylab = "Ozone", xlab = "Solar.R")
quadratic_plot(Ozone, Wind, airquality, ylab = "Ozone", xlab = "Wind")
quadratic_plot(Ozone, Temp, airquality, ylab = "Ozone", xlab = "Temp")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment