Created
November 30, 2018 11:24
-
-
Save korkridake/3fd8bad35d25746bf86949b0a1c33b63 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| if (!require(car)){install.packages(car, dep=T)} | |
| library(car) | |
| ############### | |
| # # | |
| # Exercise 1 # | |
| # # | |
| ############### | |
| # load data file | |
| mussel<-read.csv(file.choose()) | |
| str(mussel) | |
| ## 'data.frame': 25 obs. of 3 variables: | |
| ## $ AREA : num 516 469 462 939 1357 ... | |
| ## $ SPECIES: int 3 7 6 8 10 9 10 11 16 9 ... | |
| ## $ INDIV : int 18 60 57 100 48 118 148 214 225 283 ... | |
| head(mussel) | |
| ## AREA SPECIES INDIV | |
| ## 1 516.00 3 18 | |
| ## 2 469.06 7 60 | |
| ## 3 462.25 6 57 | |
| ## 4 938.60 8 100 | |
| ## 5 1357.15 10 48 | |
| ## 6 1773.66 9 118 | |
| scatterplot(SPECIES ~ AREA, data = mussel) | |
| # data are not normally distributed (especially AREA) | |
| # The species richness peaked in the middle (not a good data) | |
| ############### | |
| # # | |
| # Exercise 2 # | |
| # # | |
| ############### | |
| #fit a linear model | |
| mussel.lm <- lm(SPECIES ~ AREA, data = mussel) | |
| summary(mussel.lm) | |
| ## | |
| ## Call: | |
| ## lm(formula = SPECIES ~ AREA, data = mussel) | |
| ## | |
| ## Residuals: | |
| ## Min 1Q Median 3Q Max | |
| ## -7.1964 -2.7521 -0.7509 1.2094 7.2148 | |
| ## | |
| ## Coefficients: | |
| ## Estimate Std. Error t value Pr(>|t|) | |
| ## (Intercept) 9.856e+00 1.044e+00 9.441 2.24e-09 *** | |
| ## AREA 6.593e-04 9.283e-05 7.102 3.10e-07 *** | |
| ## --- | |
| ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 | |
| ## | |
| ## Residual standard error: 3.759 on 23 degrees of freedom | |
| ## Multiple R-squared: 0.6868, Adjusted R-squared: 0.6732 | |
| ## F-statistic: 50.44 on 1 and 23 DF, p-value: 3.102e-07 | |
| #plot the diagnostics of R inbuilt model validation plot defaults | |
| op <- par(mfrow = c(2, 2)) | |
| plot(mussel.lm) | |
| par(op) # turns graphics device back to default of one plot per page! | |
| # FINAL VALIDATION TASK - residuals against explanatory variable | |
| plot(mussel.lm$resid ~ mussel$AREA, | |
| xlab = "Mussel bed Area", | |
| ylab = "Residuals") | |
| # indicates a few large values and a slight wedge due to numerous small patches | |
| ############### | |
| # # | |
| # Exercise 3 # | |
| # # | |
| ############### | |
| #fit a second order polynomial to the area variable | |
| mussel.lm1 <- lm(SPECIES ~ AREA + I(AREA^2), data = mussel) | |
| # I(AREA^2) fits the 2nd order polynomial | |
| # results | |
| summary(mussel.lm1) | |
| ## | |
| ## Call: | |
| ## lm(formula = SPECIES ~ AREA + I(AREA^2), data = mussel) | |
| ## | |
| ## Residuals: | |
| ## Min 1Q Median 3Q Max | |
| ## -7.7894 -1.0485 0.4301 1.3790 5.0666 | |
| ## | |
| ## Coefficients: | |
| ## Estimate Std. Error t value Pr(>|t|) | |
| ## (Intercept) 6.593e+00 1.156e+00 5.701 9.82e-06 *** | |
| ## AREA 1.745e-03 2.826e-04 6.174 3.26e-06 *** | |
| ## I(AREA^2) -4.118e-08 1.036e-08 -3.974 0.000643 *** | |
| ## --- | |
| ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 | |
| ## | |
| ## Residual standard error: 2.933 on 22 degrees of freedom | |
| ## Multiple R-squared: 0.8177, Adjusted R-squared: 0.8011 | |
| ## F-statistic: 49.34 on 2 and 22 DF, p-value: 7.393e-09 | |
| # validate the model | |
| op <- par(mfrow = c(2, 2)) | |
| plot(mussel.lm1) | |
| par(op) | |
| # FINAL VALIDATION TASK - residuals against explanatory variable | |
| plot(mussel.lm1$resid ~ mussel$AREA, | |
| xlab = "Mussel bed Area", | |
| ylab = "Residuals") | |
| # Ntry to use a third order polynomial | |
| mussel.lm2 <- lm(SPECIES ~ AREA + I(AREA^2) + I(AREA^3), data = mussel) | |
| # I(AREA^2) fits the 2nd order polynomial | |
| # I(AREA^3) fits the 3nd order polynomial | |
| # check the results.... | |
| summary(mussel.lm2) | |
| ## | |
| ## Call: | |
| ## lm(formula = SPECIES ~ AREA + I(AREA^2) + I(AREA^3), data = mussel) | |
| ## | |
| ## Residuals: | |
| ## Min 1Q Median 3Q Max | |
| ## -6.0646 -0.7522 0.5169 1.5261 4.0841 | |
| ## | |
| ## Coefficients: | |
| ## Estimate Std. Error t value Pr(>|t|) | |
| ## (Intercept) 5.025e+00 1.338e+00 3.755 0.00117 ** | |
| ## AREA 2.766e-03 5.750e-04 4.811 9.38e-05 *** | |
| ## I(AREA^2) -1.600e-07 6.016e-08 -2.660 0.01466 * | |
| ## I(AREA^3) 3.177e-12 1.587e-12 2.002 0.05843 . | |
| ## --- | |
| ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 | |
| ## | |
| ## Residual standard error: 2.751 on 21 degrees of freedom | |
| ## Multiple R-squared: 0.8469, Adjusted R-squared: 0.825 | |
| ## F-statistic: 38.72 on 3 and 21 DF, p-value: 9.719e-09 | |
| # validate the model | |
| op <- par(mfrow = c(2, 2)) | |
| plot(mussel.lm2) | |
| par(op) | |
| # FINAL VALIDATION TASK - residuals against explanatory variable | |
| plot(mussel.lm2$resid ~ mussel$AREA, | |
| xlab = "Mussel bed Area", | |
| ylab = "Residuals") | |
| # same result using the following code | |
| mussel.lm3 <- lm(SPECIES ~ poly(AREA, 3), data = mussel) | |
| # check the results.... | |
| summary(mussel.lm3) | |
| ## | |
| ## Call: | |
| ## lm(formula = SPECIES ~ poly(AREA, 3), data = mussel) | |
| ## | |
| ## Residuals: | |
| ## Min 1Q Median 3Q Max | |
| ## -6.0646 -0.7522 0.5169 1.5261 4.0841 | |
| ## | |
| ## Coefficients: | |
| ## Estimate Std. Error t value Pr(>|t|) | |
| ## (Intercept) 15.0000 0.5502 27.264 < 2e-16 *** | |
| ## poly(AREA, 3)1 26.7009 2.7509 9.706 3.26e-09 *** | |
| ## poly(AREA, 3)2 -11.6546 2.7509 -4.237 0.000369 *** | |
| ## poly(AREA, 3)3 5.5060 2.7509 2.002 0.058426 . | |
| ## --- | |
| ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 | |
| ## | |
| ## Residual standard error: 2.751 on 21 degrees of freedom | |
| ## Multiple R-squared: 0.8469, Adjusted R-squared: 0.825 | |
| ## F-statistic: 38.72 on 3 and 21 DF, p-value: 9.719e-09 | |
| ############### | |
| # # | |
| # Exercise 4 # | |
| # # | |
| ############### | |
| # We now have three models which one is the best? | |
| # M0 is the poorest | |
| # Distinct M1 and M2 using ANOVA and AIC to assess which model is most parimonious | |
| anova(mussel.lm, mussel.lm1, mussel.lm2) | |
| ## Analysis of Variance Table | |
| ## | |
| ## Model 1: SPECIES ~ AREA | |
| ## Model 2: SPECIES ~ AREA + I(AREA^2) | |
| ## Model 3: SPECIES ~ AREA + I(AREA^2) + I(AREA^3) | |
| ## Res.Df RSS Df Sum of Sq F Pr(>F) | |
| ## 1 23 325.06 | |
| ## 2 22 189.23 1 135.830 17.949 0.000369 *** | |
| ## 3 21 158.92 1 30.316 4.006 0.058426 . | |
| ## --- | |
| ## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 | |
| AIC(mussel.lm, mussel.lm1, mussel.lm2) | |
| ## df AIC | |
| ## mussel.lm 3 141.0756 | |
| ## mussel.lm1 4 129.5497 | |
| ## mussel.lm2 5 127.1848 | |
| # ANOVA and AIC indicate mussel.lm2 is the 'best' model | |
| # Finalise the model and plot it.... | |
| # plot base graph (with CIs) | |
| plot(SPECIES ~ AREA, data = mussel, | |
| pch = 16, | |
| xlab = (expression(paste("Mussel bed area ", "(", m^2, ")"))), | |
| ylab = "Species Richness") | |
| # predict across the data | |
| x <- seq(min(mussel$AREA), max(mussel$AREA), l=1000) # 1000 steps.... | |
| y <- predict(mussel.lm2, data.frame(AREA = x), interval = "c") | |
| matlines(x, y, lty = 1, col = 1) | |
| # plot base graph (without CIs) | |
| plot(SPECIES ~ AREA, data = mussel, | |
| pch = 16, | |
| xlab = (expression(paste("Mussel bed area ", "(", m^2, ")"))), | |
| ylab = "Species Richness") | |
| # predict across the data | |
| x <- seq(min(mussel$AREA), max(mussel$AREA), l=1000) # 1000 steps.... | |
| points(x, predict(mussel.lm2, data.frame(AREA = x)), type = "l") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment