Last active
October 19, 2018 12:27
-
-
Save diamonaj/cee915abc59a5b8cc9a64fb2ed50d0f4 to your computer and use it in GitHub Desktop.
Correlated vs. Uncorrelated
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # EXERCISE TO BUILD INTUITION FOR CORRELATED VS. UNCORRELATED DATA | |
| # PLEASE FOCUS ON UNDERSTANDING THE BELOW | |
| ### DO NOT JUST EXECUTE ALL THE CODE IN ONE BATCH--RUN IT LINE BY LINE... | |
| ### Simulation of analysis on correlated data | |
| set.seed(1314) | |
| nsims <- 10000 | |
| storage_correlated_df <- matrix(NA, ncol = 10000, nrow = 100) | |
| # GO THROUGH THE BELOW, LINE BY LINE, & TRY TO UNDERSTAND EVERYTHING | |
| # FOCUS ON WHATEVER YOU FIND CONFUSING | |
| set.seed(2321) | |
| for(j in 1:nsims) { | |
| simulated_original_data <- rnorm(100) | |
| for(i in 1:100) | |
| { | |
| a_bootstrapped_sample <- sample(simulated_original_data, 100, replace = TRUE) | |
| summary_statistic_of_boot_sample <- mean(a_bootstrapped_sample) | |
| storage_correlated_df[i,j] <- summary_statistic_of_boot_sample | |
| } | |
| } | |
| ## ANALYSIS WITH THE RESULTS... | |
| correlated_means <- apply(storage_correlated_df, 2, mean) | |
| print(mean(correlated_means)) | |
| var_of_correlated_means <- var(correlated_means) | |
| print(var_of_correlated_means) | |
| ### NOW, what if our data is not correlated? | |
| storage.NOTcorr.df <- matrix(NA, ncol = 10000, nrow = 100) | |
| # AGAIN, GO THROUGH THE BELOW, LINE BY LINE, & TRY TO UNDERSTAND EVERYTHING | |
| # FOCUS ON WHATEVER YOU FIND CONFUSING | |
| set.seed(2353) | |
| for(j in 1:nsims) { | |
| for(i in 1:100) | |
| { | |
| an_independent_sample_of_the_data <- rnorm(100) | |
| storage.NOTcorr.df[i,j] <- mean(an_independent_sample_of_the_data) | |
| } | |
| } | |
| ## ANALYSIS WITH THE RESULTS... | |
| NOTcorr_means <- apply(storage.NOTcorr.df, 2, mean) | |
| print(mean(NOTcorr_means)) | |
| var_of_NOTcorr_means <- var(NOTcorr_means) | |
| print(var_of_NOTcorr_means) | |
| ## SYNTHESIZING THE RESULTS (is the var of uncorrelated smaller, and if so, by how much?) | |
| percent_var_redux <- 100*(var_of_correlated_means - var_of_NOTcorr_means)/var_of_correlated_means | |
| print(percent_var_redux) | |
| # LET'S VISUALIZE!!! | |
| par(mfrow = c(4,1)) | |
| hist(correlated_means, main = "Distribution of Correlated Means") | |
| ## WHEN YOU RUN THE HIST BELOW, COMPARE THE SCALE OF THE X-AXIS TO THE HIST ABOVE | |
| hist(NOTcorr_means, main = "Distribution of NOT Correlated Means") | |
| # HERE IS THE SAME INFO IN ONE FIGURE, WITH LINES SHOWING DENSITY... | |
| plot(density(NOTcorr_means), col = "blue", lwd = 2) | |
| lines(density(correlated_means), col = "red", lwd = 2) # | |
| # IF YOU HAVE TROUBLE INTERPRETING THIS, HERE IT IS ON A DIFFERENT SCALE... | |
| plot(density(correlated_means), col = "red", lwd = 2) | |
| lines(density(NOTcorr_means), col = "blue", lwd = 2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment