Skip to content

Instantly share code, notes, and snippets.

@diamonaj
Last active October 19, 2018 12:27
Show Gist options
  • Select an option

  • Save diamonaj/cee915abc59a5b8cc9a64fb2ed50d0f4 to your computer and use it in GitHub Desktop.

Select an option

Save diamonaj/cee915abc59a5b8cc9a64fb2ed50d0f4 to your computer and use it in GitHub Desktop.
Correlated vs. Uncorrelated
# EXERCISE TO BUILD INTUITION FOR CORRELATED VS. UNCORRELATED DATA
# PLEASE FOCUS ON UNDERSTANDING THE BELOW
### DO NOT JUST EXECUTE ALL THE CODE IN ONE BATCH--RUN IT LINE BY LINE...
### Simulation of analysis on correlated data
set.seed(1314)
nsims <- 10000
storage_correlated_df <- matrix(NA, ncol = 10000, nrow = 100)
# GO THROUGH THE BELOW, LINE BY LINE, & TRY TO UNDERSTAND EVERYTHING
# FOCUS ON WHATEVER YOU FIND CONFUSING
set.seed(2321)
for(j in 1:nsims) {
simulated_original_data <- rnorm(100)
for(i in 1:100)
{
a_bootstrapped_sample <- sample(simulated_original_data, 100, replace = TRUE)
summary_statistic_of_boot_sample <- mean(a_bootstrapped_sample)
storage_correlated_df[i,j] <- summary_statistic_of_boot_sample
}
}
## ANALYSIS WITH THE RESULTS...
correlated_means <- apply(storage_correlated_df, 2, mean)
print(mean(correlated_means))
var_of_correlated_means <- var(correlated_means)
print(var_of_correlated_means)
### NOW, what if our data is not correlated?
storage.NOTcorr.df <- matrix(NA, ncol = 10000, nrow = 100)
# AGAIN, GO THROUGH THE BELOW, LINE BY LINE, & TRY TO UNDERSTAND EVERYTHING
# FOCUS ON WHATEVER YOU FIND CONFUSING
set.seed(2353)
for(j in 1:nsims) {
for(i in 1:100)
{
an_independent_sample_of_the_data <- rnorm(100)
storage.NOTcorr.df[i,j] <- mean(an_independent_sample_of_the_data)
}
}
## ANALYSIS WITH THE RESULTS...
NOTcorr_means <- apply(storage.NOTcorr.df, 2, mean)
print(mean(NOTcorr_means))
var_of_NOTcorr_means <- var(NOTcorr_means)
print(var_of_NOTcorr_means)
## SYNTHESIZING THE RESULTS (is the var of uncorrelated smaller, and if so, by how much?)
percent_var_redux <- 100*(var_of_correlated_means - var_of_NOTcorr_means)/var_of_correlated_means
print(percent_var_redux)
# LET'S VISUALIZE!!!
par(mfrow = c(4,1))
hist(correlated_means, main = "Distribution of Correlated Means")
## WHEN YOU RUN THE HIST BELOW, COMPARE THE SCALE OF THE X-AXIS TO THE HIST ABOVE
hist(NOTcorr_means, main = "Distribution of NOT Correlated Means")
# HERE IS THE SAME INFO IN ONE FIGURE, WITH LINES SHOWING DENSITY...
plot(density(NOTcorr_means), col = "blue", lwd = 2)
lines(density(correlated_means), col = "red", lwd = 2) #
# IF YOU HAVE TROUBLE INTERPRETING THIS, HERE IT IS ON A DIFFERENT SCALE...
plot(density(correlated_means), col = "red", lwd = 2)
lines(density(NOTcorr_means), col = "blue", lwd = 2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment