diamonaj · October 19, 2018 12:27
diff --git a/gistfile1.txt b/gistfile1.txt
 # EXERCISE TO BUILD INTUITION FOR CORRELATED VS. UNCORRELATED DATA
 # PLEASE FOCUS ON UNDERSTANDING THE BELOW
 ### DO NOT JUST EXECUTE ALL THE CODE IN ONE BATCH--RUN IT LINE BY LINE...

 ### Simulation of analysis on correlated data

 set.seed(1314)

 nsims <- 10000

 storage_correlated_df <- matrix(NA, ncol = 10000, nrow = 100)

 # GO THROUGH THE BELOW, LINE BY LINE, & TRY TO UNDERSTAND EVERYTHING
 # FOCUS ON WHATEVER YOU FIND CONFUSING

 set.seed(2321)
 for(j in 1:nsims) {

  simulated_original_data <- rnorm(100)
  
    for(i in 1:100)
  {

    a_bootstrapped_sample <- sample(simulated_original_data, 100, replace = TRUE)
    summary_statistic_of_boot_sample <- mean(a_bootstrapped_sample)
    storage_correlated_df[i,j] <- summary_statistic_of_boot_sample
  }
  
 }

 ## ANALYSIS WITH THE RESULTS...
 correlated_means <- apply(storage_correlated_df, 2, mean)
 print(mean(correlated_means))

 var_of_correlated_means <- var(correlated_means)
 print(var_of_correlated_means)

 ### NOW, what if our data is not correlated?

 storage.NOTcorr.df <- matrix(NA, ncol = 10000, nrow = 100)

 # AGAIN, GO THROUGH THE BELOW, LINE BY LINE, & TRY TO UNDERSTAND EVERYTHING
 # FOCUS ON WHATEVER YOU FIND CONFUSING

 set.seed(2353)
 for(j in 1:nsims) {
  for(i in 1:100)
  {
    an_independent_sample_of_the_data <- rnorm(100)
    storage.NOTcorr.df[i,j] <- mean(an_independent_sample_of_the_data)
  }
  
 }

 ## ANALYSIS WITH THE RESULTS...
 NOTcorr_means <- apply(storage.NOTcorr.df, 2, mean)
 print(mean(NOTcorr_means))
 var_of_NOTcorr_means <- var(NOTcorr_means)
 print(var_of_NOTcorr_means)

 ## SYNTHESIZING THE RESULTS (is the var of uncorrelated smaller, and if so, by how much?)
 percent_var_redux <- 100*(var_of_correlated_means - var_of_NOTcorr_means)/var_of_correlated_means
 print(percent_var_redux)

 # LET'S VISUALIZE!!!
 par(mfrow = c(4,1))
 hist(correlated_means, main = "Distribution of Correlated Means")
 ## WHEN YOU RUN THE HIST BELOW, COMPARE THE SCALE OF THE X-AXIS TO THE HIST ABOVE
 hist(NOTcorr_means, main = "Distribution of NOT Correlated Means")

 # HERE IS THE SAME INFO IN ONE FIGURE, WITH LINES SHOWING DENSITY...
 plot(density(NOTcorr_means), col = "blue", lwd = 2)
 lines(density(correlated_means), col = "red", lwd = 2) # 

 # IF YOU HAVE TROUBLE INTERPRETING THIS, HERE IT IS ON A DIFFERENT SCALE...
 plot(density(correlated_means), col = "red", lwd = 2)
 lines(density(NOTcorr_means), col = "blue", lwd = 2)
	# EXERCISE TO BUILD INTUITION FOR CORRELATED VS. UNCORRELATED DATA
	# PLEASE FOCUS ON UNDERSTANDING THE BELOW
	### DO NOT JUST EXECUTE ALL THE CODE IN ONE BATCH--RUN IT LINE BY LINE...

	### Simulation of analysis on correlated data

	set.seed(1314)

	nsims <- 10000

	storage_correlated_df <- matrix(NA, ncol = 10000, nrow = 100)

	# GO THROUGH THE BELOW, LINE BY LINE, & TRY TO UNDERSTAND EVERYTHING
	# FOCUS ON WHATEVER YOU FIND CONFUSING

	set.seed(2321)
	for(j in 1:nsims) {

	simulated_original_data <- rnorm(100)

	for(i in 1:100)
	{

	a_bootstrapped_sample <- sample(simulated_original_data, 100, replace = TRUE)
	summary_statistic_of_boot_sample <- mean(a_bootstrapped_sample)
	storage_correlated_df[i,j] <- summary_statistic_of_boot_sample
	}

	}

	## ANALYSIS WITH THE RESULTS...
	correlated_means <- apply(storage_correlated_df, 2, mean)
	print(mean(correlated_means))

	var_of_correlated_means <- var(correlated_means)
	print(var_of_correlated_means)

	### NOW, what if our data is not correlated?

	storage.NOTcorr.df <- matrix(NA, ncol = 10000, nrow = 100)

	# AGAIN, GO THROUGH THE BELOW, LINE BY LINE, & TRY TO UNDERSTAND EVERYTHING
	# FOCUS ON WHATEVER YOU FIND CONFUSING

	set.seed(2353)
	for(j in 1:nsims) {
	for(i in 1:100)
	{
	an_independent_sample_of_the_data <- rnorm(100)
	storage.NOTcorr.df[i,j] <- mean(an_independent_sample_of_the_data)
	}

	}

	## ANALYSIS WITH THE RESULTS...
	NOTcorr_means <- apply(storage.NOTcorr.df, 2, mean)
	print(mean(NOTcorr_means))
	var_of_NOTcorr_means <- var(NOTcorr_means)
	print(var_of_NOTcorr_means)

	## SYNTHESIZING THE RESULTS (is the var of uncorrelated smaller, and if so, by how much?)
	percent_var_redux <- 100*(var_of_correlated_means - var_of_NOTcorr_means)/var_of_correlated_means
	print(percent_var_redux)

	# LET'S VISUALIZE!!!
	par(mfrow = c(4,1))
	hist(correlated_means, main = "Distribution of Correlated Means")
	## WHEN YOU RUN THE HIST BELOW, COMPARE THE SCALE OF THE X-AXIS TO THE HIST ABOVE
	hist(NOTcorr_means, main = "Distribution of NOT Correlated Means")

	# HERE IS THE SAME INFO IN ONE FIGURE, WITH LINES SHOWING DENSITY...
	plot(density(NOTcorr_means), col = "blue", lwd = 2)
	lines(density(correlated_means), col = "red", lwd = 2) #

	# IF YOU HAVE TROUBLE INTERPRETING THIS, HERE IT IS ON A DIFFERENT SCALE...
	plot(density(correlated_means), col = "red", lwd = 2)
	lines(density(NOTcorr_means), col = "blue", lwd = 2)
No results found