Last active
September 7, 2019 22:06
-
-
Save grosscol/263053585fc2a5cac50b82511fa5bf47 to your computer and use it in GitHub Desktop.
Exploring rnorm
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# regarding: https://www.reddit.com/r/Rlanguage/comments/d0ytw7/issues_of_bias_with_rnorm/ | |
# set the RNG seed so the results are reporducible | |
set.seed(12345) | |
# Generate 100 target means beteen 0 and 100 from the uniform distribution | |
generated_means <- runif(100, 0, 100) | |
# Generate 100 standard deviation values also from uniform distribution | |
generated_sdev <- runif(100, 1, 20) | |
# generate number of samples to create for each set | |
generated_sample_size <- runif(100, 1000, 30000) | |
# Generate 10000 samples from each target mean and target standard deviation | |
samples_vary_size <- mapply(FUN=rnorm, generated_sample_size, generated_means, 10, SIMPLIFY=FALSE) | |
samples_vary_sdev <- mapply(FUN=rnorm, 10000, generated_means, generated_sdev, SIMPLIFY=FALSE) | |
# Calculate the means of each sample | |
vary_size_means <- sapply(samples_vary_size, mean) | |
vary_sdev_means <- sapply(samples_vary_sdev, mean) | |
# Calculate differences betwen target_means | |
size_differences <- abs(generated_means - vary_size_means) | |
sdev_differences <- abs(generated_means - vary_sdev_means) | |
# plot differences between means as function of what was varied | |
plot(generated_sdev, sdev_differences, ylab="differences between generated and sampled mean") | |
plot(generated_sample_size, size_differences, ylab="differences between generated and sampled mean") | |
# return to usual operation | |
set.seed(NULL) | |
# t-test for difference of means for samples and generated means | |
vary_size_results <- mapply(FUN=t.test, x=samples_vary_size, mu=generated_means, SIMPLIFY = F) | |
vary_size_p_values <- sapply(vary_size_results, FUN=getElement, name="p.value") | |
vary_sdev_results <- mapply(FUN=t.test, x=samples_vary_sdev, mu=generated_means, SIMPLIFY = F) | |
vary_sdev_p_values <- sapply(vary_sdev_results, FUN=getElement, name="p.value") | |
# approximatedly 5 of 100 expected positives just by chance | |
sum(vary_sdev_p_values < 0.05) | |
sum(vary_size_p_values < 0.05) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment