Last active
December 6, 2024 16:04
-
-
Save gjkerns/6a7e866a9da72238779912c5c1a4f6ac to your computer and use it in GitHub Desktop.
Graphing the Bivariate Normal Distribution
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# This is a Shiny web application. You can run the application by clicking | |
# the 'Run App' button above (on RStudio). | |
# | |
# Find out more about building applications with Shiny here: | |
# | |
# https://shiny.posit.co/ | |
# | |
# Install dependencies if needed: | |
# install.packages(c("shiny", "bslib", "ellipse", "scales")) | |
library(shiny) | |
library(bslib) | |
# Define UI for application that draws the plot | |
ui <- fluidPage( | |
# Application title | |
titlePanel("Graphing the Bivariate Normal Distribution"), | |
# Sidebar with a slider input for normal parameters | |
# set defaults to approx match Galton summary statistics | |
sidebarLayout( | |
sidebarPanel( | |
sliderInput("xmean", | |
"Mean of X:", | |
min = 10, | |
max = 100, | |
value = 68.25), | |
sliderInput("ymean", | |
"Mean of Y:", | |
min = 10, | |
max = 100, | |
value = 68.25), | |
sliderInput("xstdev", | |
"Standard Deviation of X:", | |
min = 0.5, | |
max = 5, | |
value = 1.8), | |
sliderInput("ystdev", | |
"Standard Deviation of Y:", | |
min = 0.5, | |
max = 5, | |
value = 2.5), | |
sliderInput("rho", | |
"Correlation of X and Y:", | |
min = -1, | |
max = 1, | |
step = 0.05, | |
value = 0.45), | |
# checkboxes for data and regression line | |
checkboxInput("showyex", | |
"Show y = x?", | |
value = TRUE), | |
checkboxInput("showdata", | |
"Show data?", | |
value = FALSE), | |
checkboxInput("showreg", | |
"Regression line?", | |
value = FALSE) | |
), | |
# Show a plot of the generated distribution | |
mainPanel( | |
plotOutput("distPlot"), | |
card( | |
card_body( | |
markdown("Use the sliders to explore the [Bivariate Normal Distribution](https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Bivariate_case). | |
The vertical and horizontal *grey lines* mark the mean | |
of *X* and *Y*, respectively. The dashed *black line* at | |
45 degrees represents the line *y* = *x*. The three | |
ellipses correspond to 68, 95, and 99.7% level contours | |
of the bivariate PDF with *blue segments* denoting the | |
major and minor axes. Select *Show data?* to plot data | |
randomly generated according to the distribution and | |
select *Regression line?* to display the best linear | |
unbiased prediction of *Y* given a value of *X*."), | |
markdown("The default settings of the parameters were chosen to coincide | |
with [Sir Francis Galton](https://en.wikipedia.org/wiki/Francis_Galton)'s | |
analysis of height data in | |
[*Regression towards Mediocrity in Hereditary Stature*](http://www.stat.ucla.edu/~nchristo/statistics100C/history_regression.pdf) (1886). | |
Like Galton, the sample size here is *n* = 928, though | |
note that the data are randomly generated and do not exactly | |
match [Galton's historical data set](https://friendly.github.io/HistData/reference/Galton.html). Check out [regression toward the mean](https://en.wikipedia.org/wiki/Regression_toward_the_mean) for more details."), | |
markdown("Source code is available [here](https://gist.github.com/gjkerns/6a7e866a9da72238779912c5c1a4f6ac).") | |
) | |
) | |
), | |
) | |
) | |
# Define server logic required to draw the plot | |
server <- function(input, output) { | |
output$distPlot <- renderPlot({ | |
# generate bivariate normal data | |
set.seed(42) | |
library(MASS) | |
S <- matrix(c((input$xstdev)^2, input$rho*input$xstdev*input$ystdev, | |
input$rho*input$xstdev*input$ystdev,(input$ystdev)^2), nrow = 2) | |
x <- mvrnorm(n = 928, mu = c(input$xmean, input$ymean), Sigma = S) | |
# set up blank plot | |
plot(x, type="n", xlab = "X", ylab = "Y", asp = 1, | |
main = "Contour Plot of the Bivariate Normal PDF") | |
# plot the means for X and Y | |
abline(h = input$ymean, col = "grey") | |
abline(v = input$xmean, col = "grey") | |
# plot the line y = x | |
if (input$showyex){ | |
abline(a = 0, b = 1, lty = 5, lwd = 2) | |
} | |
# draw 3 ellipses with the specified parameters | |
library(ellipse) | |
lines(ellipse(input$rho, scale = c(input$xstdev,input$ystdev), | |
centre = c(input$xmean, input$ymean), level = 0.68), col = "darkgrey") | |
lines(ellipse(input$rho, scale = c(input$xstdev,input$ystdev), | |
centre = c(input$xmean, input$ymean), level = 0.95), col = "darkgrey") | |
lines(ellipse(input$rho, scale = c(input$xstdev,input$ystdev), | |
centre = c(input$xmean, input$ymean), level = 0.997), col = "darkgrey") | |
# major and minor axes | |
tmp <- eigen(S) | |
m <- c(0, tmp$vectors[1,1]*sqrt(tmp$values[1] * qchisq(0.997,2)), | |
0, tmp$vectors[2,1]*sqrt(tmp$values[1] * qchisq(0.997,2))) + | |
c(input$xmean, input$xmean, input$ymean, input$ymean) | |
lines(matrix(m, nrow = 2), col = "blue") | |
m <- c(0, tmp$vectors[1,2]*sqrt(tmp$values[2] * qchisq(0.997,2)), | |
0, tmp$vectors[2,2]*sqrt(tmp$values[2] * qchisq(0.997,2))) + | |
c(input$xmean, input$xmean, input$ymean, input$ymean) | |
lines(matrix(m, nrow = 2), col = "blue") | |
# plot the data | |
if (input$showdata){ | |
library(scales) | |
points(x, col=alpha("blue", 0.5)) | |
} | |
# plot the regression line | |
if (input$showreg){ | |
b <- input$rho * input$ystdev / input$xstdev | |
a <- input$ymean - b * input$xmean | |
library(scales) | |
abline(a = a, b = b, lty=3, lwd=3, col = alpha("red",0.8)) | |
} | |
}) | |
} | |
# Run the application | |
shinyApp(ui = ui, server = server) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment