Skip to content

Instantly share code, notes, and snippets.

@gjkerns
Last active December 6, 2024 16:04
Show Gist options
  • Save gjkerns/6a7e866a9da72238779912c5c1a4f6ac to your computer and use it in GitHub Desktop.
Save gjkerns/6a7e866a9da72238779912c5c1a4f6ac to your computer and use it in GitHub Desktop.
Graphing the Bivariate Normal Distribution
#
# This is a Shiny web application. You can run the application by clicking
# the 'Run App' button above (on RStudio).
#
# Find out more about building applications with Shiny here:
#
# https://shiny.posit.co/
#
# Install dependencies if needed:
# install.packages(c("shiny", "bslib", "ellipse", "scales"))
library(shiny)
library(bslib)
# Define UI for application that draws the plot
ui <- fluidPage(
# Application title
titlePanel("Graphing the Bivariate Normal Distribution"),
# Sidebar with a slider input for normal parameters
# set defaults to approx match Galton summary statistics
sidebarLayout(
sidebarPanel(
sliderInput("xmean",
"Mean of X:",
min = 10,
max = 100,
value = 68.25),
sliderInput("ymean",
"Mean of Y:",
min = 10,
max = 100,
value = 68.25),
sliderInput("xstdev",
"Standard Deviation of X:",
min = 0.5,
max = 5,
value = 1.8),
sliderInput("ystdev",
"Standard Deviation of Y:",
min = 0.5,
max = 5,
value = 2.5),
sliderInput("rho",
"Correlation of X and Y:",
min = -1,
max = 1,
step = 0.05,
value = 0.45),
# checkboxes for data and regression line
checkboxInput("showyex",
"Show y = x?",
value = TRUE),
checkboxInput("showdata",
"Show data?",
value = FALSE),
checkboxInput("showreg",
"Regression line?",
value = FALSE)
),
# Show a plot of the generated distribution
mainPanel(
plotOutput("distPlot"),
card(
card_body(
markdown("Use the sliders to explore the [Bivariate Normal Distribution](https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Bivariate_case).
The vertical and horizontal *grey lines* mark the mean
of *X* and *Y*, respectively. The dashed *black line* at
45 degrees represents the line *y* = *x*. The three
ellipses correspond to 68, 95, and 99.7% level contours
of the bivariate PDF with *blue segments* denoting the
major and minor axes. Select *Show data?* to plot data
randomly generated according to the distribution and
select *Regression line?* to display the best linear
unbiased prediction of *Y* given a value of *X*."),
markdown("The default settings of the parameters were chosen to coincide
with [Sir Francis Galton](https://en.wikipedia.org/wiki/Francis_Galton)'s
analysis of height data in
[*Regression towards Mediocrity in Hereditary Stature*](http://www.stat.ucla.edu/~nchristo/statistics100C/history_regression.pdf) (1886).
Like Galton, the sample size here is *n* = 928, though
note that the data are randomly generated and do not exactly
match [Galton's historical data set](https://friendly.github.io/HistData/reference/Galton.html). Check out [regression toward the mean](https://en.wikipedia.org/wiki/Regression_toward_the_mean) for more details."),
markdown("Source code is available [here](https://gist.github.com/gjkerns/6a7e866a9da72238779912c5c1a4f6ac).")
)
)
),
)
)
# Define server logic required to draw the plot
server <- function(input, output) {
output$distPlot <- renderPlot({
# generate bivariate normal data
set.seed(42)
library(MASS)
S <- matrix(c((input$xstdev)^2, input$rho*input$xstdev*input$ystdev,
input$rho*input$xstdev*input$ystdev,(input$ystdev)^2), nrow = 2)
x <- mvrnorm(n = 928, mu = c(input$xmean, input$ymean), Sigma = S)
# set up blank plot
plot(x, type="n", xlab = "X", ylab = "Y", asp = 1,
main = "Contour Plot of the Bivariate Normal PDF")
# plot the means for X and Y
abline(h = input$ymean, col = "grey")
abline(v = input$xmean, col = "grey")
# plot the line y = x
if (input$showyex){
abline(a = 0, b = 1, lty = 5, lwd = 2)
}
# draw 3 ellipses with the specified parameters
library(ellipse)
lines(ellipse(input$rho, scale = c(input$xstdev,input$ystdev),
centre = c(input$xmean, input$ymean), level = 0.68), col = "darkgrey")
lines(ellipse(input$rho, scale = c(input$xstdev,input$ystdev),
centre = c(input$xmean, input$ymean), level = 0.95), col = "darkgrey")
lines(ellipse(input$rho, scale = c(input$xstdev,input$ystdev),
centre = c(input$xmean, input$ymean), level = 0.997), col = "darkgrey")
# major and minor axes
tmp <- eigen(S)
m <- c(0, tmp$vectors[1,1]*sqrt(tmp$values[1] * qchisq(0.997,2)),
0, tmp$vectors[2,1]*sqrt(tmp$values[1] * qchisq(0.997,2))) +
c(input$xmean, input$xmean, input$ymean, input$ymean)
lines(matrix(m, nrow = 2), col = "blue")
m <- c(0, tmp$vectors[1,2]*sqrt(tmp$values[2] * qchisq(0.997,2)),
0, tmp$vectors[2,2]*sqrt(tmp$values[2] * qchisq(0.997,2))) +
c(input$xmean, input$xmean, input$ymean, input$ymean)
lines(matrix(m, nrow = 2), col = "blue")
# plot the data
if (input$showdata){
library(scales)
points(x, col=alpha("blue", 0.5))
}
# plot the regression line
if (input$showreg){
b <- input$rho * input$ystdev / input$xstdev
a <- input$ymean - b * input$xmean
library(scales)
abline(a = a, b = b, lty=3, lwd=3, col = alpha("red",0.8))
}
})
}
# Run the application
shinyApp(ui = ui, server = server)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment