Created
November 13, 2021 09:07
-
-
Save JimGrange/ad14c40c0e53b8bb2326353d2f59e7e8 to your computer and use it in GitHub Desktop.
Generate heatmaps of density estimates for NFL pass locations (https://bit.ly/3DdGac2)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# gaussian kernel function | |
gaussian_kernel <- function(u){ | |
(1 / sqrt(2 * pi)) * exp(-0.5 * u ^ 2) | |
} | |
# kernel density estimate function | |
kde <- function(n, data, x_limit, y_limit, h_x, h_y){ | |
x <- seq(from = x_limit[1], | |
to = x_limit[2], | |
length.out = n) | |
y <- seq(from = y_limit[1], | |
to = y_limit[2], | |
length.out = n) | |
z <- expand.grid(x, y) %>% | |
rename(x = Var1, y = Var2) %>% | |
mutate(density = 0) | |
for(i in 1:nrow(z)){ | |
d_x <- z$x[i] | |
d_y <- z$y[i] | |
k_x <- gaussian_kernel((d_x - data$x) / h_x) | |
k_y <- gaussian_kernel((d_y - data$y) / h_y) | |
k_xy <- sum(k_x * k_y) | |
z$density[i] <- (1 / nrow(data) * h_x * h_y) * k_xy | |
} | |
return(z) | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(patchwork) | |
# simulate data ----------------------------------------------------------- | |
# limits of the football field | |
x_limit <- c(-30, 30) | |
y_limit <- c(-10, 75) | |
# how many pass locations to simulate | |
n_samples <- 40 | |
# simulate the pass data | |
data <- tibble( | |
x = runif(n_samples, x_limit[1], x_limit[2]), | |
y = runif(n_samples, y_limit[1], y_limit[2]) | |
) | |
# kde estimates ----------------------------------------------------------- | |
# Scott’s rule of thumb heuristic for the bandwidth h | |
h_x <- 1.06 * | |
min(c(sd(data$x), (IQR(data$x) / 1.34))) * | |
(length(data$x) ^ -(1/5)) | |
h_y <- 1.06 * | |
min(c(sd(data$x), (IQR(data$x) / 1.34))) * | |
(length(data$x) ^ -(1/5)) | |
# get the density estimates | |
kde_estimate <- kde(100, | |
data, | |
x_limit, | |
y_limit, | |
h_x = h_x, | |
h_y = h_y) | |
# plotting ---------------------------------------------------------------- | |
# plot the pass data | |
data_plot <- data %>% | |
ggplot(aes(x, y)) + | |
geom_point(colour = "blue", | |
size = 3, | |
alpha = 0.7) + | |
scale_x_continuous(limits = c(-35, 35)) + | |
scale_y_continuous(limits = c(-20, 85)) | |
# plot as a heatmap | |
kde_plot <- kde_estimate %>% | |
ggplot(aes(x = x, y = y, fill = density)) + | |
geom_tile() + | |
scale_fill_gradient2(low = "#026837", | |
mid = "#fffdbc", | |
high="#aa0625", | |
midpoint = mean(kde_estimate$density)) + | |
scale_x_continuous(limits = c(-35, 35)) + | |
scale_y_continuous(limits = c(-20, 85)) | |
# data & KDE side by side | |
data_plot + kde_plot | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment