Skip to content

Instantly share code, notes, and snippets.

@JimGrange
Created November 13, 2021 09:07
Show Gist options
  • Save JimGrange/ad14c40c0e53b8bb2326353d2f59e7e8 to your computer and use it in GitHub Desktop.
Save JimGrange/ad14c40c0e53b8bb2326353d2f59e7e8 to your computer and use it in GitHub Desktop.
Generate heatmaps of density estimates for NFL pass locations (https://bit.ly/3DdGac2)
# gaussian kernel function
gaussian_kernel <- function(u){
(1 / sqrt(2 * pi)) * exp(-0.5 * u ^ 2)
}
# kernel density estimate function
kde <- function(n, data, x_limit, y_limit, h_x, h_y){
x <- seq(from = x_limit[1],
to = x_limit[2],
length.out = n)
y <- seq(from = y_limit[1],
to = y_limit[2],
length.out = n)
z <- expand.grid(x, y) %>%
rename(x = Var1, y = Var2) %>%
mutate(density = 0)
for(i in 1:nrow(z)){
d_x <- z$x[i]
d_y <- z$y[i]
k_x <- gaussian_kernel((d_x - data$x) / h_x)
k_y <- gaussian_kernel((d_y - data$y) / h_y)
k_xy <- sum(k_x * k_y)
z$density[i] <- (1 / nrow(data) * h_x * h_y) * k_xy
}
return(z)
}
library(tidyverse)
library(patchwork)
# simulate data -----------------------------------------------------------
# limits of the football field
x_limit <- c(-30, 30)
y_limit <- c(-10, 75)
# how many pass locations to simulate
n_samples <- 40
# simulate the pass data
data <- tibble(
x = runif(n_samples, x_limit[1], x_limit[2]),
y = runif(n_samples, y_limit[1], y_limit[2])
)
# kde estimates -----------------------------------------------------------
# Scott’s rule of thumb heuristic for the bandwidth h
h_x <- 1.06 *
min(c(sd(data$x), (IQR(data$x) / 1.34))) *
(length(data$x) ^ -(1/5))
h_y <- 1.06 *
min(c(sd(data$x), (IQR(data$x) / 1.34))) *
(length(data$x) ^ -(1/5))
# get the density estimates
kde_estimate <- kde(100,
data,
x_limit,
y_limit,
h_x = h_x,
h_y = h_y)
# plotting ----------------------------------------------------------------
# plot the pass data
data_plot <- data %>%
ggplot(aes(x, y)) +
geom_point(colour = "blue",
size = 3,
alpha = 0.7) +
scale_x_continuous(limits = c(-35, 35)) +
scale_y_continuous(limits = c(-20, 85))
# plot as a heatmap
kde_plot <- kde_estimate %>%
ggplot(aes(x = x, y = y, fill = density)) +
geom_tile() +
scale_fill_gradient2(low = "#026837",
mid = "#fffdbc",
high="#aa0625",
midpoint = mean(kde_estimate$density)) +
scale_x_continuous(limits = c(-35, 35)) +
scale_y_continuous(limits = c(-20, 85))
# data & KDE side by side
data_plot + kde_plot
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment