Skip to content

Instantly share code, notes, and snippets.

@jfaganUK
Created October 17, 2019 13:44
Show Gist options
  • Save jfaganUK/0f38d414d9a132d029c366ac310a2a06 to your computer and use it in GitHub Desktop.
Save jfaganUK/0f38d414d9a132d029c366ac310a2a06 to your computer and use it in GitHub Desktop.
Animating the optimization of least squared linear model
## Animate optim ordinary least squares
rm(list=ls())
gc()
library(ggplot2)
library(tidyverse)
library(foreign)
library(gganimate)
library(transformr)
library(data.table)
record1 <- read.spss('../data/Album Sales.sav', to.data.frame = T)
squared_error <- function(param) {
e <- record1$Adverts * param[1] + param[2]
es <- sum((e - record1$Sales)^2)
i <<- i + 1
params_l[[i]] <<- data.frame(i = i, squared_error = es, b1 = param[1], b0 = param[2])
return(es)
}
i <- 0
params_l <- list()
o <- optim(c(b1 = 100, b0 = -100), squared_error, control = list(trace = T))
params <- do.call('rbind', params_l)
record_anim <- lapply(1:nrow(params), function(i) {
b0 <- params$b0[i]
b1 <- params$b1[i]
d <- record1 %>%
mutate(sales_pred = b0 + b1 * Adverts) %>%
mutate(residual = Sales - sales_pred) %>%
mutate(residual2 = residual^2) %>%
mutate(residual2z = residual2 %>% scale %>% as.numeric) %>%
mutate(m_sales = mean(Sales)) %>%
mutate(param_b0 = b0, param_b1 = b1, state = i)
return(d)
}) %>% rbindlist
record_anim <- record_anim %>%
mutate(f = sprintf('sales = %1.3f budget + %1.3f', param_b1, param_b0)) %>%
mutate(f = ifelse(Adverts == min(Adverts), f, NA_character_))
p <- ggplot(record_anim) +
geom_rect(aes(xmin=Adverts, xmax = Adverts + residual,ymin = Sales,
ymax = sales_pred,fill = residual2z), alpha = 0.8) +
geom_point(aes(x = Adverts, y = Sales)) +
geom_line(aes(x = Adverts, y = sales_pred), color = 'black', size = 1) +
geom_text(x = 1000, y = 50, hjust = 0, aes(label = f)) +
scale_fill_viridis_c('Squared Residual', option = 'plasma') +
scale_x_continuous('Advertising Budget\n(thousands of £)') +
scale_y_continuous('Record Sales\n(thousands of units)') +
coord_equal(xlim = c(0,2200), ylim = c(0, 400), ratio = 1) +
labs(title = 'Estimating OLS Regression',
subtitle = 'Nelder-Mead minimizer step: {closest_state}',
caption = 'Jesse Fagan') +
theme_minimal() +
theme(legend.position = 'none') +
transition_states(state, transition_length = 3, state_length = 0, wrap = F) +
ease_aes('linear')
options(gganimate.dev_args = list(width = 1200, height = 300))
animate(p, fps = 30, duration = 20, end_pause = 100)
## the surface
N <- 100
# d1 <- data.frame(b1 = seq(min(params$b1), max(params$b1), length.out = N),
# b0 = seq(min(params$b0), max(params$b0), length.out = N))
d1 <- data.frame(b1 = seq(-20, 100, length.out = N),
b0 = seq(-100, 100, length.out = N))
d2 <- expand.grid(d1)
d2$squared_error <- sapply(1:nrow(d2), function(i) {
squared_error(c(d2$b1[i], d2$b0[i]))
})
ggplot() +
geom_contour(data = d2, bins = 20,
aes(x = b0, y = b1, z = squared_error))
ggplot() +
stat_contour(data = d2, geom = 'polygon',
aes(x = b0, y = b1, z = squared_error, fill = ..level..)) +
scale_fill_viridis_c(option = 'D')
ggplot() +
geom_tile(data = d2, aes(x = b0, y = b1, fill = squared_error)) +
scale_fill_viridis_c(option = 'D') +
geom_line(data = params, aes(x = b0, y = b1, color = i)) +
scale_color_viridis_c(option = 'A')
@crogger62
Copy link

Can you add the data file too?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment