Created
October 17, 2019 13:44
-
-
Save jfaganUK/0f38d414d9a132d029c366ac310a2a06 to your computer and use it in GitHub Desktop.
Animating the optimization of least squared linear model
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Animate optim ordinary least squares | |
rm(list=ls()) | |
gc() | |
library(ggplot2) | |
library(tidyverse) | |
library(foreign) | |
library(gganimate) | |
library(transformr) | |
library(data.table) | |
record1 <- read.spss('../data/Album Sales.sav', to.data.frame = T) | |
squared_error <- function(param) { | |
e <- record1$Adverts * param[1] + param[2] | |
es <- sum((e - record1$Sales)^2) | |
i <<- i + 1 | |
params_l[[i]] <<- data.frame(i = i, squared_error = es, b1 = param[1], b0 = param[2]) | |
return(es) | |
} | |
i <- 0 | |
params_l <- list() | |
o <- optim(c(b1 = 100, b0 = -100), squared_error, control = list(trace = T)) | |
params <- do.call('rbind', params_l) | |
record_anim <- lapply(1:nrow(params), function(i) { | |
b0 <- params$b0[i] | |
b1 <- params$b1[i] | |
d <- record1 %>% | |
mutate(sales_pred = b0 + b1 * Adverts) %>% | |
mutate(residual = Sales - sales_pred) %>% | |
mutate(residual2 = residual^2) %>% | |
mutate(residual2z = residual2 %>% scale %>% as.numeric) %>% | |
mutate(m_sales = mean(Sales)) %>% | |
mutate(param_b0 = b0, param_b1 = b1, state = i) | |
return(d) | |
}) %>% rbindlist | |
record_anim <- record_anim %>% | |
mutate(f = sprintf('sales = %1.3f budget + %1.3f', param_b1, param_b0)) %>% | |
mutate(f = ifelse(Adverts == min(Adverts), f, NA_character_)) | |
p <- ggplot(record_anim) + | |
geom_rect(aes(xmin=Adverts, xmax = Adverts + residual,ymin = Sales, | |
ymax = sales_pred,fill = residual2z), alpha = 0.8) + | |
geom_point(aes(x = Adverts, y = Sales)) + | |
geom_line(aes(x = Adverts, y = sales_pred), color = 'black', size = 1) + | |
geom_text(x = 1000, y = 50, hjust = 0, aes(label = f)) + | |
scale_fill_viridis_c('Squared Residual', option = 'plasma') + | |
scale_x_continuous('Advertising Budget\n(thousands of £)') + | |
scale_y_continuous('Record Sales\n(thousands of units)') + | |
coord_equal(xlim = c(0,2200), ylim = c(0, 400), ratio = 1) + | |
labs(title = 'Estimating OLS Regression', | |
subtitle = 'Nelder-Mead minimizer step: {closest_state}', | |
caption = 'Jesse Fagan') + | |
theme_minimal() + | |
theme(legend.position = 'none') + | |
transition_states(state, transition_length = 3, state_length = 0, wrap = F) + | |
ease_aes('linear') | |
options(gganimate.dev_args = list(width = 1200, height = 300)) | |
animate(p, fps = 30, duration = 20, end_pause = 100) | |
## the surface | |
N <- 100 | |
# d1 <- data.frame(b1 = seq(min(params$b1), max(params$b1), length.out = N), | |
# b0 = seq(min(params$b0), max(params$b0), length.out = N)) | |
d1 <- data.frame(b1 = seq(-20, 100, length.out = N), | |
b0 = seq(-100, 100, length.out = N)) | |
d2 <- expand.grid(d1) | |
d2$squared_error <- sapply(1:nrow(d2), function(i) { | |
squared_error(c(d2$b1[i], d2$b0[i])) | |
}) | |
ggplot() + | |
geom_contour(data = d2, bins = 20, | |
aes(x = b0, y = b1, z = squared_error)) | |
ggplot() + | |
stat_contour(data = d2, geom = 'polygon', | |
aes(x = b0, y = b1, z = squared_error, fill = ..level..)) + | |
scale_fill_viridis_c(option = 'D') | |
ggplot() + | |
geom_tile(data = d2, aes(x = b0, y = b1, fill = squared_error)) + | |
scale_fill_viridis_c(option = 'D') + | |
geom_line(data = params, aes(x = b0, y = b1, color = i)) + | |
scale_color_viridis_c(option = 'A') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Can you add the data file too?