This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| num_df <- num_df %>% | |
| mutate(years_since_release = 2017 - title_year) %>% | |
| select(-title_year) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| df %>% | |
| group_by(title_year) %>% | |
| summarise(mean_rating = mean(imdb_score), | |
| upper_rating = quantile(imdb_score, 0.975), | |
| lower_rating = quantile(imdb_score, 0.0275)) %>% | |
| ggplot(aes(title_year, mean_rating)) + | |
| geom_line(colour = "dodger blue") + | |
| geom_point(alpha = 0.5) + | |
| geom_smooth(method = "lm", colour = "red", alpha = 0.6, se = FALSE) + | |
| geom_errorbar(aes(ymin = upper_rating, ymax = lower_rating)) + |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| correlations <- corrr::correlate(num_df) %>% | |
| gather(variable, correlation, 2:16) %>% | |
| select(rowname, variable, correlation) %>% | |
| mutate(high_correlation = ifelse(abs(correlation) > 0.50, "high", "not so high")) | |
| correlations %>% | |
| ggplot(aes(reorder(rowname, correlation), reorder(variable, correlation), fill = correlation)) + | |
| geom_tile(alpha = 0.6, colour = "black") + | |
| geom_text(aes(label = round(correlation, 2), colour = high_correlation)) + | |
| theme_minimal() + |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| fit <- num_df %>% | |
| map(scale) %>% | |
| as.data.frame() %>% | |
| bootstrap(10) %>% | |
| do(tidy(lm(imdb_score ~., data = .), conf.int = TRUE)) %>% | |
| mutate(sig_0005 = p.value < 0.005) | |
| fit %>% | |
| filter(term != "(Intercept)") %>% | |
| group_by(term) %>% |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| fit %>% | |
| filter(term != "(Intercept)") %>% | |
| group_by(term) %>% | |
| summarise(estimate = mean(estimate), | |
| conf.low = mean(conf.low), | |
| conf.high = mean(conf.high), | |
| sig_0005 = ifelse(sum(sig_0005) > 6, "significant (< 0.005)", "not significant")) %>% | |
| ggplot(aes(y = reorder(term, estimate), x = estimate, colour = sig_0005)) + | |
| geom_point() + | |
| geom_errorbarh(aes(xmax = conf.high, xmin = conf.low)) + |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(tidyverse) | |
| library(here) | |
| library(broom) | |
| library(corrr) | |
| library(forcats) | |
| library(stringr) | |
| library(lubridate) | |
| library(gridExtra) | |
| df <- read_csv("movie_metadata.csv") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(dplyr) | |
| library(ggplot2) | |
| n <- 200 # number of observations | |
| bias <- 4 | |
| slope <- 3.5 | |
| dot <- `%*%` # defined for personal preference | |
| x <- rnorm(n) * 2 | |
| x_b <- cbind(x, rep(1, n)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| for (iteration in seq_len(n_iterations)) { | |
| yhat <- dot(x_b, theta) # predict using weights in theta | |
| residuals_b <- yhat - y # calculate the residuals | |
| gradients <- 2/n * dot(t(x_b), residuals_b) # calculate the gradients of MSE w.r.t model weights | |
| theta <- theta - learning_rate * gradients # update theta | |
| sse_i[[iteration]] <- sum((y - dot(x_b, theta))**2) | |
| b0[[iteration]] <- theta[2] | |
| b1[[iteration]] <- theta[1] | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| p1 <- df %>% | |
| ggplot(aes(x=x, y=y)) + | |
| geom_abline(aes(intercept = b0, | |
| slope = b1, | |
| colour = -sse, | |
| frame = model_iter), | |
| data = model_i, | |
| alpha = .50 | |
| ) + | |
| geom_point(alpha = 0.4) + |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| predict_from_theta <- function(x) { | |
| x <- cbind(x, rep(1, length(x))) | |
| dot(x, theta) | |
| } | |
| predict_from_theta(rnorm(10)) | |
| [,1] | |
| [1,] -1.530065 |