Skip to content

Instantly share code, notes, and snippets.

@jbkunst
Created November 30, 2017 21:25
Show Gist options
  • Save jbkunst/cf574c9a6f99b98a9c95dac46756b91b to your computer and use it in GitHub Desktop.
Save jbkunst/cf574c9a6f99b98a9c95dac46756b91b to your computer and use it in GitHub Desktop.
# packages ----------------------------------------------------------------
rm(list = ls())
library(tidyverse)
theme_set(theme_minimal())
# data --------------------------------------------------------------------
n <- 500
df <- data_frame(
x1 = rnorm(n) + 1,
x2 = 5 + 2 * rnorm(n)
)
ggplot(df) +
geom_point(aes(x1, x2))
df <- df %>%
mutate(y = as.numeric(x1 > (x2 - 5)/2 + rnorm(n)))
ggplot(df) +
geom_point(aes(x1, x2, color = y))
ggplot(df) +
geom_point(aes(x1, y, color = y), alpha = 0.25)
df
df %>%
gather(key, value, -y) %>%
ggplot() +
geom_point(aes(value, y, color = y), alpha = 0.25) +
facet_wrap(~ key, scales = "free")
ggplot(df) +
geom_density(aes(x1, fill = y, group = y), alpha = 0.75)
# ajuste visual -----------------------------------------------------------
ggplot(df) +
geom_point(aes(x1, y, color = y), alpha = 0.25) +
geom_smooth(aes(x1, y), method = "lm")
ggplot(df) +
geom_point(aes(x1, y, color = y), alpha = 0.25) +
geom_smooth(aes(x1, y), method = "loess")
# logit -------------------------------------------------------------------
df2 <- data_frame(
s = seq(-10, 10, length.out = 101)
)
ggplot(df2) +
geom_line(aes(s, sin(s)))
ggplot(df2) +
geom_line(aes(s, 1 / (1 + exp(s))))
ggplot(df2) +
geom_line(aes(s, 1 / (1 + exp(-s))))
ggplot(df2) +
geom_line(aes(s, 1 / (1 + exp(-(2 + - 2 * s)))))
s <- seq(-10, 10, length.out = 101)
s2 <- seq(-5, 5, length.out = 6)
p <- expand.grid(x = s, b0 = s2, b1 = s2) %>%
tbl_df() %>%
mutate(y = 1 / (1 + exp(-(b0 + b1 * x)))) %>%
ggplot() +
geom_line(aes(x, y)) +
facet_grid(b0 ~ b1) +
theme(axis.title.x = element_blank(), axis.text = element_blank(), axis.ticks = element_blank())
p
p +
geom_point(data = df, aes(x1, y, color = y), alpha = 0.25) +
theme(legend.position = "none")
# ajuste parametros -------------------------------------------------------
m1 <- glm(y ~ x1, family = binomial(link = "logit"), data = df)
m1
c <- m1$coefficients
c
ggplot() +
geom_jitter(data = df, aes(x1, y, color = y), alpha = 0.2, height = 0.01) +
geom_line(data = df2, aes(s, 1 / (1 + exp(-(c[1] + c[2] * s))))) +
xlim(-2, 3)
m2 <- glm(y ~ x1 + x2, family = binomial(link = "logit"), data = df)
m2
c <- m2$coefficients
c
df_sur <- expand.grid(
x1 = seq(min(df$x1), max(df$x1), length.out = 100),
x2 = seq(min(df$x2), max(df$x2), length.out = 100)
) %>%
tbl_df() %>%
mutate(
y = predict(m2, newdata = ., type = "response")
)
df_sur
df_surm <- df_sur %>%
spread(x1, y) %>%
select(-x2) %>%
as.matrix()
library(plotly)
plot_ly() %>%
add_surface(x = unique(sort(df_sur$x1)), y = unique(sort(df_sur$x2)), z = df_surm) %>%
add_markers(data = df, x = ~x1, y = ~x2, z = ~y, color = ~y)
# evaluar -----------------------------------------------------------------
df <- df %>%
mutate(
p1 = predict(m1, newdata = ., type = "response"),
p2 = predict(m2, newdata = ., type = "response"),
l2 = predict(m2, newdata = ., type = "link")
)
ggplot(df) +
geom_density(aes(p1))
df %>%
select(y, starts_with("p")) %>%
gather(key, value, -y) %>%
ggplot() +
geom_density(aes(value)) +
facet_wrap(~ key)
df %>%
select(y, starts_with("p")) %>%
gather(key, value, -y) %>%
ggplot() +
geom_density(aes(value, group = y, fill = y), alpha = 0.75) +
facet_wrap(~ key)
# acumulada ---------------------------------------------------------------
x <- 1:10
ecdf_x <- ecdf(x)
plot(ecdf_x)
n <- 5000
plot(ecdf(runif(n)))
plot(ecdf(rnorm(n)))
plot(ecdf(rexp(n)), add = TRUE)
plot(ecdf(runif(n)), add = TRUE)
df <- df %>%
mutate()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment