Skip to content

Instantly share code, notes, and snippets.

@artemklevtsov
Last active April 9, 2018 13:02
Show Gist options
  • Save artemklevtsov/710191b96e87bdb14013712a93bfbf86 to your computer and use it in GitHub Desktop.
Save artemklevtsov/710191b96e87bdb14013712a93bfbf86 to your computer and use it in GitHub Desktop.
Telco-Customer-Churn
## ---- Загрузка пакетов ----
library(readr) # чтение данных
library(dplyr) # манипулации с данными
library(tidyr) # манипулации с данными
library(rsample) # разбиение выборки
library(recipes) # препроцессинг
library(yardstick) # метрики модели
library(glmnet) # логистическая регрессия с регуляризацией
## ---- Загрузка данных ----
data_url <- "https://community.watsonanalytics.com/wp-content/uploads/2015/03/WA_Fn-UseC_-Telco-Customer-Churn.csv"
all_data <- read_csv(data_url)
all_data <- all_data %>%
select(-customerID) %>%
drop_na() %>%
mutate_if(funs(any(grepl("No .* service", .))),
funs(replace(., which(grepl("No .* service", .)), "No"))) %>%
mutate(Churn = factor(Churn, c("No", "Yes"))) %>%
select(Churn, everything())
## ---- Разбиение выборки
set.seed(100)
train_test_split <- initial_split(all_data, prop = 0.8, strata = "Churn")
train_data <- training(train_test_split)
test_data <- testing(train_test_split)
## ---- Предобработка ----
# Создаём сценарий обработки
rec_obj <- recipe(Churn ~ ., data = train_data) %>%
step_ratio(TotalCharges, denom = denom_vars(tenure)) %>%
step_bin2factor(SeniorCitizen, levels = c("Yes", "No")) %>%
step_discretize(tenure, options = list(cuts = 4)) %>%
step_discretize(MonthlyCharges, options = list(cuts = 3)) %>%
step_regex(PaymentMethod, pattern = "automatic", result = "PaymentMethodAuto") %>%
step_bin2factor(PaymentMethodAuto, levels = c("Yes", "No")) %>%
step_BoxCox(TotalCharges) %>%
step_center(all_numeric(), -all_outcomes()) %>%
step_scale(all_numeric(), -all_outcomes()) %>%
step_dummy(all_nominal(), -all_outcomes()) %>%
step_zv(all_predictors(), -all_outcomes()) %>%
# step_lincomb(all_predictors()) %>%
# step_corr(all_numeric(), -all_outcomes(), threshold = 0.99) %>%
prep(data = train_data)
# Извлечение целевых переменных
train_lab <- train_data %>% pull(Churn)
test_lab <- test_data %>% pull(Churn)
# Применение сценария
train_prep_data <- bake(rec_obj, train_data) %>% select(-Churn)
test_prep_data <- bake(rec_obj, test_data) %>% select(-Churn)
## ---- Обучение модели ----
glm_fit <- cv.glmnet(as.matrix(train_prep_data), as.integer(train_lab == "Yes"),
family = "binomial", type.measure = "mse",
nfolds = 5)
## ---- Оценка модели ----
# Предсказания вероятности
train_pred <- predict(glm_fit, as.matrix(train_prep_data), s = "lambda.min", type="response")[, 1]
test_pred <- predict(glm_fit, as.matrix(test_prep_data), s = "lambda.min", type="response")[, 1]
test_pred_data <- data_frame(
target = test_lab,
glm = test_pred,
glm_bin = factor(test_pred >= 0.5, c(FALSE, TRUE), c("No", "Yes"))
)
# Метрики
roc_auc(test_pred_data, target, glm)
accuracy(test_pred_data, target, glm_bin)
precision(test_pred_data, target, glm_bin)
recall(test_pred_data, target, glm_bin)
conf_mat(test_pred_data, target, glm_bin)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment