Skip to content

Instantly share code, notes, and snippets.

@artemklevtsov
artemklevtsov / bin.R
Last active April 9, 2018 13:32
Binning (discretize) variables based on CART
library(recipes)
library(dplyr)
library(tibble)
step_bin <- function(recipe, ..., role = "predictor", trained = FALSE,
threshold = .05, n.group = NULL, woe = FALSE, objects = NULL,
skip = FALSE) {
if (threshold <= 0)
stop("`threshold` should be greater than zero", call. = FALSE)
if (threshold >= 1)
@artemklevtsov
artemklevtsov / Telco-Customer-Churn.R
Last active April 9, 2018 13:02
Telco-Customer-Churn
## ---- Загрузка пакетов ----
library(readr) # чтение данных
library(dplyr) # манипулации с данными
library(tidyr) # манипулации с данными
library(rsample) # разбиение выборки
library(recipes) # препроцессинг
library(yardstick) # метрики модели
library(glmnet) # логистическая регрессия с регуляризацией
@artemklevtsov
artemklevtsov / cramer.R
Last active March 1, 2018 14:22
Статистика Крамера
library(data.table)
#' @title Функция для расчёта статистики Крамера
#' @param x Категориальная переменная.
#' @param y Категориальная переменная.
#' @return Статистика Крамера (число в диапазоне от 0 до 1).
cramer <- function(x, y) {
# На случай сравнения переменной с самой собой
if (identical(x, y)) return(1.0)
# Наблюдаемые частоты
library(curl)
library(jsonlite)
library(data.table)
# Get data
hero_stats <- stream_in(curl("https://api.opendota.com/api/heroStats"), verbose = FALSE)
# Post processing
setDT(hero_stats)
hero_roels <- dcast(
data = res[, .(roles = unlist(roles)), by = id],
@artemklevtsov
artemklevtsov / svn.R
Created July 14, 2017 14:20
Get R releases dates
library(xml2)
library(curl)
library(data.table)
library(lubridate)
r_svn_url <- "https://svn.r-project.org/R/tags/"
h <- handle_setheaders(new_handle(customrequest = "PROPFIND"), Depth="1")
req <- curl_fetch_memory(r_svn_url, handle = h)
doc <- read_xml(rawToChar(req$content))
ns <- xml_ns(doc)
#include <Rcpp.h>
using namespace Rcpp;
RObject get_elem(const RObject& x, std::size_t i) {
switch(x.sexp_type()) {
case INTSXP: {
RObject res = wrap(INTEGER(x)[i]);
if (Rf_isFactor(x)) {
res.attr("class") = "factor";
res.attr("levels") = x.attr("levels");
// [[Rcpp::plugins("cpp11")]]
#include <Rcpp.h>
#include <functional>
using namespace Rcpp;
// [[Rcpp::export]]
std::vector<size_t> hash(const std::vector<std::string>& x) {
std::vector<size_t> res(x.size());
std::transform(x.begin(), x.end(), res.begin(), std::hash<std::string>());
// [[Rcpp::plugins("cpp11")]]
// [[Rcpp::depends("BH")]]
#include <Rcpp.h>
#include <boost/asio.hpp>
using namespace Rcpp;
using namespace boost::asio;
using namespace boost::asio::ip;
// [[Rcpp::plugins("cpp11")]]
#include <Rcpp.h>
using namespace Rcpp;
void append_string(std::vector<std::string>& s1, const std::string s2) {
if (!s2.empty()) {
std::transform(s1.begin(), s1.end(), s1.begin(),
[&] (std::string& s) { return s2 + '.' + s; });
}
plot.roc <- function(x, ...) {
ind <- which.max(x$tpr + 1 - x$fpr)
opar <- par(pty = "s")
on.exit(par(opar))
ticks <- seq(from = 0, to = 1, by = 0.1)
plot(x = x$fpr, y = x$tpr,
xlim = c(0, 1), ylim = c(0, 1),
type = "s", lwd = 2,
xlab = "False Posisitive Rate",
ylab = "True Positive Rate",